ecs_deploy 0.3.0 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,12 +6,16 @@ module EcsDeploy
6
6
  :secret_access_key,
7
7
  :default_region,
8
8
  :deploy_wait_timeout,
9
- :ecs_service_role
9
+ :ecs_service_role,
10
+ :ecs_wait_until_services_stable_max_attempts,
11
+ :ecs_wait_until_services_stable_delay
10
12
 
11
13
  def initialize
12
14
  @log_level = :info
13
15
  @deploy_wait_timeout = 300
14
- @ecs_service_role = "ecsServiceRole"
16
+ # The following values are the default values of Aws::ECS::Waiters::ServicesStable
17
+ @ecs_wait_until_services_stable_max_attempts = 40
18
+ @ecs_wait_until_services_stable_delay = 15
15
19
  end
16
20
  end
17
21
  end
@@ -0,0 +1,195 @@
1
+ require "aws-sdk-autoscaling"
2
+ require "aws-sdk-ec2"
3
+ require "aws-sdk-ecs"
4
+
5
+ module EcsDeploy
6
+ class InstanceFluctuationManager
7
+ attr_reader :logger
8
+
9
+ MAX_UPDATABLE_ECS_CONTAINER_COUNT = 10
10
+ MAX_DETACHEABLE_EC2_INSTACE_COUNT = 20
11
+ MAX_DESCRIBABLE_ECS_TASK_COUNT = 100
12
+
13
+ def initialize(region:, cluster:, auto_scaling_group_name:, desired_capacity:, logger:)
14
+ @region = region
15
+ @cluster = cluster
16
+ @auto_scaling_group_name = auto_scaling_group_name
17
+ @desired_capacity = desired_capacity
18
+ @logger = logger
19
+ end
20
+
21
+ def increase
22
+ asg = fetch_auto_scaling_group
23
+
24
+ @logger.info("Increase desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{asg.max_size}")
25
+ as_client.update_auto_scaling_group(auto_scaling_group_name: @auto_scaling_group_name, desired_capacity: asg.max_size)
26
+
27
+ # Run in background because increasing instances may take time
28
+ Thread.new do
29
+ loop do
30
+ cluster = ecs_client.describe_clusters(clusters: [@cluster]).clusters.first
31
+ instance_count = cluster.registered_container_instances_count
32
+ if instance_count == asg.max_size
33
+ @logger.info("Succeeded in increasing instances!")
34
+ break
35
+ end
36
+ @logger.info("Current registered instance count: #{instance_count}")
37
+ sleep 5
38
+ end
39
+ end
40
+ end
41
+
42
+ def decrease
43
+ asg = fetch_auto_scaling_group
44
+
45
+ decrease_count = asg.desired_capacity - @desired_capacity
46
+ if decrease_count <= 0
47
+ @logger.info("The capacity is already #{asg.desired_capacity}")
48
+ return
49
+ end
50
+ @logger.info("Decrease desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{@desired_capacity}")
51
+
52
+ container_instances = ecs_client.list_container_instances(cluster: @cluster).flat_map do |resp|
53
+ ecs_client.describe_container_instances(
54
+ cluster: @cluster,
55
+ container_instances: resp.container_instance_arns
56
+ ).container_instances
57
+ end
58
+
59
+ az_to_container_instances = container_instances.sort_by {|ci| - ci.running_tasks_count }.group_by do |ci|
60
+ ci.attributes.find {|attribute| attribute.name == "ecs.availability-zone" }.value
61
+ end
62
+ if az_to_container_instances.empty?
63
+ @logger.info("There are no instances to terminate.")
64
+ return
65
+ end
66
+
67
+ target_container_instances = extract_target_container_instances(decrease_count, az_to_container_instances)
68
+
69
+ @logger.info("running tasks: #{ecs_client.list_tasks(cluster: @cluster).task_arns.size}")
70
+ all_running_task_arns = []
71
+ target_container_instances.map(&:container_instance_arn).each_slice(MAX_UPDATABLE_ECS_CONTAINER_COUNT) do |arns|
72
+ @logger.info(arns)
73
+ ecs_client.update_container_instances_state(
74
+ cluster: @cluster,
75
+ container_instances: arns,
76
+ status: "DRAINING"
77
+ )
78
+ arns.each do |arn|
79
+ all_running_task_arns.concat(list_running_task_arns(arn))
80
+ end
81
+ end
82
+
83
+ stop_tasks_not_belonging_service(all_running_task_arns)
84
+ wait_until_tasks_stopped(all_running_task_arns)
85
+
86
+ instance_ids = target_container_instances.map(&:ec2_instance_id)
87
+ terminate_instances(instance_ids)
88
+ @logger.info("Succeeded in decreasing instances!")
89
+ end
90
+
91
+ private
92
+
93
+ def aws_params
94
+ {
95
+ access_key_id: EcsDeploy.config.access_key_id,
96
+ secret_access_key: EcsDeploy.config.secret_access_key,
97
+ region: @region,
98
+ logger: @logger
99
+ }.reject do |_key, value|
100
+ value.nil?
101
+ end
102
+ end
103
+
104
+ def as_client
105
+ @as_client ||= Aws::AutoScaling::Client.new(aws_params)
106
+ end
107
+
108
+ def ec2_client
109
+ @ec2_client ||= Aws::EC2::Client.new(aws_params)
110
+ end
111
+
112
+ def ecs_client
113
+ @ecs_client ||= Aws::ECS::Client.new(aws_params)
114
+ end
115
+
116
+ def fetch_auto_scaling_group
117
+ as_client.describe_auto_scaling_groups(auto_scaling_group_names: [@auto_scaling_group_name]).auto_scaling_groups.first
118
+ end
119
+
120
+ # Extract container instances to terminate considering AZ balance
121
+ def extract_target_container_instances(decrease_count, az_to_container_instances)
122
+ target_container_instances = []
123
+ decrease_count.times do
124
+ @logger.debug do
125
+ "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
126
+ end
127
+ az = az_to_container_instances.max_by {|_az, instances| instances.size }.first
128
+ target_container_instances << az_to_container_instances[az].pop
129
+ end
130
+ @logger.info do
131
+ "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
132
+ end
133
+
134
+ target_container_instances
135
+ end
136
+
137
+ # list tasks whose desired_status is "RUNNING" or
138
+ # whoose desired_status is "STOPPED" but last_status is "RUNNING" on the ECS container
139
+ def list_running_task_arns(container_instance_arn)
140
+ running_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn).flat_map(&:task_arns)
141
+ stopped_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn, desired_status: "STOPPED").flat_map(&:task_arns)
142
+ stopped_running_task_arns = stopped_tasks_arn.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).flat_map do |arns|
143
+ ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.select do |task|
144
+ task.desired_status == "STOPPED" && task.last_status == "RUNNING"
145
+ end
146
+ end.map(&:task_arn)
147
+ running_tasks_arn + stopped_running_task_arns
148
+ end
149
+
150
+ def wait_until_tasks_stopped(task_arns)
151
+ @logger.info("All old tasks: #{task_arns.size}")
152
+ task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
153
+ ecs_client.wait_until(:tasks_stopped, cluster: @cluster, tasks: arns)
154
+ end
155
+ @logger.info("All old tasks are stopped")
156
+ end
157
+
158
+ def stop_tasks_not_belonging_service(running_task_arns)
159
+ @logger.info("Running tasks: #{running_task_arns.size}")
160
+ unless running_task_arns.empty?
161
+ running_task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
162
+ ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.each do |task|
163
+ ecs_client.stop_task(cluster: @cluster, task: task.task_arn) if task.group.start_with?("family:")
164
+ end
165
+ end
166
+ end
167
+ end
168
+
169
+ def terminate_instances(instance_ids)
170
+ if instance_ids.empty?
171
+ @logger.info("There are no instances to terminate.")
172
+ return
173
+ end
174
+ instance_ids.each_slice(MAX_DETACHEABLE_EC2_INSTACE_COUNT) do |ids|
175
+ as_client.detach_instances(
176
+ auto_scaling_group_name: @auto_scaling_group_name,
177
+ instance_ids: ids,
178
+ should_decrement_desired_capacity: true
179
+ )
180
+ end
181
+
182
+ ec2_client.terminate_instances(instance_ids: instance_ids)
183
+
184
+ ec2_client.wait_until(:instance_terminated, instance_ids: instance_ids) do |w|
185
+ w.before_wait do |attempts, response|
186
+ @logger.info("Waiting for stopping all instances...#{attempts}")
187
+ instances = response.reservations.flat_map(&:instances)
188
+ instances.sort_by(&:instance_id).each do |instance|
189
+ @logger.info("#{instance.instance_id}\t#{instance.state.name}")
190
+ end
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -1,3 +1,4 @@
1
+ require 'aws-sdk-cloudwatchevents'
1
2
  require 'timeout'
2
3
 
3
4
  module EcsDeploy
@@ -8,8 +9,8 @@ module EcsDeploy
8
9
 
9
10
  def initialize(
10
11
  cluster:, rule_name:, schedule_expression:, enabled: true, description: nil, target_id: nil,
11
- task_definition_name:, revision: nil, task_count: nil, role_arn:,
12
- region: nil
12
+ task_definition_name:, revision: nil, task_count: nil, role_arn:, network_configuration: nil, launch_type: nil, platform_version: nil, group: nil,
13
+ region: nil, container_overrides: nil
13
14
  )
14
15
  @cluster = cluster
15
16
  @rule_name = rule_name
@@ -21,9 +22,15 @@ module EcsDeploy
21
22
  @task_count = task_count || 1
22
23
  @revision = revision
23
24
  @role_arn = role_arn
24
- @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
25
+ @network_configuration = network_configuration
26
+ @launch_type = launch_type || "EC2"
27
+ @platform_version = platform_version
28
+ @group = group
29
+ region ||= EcsDeploy.config.default_region
30
+ @container_overrides = container_overrides
25
31
 
26
- @client = Aws::ECS::Client.new(region: @region)
32
+ @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
33
+ @region = @client.config.region
27
34
  @cloud_watch_events = Aws::CloudWatchEvents::Client.new(region: @region)
28
35
  end
29
36
 
@@ -58,19 +65,28 @@ module EcsDeploy
58
65
  end
59
66
 
60
67
  def put_targets
68
+ target = {
69
+ id: @target_id,
70
+ arn: cluster_arn,
71
+ role_arn: @role_arn,
72
+ ecs_parameters: {
73
+ task_definition_arn: task_definition_arn,
74
+ task_count: @task_count,
75
+ network_configuration: @network_configuration,
76
+ launch_type: @launch_type,
77
+ platform_version: @platform_version,
78
+ group: @group,
79
+ },
80
+ }
81
+ target[:ecs_parameters].compact!
82
+
83
+ if @container_overrides
84
+ target.merge!(input: { containerOverrides: @container_overrides }.to_json)
85
+ end
86
+
61
87
  res = @cloud_watch_events.put_targets(
62
88
  rule: @rule_name,
63
- targets: [
64
- {
65
- id: @target_id,
66
- arn: cluster_arn,
67
- role_arn: @role_arn,
68
- ecs_parameters: {
69
- task_definition_arn: task_definition_arn,
70
- task_count: @task_count,
71
- },
72
- }
73
- ]
89
+ targets: [target]
74
90
  )
75
91
  if res.failed_entry_count.zero?
76
92
  EcsDeploy.logger.info "create cloudwatch event target [#{@target_id}] [#{@region}] [#{Paint['OK', :green]}]"
@@ -5,13 +5,26 @@ module EcsDeploy
5
5
  CHECK_INTERVAL = 5
6
6
  MAX_DESCRIBE_SERVICES = 10
7
7
 
8
- attr_reader :cluster, :region, :service_name
8
+ class TooManyAttemptsError < StandardError; end
9
+
10
+ attr_reader :cluster, :region, :service_name, :delete
9
11
 
10
12
  def initialize(
11
13
  cluster:, service_name:, task_definition_name: nil, revision: nil,
12
14
  load_balancers: nil,
13
15
  desired_count: nil, deployment_configuration: {maximum_percent: 200, minimum_healthy_percent: 100},
14
- region: nil
16
+ launch_type: nil,
17
+ placement_constraints: [],
18
+ placement_strategy: [],
19
+ network_configuration: nil,
20
+ health_check_grace_period_seconds: nil,
21
+ scheduling_strategy: 'REPLICA',
22
+ enable_ecs_managed_tags: nil,
23
+ tags: nil,
24
+ propagate_tags: nil,
25
+ region: nil,
26
+ delete: false,
27
+ enable_execute_command: false
15
28
  )
16
29
  @cluster = cluster
17
30
  @service_name = service_name
@@ -19,11 +32,25 @@ module EcsDeploy
19
32
  @load_balancers = load_balancers
20
33
  @desired_count = desired_count
21
34
  @deployment_configuration = deployment_configuration
35
+ @launch_type = launch_type
36
+ @placement_constraints = placement_constraints
37
+ @placement_strategy = placement_strategy
38
+ @network_configuration = network_configuration
39
+ @health_check_grace_period_seconds = health_check_grace_period_seconds
40
+ @scheduling_strategy = scheduling_strategy
22
41
  @revision = revision
23
- @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
42
+ @enable_ecs_managed_tags = enable_ecs_managed_tags
43
+ @tags = tags
44
+ @propagate_tags = propagate_tags
45
+ @enable_execute_command = enable_execute_command
46
+
24
47
  @response = nil
25
48
 
26
- @client = Aws::ECS::Client.new(region: @region)
49
+ region ||= EcsDeploy.config.default_region
50
+ @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
51
+ @region = @client.config.region
52
+
53
+ @delete = delete
27
54
  end
28
55
 
29
56
  def current_task_definition_arn
@@ -37,53 +64,105 @@ module EcsDeploy
37
64
  cluster: @cluster,
38
65
  task_definition: task_definition_name_with_revision,
39
66
  deployment_configuration: @deployment_configuration,
67
+ network_configuration: @network_configuration,
68
+ health_check_grace_period_seconds: @health_check_grace_period_seconds,
69
+ enable_execute_command: @enable_execute_command,
40
70
  }
41
71
  if res.services.select{ |s| s.status == 'ACTIVE' }.empty?
72
+ return if @delete
73
+
42
74
  service_options.merge!({
43
75
  service_name: @service_name,
44
76
  desired_count: @desired_count.to_i,
77
+ launch_type: @launch_type,
78
+ placement_constraints: @placement_constraints,
79
+ placement_strategy: @placement_strategy,
80
+ enable_ecs_managed_tags: @enable_ecs_managed_tags,
81
+ tags: @tags,
82
+ propagate_tags: @propagate_tags,
45
83
  })
46
- if @load_balancers
84
+
85
+ if @load_balancers && EcsDeploy.config.ecs_service_role
47
86
  service_options.merge!({
48
87
  role: EcsDeploy.config.ecs_service_role,
88
+ })
89
+ end
90
+
91
+ if @load_balancers
92
+ service_options.merge!({
49
93
  load_balancers: @load_balancers,
50
94
  })
51
95
  end
96
+
97
+ if @scheduling_strategy == 'DAEMON'
98
+ service_options[:scheduling_strategy] = @scheduling_strategy
99
+ service_options.delete(:desired_count)
100
+ end
52
101
  @response = @client.create_service(service_options)
53
102
  EcsDeploy.logger.info "create service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
54
103
  else
104
+ return delete_service if @delete
105
+
55
106
  service_options.merge!({service: @service_name})
56
107
  service_options.merge!({desired_count: @desired_count}) if @desired_count
108
+ update_tags(@service_name, @tags)
57
109
  @response = @client.update_service(service_options)
58
110
  EcsDeploy.logger.info "update service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
59
111
  end
60
112
  end
61
113
 
62
- def wait_running
63
- return if @response.nil?
114
+ def delete_service
115
+ if @scheduling_strategy != 'DAEMON'
116
+ @client.update_service(cluster: @cluster, service: @service_name, desired_count: 0)
117
+ sleep 1
118
+ end
119
+ @client.delete_service(cluster: @cluster, service: @service_name)
120
+ EcsDeploy.logger.info "delete service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
121
+ end
64
122
 
65
- service = @response.service
123
+ def update_tags(service_name, tags)
124
+ service_arn = @client.describe_services(cluster: @cluster, services: [service_name]).services.first.service_arn
125
+ if service_arn.split('/').size == 2
126
+ if tags
127
+ EcsDeploy.logger.warn "#{service_name} doesn't support tagging operations, so tags are ignored. Long arn format must be used for tagging operations."
128
+ end
129
+ return
130
+ end
66
131
 
67
- @client.wait_until(:services_stable, cluster: @cluster, services: [service.service_name]) do |w|
68
- w.delay = 10
132
+ tags ||= []
133
+ current_tag_keys = @client.list_tags_for_resource(resource_arn: service_arn).tags.map(&:key)
134
+ deleted_tag_keys = current_tag_keys - tags.map { |t| t[:key] }
69
135
 
70
- w.before_attempt do
71
- EcsDeploy.logger.info "wait service stable [#{service.service_name}]"
72
- end
136
+ unless deleted_tag_keys.empty?
137
+ @client.untag_resource(resource_arn: service_arn, tag_keys: deleted_tag_keys)
138
+ end
139
+
140
+ unless tags.empty?
141
+ @client.tag_resource(resource_arn: service_arn, tags: tags)
73
142
  end
74
143
  end
75
144
 
76
145
  def self.wait_all_running(services)
77
- services.group_by { |s| [s.cluster, s.region] }.each do |(cl, region), ss|
146
+ services.group_by { |s| [s.cluster, s.region] }.flat_map do |(cl, region), ss|
78
147
  client = Aws::ECS::Client.new(region: region)
79
- ss.map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES) do |chunked_service_names|
80
- client.wait_until(:services_stable, cluster: cl, services: chunked_service_names) do |w|
81
- w.before_attempt do
148
+ ss.reject(&:delete).map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES).map do |chunked_service_names|
149
+ Thread.new do
150
+ EcsDeploy.config.ecs_wait_until_services_stable_max_attempts.times do
82
151
  EcsDeploy.logger.info "wait service stable [#{chunked_service_names.join(", ")}]"
152
+ resp = client.describe_services(cluster: cl, services: chunked_service_names)
153
+ resp.services.each do |s|
154
+ # cf. https://github.com/aws/aws-sdk-ruby/blob/master/gems/aws-sdk-ecs/lib/aws-sdk-ecs/waiters.rb#L91-L96
155
+ if s.deployments.size == 1 && s.running_count == s.desired_count
156
+ chunked_service_names.delete(s.service_name)
157
+ end
158
+ end
159
+ break if chunked_service_names.empty?
160
+ sleep EcsDeploy.config.ecs_wait_until_services_stable_delay
83
161
  end
162
+ raise TooManyAttemptsError unless chunked_service_names.empty?
84
163
  end
85
164
  end
86
- end
165
+ end.each(&:join)
87
166
  end
88
167
 
89
168
  private