RubyGems - ecs_deploy - Versions diffs - 0.3.2 → 1.0.3 - Mend

ecs_deploy 0.3.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +5 -5
data/.gitignore +1 -0
data/.travis.yml +5 -0
data/CHANGELOG.md +150 -0
data/README.md +272 -23
data/Rakefile +4 -0
data/ecs_deploy.gemspec +9 -3
data/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +209 -0
data/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb +149 -0
data/lib/ecs_deploy/auto_scaler/config_base.rb +16 -0
data/lib/ecs_deploy/auto_scaler/instance_drainer.rb +134 -0
data/lib/ecs_deploy/auto_scaler/service_config.rb +223 -0
data/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb +102 -0
data/lib/ecs_deploy/auto_scaler/trigger_config.rb +42 -0
data/lib/ecs_deploy/auto_scaler.rb +105 -339
data/lib/ecs_deploy/capistrano.rb +73 -3
data/lib/ecs_deploy/configuration.rb +6 -2
data/lib/ecs_deploy/instance_fluctuation_manager.rb +198 -0
data/lib/ecs_deploy/scheduled_task.rb +15 -3
data/lib/ecs_deploy/service.rb +100 -21
data/lib/ecs_deploy/task_definition.rb +30 -9
data/lib/ecs_deploy/version.rb +1 -1
data/lib/ecs_deploy.rb +1 -1
metadata +113 -14

data/lib/ecs_deploy/instance_fluctuation_manager.rb ADDED Viewed

@@ -0,0 +1,198 @@
+require "aws-sdk-autoscaling"
+require "aws-sdk-ec2"
+require "aws-sdk-ecs"
+module EcsDeploy
+  class InstanceFluctuationManager
+    attr_reader :logger
+    MAX_UPDATABLE_ECS_CONTAINER_COUNT = 10
+    MAX_DETACHEABLE_EC2_INSTACE_COUNT = 20
+    MAX_DESCRIBABLE_ECS_TASK_COUNT = 100
+    def initialize(region:, cluster:, auto_scaling_group_name:, desired_capacity:, logger:)
+      @region = region
+      @cluster = cluster
+      @auto_scaling_group_name = auto_scaling_group_name
+      @desired_capacity = desired_capacity
+      @logger = logger
+    end
+    def increase
+      asg = fetch_auto_scaling_group
+      @logger.info("Increase desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{asg.max_size}")
+      as_client.update_auto_scaling_group(auto_scaling_group_name: @auto_scaling_group_name, desired_capacity: asg.max_size)
+      # Run in background because increasing instances may take time
+      Thread.new do
+        loop do
+          cluster = ecs_client.describe_clusters(clusters: [@cluster]).clusters.first
+          instance_count = cluster.registered_container_instances_count
+          if instance_count == asg.max_size
+            @logger.info("Succeeded in increasing instances!")
+            break
+          end
+          @logger.info("Current registered instance count: #{instance_count}")
+          sleep 5
+        end
+      end
+    end
+    def decrease
+      asg = fetch_auto_scaling_group
+      decrease_count = asg.desired_capacity - @desired_capacity
+      if decrease_count <= 0
+        @logger.info("The capacity is already #{asg.desired_capacity}")
+        return
+      end
+      @logger.info("Decrease desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{@desired_capacity}")
+      container_instances = ecs_client.list_container_instances(cluster: @cluster).flat_map do |resp|
+        ecs_client.describe_container_instances(
+          cluster: @cluster,
+          container_instances: resp.container_instance_arns
+        ).container_instances
+      end
+      # The status of ECS instances sometimes seems to remain 'DEREGISTERING' for a few minutes after they are terminated.
+      container_instances.reject! { |ci| ci.status == 'DEREGISTERING' }
+      az_to_container_instances = container_instances.sort_by {|ci| - ci.running_tasks_count }.group_by do |ci|
+        ci.attributes.find {|attribute| attribute.name == "ecs.availability-zone" }.value
+      end
+      if az_to_container_instances.empty?
+        @logger.info("There are no instances to terminate.")
+        return
+      end
+      target_container_instances = extract_target_container_instances(decrease_count, az_to_container_instances)
+      @logger.info("running tasks: #{ecs_client.list_tasks(cluster: @cluster).task_arns.size}")
+      all_running_task_arns = []
+      target_container_instances.map(&:container_instance_arn).each_slice(MAX_UPDATABLE_ECS_CONTAINER_COUNT) do |arns|
+        @logger.info(arns)
+        ecs_client.update_container_instances_state(
+          cluster: @cluster,
+          container_instances: arns,
+          status: "DRAINING"
+        )
+        arns.each do |arn|
+          all_running_task_arns.concat(list_running_task_arns(arn))
+        end
+      end
+      stop_tasks_not_belonging_service(all_running_task_arns)
+      wait_until_tasks_stopped(all_running_task_arns)
+      instance_ids = target_container_instances.map(&:ec2_instance_id)
+      terminate_instances(instance_ids)
+      @logger.info("Succeeded in decreasing instances!")
+    end
+    private
+    def aws_params
+      {
+        access_key_id: EcsDeploy.config.access_key_id,
+        secret_access_key: EcsDeploy.config.secret_access_key,
+        region: @region,
+        logger: @logger
+      }.reject do |_key, value|
+        value.nil?
+      end
+    end
+    def as_client
+      @as_client ||= Aws::AutoScaling::Client.new(aws_params)
+    end
+    def ec2_client
+      @ec2_client ||= Aws::EC2::Client.new(aws_params)
+    end
+    def ecs_client
+      @ecs_client ||= Aws::ECS::Client.new(aws_params)
+    end
+    def fetch_auto_scaling_group
+      as_client.describe_auto_scaling_groups(auto_scaling_group_names: [@auto_scaling_group_name]).auto_scaling_groups.first
+    end
+    # Extract container instances to terminate considering AZ balance
+    def extract_target_container_instances(decrease_count, az_to_container_instances)
+      target_container_instances = []
+      decrease_count.times do
+        @logger.debug do
+          "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
+        end
+        az = az_to_container_instances.max_by {|_az, instances| instances.size }.first
+        target_container_instances << az_to_container_instances[az].pop
+      end
+      @logger.info do
+        "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
+      end
+      target_container_instances
+    end
+    # list tasks whose desired_status is "RUNNING" or
+    # whoose desired_status is "STOPPED" but last_status is "RUNNING" on the ECS container
+    def list_running_task_arns(container_instance_arn)
+      running_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn).flat_map(&:task_arns)
+      stopped_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn, desired_status: "STOPPED").flat_map(&:task_arns)
+      stopped_running_task_arns = stopped_tasks_arn.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).flat_map do |arns|
+        ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.select do |task|
+          task.desired_status == "STOPPED" && task.last_status == "RUNNING"
+        end
+      end.map(&:task_arn)
+      running_tasks_arn + stopped_running_task_arns
+    end
+    def wait_until_tasks_stopped(task_arns)
+      @logger.info("All old tasks: #{task_arns.size}")
+      task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
+        ecs_client.wait_until(:tasks_stopped, cluster: @cluster, tasks: arns)
+      end
+      @logger.info("All old tasks are stopped")
+    end
+    def stop_tasks_not_belonging_service(running_task_arns)
+      @logger.info("Running tasks: #{running_task_arns.size}")
+      unless running_task_arns.empty?
+        running_task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
+          ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.each do |task|
+            ecs_client.stop_task(cluster: @cluster, task: task.task_arn) if task.group.start_with?("family:")
+          end
+        end
+      end
+    end
+    def terminate_instances(instance_ids)
+      if instance_ids.empty?
+        @logger.info("There are no instances to terminate.")
+        return
+      end
+      instance_ids.each_slice(MAX_DETACHEABLE_EC2_INSTACE_COUNT) do |ids|
+        as_client.detach_instances(
+          auto_scaling_group_name: @auto_scaling_group_name,
+          instance_ids: ids,
+          should_decrement_desired_capacity: true
+        )
+      end
+      ec2_client.terminate_instances(instance_ids: instance_ids)
+      ec2_client.wait_until(:instance_terminated, instance_ids: instance_ids) do |w|
+        w.before_wait do |attempts, response|
+          @logger.info("Waiting for stopping all instances...#{attempts}")
+          instances = response.reservations.flat_map(&:instances)
+          instances.sort_by(&:instance_id).each do |instance|
+            @logger.info("#{instance.instance_id}\t#{instance.state.name}")
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ecs_deploy/scheduled_task.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'aws-sdk-cloudwatchevents'
 require 'timeout'
 module EcsDeploy
@@ -8,7 +9,7 @@ module EcsDeploy
     def initialize(
       cluster:, rule_name:, schedule_expression:, enabled: true, description: nil, target_id: nil,
-      task_definition_name:, revision: nil, task_count: nil, role_arn:,
+      task_definition_name:, revision: nil, task_count: nil, role_arn:, network_configuration: nil, launch_type: nil, platform_version: nil, group: nil,
       region: nil, container_overrides: nil
     )
       @cluster = cluster
@@ -21,10 +22,15 @@ module EcsDeploy
       @task_count = task_count || 1
       @revision = revision
       @role_arn = role_arn
-      @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
+      @network_configuration = network_configuration
+      @launch_type = launch_type || "EC2"
+      @platform_version = platform_version
+      @group = group
+      region ||= EcsDeploy.config.default_region
       @container_overrides = container_overrides
-      @client = Aws::ECS::Client.new(region: @region)
+      @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
+      @region = @client.config.region
       @cloud_watch_events = Aws::CloudWatchEvents::Client.new(region: @region)
     end
@@ -66,8 +72,14 @@ module EcsDeploy
         ecs_parameters: {
           task_definition_arn: task_definition_arn,
           task_count: @task_count,
+          network_configuration: @network_configuration,
+          launch_type: @launch_type,
+          platform_version: @platform_version,
+          group: @group,
         },
       }
+      target[:ecs_parameters].compact!
       if @container_overrides
         target.merge!(input: { containerOverrides: @container_overrides }.to_json)
       end

data/lib/ecs_deploy/service.rb CHANGED Viewed

@@ -5,13 +5,26 @@ module EcsDeploy
     CHECK_INTERVAL = 5
     MAX_DESCRIBE_SERVICES = 10
-    attr_reader :cluster, :region, :service_name
+    class TooManyAttemptsError < StandardError; end
+    attr_reader :cluster, :region, :service_name, :delete
     def initialize(
       cluster:, service_name:, task_definition_name: nil, revision: nil,
       load_balancers: nil,
       desired_count: nil, deployment_configuration: {maximum_percent: 200, minimum_healthy_percent: 100},
-      region: nil
+      launch_type: nil,
+      placement_constraints: [],
+      placement_strategy: [],
+      network_configuration: nil,
+      health_check_grace_period_seconds: nil,
+      scheduling_strategy: 'REPLICA',
+      enable_ecs_managed_tags: nil,
+      tags: nil,
+      propagate_tags: nil,
+      region: nil,
+      delete: false,
+      enable_execute_command: false
     )
       @cluster = cluster
       @service_name = service_name
@@ -19,11 +32,25 @@ module EcsDeploy
       @load_balancers = load_balancers
       @desired_count = desired_count
       @deployment_configuration = deployment_configuration
+      @launch_type = launch_type
+      @placement_constraints = placement_constraints
+      @placement_strategy = placement_strategy
+      @network_configuration = network_configuration
+      @health_check_grace_period_seconds = health_check_grace_period_seconds
+      @scheduling_strategy = scheduling_strategy
       @revision = revision
-      @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
+      @enable_ecs_managed_tags = enable_ecs_managed_tags
+      @tags = tags
+      @propagate_tags = propagate_tags
+      @enable_execute_command = enable_execute_command
       @response = nil
-      @client = Aws::ECS::Client.new(region: @region)
+      region ||= EcsDeploy.config.default_region
+      @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
+      @region = @client.config.region
+      @delete = delete
     end
     def current_task_definition_arn
@@ -37,53 +64,105 @@ module EcsDeploy
         cluster: @cluster,
         task_definition: task_definition_name_with_revision,
         deployment_configuration: @deployment_configuration,
+        network_configuration: @network_configuration,
+        health_check_grace_period_seconds: @health_check_grace_period_seconds,
+        enable_execute_command: @enable_execute_command,
       }
       if res.services.select{ |s| s.status == 'ACTIVE' }.empty?
+        return if @delete
         service_options.merge!({
           service_name: @service_name,
           desired_count: @desired_count.to_i,
+          launch_type: @launch_type,
+          placement_constraints: @placement_constraints,
+          placement_strategy: @placement_strategy,
+          enable_ecs_managed_tags: @enable_ecs_managed_tags,
+          tags: @tags,
+          propagate_tags: @propagate_tags,
         })
-        if @load_balancers
+        if @load_balancers && EcsDeploy.config.ecs_service_role
           service_options.merge!({
             role: EcsDeploy.config.ecs_service_role,
+          })
+        end
+        if @load_balancers
+          service_options.merge!({
             load_balancers: @load_balancers,
           })
         end
+        if @scheduling_strategy == 'DAEMON'
+          service_options[:scheduling_strategy] = @scheduling_strategy
+          service_options.delete(:desired_count)
+        end
         @response = @client.create_service(service_options)
-        EcsDeploy.logger.info "create service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
+        EcsDeploy.logger.info "create service [#{@service_name}] [#{@cluster}] [#{@region}] [#{Paint['OK', :green]}]"
       else
+        return delete_service if @delete
         service_options.merge!({service: @service_name})
         service_options.merge!({desired_count: @desired_count}) if @desired_count
+        update_tags(@service_name, @tags)
         @response = @client.update_service(service_options)
-        EcsDeploy.logger.info "update service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
+        EcsDeploy.logger.info "update service [#{@service_name}] [#{@cluster}] [#{@region}] [#{Paint['OK', :green]}]"
       end
     end
-    def wait_running
-      return if @response.nil?
+    def delete_service
+      if @scheduling_strategy != 'DAEMON'
+        @client.update_service(cluster: @cluster, service: @service_name, desired_count: 0)
+        sleep 1
+      end
+      @client.delete_service(cluster: @cluster, service: @service_name)
+      EcsDeploy.logger.info "delete service [#{@service_name}] [#{@cluster}] [#{@region}] [#{Paint['OK', :green]}]"
+    end
-      service = @response.service
+    def update_tags(service_name, tags)
+      service_arn = @client.describe_services(cluster: @cluster, services: [service_name]).services.first.service_arn
+      if service_arn.split('/').size == 2
+        if tags
+          EcsDeploy.logger.warn "#{service_name} doesn't support tagging operations, so tags are ignored. Long arn format must be used for tagging operations."
+        end
+        return
+      end
-      @client.wait_until(:services_stable, cluster: @cluster, services: [service.service_name]) do |w|
-        w.delay = 10
+      tags ||= []
+      current_tag_keys = @client.list_tags_for_resource(resource_arn: service_arn).tags.map(&:key)
+      deleted_tag_keys = current_tag_keys - tags.map { |t| t[:key] }
-        w.before_attempt do
-          EcsDeploy.logger.info "wait service stable [#{service.service_name}]"
-        end
+      unless deleted_tag_keys.empty?
+        @client.untag_resource(resource_arn: service_arn, tag_keys: deleted_tag_keys)
+      end
+      unless tags.empty?
+        @client.tag_resource(resource_arn: service_arn, tags: tags)
       end
     end
     def self.wait_all_running(services)
-      services.group_by { |s| [s.cluster, s.region] }.each do |(cl, region), ss|
+      services.group_by { |s| [s.cluster, s.region] }.flat_map do |(cl, region), ss|
         client = Aws::ECS::Client.new(region: region)
-        ss.map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES) do |chunked_service_names|
-          client.wait_until(:services_stable, cluster: cl, services: chunked_service_names) do |w|
-            w.before_attempt do
-              EcsDeploy.logger.info "wait service stable [#{chunked_service_names.join(", ")}]"
+        ss.reject(&:delete).map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES).map do |chunked_service_names|
+          Thread.new do
+            EcsDeploy.config.ecs_wait_until_services_stable_max_attempts.times do
+              EcsDeploy.logger.info "wait service stable [#{chunked_service_names.join(", ")}] [#{cl}]"
+              resp = client.describe_services(cluster: cl, services: chunked_service_names)
+              resp.services.each do |s|
+                # cf. https://github.com/aws/aws-sdk-ruby/blob/master/gems/aws-sdk-ecs/lib/aws-sdk-ecs/waiters.rb#L91-L96
+                if s.deployments.size == 1 && s.running_count == s.desired_count
+                  chunked_service_names.delete(s.service_name)
+                end
+              end
+              break if chunked_service_names.empty?
+              sleep EcsDeploy.config.ecs_wait_until_services_stable_delay
             end
+            raise TooManyAttemptsError unless chunked_service_names.empty?
           end
         end
-      end
+      end.each(&:join)
     end
     private

data/lib/ecs_deploy/task_definition.rb CHANGED Viewed

@@ -1,37 +1,54 @@
 module EcsDeploy
   class TaskDefinition
+    RETRY_BACKOFF = lambda do |c|
+      sleep(1)
+    end
+    RETRY_LIMIT = 10
     def self.deregister(arn, region: nil)
-      region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
-      client = Aws::ECS::Client.new(region: region)
+      region ||= EcsDeploy.config.default_region
+      param = {retry_backoff: RETRY_BACKOFF, retry_limit: RETRY_LIMIT}
+      client = region ? Aws::ECS::Client.new(param.merge(region: region)) : Aws::ECS::Client.new(param)
       client.deregister_task_definition({
         task_definition: arn,
       })
-      EcsDeploy.logger.info "deregister task definition [#{arn}] [#{region}] [#{Paint['OK', :green]}]"
+      EcsDeploy.logger.info "deregister task definition [#{arn}] [#{client.config.region}] [#{Paint['OK', :green]}]"
     end
     def initialize(
       task_definition_name:, region: nil,
       network_mode: "bridge", volumes: [], container_definitions: [], placement_constraints: [],
-      task_role_arn: nil
+      task_role_arn: nil,
+      execution_role_arn: nil,
+      requires_compatibilities: nil,
+      cpu: nil, memory: nil,
+      tags: nil
     )
       @task_definition_name = task_definition_name
       @task_role_arn        = task_role_arn
-      @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
+      @execution_role_arn   = execution_role_arn
+      region ||= EcsDeploy.config.default_region
       @container_definitions = container_definitions.map do |cd|
         if cd[:docker_labels]
           cd[:docker_labels] = cd[:docker_labels].map { |k, v| [k.to_s, v] }.to_h
         end
-        if cd[:log_configuration] && cd[:log_configuration][:options]
-          cd[:log_configuration][:options] = cd[:log_configuration][:options].map { |k, v| [k.to_s, v] }.to_h
+        if cd.dig(:log_configuration, :options)
+          cd[:log_configuration][:options] = cd.dig(:log_configuration, :options).map { |k, v| [k.to_s, v] }.to_h
         end
         cd
       end
       @volumes = volumes
       @network_mode = network_mode
       @placement_constraints = placement_constraints
-      @client = Aws::ECS::Client.new(region: @region)
+      @requires_compatibilities = requires_compatibilities
+      @cpu = cpu&.to_s
+      @memory = memory&.to_s
+      @tags = tags
+      param = {retry_backoff: RETRY_BACKOFF, retry_limit: RETRY_LIMIT}
+      @client = region ? Aws::ECS::Client.new(param.merge(region: region)) : Aws::ECS::Client.new(param)
+      @region = @client.config.region
     end
     def recent_task_definition_arns
@@ -52,6 +69,10 @@ module EcsDeploy
         volumes: @volumes,
         placement_constraints: @placement_constraints,
         task_role_arn: @task_role_arn,
+        execution_role_arn: @execution_role_arn,
+        requires_compatibilities: @requires_compatibilities,
+        cpu: @cpu, memory: @memory,
+        tags: @tags
       })
       EcsDeploy.logger.info "register task definition [#{@task_definition_name}] [#{@region}] [#{Paint['OK', :green]}]"
       res.task_definition

data/lib/ecs_deploy/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module EcsDeploy
-  VERSION = "0.3.2"
+  VERSION = "1.0.3"
 end

data/lib/ecs_deploy.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 require "ecs_deploy/version"
 require "ecs_deploy/configuration"
-require 'aws-sdk'
+require 'aws-sdk-ecs'
 require 'logger'
 require 'terminal-table'
 require 'paint'