RubyGems - ecs_deploy - Versions diffs - 0.3.0 → 1.0.2 - Mend

ecs_deploy 0.3.0 → 1.0.2

Files changed (24) hide show

checksums.yaml +5 -5
data/.gitignore +1 -0
data/.travis.yml +5 -0
data/CHANGELOG.md +139 -0
data/README.md +290 -27
data/Rakefile +4 -0
data/ecs_deploy.gemspec +9 -3
data/lib/ecs_deploy.rb +1 -1
data/lib/ecs_deploy/auto_scaler.rb +105 -340
data/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +209 -0
data/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb +149 -0
data/lib/ecs_deploy/auto_scaler/config_base.rb +16 -0
data/lib/ecs_deploy/auto_scaler/instance_drainer.rb +134 -0
data/lib/ecs_deploy/auto_scaler/service_config.rb +223 -0
data/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb +102 -0
data/lib/ecs_deploy/auto_scaler/trigger_config.rb +42 -0
data/lib/ecs_deploy/capistrano.rb +77 -15
data/lib/ecs_deploy/configuration.rb +6 -2
data/lib/ecs_deploy/instance_fluctuation_manager.rb +195 -0
data/lib/ecs_deploy/scheduled_task.rb +31 -15
data/lib/ecs_deploy/service.rb +97 -18
data/lib/ecs_deploy/task_definition.rb +30 -45
data/lib/ecs_deploy/version.rb +1 -1
metadata +113 -14

data/lib/ecs_deploy/configuration.rb CHANGED Viewed

@@ -6,12 +6,16 @@ module EcsDeploy
       :secret_access_key,
       :default_region,
       :deploy_wait_timeout,
-      :ecs_service_role
+      :ecs_service_role,
+      :ecs_wait_until_services_stable_max_attempts,
+      :ecs_wait_until_services_stable_delay
     def initialize
       @log_level = :info
       @deploy_wait_timeout = 300
-      @ecs_service_role = "ecsServiceRole"
+      # The following values are the default values of Aws::ECS::Waiters::ServicesStable
+      @ecs_wait_until_services_stable_max_attempts = 40
+      @ecs_wait_until_services_stable_delay = 15
     end
   end
 end

data/lib/ecs_deploy/instance_fluctuation_manager.rb ADDED Viewed

@@ -0,0 +1,195 @@
+require "aws-sdk-autoscaling"
+require "aws-sdk-ec2"
+require "aws-sdk-ecs"
+module EcsDeploy
+  class InstanceFluctuationManager
+    attr_reader :logger
+    MAX_UPDATABLE_ECS_CONTAINER_COUNT = 10
+    MAX_DETACHEABLE_EC2_INSTACE_COUNT = 20
+    MAX_DESCRIBABLE_ECS_TASK_COUNT = 100
+    def initialize(region:, cluster:, auto_scaling_group_name:, desired_capacity:, logger:)
+      @region = region
+      @cluster = cluster
+      @auto_scaling_group_name = auto_scaling_group_name
+      @desired_capacity = desired_capacity
+      @logger = logger
+    end
+    def increase
+      asg = fetch_auto_scaling_group
+      @logger.info("Increase desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{asg.max_size}")
+      as_client.update_auto_scaling_group(auto_scaling_group_name: @auto_scaling_group_name, desired_capacity: asg.max_size)
+      # Run in background because increasing instances may take time
+      Thread.new do
+        loop do
+          cluster = ecs_client.describe_clusters(clusters: [@cluster]).clusters.first
+          instance_count = cluster.registered_container_instances_count
+          if instance_count == asg.max_size
+            @logger.info("Succeeded in increasing instances!")
+            break
+          end
+          @logger.info("Current registered instance count: #{instance_count}")
+          sleep 5
+        end
+      end
+    end
+    def decrease
+      asg = fetch_auto_scaling_group
+      decrease_count = asg.desired_capacity - @desired_capacity
+      if decrease_count <= 0
+        @logger.info("The capacity is already #{asg.desired_capacity}")
+        return
+      end
+      @logger.info("Decrease desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{@desired_capacity}")
+      container_instances = ecs_client.list_container_instances(cluster: @cluster).flat_map do |resp|
+        ecs_client.describe_container_instances(
+          cluster: @cluster,
+          container_instances: resp.container_instance_arns
+        ).container_instances
+      end
+      az_to_container_instances = container_instances.sort_by {|ci| - ci.running_tasks_count }.group_by do |ci|
+        ci.attributes.find {|attribute| attribute.name == "ecs.availability-zone" }.value
+      end
+      if az_to_container_instances.empty?
+        @logger.info("There are no instances to terminate.")
+        return
+      end
+      target_container_instances = extract_target_container_instances(decrease_count, az_to_container_instances)
+      @logger.info("running tasks: #{ecs_client.list_tasks(cluster: @cluster).task_arns.size}")
+      all_running_task_arns = []
+      target_container_instances.map(&:container_instance_arn).each_slice(MAX_UPDATABLE_ECS_CONTAINER_COUNT) do |arns|
+        @logger.info(arns)
+        ecs_client.update_container_instances_state(
+          cluster: @cluster,
+          container_instances: arns,
+          status: "DRAINING"
+        )
+        arns.each do |arn|
+          all_running_task_arns.concat(list_running_task_arns(arn))
+        end
+      end
+      stop_tasks_not_belonging_service(all_running_task_arns)
+      wait_until_tasks_stopped(all_running_task_arns)
+      instance_ids = target_container_instances.map(&:ec2_instance_id)
+      terminate_instances(instance_ids)
+      @logger.info("Succeeded in decreasing instances!")
+    end
+    private
+    def aws_params
+      {
+        access_key_id: EcsDeploy.config.access_key_id,
+        secret_access_key: EcsDeploy.config.secret_access_key,
+        region: @region,
+        logger: @logger
+      }.reject do |_key, value|
+        value.nil?
+      end
+    end
+    def as_client
+      @as_client ||= Aws::AutoScaling::Client.new(aws_params)
+    end
+    def ec2_client
+      @ec2_client ||= Aws::EC2::Client.new(aws_params)
+    end
+    def ecs_client
+      @ecs_client ||= Aws::ECS::Client.new(aws_params)
+    end
+    def fetch_auto_scaling_group
+      as_client.describe_auto_scaling_groups(auto_scaling_group_names: [@auto_scaling_group_name]).auto_scaling_groups.first
+    end
+    # Extract container instances to terminate considering AZ balance
+    def extract_target_container_instances(decrease_count, az_to_container_instances)
+      target_container_instances = []
+      decrease_count.times do
+        @logger.debug do
+          "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
+        end
+        az = az_to_container_instances.max_by {|_az, instances| instances.size }.first
+        target_container_instances << az_to_container_instances[az].pop
+      end
+      @logger.info do
+        "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
+      end
+      target_container_instances
+    end
+    # list tasks whose desired_status is "RUNNING" or
+    # whoose desired_status is "STOPPED" but last_status is "RUNNING" on the ECS container
+    def list_running_task_arns(container_instance_arn)
+      running_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn).flat_map(&:task_arns)
+      stopped_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn, desired_status: "STOPPED").flat_map(&:task_arns)
+      stopped_running_task_arns = stopped_tasks_arn.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).flat_map do |arns|
+        ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.select do |task|
+          task.desired_status == "STOPPED" && task.last_status == "RUNNING"
+        end
+      end.map(&:task_arn)
+      running_tasks_arn + stopped_running_task_arns
+    end
+    def wait_until_tasks_stopped(task_arns)
+      @logger.info("All old tasks: #{task_arns.size}")
+      task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
+        ecs_client.wait_until(:tasks_stopped, cluster: @cluster, tasks: arns)
+      end
+      @logger.info("All old tasks are stopped")
+    end
+    def stop_tasks_not_belonging_service(running_task_arns)
+      @logger.info("Running tasks: #{running_task_arns.size}")
+      unless running_task_arns.empty?
+        running_task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
+          ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.each do |task|
+            ecs_client.stop_task(cluster: @cluster, task: task.task_arn) if task.group.start_with?("family:")
+          end
+        end
+      end
+    end
+    def terminate_instances(instance_ids)
+      if instance_ids.empty?
+        @logger.info("There are no instances to terminate.")
+        return
+      end
+      instance_ids.each_slice(MAX_DETACHEABLE_EC2_INSTACE_COUNT) do |ids|
+        as_client.detach_instances(
+          auto_scaling_group_name: @auto_scaling_group_name,
+          instance_ids: ids,
+          should_decrement_desired_capacity: true
+        )
+      end
+      ec2_client.terminate_instances(instance_ids: instance_ids)
+      ec2_client.wait_until(:instance_terminated, instance_ids: instance_ids) do |w|
+        w.before_wait do |attempts, response|
+          @logger.info("Waiting for stopping all instances...#{attempts}")
+          instances = response.reservations.flat_map(&:instances)
+          instances.sort_by(&:instance_id).each do |instance|
+            @logger.info("#{instance.instance_id}\t#{instance.state.name}")
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ecs_deploy/scheduled_task.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'aws-sdk-cloudwatchevents'
 require 'timeout'
 module EcsDeploy
@@ -8,8 +9,8 @@ module EcsDeploy
     def initialize(
       cluster:, rule_name:, schedule_expression:, enabled: true, description: nil, target_id: nil,
-      task_definition_name:, revision: nil, task_count: nil, role_arn:,
-      region: nil
+      task_definition_name:, revision: nil, task_count: nil, role_arn:, network_configuration: nil, launch_type: nil, platform_version: nil, group: nil,
+      region: nil, container_overrides: nil
     )
       @cluster = cluster
       @rule_name = rule_name
@@ -21,9 +22,15 @@ module EcsDeploy
       @task_count = task_count || 1
       @revision = revision
       @role_arn = role_arn
-      @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
+      @network_configuration = network_configuration
+      @launch_type = launch_type || "EC2"
+      @platform_version = platform_version
+      @group = group
+      region ||= EcsDeploy.config.default_region
+      @container_overrides = container_overrides
-      @client = Aws::ECS::Client.new(region: @region)
+      @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
+      @region = @client.config.region
       @cloud_watch_events = Aws::CloudWatchEvents::Client.new(region: @region)
     end
@@ -58,19 +65,28 @@ module EcsDeploy
     end
     def put_targets
+      target = {
+        id: @target_id,
+        arn: cluster_arn,
+        role_arn: @role_arn,
+        ecs_parameters: {
+          task_definition_arn: task_definition_arn,
+          task_count: @task_count,
+          network_configuration: @network_configuration,
+          launch_type: @launch_type,
+          platform_version: @platform_version,
+          group: @group,
+        },
+      }
+      target[:ecs_parameters].compact!
+      if @container_overrides
+        target.merge!(input: { containerOverrides: @container_overrides }.to_json)
+      end
       res = @cloud_watch_events.put_targets(
         rule: @rule_name,
-        targets: [
-          {
-            id: @target_id,
-            arn: cluster_arn,
-            role_arn: @role_arn,
-            ecs_parameters: {
-              task_definition_arn: task_definition_arn,
-              task_count: @task_count,
-            },
-          }
-        ]
+        targets: [target]
       )
       if res.failed_entry_count.zero?
         EcsDeploy.logger.info "create cloudwatch event target [#{@target_id}] [#{@region}] [#{Paint['OK', :green]}]"

data/lib/ecs_deploy/service.rb CHANGED Viewed

@@ -5,13 +5,26 @@ module EcsDeploy
     CHECK_INTERVAL = 5
     MAX_DESCRIBE_SERVICES = 10
-    attr_reader :cluster, :region, :service_name
+    class TooManyAttemptsError < StandardError; end
+    attr_reader :cluster, :region, :service_name, :delete
     def initialize(
       cluster:, service_name:, task_definition_name: nil, revision: nil,
       load_balancers: nil,
       desired_count: nil, deployment_configuration: {maximum_percent: 200, minimum_healthy_percent: 100},
-      region: nil
+      launch_type: nil,
+      placement_constraints: [],
+      placement_strategy: [],
+      network_configuration: nil,
+      health_check_grace_period_seconds: nil,
+      scheduling_strategy: 'REPLICA',
+      enable_ecs_managed_tags: nil,
+      tags: nil,
+      propagate_tags: nil,
+      region: nil,
+      delete: false,
+      enable_execute_command: false
     )
       @cluster = cluster
       @service_name = service_name
@@ -19,11 +32,25 @@ module EcsDeploy
       @load_balancers = load_balancers
       @desired_count = desired_count
       @deployment_configuration = deployment_configuration
+      @launch_type = launch_type
+      @placement_constraints = placement_constraints
+      @placement_strategy = placement_strategy
+      @network_configuration = network_configuration
+      @health_check_grace_period_seconds = health_check_grace_period_seconds
+      @scheduling_strategy = scheduling_strategy
       @revision = revision
-      @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
+      @enable_ecs_managed_tags = enable_ecs_managed_tags
+      @tags = tags
+      @propagate_tags = propagate_tags
+      @enable_execute_command = enable_execute_command
       @response = nil
-      @client = Aws::ECS::Client.new(region: @region)
+      region ||= EcsDeploy.config.default_region
+      @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
+      @region = @client.config.region
+      @delete = delete
     end
     def current_task_definition_arn
@@ -37,53 +64,105 @@ module EcsDeploy
         cluster: @cluster,
         task_definition: task_definition_name_with_revision,
         deployment_configuration: @deployment_configuration,
+        network_configuration: @network_configuration,
+        health_check_grace_period_seconds: @health_check_grace_period_seconds,
+        enable_execute_command: @enable_execute_command,
       }
       if res.services.select{ |s| s.status == 'ACTIVE' }.empty?
+        return if @delete
         service_options.merge!({
           service_name: @service_name,
           desired_count: @desired_count.to_i,
+          launch_type: @launch_type,
+          placement_constraints: @placement_constraints,
+          placement_strategy: @placement_strategy,
+          enable_ecs_managed_tags: @enable_ecs_managed_tags,
+          tags: @tags,
+          propagate_tags: @propagate_tags,
         })
-        if @load_balancers
+        if @load_balancers && EcsDeploy.config.ecs_service_role
           service_options.merge!({
             role: EcsDeploy.config.ecs_service_role,
+          })
+        end
+        if @load_balancers
+          service_options.merge!({
             load_balancers: @load_balancers,
           })
         end
+        if @scheduling_strategy == 'DAEMON'
+          service_options[:scheduling_strategy] = @scheduling_strategy
+          service_options.delete(:desired_count)
+        end
         @response = @client.create_service(service_options)
         EcsDeploy.logger.info "create service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
       else
+        return delete_service if @delete
         service_options.merge!({service: @service_name})
         service_options.merge!({desired_count: @desired_count}) if @desired_count
+        update_tags(@service_name, @tags)
         @response = @client.update_service(service_options)
         EcsDeploy.logger.info "update service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
       end
     end
-    def wait_running
-      return if @response.nil?
+    def delete_service
+      if @scheduling_strategy != 'DAEMON'
+        @client.update_service(cluster: @cluster, service: @service_name, desired_count: 0)
+        sleep 1
+      end
+      @client.delete_service(cluster: @cluster, service: @service_name)
+      EcsDeploy.logger.info "delete service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
+    end
-      service = @response.service
+    def update_tags(service_name, tags)
+      service_arn = @client.describe_services(cluster: @cluster, services: [service_name]).services.first.service_arn
+      if service_arn.split('/').size == 2
+        if tags
+          EcsDeploy.logger.warn "#{service_name} doesn't support tagging operations, so tags are ignored. Long arn format must be used for tagging operations."
+        end
+        return
+      end
-      @client.wait_until(:services_stable, cluster: @cluster, services: [service.service_name]) do |w|
-        w.delay = 10
+      tags ||= []
+      current_tag_keys = @client.list_tags_for_resource(resource_arn: service_arn).tags.map(&:key)
+      deleted_tag_keys = current_tag_keys - tags.map { |t| t[:key] }
-        w.before_attempt do
-          EcsDeploy.logger.info "wait service stable [#{service.service_name}]"
-        end
+      unless deleted_tag_keys.empty?
+        @client.untag_resource(resource_arn: service_arn, tag_keys: deleted_tag_keys)
+      end
+      unless tags.empty?
+        @client.tag_resource(resource_arn: service_arn, tags: tags)
       end
     end
     def self.wait_all_running(services)
-      services.group_by { |s| [s.cluster, s.region] }.each do |(cl, region), ss|
+      services.group_by { |s| [s.cluster, s.region] }.flat_map do |(cl, region), ss|
         client = Aws::ECS::Client.new(region: region)
-        ss.map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES) do |chunked_service_names|
-          client.wait_until(:services_stable, cluster: cl, services: chunked_service_names) do |w|
-            w.before_attempt do
+        ss.reject(&:delete).map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES).map do |chunked_service_names|
+          Thread.new do
+            EcsDeploy.config.ecs_wait_until_services_stable_max_attempts.times do
               EcsDeploy.logger.info "wait service stable [#{chunked_service_names.join(", ")}]"
+              resp = client.describe_services(cluster: cl, services: chunked_service_names)
+              resp.services.each do |s|
+                # cf. https://github.com/aws/aws-sdk-ruby/blob/master/gems/aws-sdk-ecs/lib/aws-sdk-ecs/waiters.rb#L91-L96
+                if s.deployments.size == 1 && s.running_count == s.desired_count
+                  chunked_service_names.delete(s.service_name)
+                end
+              end
+              break if chunked_service_names.empty?
+              sleep EcsDeploy.config.ecs_wait_until_services_stable_delay
             end
+            raise TooManyAttemptsError unless chunked_service_names.empty?
           end
         end
-      end
+      end.each(&:join)
     end
     private