RubyGems - ecs_deploy - Versions diffs - 0.2.0 → 1.0.1 - Mend

ecs_deploy 0.2.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +5 -5
data/.gitignore +1 -0
data/.travis.yml +5 -0
data/CHANGELOG.md +132 -0
data/README.md +291 -28
data/Rakefile +4 -0
data/ecs_deploy.gemspec +9 -3
data/lib/ecs_deploy.rb +2 -1
data/lib/ecs_deploy/auto_scaler.rb +107 -358
data/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +209 -0
data/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb +149 -0
data/lib/ecs_deploy/auto_scaler/config_base.rb +16 -0
data/lib/ecs_deploy/auto_scaler/instance_drainer.rb +134 -0
data/lib/ecs_deploy/auto_scaler/service_config.rb +223 -0
data/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb +102 -0
data/lib/ecs_deploy/auto_scaler/trigger_config.rb +42 -0
data/lib/ecs_deploy/capistrano.rb +108 -11
data/lib/ecs_deploy/configuration.rb +6 -2
data/lib/ecs_deploy/instance_fluctuation_manager.rb +195 -0
data/lib/ecs_deploy/scheduled_task.rb +101 -0
data/lib/ecs_deploy/service.rb +99 -20
data/lib/ecs_deploy/task_definition.rb +37 -47
data/lib/ecs_deploy/version.rb +1 -1
metadata +114 -14

data/lib/ecs_deploy/configuration.rb CHANGED Viewed

@@ -6,12 +6,16 @@ module EcsDeploy
       :secret_access_key,
       :default_region,
       :deploy_wait_timeout,
-      :ecs_service_role
+      :ecs_service_role,
+      :ecs_wait_until_services_stable_max_attempts,
+      :ecs_wait_until_services_stable_delay
     def initialize
       @log_level = :info
       @deploy_wait_timeout = 300
-      @ecs_service_role = "ecsServiceRole"
+      # The following values are the default values of Aws::ECS::Waiters::ServicesStable
+      @ecs_wait_until_services_stable_max_attempts = 40
+      @ecs_wait_until_services_stable_delay = 15
     end
   end
 end

data/lib/ecs_deploy/instance_fluctuation_manager.rb ADDED Viewed

@@ -0,0 +1,195 @@
+require "aws-sdk-autoscaling"
+require "aws-sdk-ec2"
+require "aws-sdk-ecs"
+module EcsDeploy
+  class InstanceFluctuationManager
+    attr_reader :logger
+    MAX_UPDATABLE_ECS_CONTAINER_COUNT = 10
+    MAX_DETACHEABLE_EC2_INSTACE_COUNT = 20
+    MAX_DESCRIBABLE_ECS_TASK_COUNT = 100
+    def initialize(region:, cluster:, auto_scaling_group_name:, desired_capacity:, logger:)
+      @region = region
+      @cluster = cluster
+      @auto_scaling_group_name = auto_scaling_group_name
+      @desired_capacity = desired_capacity
+      @logger = logger
+    end
+    def increase
+      asg = fetch_auto_scaling_group
+      @logger.info("Increase desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{asg.max_size}")
+      as_client.update_auto_scaling_group(auto_scaling_group_name: @auto_scaling_group_name, desired_capacity: asg.max_size)
+      # Run in background because increasing instances may take time
+      Thread.new do
+        loop do
+          cluster = ecs_client.describe_clusters(clusters: [@cluster]).clusters.first
+          instance_count = cluster.registered_container_instances_count
+          if instance_count == asg.max_size
+            @logger.info("Succeeded in increasing instances!")
+            break
+          end
+          @logger.info("Current registered instance count: #{instance_count}")
+          sleep 5
+        end
+      end
+    end
+    def decrease
+      asg = fetch_auto_scaling_group
+      decrease_count = asg.desired_capacity - @desired_capacity
+      if decrease_count <= 0
+        @logger.info("The capacity is already #{asg.desired_capacity}")
+        return
+      end
+      @logger.info("Decrease desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{@desired_capacity}")
+      container_instances = ecs_client.list_container_instances(cluster: @cluster).flat_map do |resp|
+        ecs_client.describe_container_instances(
+          cluster: @cluster,
+          container_instances: resp.container_instance_arns
+        ).container_instances
+      end
+      az_to_container_instances = container_instances.sort_by {|ci| - ci.running_tasks_count }.group_by do |ci|
+        ci.attributes.find {|attribute| attribute.name == "ecs.availability-zone" }.value
+      end
+      if az_to_container_instances.empty?
+        @logger.info("There are no instances to terminate.")
+        return
+      end
+      target_container_instances = extract_target_container_instances(decrease_count, az_to_container_instances)
+      @logger.info("running tasks: #{ecs_client.list_tasks(cluster: @cluster).task_arns.size}")
+      all_running_task_arns = []
+      target_container_instances.map(&:container_instance_arn).each_slice(MAX_UPDATABLE_ECS_CONTAINER_COUNT) do |arns|
+        @logger.info(arns)
+        ecs_client.update_container_instances_state(
+          cluster: @cluster,
+          container_instances: arns,
+          status: "DRAINING"
+        )
+        arns.each do |arn|
+          all_running_task_arns.concat(list_running_task_arns(arn))
+        end
+      end
+      stop_tasks_not_belonging_service(all_running_task_arns)
+      wait_until_tasks_stopped(all_running_task_arns)
+      instance_ids = target_container_instances.map(&:ec2_instance_id)
+      terminate_instances(instance_ids)
+      @logger.info("Succeeded in decreasing instances!")
+    end
+    private
+    def aws_params
+      {
+        access_key_id: EcsDeploy.config.access_key_id,
+        secret_access_key: EcsDeploy.config.secret_access_key,
+        region: @region,
+        logger: @logger
+      }.reject do |_key, value|
+        value.nil?
+      end
+    end
+    def as_client
+      @as_client ||= Aws::AutoScaling::Client.new(aws_params)
+    end
+    def ec2_client
+      @ec2_client ||= Aws::EC2::Client.new(aws_params)
+    end
+    def ecs_client
+      @ecs_client ||= Aws::ECS::Client.new(aws_params)
+    end
+    def fetch_auto_scaling_group
+      as_client.describe_auto_scaling_groups(auto_scaling_group_names: [@auto_scaling_group_name]).auto_scaling_groups.first
+    end
+    # Extract container instances to terminate considering AZ balance
+    def extract_target_container_instances(decrease_count, az_to_container_instances)
+      target_container_instances = []
+      decrease_count.times do
+        @logger.debug do
+          "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
+        end
+        az = az_to_container_instances.max_by {|_az, instances| instances.size }.first
+        target_container_instances << az_to_container_instances[az].pop
+      end
+      @logger.info do
+        "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
+      end
+      target_container_instances
+    end
+    # list tasks whose desired_status is "RUNNING" or
+    # whoose desired_status is "STOPPED" but last_status is "RUNNING" on the ECS container
+    def list_running_task_arns(container_instance_arn)
+      running_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn).flat_map(&:task_arns)
+      stopped_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn, desired_status: "STOPPED").flat_map(&:task_arns)
+      stopped_running_task_arns = stopped_tasks_arn.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).flat_map do |arns|
+        ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.select do |task|
+          task.desired_status == "STOPPED" && task.last_status == "RUNNING"
+        end
+      end.map(&:task_arn)
+      running_tasks_arn + stopped_running_task_arns
+    end
+    def wait_until_tasks_stopped(task_arns)
+      @logger.info("All old tasks: #{task_arns.size}")
+      task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
+        ecs_client.wait_until(:tasks_stopped, cluster: @cluster, tasks: arns)
+      end
+      @logger.info("All old tasks are stopped")
+    end
+    def stop_tasks_not_belonging_service(running_task_arns)
+      @logger.info("Running tasks: #{running_task_arns.size}")
+      unless running_task_arns.empty?
+        running_task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
+          ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.each do |task|
+            ecs_client.stop_task(cluster: @cluster, task: task.task_arn) if task.group.start_with?("family:")
+          end
+        end
+      end
+    end
+    def terminate_instances(instance_ids)
+      if instance_ids.empty?
+        @logger.info("There are no instances to terminate.")
+        return
+      end
+      instance_ids.each_slice(MAX_DETACHEABLE_EC2_INSTACE_COUNT) do |ids|
+        as_client.detach_instances(
+          auto_scaling_group_name: @auto_scaling_group_name,
+          instance_ids: ids,
+          should_decrement_desired_capacity: true
+        )
+      end
+      ec2_client.terminate_instances(instance_ids: instance_ids)
+      ec2_client.wait_until(:instance_terminated, instance_ids: instance_ids) do |w|
+        w.before_wait do |attempts, response|
+          @logger.info("Waiting for stopping all instances...#{attempts}")
+          instances = response.reservations.flat_map(&:instances)
+          instances.sort_by(&:instance_id).each do |instance|
+            @logger.info("#{instance.instance_id}\t#{instance.state.name}")
+          end
+        end
+      end
+    end
+  end
+end

data/lib/ecs_deploy/scheduled_task.rb ADDED Viewed

@@ -0,0 +1,101 @@
+require 'aws-sdk-cloudwatchevents'
+require 'timeout'
+module EcsDeploy
+  class ScheduledTask
+    class PutTargetsFailure < StandardError; end
+    attr_reader :cluster, :region, :schedule_rule_name
+    def initialize(
+      cluster:, rule_name:, schedule_expression:, enabled: true, description: nil, target_id: nil,
+      task_definition_name:, revision: nil, task_count: nil, role_arn:, network_configuration: nil, launch_type: nil, platform_version: nil, group: nil,
+      region: nil, container_overrides: nil
+    )
+      @cluster = cluster
+      @rule_name = rule_name
+      @schedule_expression = schedule_expression
+      @enabled = enabled
+      @description = description
+      @target_id = target_id || task_definition_name
+      @task_definition_name = task_definition_name
+      @task_count = task_count || 1
+      @revision = revision
+      @role_arn = role_arn
+      @network_configuration = network_configuration
+      @launch_type = launch_type || "EC2"
+      @platform_version = platform_version
+      @group = group
+      region ||= EcsDeploy.config.default_region
+      @container_overrides = container_overrides
+      @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
+      @region = @client.config.region
+      @cloud_watch_events = Aws::CloudWatchEvents::Client.new(region: @region)
+    end
+    def deploy
+      put_rule
+      put_targets
+    end
+    private
+    def cluster_arn
+      cl = @client.describe_clusters(clusters: [@cluster]).clusters[0]
+      if cl
+        cl.cluster_arn
+      end
+    end
+    def task_definition_arn
+      suffix = @revision ? ":#{@revision}" : ""
+      name = "#{@task_definition_name}#{suffix}"
+      @client.describe_task_definition(task_definition: name).task_definition.task_definition_arn
+    end
+    def put_rule
+      res = @cloud_watch_events.put_rule(
+        name: @rule_name,
+        schedule_expression: @schedule_expression,
+        state: @enabled ? "ENABLED" : "DISABLED",
+        description: @description,
+      )
+      EcsDeploy.logger.info "create cloudwatch event rule [#{res.rule_arn}] [#{@region}] [#{Paint['OK', :green]}]"
+    end
+    def put_targets
+      target = {
+        id: @target_id,
+        arn: cluster_arn,
+        role_arn: @role_arn,
+        ecs_parameters: {
+          task_definition_arn: task_definition_arn,
+          task_count: @task_count,
+          network_configuration: @network_configuration,
+          launch_type: @launch_type,
+          platform_version: @platform_version,
+          group: @group,
+        },
+      }
+      target[:ecs_parameters].compact!
+      if @container_overrides
+        target.merge!(input: { containerOverrides: @container_overrides }.to_json)
+      end
+      res = @cloud_watch_events.put_targets(
+        rule: @rule_name,
+        targets: [target]
+      )
+      if res.failed_entry_count.zero?
+        EcsDeploy.logger.info "create cloudwatch event target [#{@target_id}] [#{@region}] [#{Paint['OK', :green]}]"
+      else
+        res.failed_entries.each do |entry|
+          EcsDeploy.logger.error "failed to create cloudwatch event target [#{@region}] target_id=#{entry.target_id} error_code=#{entry.error_code} error_message=#{entry.error_message}"
+        end
+        raise PutTargetsFailure
+      end
+    end
+  end
+end

data/lib/ecs_deploy/service.rb CHANGED Viewed

@@ -3,13 +3,27 @@ require 'timeout'
 module EcsDeploy
   class Service
     CHECK_INTERVAL = 5
-    attr_reader :cluster, :region, :service_name
+    MAX_DESCRIBE_SERVICES = 10
+    class TooManyAttemptsError < StandardError; end
+    attr_reader :cluster, :region, :service_name, :delete
     def initialize(
       cluster:, service_name:, task_definition_name: nil, revision: nil,
       load_balancers: nil,
       desired_count: nil, deployment_configuration: {maximum_percent: 200, minimum_healthy_percent: 100},
-      region: nil
+      launch_type: nil,
+      placement_constraints: [],
+      placement_strategy: [],
+      network_configuration: nil,
+      health_check_grace_period_seconds: nil,
+      scheduling_strategy: 'REPLICA',
+      enable_ecs_managed_tags: nil,
+      tags: nil,
+      propagate_tags: nil,
+      region: nil,
+      delete: false
     )
       @cluster = cluster
       @service_name = service_name
@@ -17,11 +31,24 @@ module EcsDeploy
       @load_balancers = load_balancers
       @desired_count = desired_count
       @deployment_configuration = deployment_configuration
+      @launch_type = launch_type
+      @placement_constraints = placement_constraints
+      @placement_strategy = placement_strategy
+      @network_configuration = network_configuration
+      @health_check_grace_period_seconds = health_check_grace_period_seconds
+      @scheduling_strategy = scheduling_strategy
       @revision = revision
-      @region = region || EcsDeploy.config.default_region || ENV["AWS_DEFAULT_REGION"]
+      @enable_ecs_managed_tags = enable_ecs_managed_tags
+      @tags = tags
+      @propagate_tags = propagate_tags
       @response = nil
-      @client = Aws::ECS::Client.new(region: @region)
+      region ||= EcsDeploy.config.default_region
+      @client = region ? Aws::ECS::Client.new(region: region) : Aws::ECS::Client.new
+      @region = @client.config.region
+      @delete = delete
     end
     def current_task_definition_arn
@@ -35,52 +62,104 @@ module EcsDeploy
         cluster: @cluster,
         task_definition: task_definition_name_with_revision,
         deployment_configuration: @deployment_configuration,
+        network_configuration: @network_configuration,
+        health_check_grace_period_seconds: @health_check_grace_period_seconds,
       }
-      if res.services.empty?
+      if res.services.select{ |s| s.status == 'ACTIVE' }.empty?
+        return if @delete
         service_options.merge!({
           service_name: @service_name,
           desired_count: @desired_count.to_i,
+          launch_type: @launch_type,
+          placement_constraints: @placement_constraints,
+          placement_strategy: @placement_strategy,
+          enable_ecs_managed_tags: @enable_ecs_managed_tags,
+          tags: @tags,
+          propagate_tags: @propagate_tags,
         })
-        if @load_balancers
+        if @load_balancers && EcsDeploy.config.ecs_service_role
           service_options.merge!({
             role: EcsDeploy.config.ecs_service_role,
+          })
+        end
+        if @load_balancers
+          service_options.merge!({
             load_balancers: @load_balancers,
           })
         end
+        if @scheduling_strategy == 'DAEMON'
+          service_options[:scheduling_strategy] = @scheduling_strategy
+          service_options.delete(:desired_count)
+        end
         @response = @client.create_service(service_options)
         EcsDeploy.logger.info "create service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
       else
+        return delete_service if @delete
         service_options.merge!({service: @service_name})
         service_options.merge!({desired_count: @desired_count}) if @desired_count
+        update_tags(@service_name, @tags)
         @response = @client.update_service(service_options)
         EcsDeploy.logger.info "update service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
       end
     end
-    def wait_running
-      return if @response.nil?
+    def delete_service
+      if @scheduling_strategy != 'DAEMON'
+        @client.update_service(cluster: @cluster, service: @service_name, desired_count: 0)
+        sleep 1
+      end
+      @client.delete_service(cluster: @cluster, service: @service_name)
+      EcsDeploy.logger.info "delete service [#{@service_name}] [#{@region}] [#{Paint['OK', :green]}]"
+    end
-      service = @response.service
+    def update_tags(service_name, tags)
+      service_arn = @client.describe_services(cluster: @cluster, services: [service_name]).services.first.service_arn
+      if service_arn.split('/').size == 2
+        if tags
+          EcsDeploy.logger.warn "#{service_name} doesn't support tagging operations, so tags are ignored. Long arn format must be used for tagging operations."
+        end
+        return
+      end
-      @client.wait_until(:services_stable, cluster: @cluster, services: [service.service_name]) do |w|
-        w.delay = 10
+      tags ||= []
+      current_tag_keys = @client.list_tags_for_resource(resource_arn: service_arn).tags.map(&:key)
+      deleted_tag_keys = current_tag_keys - tags.map { |t| t[:key] }
-        w.before_attempt do
-          EcsDeploy.logger.info "wait service stable [#{service.service_name}]"
-        end
+      unless deleted_tag_keys.empty?
+        @client.untag_resource(resource_arn: service_arn, tag_keys: deleted_tag_keys)
+      end
+      unless tags.empty?
+        @client.tag_resource(resource_arn: service_arn, tags: tags)
       end
     end
     def self.wait_all_running(services)
-      services.group_by { |s| [s.cluster, s.region] }.each do |(cl, region), ss|
+      services.group_by { |s| [s.cluster, s.region] }.flat_map do |(cl, region), ss|
         client = Aws::ECS::Client.new(region: region)
-        service_names = ss.map(&:service_name)
-        client.wait_until(:services_stable, cluster: cl, services: service_names) do |w|
-          w.before_attempt do
-            EcsDeploy.logger.info "wait service stable [#{service_names.join(", ")}]"
+        ss.reject(&:delete).map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES).map do |chunked_service_names|
+          Thread.new do
+            EcsDeploy.config.ecs_wait_until_services_stable_max_attempts.times do
+              EcsDeploy.logger.info "wait service stable [#{chunked_service_names.join(", ")}]"
+              resp = client.describe_services(cluster: cl, services: chunked_service_names)
+              resp.services.each do |s|
+                # cf. https://github.com/aws/aws-sdk-ruby/blob/master/gems/aws-sdk-ecs/lib/aws-sdk-ecs/waiters.rb#L91-L96
+                if s.deployments.size == 1 && s.running_count == s.desired_count
+                  chunked_service_names.delete(s.service_name)
+                end
+              end
+              break if chunked_service_names.empty?
+              sleep EcsDeploy.config.ecs_wait_until_services_stable_delay
+            end
+            raise TooManyAttemptsError unless chunked_service_names.empty?
           end
         end
-      end
+      end.each(&:join)
     end
     private