cfn_manage 0.7.0 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +8 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +20 -0
  5. data/Dockerfile +7 -0
  6. data/Gemfile +9 -0
  7. data/LICENSE +21 -0
  8. data/README.md +311 -0
  9. data/Rakefile +9 -0
  10. data/cfn_manage.gemspec +58 -0
  11. data/{bin → exe}/cfn_manage +78 -7
  12. data/{bin → exe}/usage.txt +31 -0
  13. data/lib/cfn_manage/cf_start_stop_environment.rb +84 -56
  14. data/lib/cfn_manage/globals.rb +90 -0
  15. data/lib/cfn_manage/handlers/alarm.rb +45 -0
  16. data/lib/cfn_manage/handlers/asg.rb +311 -0
  17. data/lib/cfn_manage/handlers/aurora_cluster.rb +97 -0
  18. data/lib/cfn_manage/handlers/documentdb.rb +89 -0
  19. data/lib/cfn_manage/handlers/ec2.rb +42 -0
  20. data/lib/cfn_manage/handlers/ecs_cluster.rb +219 -0
  21. data/lib/cfn_manage/handlers/rds.rb +142 -0
  22. data/lib/cfn_manage/handlers/spot_fleet.rb +56 -0
  23. data/lib/cfn_manage/handlers/transfer.rb +96 -0
  24. data/lib/cfn_manage/start_stop_handler_factory.rb +19 -19
  25. data/lib/cfn_manage/tag_finder.rb +77 -0
  26. data/lib/cfn_manage/version.rb +1 -1
  27. metadata +56 -21
  28. data/lib/cfn_manage/alarm_start_stop_handler.rb +0 -44
  29. data/lib/cfn_manage/asg_start_stop_handler.rb +0 -181
  30. data/lib/cfn_manage/aurora_cluster_start_stop_handler.rb +0 -97
  31. data/lib/cfn_manage/documentdb_cluster_start_stop_handler.rb +0 -89
  32. data/lib/cfn_manage/ec2_start_stop_handler.rb +0 -43
  33. data/lib/cfn_manage/ecs_cluster_start_stop_handler.rb +0 -80
  34. data/lib/cfn_manage/rds_start_stop_handler.rb +0 -134
  35. data/lib/cfn_manage/spot_fleet_start_stop_handler.rb +0 -57
  36. data/lib/cfn_manage/transfer_start_stop_handler.rb +0 -97
@@ -0,0 +1,89 @@
1
+ require 'aws-sdk-docdb'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class DocumentDb
7
+
8
+ def initialize(cluster_id, options = {})
9
+ @cluster_id = cluster_id
10
+ credentials = CfnManage::AWSCredentials.get_session_credentials("startstopcluster_#{cluster_id}")
11
+ @docdb_client = Aws::DocDB::Client.new(retry_limit: 20)
12
+ if credentials != nil
13
+ @docdb_client = Aws::DocDB::Client.new(credentials: credentials, retry_limit: 20)
14
+ end
15
+ cluster = @docdb_client.describe_db_clusters({ db_cluster_identifier: @cluster_id })
16
+ @docdb_cluster = cluster.db_clusters.first
17
+ end
18
+
19
+ def start(configuration)
20
+ if @docdb_cluster.status == 'available'
21
+ $log.info("DocDB Cluster #{@cluster_id} is already in available state")
22
+ return
23
+ end
24
+
25
+ # start docdb cluster
26
+ if @docdb_cluster.status == 'stopped'
27
+ $log.info("Starting DocDB cluster #{@cluster_id}")
28
+ @docdb_client.start_db_cluster({ db_cluster_identifier: @cluster_id })
29
+ unless CfnManage.skip_wait?
30
+ # wait cluster to become available
31
+ $log.info("Waiting DocDB cluster to become available #{@cluster_id}")
32
+ wait('available')
33
+ end
34
+ else
35
+ $log.info("DocDB Cluster #{@cluster_id} is not in a stopped state. State: #{@docdb_cluster.status}")
36
+ end
37
+ end
38
+
39
+ def stop
40
+ if @docdb_cluster.status == 'stopped'
41
+ $log.info("DocDB Cluster #{@cluster_id} is already stopped")
42
+ return {}
43
+ end
44
+
45
+ if @docdb_cluster.status != 'available'
46
+ $log.info("DocDB Cluster #{@cluster_id} is not in a available state. State: #{@docdb_cluster.status}")
47
+ return {}
48
+ end
49
+ # stop docdb cluster and wait for it to be fully stopped
50
+ $log.info("Stopping DocDB cluster #{@cluster_id}")
51
+ @docdb_client.stop_db_cluster({ db_cluster_identifier: @cluster_id })
52
+ unless CfnManage.skip_wait?
53
+ $log.info("Waiting DocDB cluster to be stopped #{@cluster_id}")
54
+ wait('stopped')
55
+ end
56
+ return {}
57
+ end
58
+
59
+ def wait(completed_state)
60
+ # reached state must be steady, at least a minute.
61
+ state_count = 0
62
+ steady_count = 4
63
+ attempts = 0
64
+
65
+ until attempts == (max_attempts = 60*6) do
66
+ # Declare client and cluster variable a second time inside the loop so it re-evaluates each time.
67
+ docdb = @docdb_client.describe_db_clusters({ db_cluster_identifier: @cluster_id })
68
+ cluster = docdb.db_clusters.first
69
+ $log.info("DocDB Cluster #{cluster.db_cluster_identifier} state: #{cluster.status}, waiting for #{completed_state}")
70
+
71
+ if cluster.status == "#{completed_state}"
72
+ state_count = state_count + 1
73
+ $log.info("#{state_count}/#{steady_count}")
74
+ else
75
+ state_count = 0
76
+ end
77
+ break if state_count == steady_count
78
+ attempts = attempts + 1
79
+ sleep(15)
80
+ end
81
+
82
+ if attempts == max_attempts
83
+ $log.error("DocDB Cluster #{@cluster_id} did not enter #{completed_state} state, however continuing operations...")
84
+ end
85
+ end
86
+
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,42 @@
1
+ require 'aws-sdk-ec2'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class Ec2
7
+
8
+ def initialize(instance_id, options = {})
9
+ credentials = CfnManage::AWSCredentials.get_session_credentials("stoprun_#{instance_id}")
10
+ ec2_client = Aws::EC2::Client.new(credentials: credentials, retry_limit: 20)
11
+ @instance = Aws::EC2::Resource.new(client: ec2_client, retry_limit: 20).instance(instance_id)
12
+ @instance_id = instance_id
13
+ end
14
+
15
+ def start(configuration)
16
+ if %w(running).include?(@instance.state.name)
17
+ $log.info("Instance #{@instance_id} already running")
18
+ return
19
+ end
20
+ $log.info("Starting instance #{@instance_id}")
21
+ @instance.start()
22
+ end
23
+
24
+ def stop
25
+ if %w(stopped stopping).include?(@instance.state.name)
26
+ $log.info("Instance #{@instance_id} already stopping or stopped")
27
+ return
28
+ end
29
+ $log.info("Stopping instance #{@instance_id}")
30
+ @instance.stop()
31
+
32
+ # empty configuration for ec2 instances
33
+ return {}
34
+ end
35
+
36
+ def wait(wait_states=[])
37
+ $log.debug("Not waiting for EC2 instance #{@instance_id}")
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,219 @@
1
+ require 'aws-sdk-ecs'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class EcsCluster
7
+
8
+ def initialize(cluster_id, options = {})
9
+ @wait_state = options.has_key?(:wait_state) ? options[:wait_state] : CfnManage.ecs_wait_state
10
+ @skip_wait = options.has_key?(:skip_wait) ? CfnManage.true?(options[:skip_wait]) : CfnManage.skip_wait?
11
+ @wait_container_instances = options.has_key?(:wait_container_instances) ? CfnManage.true?(options[:wait_container_instances]) : CfnManage.ecs_wait_container_instances?
12
+ @ignore_missing_ecs_config = options.has_key?(:ignore_missing_ecs_config) ? CfnManage.true?(options[:ignore_missing_ecs_config]) : CfnManage.ignore_missing_ecs_config?
13
+
14
+ credentials = CfnManage::AWSCredentials.get_session_credentials("stoprun_#{cluster_id}")
15
+ @ecs_client = Aws::ECS::Client.new(credentials: credentials, retry_limit: 20)
16
+ @elb_client = Aws::ElasticLoadBalancingV2::Client.new(credentials: credentials, retry_limit: 20)
17
+ @services = []
18
+ @ecs_client.list_services(cluster: cluster_id, scheduling_strategy: 'REPLICA', max_results: 100).each do |results|
19
+ @services.push(*results.service_arns)
20
+ end
21
+ $log.info("Found #{@services.count} services in ECS cluster #{cluster_id}")
22
+ @cluster = cluster_id
23
+ end
24
+
25
+ def start(configuration)
26
+ if @wait_container_instances
27
+ wait_for_instances()
28
+ end
29
+
30
+ @services.each do |service_arn|
31
+
32
+ $log.info("Searching for ECS service #{service_arn} in cluster #{@cluster}")
33
+ service = @ecs_client.describe_services(services:[service_arn], cluster: @cluster).services.first
34
+
35
+ if service.desired_count != 0
36
+ $log.info("ECS service #{service.service_name} is already running")
37
+ next
38
+ end
39
+
40
+ if configuration.has_key?(service.service_name)
41
+ desired_count = configuration[service.service_name].has_key?('desired_count') ? configuration[service.service_name]['desired_count'] : 0
42
+ if desired_count == 0
43
+ $log.info("Desired count is #{desired_count}, skipping starting of service #{service.service_name}")
44
+ next
45
+ end
46
+ elsif CfnManage.ignore_missing_ecs_config?
47
+ $log.info("ECS service #{service.service_name} wasn't previously stopped by cfn_manage. Option --ignore-missing-ecs-config set and setting desired count to 1")
48
+ desired_count = 1
49
+ else
50
+ $log.warn("ECS service #{service.service_name} wasn't previously stopped by cfn_manage. Skipping ...")
51
+ next
52
+ end
53
+
54
+ $log.info("Starting ECS service #{service.service_name} with desired count of #{desired_count}")
55
+ @ecs_client.update_service({
56
+ desired_count: desired_count,
57
+ service: service_arn,
58
+ cluster: @cluster
59
+ })
60
+
61
+ end
62
+
63
+ if !@skip_wait
64
+ @services.each do |service_arn|
65
+ service = @ecs_client.describe_services(services:[service_arn], cluster: @cluster).services.first
66
+
67
+ if service.desired_count == 0
68
+ $log.info("ECS service #{service.service_name} was not started so will not wait for it")
69
+ next
70
+ end
71
+
72
+ wait(@wait_state,service_arn)
73
+ end
74
+ end
75
+
76
+ end
77
+
78
+ def stop
79
+ configuration = {}
80
+ @services.each do |service_arn|
81
+
82
+ $log.info("Searching for ECS service #{service_arn} in cluster #{@cluster}")
83
+ service = @ecs_client.describe_services(services:[service_arn], cluster: @cluster).services.first
84
+
85
+ if service.desired_count == 0
86
+ $log.info("ECS service #{service.service_name} is already stopped")
87
+ next
88
+ end
89
+
90
+ configuration[service.service_name] = { desired_count: service.desired_count }
91
+ $log.info("Stopping ECS service #{service.service_name}")
92
+ @ecs_client.update_service({
93
+ desired_count: 0,
94
+ service: service_arn,
95
+ cluster: @cluster
96
+ })
97
+
98
+ end
99
+
100
+ return configuration.empty? ? nil : configuration
101
+ end
102
+
103
+ def wait(type,service_arn=nil)
104
+
105
+ if service_arn.nil?
106
+ $log.warn("Unable to wait for #{service_arn} service")
107
+ return
108
+ end
109
+
110
+ attempts = 0
111
+
112
+ until attempts == (max_attempts = 60*6) do
113
+
114
+ case type
115
+ when 'Running'
116
+ success = wait_till_running(service_arn)
117
+ when 'HealthyInTargetGroup'
118
+ success = wait_till_healthy_in_target_group(service_arn)
119
+ when 'Skip'
120
+ $log.info("Skipping wait for #{service_arn} service")
121
+ break
122
+ else
123
+ $log.warn("Unknown ECS service wait type #{type}. Skipping...")
124
+ break
125
+ end
126
+
127
+ if success
128
+ break
129
+ end
130
+
131
+ attempts = attempts + 1
132
+ sleep(15)
133
+ end
134
+
135
+ if attempts == max_attempts
136
+ $log.error("Failed to wait for ecs service with wait type #{type}")
137
+ end
138
+ end
139
+
140
+ def wait_for_instances
141
+
142
+ attempts = 0
143
+
144
+ until attempts == (max_attempts = 60*3) do
145
+
146
+ resp = @ecs_client.list_container_instances({
147
+ cluster: @cluster,
148
+ status: "ACTIVE"
149
+ })
150
+
151
+ if resp.container_instance_arns.any?
152
+ $log.info("A container instances has joined ecs cluster #{@cluster}")
153
+ break
154
+ end
155
+
156
+ attempts = attempts + 1
157
+ sleep(5)
158
+ end
159
+
160
+ if attempts == max_attempts
161
+ $log.error("Failed to wait for container instances to join ecs cluster #{@cluster}")
162
+ end
163
+ end
164
+
165
+ def wait_till_running(service_arn)
166
+ service_name = service_arn.split('/').last
167
+ service = @ecs_client.describe_services(services:[service_arn], cluster: @cluster).services.first
168
+
169
+ if service.running_count > 0
170
+ $log.info("ecs service #{service_name} has #{service.running_count} running tasks")
171
+ return true
172
+ end
173
+
174
+ $log.info("waiting for ecs service #{service_name} to reach a running state")
175
+ return false
176
+ end
177
+
178
+ def wait_till_healthy_in_target_group(service_arn)
179
+ service = @ecs_client.describe_services(services:[service_arn], cluster: @cluster).services.first
180
+ target_groups = service.load_balancers.collect { |lb| lb.target_group_arn }
181
+
182
+ if target_groups.empty?
183
+ # we want to skip here if the asg is not associated with any target groups
184
+ $log.info("ecs aervice #{service_arn} is not associated with any target groups")
185
+ return true
186
+ end
187
+
188
+ target_health = []
189
+ target_groups.each do |tg|
190
+ resp = @elb_client.describe_target_health({
191
+ target_group_arn: tg,
192
+ })
193
+ if resp.target_health_descriptions.empty?
194
+ # we need to wait until a ecs task has been placed into the target group
195
+ # before we can check it's healthy
196
+ $log.info("ECS service #{service_arn} hasn't been placed into target group #{tg.split('/')[1]} yet")
197
+ return false
198
+ end
199
+ target_health.push(*resp.target_health_descriptions)
200
+ end
201
+
202
+ state = target_health.collect {|tg| tg.target_health.state}
203
+
204
+ if state.all? 'healthy'
205
+ $log.info("All ecs tasks are in a healthy state in target groups #{target_groups.map {|tg| tg.split('/')[1] }}")
206
+ return true
207
+ end
208
+
209
+ unhealthy = target_health.select {|tg| tg.target_health.state != 'healthy'}
210
+ unhealthy.each do |tg|
211
+ $log.info("waiting for ecs task #{tg.target.id} to be healthy in target group. Current state is #{tg.target_health.state}")
212
+ end
213
+
214
+ return false
215
+ end
216
+
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,142 @@
1
+ require 'aws-sdk-rds'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class Rds
7
+
8
+ def initialize(instance_id, options = {})
9
+ @instance_id = instance_id
10
+ @excluded_engines = %w(aurora aurora-mysql aurora-postgresql) # RDS list of exluded engines that don't support RDS stop start
11
+ credentials = CfnManage::AWSCredentials.get_session_credentials("startstoprds_#{instance_id}")
12
+ @rds_client = Aws::RDS::Client.new(retry_limit: 20)
13
+ if credentials != nil
14
+ @rds_client = Aws::RDS::Client.new(credentials: credentials, retry_limit: 20)
15
+ end
16
+ rds = Aws::RDS::Resource.new(client: @rds_client)
17
+ @rds_instance = rds.db_instance(instance_id)
18
+
19
+ end
20
+
21
+ def start(configuration)
22
+ if @excluded_engines.include? @rds_instance.engine
23
+ $log.info("RDS Instance #{@instance_id} engine is #{@rds_instance.engine} and cannot be started by instance.")
24
+ return
25
+ end
26
+
27
+ if @rds_instance.db_instance_status == 'available'
28
+ $log.info("RDS Instance #{@instance_id} is already in available state")
29
+ end
30
+
31
+ # start rds instance
32
+ if @rds_instance.db_instance_status == 'stopped'
33
+ $log.info("Starting db instance #{@instance_id}")
34
+ @rds_client.start_db_instance({ db_instance_identifier: @instance_id })
35
+
36
+ # wait instance to become available
37
+ unless CfnManage.skip_wait?
38
+ $log.info("Waiting db instance to become available #{@instance_id}")
39
+ wait('available')
40
+ end
41
+ else
42
+ wait('available') unless CfnManage.skip_wait?
43
+ end
44
+
45
+ # convert rds instance to mutli-az if required
46
+ if configuration['is_multi_az']
47
+ $log.info("Converting to Multi-AZ instance after start (instance #{@instance_id})")
48
+ set_rds_instance_multi_az( true)
49
+ end unless configuration.nil?
50
+ end
51
+
52
+ def stop
53
+
54
+ configuration = {
55
+ is_multi_az: @rds_instance.multi_az
56
+ }
57
+ # RDS list of exluded engines that don't support RDS stop start
58
+ if @excluded_engines.include? @rds_instance.engine
59
+ $log.info("RDS Instance #{@instance_id} engine is #{@rds_instance.engine} and cannot be stoped by instance.")
60
+ return configuration
61
+ end
62
+
63
+ # check if available
64
+ if @rds_instance.db_instance_status != 'available'
65
+ $log.warn("RDS Instance #{@instance_id} not in available state, and thus can not be stopped")
66
+ $log.warn("RDS Instance #{@instance_id} state: #{@rds_instance.db_instance_status}")
67
+ return configuration
68
+ end
69
+
70
+ # check if already stopped
71
+ if @rds_instance.db_instance_status == 'stopped'
72
+ $log.info("RDS Instance #{@instance_id} is already stopped")
73
+ return configuration
74
+ end
75
+
76
+ #check if mutli-az RDS. if so, convert to single-az
77
+ if @rds_instance.multi_az
78
+ $log.info("Converting to Non-Multi-AZ instance before stop (instance #{@instance_id}")
79
+ set_rds_instance_multi_az(false)
80
+ end
81
+
82
+ # stop rds instance and wait for it to be fully stopped
83
+ $log.info("Stopping instance #{@instance_id}")
84
+ begin
85
+ @rds_client.stop_db_instance({ db_instance_identifier: @instance_id })
86
+ rescue Aws::RDS::Errors::InvalidDBInstanceState => e
87
+ if e.message == "Cannot stop or start a Read-Replica instance"
88
+ $log.warn("Skipping due to error: #{e.message}")
89
+ return
90
+ else
91
+ raise e
92
+ end
93
+ end
94
+ unless CfnManage.skip_wait?
95
+ $log.info("Waiting db instance to be stopped #{@instance_id}")
96
+ wait('stopped')
97
+ end
98
+ return configuration
99
+ end
100
+
101
+ def set_rds_instance_multi_az(multi_az)
102
+ if @rds_instance.multi_az == multi_az
103
+ $log.info("Rds instance #{@rds_instance.db_instance_identifier} already multi-az=#{multi_az}")
104
+ return
105
+ end
106
+ @rds_instance.modify({ multi_az: multi_az, apply_immediately: true })
107
+ # allow half an hour for instance to be converted
108
+ wait('available')
109
+ end
110
+
111
+ def wait(completed_state)
112
+ # reached state must be steady, at least a minute. Modifying an instance to/from MultiAZ can't be shorter
113
+ # than 40 seconds, hence steady count is 4
114
+ state_count = 0
115
+ steady_count = 4
116
+ attempts = 0
117
+ rds = Aws::RDS::Resource.new(client: @rds_client)
118
+ until attempts == (max_attempts = 60*6) do
119
+ instance = rds.db_instance(@instance_id)
120
+ $log.info("Instance #{instance.db_instance_identifier} state: #{instance.db_instance_status}, waiting for #{completed_state}")
121
+
122
+ if instance.db_instance_status == "#{completed_state}"
123
+ state_count = state_count + 1
124
+ $log.info("#{state_count}/#{steady_count}")
125
+ else
126
+ state_count = 0
127
+ end
128
+ break if state_count == steady_count
129
+ attempts = attempts + 1
130
+ sleep(15)
131
+ end
132
+
133
+ if attempts == max_attempts
134
+ $log.error("RDS Database Instance #{@instance_id} did not enter #{state} state, however continuing operations...")
135
+ end
136
+ end
137
+
138
+ private :set_rds_instance_multi_az
139
+
140
+ end
141
+ end
142
+ end