cfn_manage 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ module CfnManage
2
+
3
+ # set default options here
4
+ @asg_wait_state = 'HealthyInASG'
5
+ @ecs_wait_state = 'Skip'
6
+
7
+ class << self
8
+
9
+ # return the vale of our options
10
+ attr_accessor :asg_wait_state, :ecs_wait_state
11
+
12
+ # converts string based bolleans from aws tag values to bolleans
13
+ def true?(obj)
14
+ ["true","1"].include? obj.to_s.downcase
15
+ end
16
+
17
+ # find options set on resource tags
18
+ def find_tags
19
+ @find_tags = true
20
+ end
21
+
22
+ def find_tags?
23
+ @find_tags
24
+ end
25
+
26
+ # don't stop or start resources
27
+ def dry_run
28
+ @dry_run = true
29
+ end
30
+
31
+ def dry_run?
32
+ @dry_run
33
+ end
34
+
35
+ # dont wait for resources to become healthy
36
+ def skip_wait
37
+ @skip_wait = true
38
+ end
39
+
40
+ def skip_wait?
41
+ @skip_wait
42
+ end
43
+
44
+ # wait for resources based upon priority groups
45
+ def wait_async
46
+ @wait_async = true
47
+ end
48
+
49
+ def wait_async?
50
+ @wait_async
51
+ end
52
+
53
+ # dirty hack
54
+ def ignore_missing_ecs_config
55
+ @ignore_missing_ecs_config = true
56
+ end
57
+
58
+ def ignore_missing_ecs_config?
59
+ @ignore_missing_ecs_config
60
+ end
61
+
62
+ # disable termination on asg when stopping EC2 instances in an asg
63
+ def asg_suspend_termination
64
+ @asg_suspend_termination = true
65
+ end
66
+
67
+ def asg_suspend_termination?
68
+ @asg_suspend_termination
69
+ end
70
+
71
+ # continue if a resource fails to stop or start
72
+ def continue_on_error
73
+ @continue_on_error = true
74
+ end
75
+
76
+ def continue_on_error?
77
+ @continue_on_error
78
+ end
79
+
80
+ # Wait for a container instances to join a ecs cluster
81
+ def ecs_wait_container_instances
82
+ @ecs_wait_container_instances = true
83
+ end
84
+
85
+ def ecs_wait_container_instances?
86
+ @ecs_wait_container_instances
87
+ end
88
+
89
+ end
90
+ end
@@ -0,0 +1,45 @@
1
+ require 'aws-sdk-cloudwatch'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class Alarm
7
+
8
+ def initialize(alarm_name,options = {})
9
+ @alarm_id = alarm_name
10
+ credentials = CfnManage::AWSCredentials.get_session_credentials("startstopalarm_#{@asg_name}")
11
+ @cwclient = Aws::CloudWatch::Client.new(retry_limit: 20)
12
+ if credentials != nil
13
+ @cwclient = Aws::CloudWatch::Client.new(credentials: credentials, retry_limit: 20)
14
+ end
15
+
16
+ @cwresource = Aws::CloudWatch::Resource.new(client: @cwclient)
17
+ @alarm = @cwresource.alarm(alarm_name)
18
+ end
19
+
20
+ def start(configuration)
21
+ if @alarm.actions_enabled
22
+ $log.info("Alarm #{@alarm.alarm_arn} actions already enabled")
23
+ return
24
+ end
25
+ $log.info("Enabling alarm #{@alarm.alarm_arn}")
26
+ @alarm.enable_actions({})
27
+ end
28
+
29
+ def stop
30
+ if not @alarm.actions_enabled
31
+ $log.info("Alarm #{@alarm.alarm_arn} actions already disabled")
32
+ return {}
33
+ end
34
+ $log.info("Disabling actions on alarm #{@alarm.alarm_arn}")
35
+ @alarm.disable_actions({})
36
+ return {}
37
+ end
38
+
39
+ def wait(wait_states=[])
40
+ $log.debug("Not waiting for alarm #{@alarm_id}")
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,311 @@
1
+ require 'cfn_manage/aws_credentials'
2
+
3
+ require 'aws-sdk-autoscaling'
4
+ require 'aws-sdk-ec2'
5
+ require 'aws-sdk-elasticloadbalancingv2'
6
+
7
+ module CfnManage
8
+ module StartStopHandler
9
+ class Asg
10
+
11
+ def initialize(asg_id, options = {})
12
+ @asg_name = asg_id
13
+ @wait_state = options.has_key?(:wait_state) ? options[:wait_state] : CfnManage.asg_wait_state
14
+ @skip_wait = options.has_key?(:skip_wait) ? CfnManage.true?(options[:skip_wait]) : CfnManage.skip_wait?
15
+ @suspend_termination = options.has_key?(:suspend_termination) ? CfnManage.true?(options[:suspend_termination]) : CfnManage.asg_suspend_termination?
16
+
17
+ credentials = CfnManage::AWSCredentials.get_session_credentials("stopasg_#{@asg_name}")
18
+ @asg_client = Aws::AutoScaling::Client.new(retry_limit: 20)
19
+ @ec2_client = Aws::EC2::Client.new(retry_limit: 20)
20
+ @elb_client = Aws::ElasticLoadBalancingV2::Client.new(retry_limit: 20)
21
+ if credentials != nil
22
+ @asg_client = Aws::AutoScaling::Client.new(credentials: credentials, retry_limit: 20)
23
+ @ec2_client = Aws::EC2::Client.new(credentials: credentials, retry_limit: 20)
24
+ @elb_client = Aws::ElasticLoadBalancingV2::Client.new(credentials: credentials, retry_limit: 20)
25
+ end
26
+
27
+ asg_details = @asg_client.describe_auto_scaling_groups(
28
+ auto_scaling_group_names: [@asg_name]
29
+ )
30
+ if asg_details.auto_scaling_groups.size() == 0
31
+ raise "Couldn't find ASG #{@asg_name}"
32
+ end
33
+ @asg = asg_details.auto_scaling_groups[0]
34
+ end
35
+
36
+ def stop
37
+ # check if already stopped
38
+ if @asg.min_size == @asg.max_size and @asg.max_size == @asg.desired_capacity and @asg.min_size == 0
39
+ $log.info("ASG #{@asg_name} already stopped")
40
+ # nil and false configurations are not saved
41
+ return nil
42
+ else
43
+
44
+ unless @suspend_termination
45
+ # store asg configuration to S3
46
+ configuration = {
47
+ desired_capacity: @asg.desired_capacity,
48
+ min_size: @asg.min_size,
49
+ max_size: @asg.max_size
50
+ }
51
+
52
+ $log.info("Setting desired capacity to 0/0/0 for ASG #{@asg.auto_scaling_group_name}A")
53
+
54
+ @asg_client.update_auto_scaling_group({
55
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
56
+ min_size: 0,
57
+ max_size: 0,
58
+ desired_capacity: 0
59
+ })
60
+ return configuration
61
+ else
62
+
63
+ configuration = {
64
+ desired_capacity: @asg.desired_capacity,
65
+ min_size: @asg.min_size,
66
+ max_size: @asg.max_size,
67
+ suspended_processes: @asg.suspended_processes
68
+ }
69
+
70
+ $log.info("Suspending processes for ASG #{@asg.auto_scaling_group_name}A")
71
+
72
+ @asg_client.suspend_processes({
73
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
74
+ })
75
+
76
+ $log.info("Stopping all instances in ASG #{@asg.auto_scaling_group_name}A")
77
+
78
+ @asg.instances.each do |instance|
79
+ @instance_id = instance.instance_id
80
+ @instance = Aws::EC2::Resource.new(client: @ec2_client, retry_limit: 20).instance(@instance_id)
81
+
82
+ if %w(stopped stopping).include?(@instance.state.name)
83
+ $log.info("Instance #{@instance_id} already stopping or stopped")
84
+ return
85
+ end
86
+
87
+ $log.info("Stopping instance #{@instance_id}")
88
+ @instance.stop()
89
+ end
90
+
91
+ return configuration
92
+
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ def start(configuration)
100
+ if configuration.nil?
101
+ $log.warn("No configuration found for #{@asg_name}, skipping..")
102
+ return
103
+ end
104
+ $log.info("Starting ASG #{@asg_name} with following configuration\n#{configuration}")
105
+
106
+ unless @suspend_termination
107
+ # restore asg sizes
108
+ @asg_client.update_auto_scaling_group({
109
+ auto_scaling_group_name: @asg_name,
110
+ min_size: configuration['min_size'],
111
+ max_size: configuration['max_size'],
112
+ desired_capacity: configuration['desired_capacity']
113
+ })
114
+
115
+ else
116
+
117
+ $log.info("Starting instances for ASG #{@asg_name}...")
118
+
119
+ @asg.instances.each do |instance|
120
+ @instance_id = instance.instance_id
121
+ @instance = Aws::EC2::Resource.new(client: @ec2_client, retry_limit: 20).instance(@instance_id)
122
+
123
+ if %w(running).include?(@instance.state.name)
124
+ $log.info("Instance #{@instance_id} already running")
125
+ return
126
+ end
127
+ $log.info("Starting instance #{@instance_id}")
128
+ @instance.start()
129
+ end
130
+
131
+ end
132
+
133
+ if configuration['desired_capacity'] == 0
134
+ # if ASG desired count is purposfully set to 0 and we want to wait for other ASG's
135
+ # int the stack, then we need to skip wait for this ASG.
136
+ $log.info("Desired capacity is 0, skipping wait for asg #{@asg_name}")
137
+ elsif @skip_wait && @suspend_termination
138
+ # If wait is skipped we still need to wait until the instances are healthy in the asg
139
+ # before resuming the processes. This will avoid the asg terminating the instances.
140
+ wait('HealthyInASG')
141
+ elsif !@skip_wait
142
+ # if we are waiting for the instances to reach a desired state
143
+ $log.info("Waiting for ASG instances wait state #{@wait_state}")
144
+ wait(@wait_state)
145
+ end
146
+
147
+ if @suspend_termination
148
+ # resume the asg processes after we've waited for them to become healthy
149
+ $log.info("Resuming all processes for ASG #{@asg_name}")
150
+
151
+ @asg_client.resume_processes({
152
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
153
+ })
154
+
155
+ if configuration.key?(:suspended_processes)
156
+
157
+ $log.info("Suspending processes stored in configuration for ASG #{@asg_name}")
158
+
159
+ @asg_client.suspend_processes({
160
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
161
+ scaling_processes: configuration['suspended_processes'],
162
+ })
163
+ end
164
+
165
+ end
166
+
167
+ end
168
+
169
+ def wait(type)
170
+
171
+ attempts = 0
172
+
173
+ until attempts == (max_attempts = 60*6) do
174
+
175
+ case type
176
+ when 'HealthyInASG'
177
+ success = wait_till_healthy_in_asg()
178
+ when 'Running'
179
+ success = wait_till_running()
180
+ when 'HealthyInTargetGroup'
181
+ success = wait_till_healthy_in_target_group()
182
+ else
183
+ $log.warn("unknown asg wait type #{type}. skipping...")
184
+ break
185
+ end
186
+
187
+ if success
188
+ break
189
+ end
190
+
191
+ attempts = attempts + 1
192
+ sleep(15)
193
+ end
194
+
195
+ if attempts == max_attempts
196
+ $log.error("Failed to wait for asg with wait type #{type}")
197
+ end
198
+ end
199
+
200
+ def wait_till_healthy_in_asg
201
+
202
+ asg_curr_details = @asg_client.describe_auto_scaling_groups(
203
+ auto_scaling_group_names: [@asg_name]
204
+ )
205
+
206
+ asg_status = asg_curr_details.auto_scaling_groups.first
207
+ health_status = asg_status.instances.collect { |inst| inst.health_status }
208
+ $log.info("ASG #{@asg_name} health status is currently #{health_status}")
209
+
210
+ if health_status.empty?
211
+ $log.info("ASG #{@asg_name} has not started any instances yet")
212
+ return false
213
+ end
214
+
215
+ if health_status.all? "Healthy"
216
+ $log.info("All instances healthy in ASG #{@asg_name}")
217
+ return true
218
+ end
219
+
220
+ unhealthy = asg_status.instances.select {|inst| inst.health_status == "Unhealthy" }.collect {|inst| inst.instance_id }
221
+ $log.info("waiting for instances #{unhealthy} to become healthy in asg #{@asg_name}")
222
+ return false
223
+
224
+ end
225
+
226
+ def wait_till_running
227
+
228
+ asg_curr_details = @asg_client.describe_auto_scaling_groups(
229
+ auto_scaling_group_names: [@asg_name]
230
+ )
231
+ asg_status = asg_curr_details.auto_scaling_groups.first
232
+ instances = asg_status.instances.collect { |inst| inst.instance_id }
233
+
234
+ if instances.empty?
235
+ $log.info("ASG #{@asg_name} has not started any instances yet")
236
+ return false
237
+ end
238
+
239
+ status = @ec2_client.describe_instance_status({
240
+ instance_ids: instances
241
+ })
242
+
243
+ state = status.instance_statuses.collect {|inst| inst.instance_state.name}
244
+
245
+ if state.all? "running"
246
+ $log.info("All instances in a running state from ASG #{@asg_name}")
247
+ return true
248
+ end
249
+
250
+ not_running = @status.instance_statuses.select {|inst| inst.instance_state.name != "running" }
251
+ not_running.each do |inst|
252
+ $log.info("waiting for instances #{inst.instance_id} to be running. Current state is #{inst.instance_state.name}")
253
+ end
254
+
255
+ return false
256
+
257
+ end
258
+
259
+ def wait_till_healthy_in_target_group
260
+
261
+ asg_curr_details = @asg_client.describe_auto_scaling_groups(
262
+ auto_scaling_group_names: [@asg_name]
263
+ )
264
+ asg_status = asg_curr_details.auto_scaling_groups.first
265
+ asg_instances = asg_status.instances.collect { |inst| inst.instance_id }
266
+ target_groups = asg_status.target_group_arns
267
+
268
+ if asg_instances.empty?
269
+ $log.info("ASG #{@asg_name} has not started any instances yet")
270
+ return false
271
+ end
272
+
273
+ if target_groups.empty?
274
+ # we want to skip here if the asg is not associated with any target groups
275
+ $log.info("ASG #{@asg_name} is not associated with any target groups")
276
+ return true
277
+ end
278
+
279
+ target_health = []
280
+ target_groups.each do |tg|
281
+ resp = @elb_client.describe_target_health({
282
+ target_group_arn: tg,
283
+ })
284
+ if resp.target_health_descriptions.length != asg_instances.length
285
+ # we need to wait until all asg insatnces have been placed into the target group
286
+ # before we can check they're healthy
287
+ $log.info("All ASG instances haven't been placed into target group #{tg.split('/')[1]} yet")
288
+ return false
289
+ end
290
+ target_health.push(*resp.target_health_descriptions)
291
+ end
292
+
293
+ state = target_health.collect {|tg| tg.target_health.state}
294
+
295
+ if state.all? 'healthy'
296
+ $log.info("All instances are in a healthy state in target groups #{target_groups.map {|tg| tg.split('/')[1] }}")
297
+ return true
298
+ end
299
+
300
+ unhealthy = target_health.select {|tg| tg.target_health.state != 'healthy'}
301
+ unhealthy.each do |tg|
302
+ $log.info("waiting for instances #{tg.target.id} to be healthy in target group. Current state is #{tg.target_health.state}")
303
+ end
304
+
305
+ return false
306
+
307
+ end
308
+
309
+ end
310
+ end
311
+ end
@@ -0,0 +1,97 @@
1
+ require 'aws-sdk-rds'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class AuroraCluster
7
+
8
+ def initialize(cluster_id, options = {})
9
+ @cluster_id = cluster_id
10
+ credentials = CfnManage::AWSCredentials.get_session_credentials("startstopcluster_#{cluster_id}")
11
+ @rds_client = Aws::RDS::Client.new(retry_limit: 20)
12
+ if credentials != nil
13
+ @rds_client = Aws::RDS::Client.new(credentials: credentials, retry_limit: 20)
14
+ end
15
+ rds = Aws::RDS::Resource.new(client: @rds_client)
16
+ @rds_cluster = rds.db_cluster(cluster_id)
17
+ end
18
+
19
+ def start(configuration)
20
+ if @rds_cluster.status == 'available'
21
+ $log.info("Aurora Cluster #{@cluster_id} is already in available state")
22
+ return
23
+ end
24
+
25
+ if @rds_cluster.engine_mode != 'provisioned'
26
+ $log.info("Aurora Cluster #{@cluster_id} is not a provisioned cluster and cannot be started using this method.")
27
+ return
28
+ end
29
+
30
+ # start rds cluster
31
+ if @rds_cluster.status == 'stopped'
32
+ $log.info("Starting Aurora cluster #{@cluster_id}")
33
+ @rds_client.start_db_cluster({ db_cluster_identifier: @cluster_id })
34
+ unless CfnManage.skip_wait?
35
+ # wait cluster to become available
36
+ $log.info("Waiting Aurora cluster to become available #{@cluster_id}")
37
+ wait('available')
38
+ end
39
+ else
40
+ $log.info("Aurora Cluster #{@cluster_id} is not in a stopped state. State: #{@rds_cluster.status}")
41
+ end
42
+ end
43
+
44
+ def stop
45
+ if @rds_cluster.status == 'stopped'
46
+ $log.info("Aurora Cluster #{@cluster_id} is already stopped")
47
+ return {}
48
+ end
49
+
50
+ if @rds_cluster.status != 'available'
51
+ $log.info("Aurora Cluster #{@cluster_id} is not in a available state. State: #{@rds_cluster.status}")
52
+ return {}
53
+ end
54
+
55
+ if @rds_cluster.engine_mode != 'provisioned'
56
+ $log.info("Aurora Cluster #{@cluster_id} is not a provisioned cluster and cannot be stopped using this method.")
57
+ return {}
58
+ end
59
+ # stop rds cluster and wait for it to be fully stopped
60
+ $log.info("Stopping aurora cluster #{@cluster_id}")
61
+ @rds_client.stop_db_cluster({ db_cluster_identifier: @cluster_id })
62
+ unless CfnManage.skip_wait?
63
+ $log.info("Waiting aurora cluster to be stopped #{@cluster_id}")
64
+ wait('stopped')
65
+ end
66
+ return {}
67
+ end
68
+
69
+ def wait(completed_state)
70
+ # reached state must be steady, at least a minute.
71
+ state_count = 0
72
+ steady_count = 4
73
+ attempts = 0
74
+ rds = Aws::RDS::Resource.new(client: @rds_client)
75
+ until attempts == (max_attempts = 60*6) do
76
+ cluster = rds.db_cluster(@cluster_id)
77
+ $log.info("Aurora Cluster #{cluster.db_cluster_identifier} state: #{cluster.status}, waiting for #{completed_state}")
78
+
79
+ if cluster.status == "#{completed_state}"
80
+ state_count = state_count + 1
81
+ $log.info("#{state_count}/#{steady_count}")
82
+ else
83
+ state_count = 0
84
+ end
85
+ break if state_count == steady_count
86
+ attempts = attempts + 1
87
+ sleep(15)
88
+ end
89
+
90
+ if attempts == max_attempts
91
+ $log.error("RDS Aurora Cluster #{@cluster_id} did not enter #{state} state, however continuing operations...")
92
+ end
93
+ end
94
+
95
+ end
96
+ end
97
+ end