cfn_manage 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,90 @@
1
+ module CfnManage
2
+
3
+ # set default options here
4
+ @asg_wait_state = 'HealthyInASG'
5
+ @ecs_wait_state = 'Skip'
6
+
7
+ class << self
8
+
9
+ # return the vale of our options
10
+ attr_accessor :asg_wait_state, :ecs_wait_state
11
+
12
+ # converts string based bolleans from aws tag values to bolleans
13
+ def true?(obj)
14
+ ["true","1"].include? obj.to_s.downcase
15
+ end
16
+
17
+ # find options set on resource tags
18
+ def find_tags
19
+ @find_tags = true
20
+ end
21
+
22
+ def find_tags?
23
+ @find_tags
24
+ end
25
+
26
+ # don't stop or start resources
27
+ def dry_run
28
+ @dry_run = true
29
+ end
30
+
31
+ def dry_run?
32
+ @dry_run
33
+ end
34
+
35
+ # dont wait for resources to become healthy
36
+ def skip_wait
37
+ @skip_wait = true
38
+ end
39
+
40
+ def skip_wait?
41
+ @skip_wait
42
+ end
43
+
44
+ # wait for resources based upon priority groups
45
+ def wait_async
46
+ @wait_async = true
47
+ end
48
+
49
+ def wait_async?
50
+ @wait_async
51
+ end
52
+
53
+ # dirty hack
54
+ def ignore_missing_ecs_config
55
+ @ignore_missing_ecs_config = true
56
+ end
57
+
58
+ def ignore_missing_ecs_config?
59
+ @ignore_missing_ecs_config
60
+ end
61
+
62
+ # disable termination on asg when stopping EC2 instances in an asg
63
+ def asg_suspend_termination
64
+ @asg_suspend_termination = true
65
+ end
66
+
67
+ def asg_suspend_termination?
68
+ @asg_suspend_termination
69
+ end
70
+
71
+ # continue if a resource fails to stop or start
72
+ def continue_on_error
73
+ @continue_on_error = true
74
+ end
75
+
76
+ def continue_on_error?
77
+ @continue_on_error
78
+ end
79
+
80
+ # Wait for a container instances to join a ecs cluster
81
+ def ecs_wait_container_instances
82
+ @ecs_wait_container_instances = true
83
+ end
84
+
85
+ def ecs_wait_container_instances?
86
+ @ecs_wait_container_instances
87
+ end
88
+
89
+ end
90
+ end
@@ -0,0 +1,45 @@
1
+ require 'aws-sdk-cloudwatch'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class Alarm
7
+
8
+ def initialize(alarm_name,options = {})
9
+ @alarm_id = alarm_name
10
+ credentials = CfnManage::AWSCredentials.get_session_credentials("startstopalarm_#{@asg_name}")
11
+ @cwclient = Aws::CloudWatch::Client.new(retry_limit: 20)
12
+ if credentials != nil
13
+ @cwclient = Aws::CloudWatch::Client.new(credentials: credentials, retry_limit: 20)
14
+ end
15
+
16
+ @cwresource = Aws::CloudWatch::Resource.new(client: @cwclient)
17
+ @alarm = @cwresource.alarm(alarm_name)
18
+ end
19
+
20
+ def start(configuration)
21
+ if @alarm.actions_enabled
22
+ $log.info("Alarm #{@alarm.alarm_arn} actions already enabled")
23
+ return
24
+ end
25
+ $log.info("Enabling alarm #{@alarm.alarm_arn}")
26
+ @alarm.enable_actions({})
27
+ end
28
+
29
+ def stop
30
+ if not @alarm.actions_enabled
31
+ $log.info("Alarm #{@alarm.alarm_arn} actions already disabled")
32
+ return {}
33
+ end
34
+ $log.info("Disabling actions on alarm #{@alarm.alarm_arn}")
35
+ @alarm.disable_actions({})
36
+ return {}
37
+ end
38
+
39
+ def wait(wait_states=[])
40
+ $log.debug("Not waiting for alarm #{@alarm_id}")
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,311 @@
1
+ require 'cfn_manage/aws_credentials'
2
+
3
+ require 'aws-sdk-autoscaling'
4
+ require 'aws-sdk-ec2'
5
+ require 'aws-sdk-elasticloadbalancingv2'
6
+
7
+ module CfnManage
8
+ module StartStopHandler
9
+ class Asg
10
+
11
+ def initialize(asg_id, options = {})
12
+ @asg_name = asg_id
13
+ @wait_state = options.has_key?(:wait_state) ? options[:wait_state] : CfnManage.asg_wait_state
14
+ @skip_wait = options.has_key?(:skip_wait) ? CfnManage.true?(options[:skip_wait]) : CfnManage.skip_wait?
15
+ @suspend_termination = options.has_key?(:suspend_termination) ? CfnManage.true?(options[:suspend_termination]) : CfnManage.asg_suspend_termination?
16
+
17
+ credentials = CfnManage::AWSCredentials.get_session_credentials("stopasg_#{@asg_name}")
18
+ @asg_client = Aws::AutoScaling::Client.new(retry_limit: 20)
19
+ @ec2_client = Aws::EC2::Client.new(retry_limit: 20)
20
+ @elb_client = Aws::ElasticLoadBalancingV2::Client.new(retry_limit: 20)
21
+ if credentials != nil
22
+ @asg_client = Aws::AutoScaling::Client.new(credentials: credentials, retry_limit: 20)
23
+ @ec2_client = Aws::EC2::Client.new(credentials: credentials, retry_limit: 20)
24
+ @elb_client = Aws::ElasticLoadBalancingV2::Client.new(credentials: credentials, retry_limit: 20)
25
+ end
26
+
27
+ asg_details = @asg_client.describe_auto_scaling_groups(
28
+ auto_scaling_group_names: [@asg_name]
29
+ )
30
+ if asg_details.auto_scaling_groups.size() == 0
31
+ raise "Couldn't find ASG #{@asg_name}"
32
+ end
33
+ @asg = asg_details.auto_scaling_groups[0]
34
+ end
35
+
36
+ def stop
37
+ # check if already stopped
38
+ if @asg.min_size == @asg.max_size and @asg.max_size == @asg.desired_capacity and @asg.min_size == 0
39
+ $log.info("ASG #{@asg_name} already stopped")
40
+ # nil and false configurations are not saved
41
+ return nil
42
+ else
43
+
44
+ unless @suspend_termination
45
+ # store asg configuration to S3
46
+ configuration = {
47
+ desired_capacity: @asg.desired_capacity,
48
+ min_size: @asg.min_size,
49
+ max_size: @asg.max_size
50
+ }
51
+
52
+ $log.info("Setting desired capacity to 0/0/0 for ASG #{@asg.auto_scaling_group_name}A")
53
+
54
+ @asg_client.update_auto_scaling_group({
55
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
56
+ min_size: 0,
57
+ max_size: 0,
58
+ desired_capacity: 0
59
+ })
60
+ return configuration
61
+ else
62
+
63
+ configuration = {
64
+ desired_capacity: @asg.desired_capacity,
65
+ min_size: @asg.min_size,
66
+ max_size: @asg.max_size,
67
+ suspended_processes: @asg.suspended_processes
68
+ }
69
+
70
+ $log.info("Suspending processes for ASG #{@asg.auto_scaling_group_name}A")
71
+
72
+ @asg_client.suspend_processes({
73
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
74
+ })
75
+
76
+ $log.info("Stopping all instances in ASG #{@asg.auto_scaling_group_name}A")
77
+
78
+ @asg.instances.each do |instance|
79
+ @instance_id = instance.instance_id
80
+ @instance = Aws::EC2::Resource.new(client: @ec2_client, retry_limit: 20).instance(@instance_id)
81
+
82
+ if %w(stopped stopping).include?(@instance.state.name)
83
+ $log.info("Instance #{@instance_id} already stopping or stopped")
84
+ return
85
+ end
86
+
87
+ $log.info("Stopping instance #{@instance_id}")
88
+ @instance.stop()
89
+ end
90
+
91
+ return configuration
92
+
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ def start(configuration)
100
+ if configuration.nil?
101
+ $log.warn("No configuration found for #{@asg_name}, skipping..")
102
+ return
103
+ end
104
+ $log.info("Starting ASG #{@asg_name} with following configuration\n#{configuration}")
105
+
106
+ unless @suspend_termination
107
+ # restore asg sizes
108
+ @asg_client.update_auto_scaling_group({
109
+ auto_scaling_group_name: @asg_name,
110
+ min_size: configuration['min_size'],
111
+ max_size: configuration['max_size'],
112
+ desired_capacity: configuration['desired_capacity']
113
+ })
114
+
115
+ else
116
+
117
+ $log.info("Starting instances for ASG #{@asg_name}...")
118
+
119
+ @asg.instances.each do |instance|
120
+ @instance_id = instance.instance_id
121
+ @instance = Aws::EC2::Resource.new(client: @ec2_client, retry_limit: 20).instance(@instance_id)
122
+
123
+ if %w(running).include?(@instance.state.name)
124
+ $log.info("Instance #{@instance_id} already running")
125
+ return
126
+ end
127
+ $log.info("Starting instance #{@instance_id}")
128
+ @instance.start()
129
+ end
130
+
131
+ end
132
+
133
+ if configuration['desired_capacity'] == 0
134
+ # if ASG desired count is purposfully set to 0 and we want to wait for other ASG's
135
+ # int the stack, then we need to skip wait for this ASG.
136
+ $log.info("Desired capacity is 0, skipping wait for asg #{@asg_name}")
137
+ elsif @skip_wait && @suspend_termination
138
+ # If wait is skipped we still need to wait until the instances are healthy in the asg
139
+ # before resuming the processes. This will avoid the asg terminating the instances.
140
+ wait('HealthyInASG')
141
+ elsif !@skip_wait
142
+ # if we are waiting for the instances to reach a desired state
143
+ $log.info("Waiting for ASG instances wait state #{@wait_state}")
144
+ wait(@wait_state)
145
+ end
146
+
147
+ if @suspend_termination
148
+ # resume the asg processes after we've waited for them to become healthy
149
+ $log.info("Resuming all processes for ASG #{@asg_name}")
150
+
151
+ @asg_client.resume_processes({
152
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
153
+ })
154
+
155
+ if configuration.key?(:suspended_processes)
156
+
157
+ $log.info("Suspending processes stored in configuration for ASG #{@asg_name}")
158
+
159
+ @asg_client.suspend_processes({
160
+ auto_scaling_group_name: "#{@asg.auto_scaling_group_name}",
161
+ scaling_processes: configuration['suspended_processes'],
162
+ })
163
+ end
164
+
165
+ end
166
+
167
+ end
168
+
169
+ def wait(type)
170
+
171
+ attempts = 0
172
+
173
+ until attempts == (max_attempts = 60*6) do
174
+
175
+ case type
176
+ when 'HealthyInASG'
177
+ success = wait_till_healthy_in_asg()
178
+ when 'Running'
179
+ success = wait_till_running()
180
+ when 'HealthyInTargetGroup'
181
+ success = wait_till_healthy_in_target_group()
182
+ else
183
+ $log.warn("unknown asg wait type #{type}. skipping...")
184
+ break
185
+ end
186
+
187
+ if success
188
+ break
189
+ end
190
+
191
+ attempts = attempts + 1
192
+ sleep(15)
193
+ end
194
+
195
+ if attempts == max_attempts
196
+ $log.error("Failed to wait for asg with wait type #{type}")
197
+ end
198
+ end
199
+
200
+ def wait_till_healthy_in_asg
201
+
202
+ asg_curr_details = @asg_client.describe_auto_scaling_groups(
203
+ auto_scaling_group_names: [@asg_name]
204
+ )
205
+
206
+ asg_status = asg_curr_details.auto_scaling_groups.first
207
+ health_status = asg_status.instances.collect { |inst| inst.health_status }
208
+ $log.info("ASG #{@asg_name} health status is currently #{health_status}")
209
+
210
+ if health_status.empty?
211
+ $log.info("ASG #{@asg_name} has not started any instances yet")
212
+ return false
213
+ end
214
+
215
+ if health_status.all? "Healthy"
216
+ $log.info("All instances healthy in ASG #{@asg_name}")
217
+ return true
218
+ end
219
+
220
+ unhealthy = asg_status.instances.select {|inst| inst.health_status == "Unhealthy" }.collect {|inst| inst.instance_id }
221
+ $log.info("waiting for instances #{unhealthy} to become healthy in asg #{@asg_name}")
222
+ return false
223
+
224
+ end
225
+
226
+ def wait_till_running
227
+
228
+ asg_curr_details = @asg_client.describe_auto_scaling_groups(
229
+ auto_scaling_group_names: [@asg_name]
230
+ )
231
+ asg_status = asg_curr_details.auto_scaling_groups.first
232
+ instances = asg_status.instances.collect { |inst| inst.instance_id }
233
+
234
+ if instances.empty?
235
+ $log.info("ASG #{@asg_name} has not started any instances yet")
236
+ return false
237
+ end
238
+
239
+ status = @ec2_client.describe_instance_status({
240
+ instance_ids: instances
241
+ })
242
+
243
+ state = status.instance_statuses.collect {|inst| inst.instance_state.name}
244
+
245
+ if state.all? "running"
246
+ $log.info("All instances in a running state from ASG #{@asg_name}")
247
+ return true
248
+ end
249
+
250
+ not_running = @status.instance_statuses.select {|inst| inst.instance_state.name != "running" }
251
+ not_running.each do |inst|
252
+ $log.info("waiting for instances #{inst.instance_id} to be running. Current state is #{inst.instance_state.name}")
253
+ end
254
+
255
+ return false
256
+
257
+ end
258
+
259
+ def wait_till_healthy_in_target_group
260
+
261
+ asg_curr_details = @asg_client.describe_auto_scaling_groups(
262
+ auto_scaling_group_names: [@asg_name]
263
+ )
264
+ asg_status = asg_curr_details.auto_scaling_groups.first
265
+ asg_instances = asg_status.instances.collect { |inst| inst.instance_id }
266
+ target_groups = asg_status.target_group_arns
267
+
268
+ if asg_instances.empty?
269
+ $log.info("ASG #{@asg_name} has not started any instances yet")
270
+ return false
271
+ end
272
+
273
+ if target_groups.empty?
274
+ # we want to skip here if the asg is not associated with any target groups
275
+ $log.info("ASG #{@asg_name} is not associated with any target groups")
276
+ return true
277
+ end
278
+
279
+ target_health = []
280
+ target_groups.each do |tg|
281
+ resp = @elb_client.describe_target_health({
282
+ target_group_arn: tg,
283
+ })
284
+ if resp.target_health_descriptions.length != asg_instances.length
285
+ # we need to wait until all asg insatnces have been placed into the target group
286
+ # before we can check they're healthy
287
+ $log.info("All ASG instances haven't been placed into target group #{tg.split('/')[1]} yet")
288
+ return false
289
+ end
290
+ target_health.push(*resp.target_health_descriptions)
291
+ end
292
+
293
+ state = target_health.collect {|tg| tg.target_health.state}
294
+
295
+ if state.all? 'healthy'
296
+ $log.info("All instances are in a healthy state in target groups #{target_groups.map {|tg| tg.split('/')[1] }}")
297
+ return true
298
+ end
299
+
300
+ unhealthy = target_health.select {|tg| tg.target_health.state != 'healthy'}
301
+ unhealthy.each do |tg|
302
+ $log.info("waiting for instances #{tg.target.id} to be healthy in target group. Current state is #{tg.target_health.state}")
303
+ end
304
+
305
+ return false
306
+
307
+ end
308
+
309
+ end
310
+ end
311
+ end
@@ -0,0 +1,97 @@
1
+ require 'aws-sdk-rds'
2
+ require 'cfn_manage/aws_credentials'
3
+
4
+ module CfnManage
5
+ module StartStopHandler
6
+ class AuroraCluster
7
+
8
+ def initialize(cluster_id, options = {})
9
+ @cluster_id = cluster_id
10
+ credentials = CfnManage::AWSCredentials.get_session_credentials("startstopcluster_#{cluster_id}")
11
+ @rds_client = Aws::RDS::Client.new(retry_limit: 20)
12
+ if credentials != nil
13
+ @rds_client = Aws::RDS::Client.new(credentials: credentials, retry_limit: 20)
14
+ end
15
+ rds = Aws::RDS::Resource.new(client: @rds_client)
16
+ @rds_cluster = rds.db_cluster(cluster_id)
17
+ end
18
+
19
+ def start(configuration)
20
+ if @rds_cluster.status == 'available'
21
+ $log.info("Aurora Cluster #{@cluster_id} is already in available state")
22
+ return
23
+ end
24
+
25
+ if @rds_cluster.engine_mode != 'provisioned'
26
+ $log.info("Aurora Cluster #{@cluster_id} is not a provisioned cluster and cannot be started using this method.")
27
+ return
28
+ end
29
+
30
+ # start rds cluster
31
+ if @rds_cluster.status == 'stopped'
32
+ $log.info("Starting Aurora cluster #{@cluster_id}")
33
+ @rds_client.start_db_cluster({ db_cluster_identifier: @cluster_id })
34
+ unless CfnManage.skip_wait?
35
+ # wait cluster to become available
36
+ $log.info("Waiting Aurora cluster to become available #{@cluster_id}")
37
+ wait('available')
38
+ end
39
+ else
40
+ $log.info("Aurora Cluster #{@cluster_id} is not in a stopped state. State: #{@rds_cluster.status}")
41
+ end
42
+ end
43
+
44
+ def stop
45
+ if @rds_cluster.status == 'stopped'
46
+ $log.info("Aurora Cluster #{@cluster_id} is already stopped")
47
+ return {}
48
+ end
49
+
50
+ if @rds_cluster.status != 'available'
51
+ $log.info("Aurora Cluster #{@cluster_id} is not in a available state. State: #{@rds_cluster.status}")
52
+ return {}
53
+ end
54
+
55
+ if @rds_cluster.engine_mode != 'provisioned'
56
+ $log.info("Aurora Cluster #{@cluster_id} is not a provisioned cluster and cannot be stopped using this method.")
57
+ return {}
58
+ end
59
+ # stop rds cluster and wait for it to be fully stopped
60
+ $log.info("Stopping aurora cluster #{@cluster_id}")
61
+ @rds_client.stop_db_cluster({ db_cluster_identifier: @cluster_id })
62
+ unless CfnManage.skip_wait?
63
+ $log.info("Waiting aurora cluster to be stopped #{@cluster_id}")
64
+ wait('stopped')
65
+ end
66
+ return {}
67
+ end
68
+
69
+ def wait(completed_state)
70
+ # reached state must be steady, at least a minute.
71
+ state_count = 0
72
+ steady_count = 4
73
+ attempts = 0
74
+ rds = Aws::RDS::Resource.new(client: @rds_client)
75
+ until attempts == (max_attempts = 60*6) do
76
+ cluster = rds.db_cluster(@cluster_id)
77
+ $log.info("Aurora Cluster #{cluster.db_cluster_identifier} state: #{cluster.status}, waiting for #{completed_state}")
78
+
79
+ if cluster.status == "#{completed_state}"
80
+ state_count = state_count + 1
81
+ $log.info("#{state_count}/#{steady_count}")
82
+ else
83
+ state_count = 0
84
+ end
85
+ break if state_count == steady_count
86
+ attempts = attempts + 1
87
+ sleep(15)
88
+ end
89
+
90
+ if attempts == max_attempts
91
+ $log.error("RDS Aurora Cluster #{@cluster_id} did not enter #{state} state, however continuing operations...")
92
+ end
93
+ end
94
+
95
+ end
96
+ end
97
+ end