cloud-mu 3.1.4 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/Dockerfile +5 -1
  3. data/ansible/roles/mu-windows/README.md +33 -0
  4. data/ansible/roles/mu-windows/defaults/main.yml +2 -0
  5. data/ansible/roles/mu-windows/files/LaunchConfig.json +9 -0
  6. data/ansible/roles/mu-windows/files/config.xml +76 -0
  7. data/ansible/roles/mu-windows/handlers/main.yml +2 -0
  8. data/ansible/roles/mu-windows/meta/main.yml +53 -0
  9. data/ansible/roles/mu-windows/tasks/main.yml +36 -0
  10. data/ansible/roles/mu-windows/tests/inventory +2 -0
  11. data/ansible/roles/mu-windows/tests/test.yml +5 -0
  12. data/ansible/roles/mu-windows/vars/main.yml +2 -0
  13. data/bin/mu-adopt +16 -12
  14. data/bin/mu-azure-tests +57 -0
  15. data/bin/mu-cleanup +2 -4
  16. data/bin/mu-configure +52 -0
  17. data/bin/mu-deploy +3 -3
  18. data/bin/mu-findstray-tests +25 -0
  19. data/bin/mu-gen-docs +2 -4
  20. data/bin/mu-load-config.rb +2 -1
  21. data/bin/mu-node-manage +15 -16
  22. data/bin/mu-run-tests +37 -12
  23. data/cloud-mu.gemspec +5 -3
  24. data/cookbooks/mu-activedirectory/resources/domain.rb +4 -4
  25. data/cookbooks/mu-activedirectory/resources/domain_controller.rb +4 -4
  26. data/cookbooks/mu-tools/libraries/helper.rb +1 -1
  27. data/cookbooks/mu-tools/recipes/apply_security.rb +14 -14
  28. data/cookbooks/mu-tools/recipes/aws_api.rb +9 -0
  29. data/cookbooks/mu-tools/recipes/eks.rb +2 -2
  30. data/cookbooks/mu-tools/recipes/selinux.rb +2 -1
  31. data/cookbooks/mu-tools/recipes/windows-client.rb +163 -164
  32. data/cookbooks/mu-tools/resources/windows_users.rb +44 -43
  33. data/extras/clean-stock-amis +25 -19
  34. data/extras/generate-stock-images +1 -0
  35. data/extras/image-generators/AWS/win2k12.yaml +18 -13
  36. data/extras/image-generators/AWS/win2k16.yaml +18 -13
  37. data/extras/image-generators/AWS/win2k19.yaml +21 -0
  38. data/modules/mommacat.ru +1 -1
  39. data/modules/mu.rb +158 -107
  40. data/modules/mu/adoption.rb +386 -59
  41. data/modules/mu/cleanup.rb +214 -303
  42. data/modules/mu/cloud.rb +128 -1632
  43. data/modules/mu/cloud/database.rb +49 -0
  44. data/modules/mu/cloud/dnszone.rb +44 -0
  45. data/modules/mu/cloud/machine_images.rb +212 -0
  46. data/modules/mu/cloud/providers.rb +81 -0
  47. data/modules/mu/cloud/resource_base.rb +926 -0
  48. data/modules/mu/cloud/server.rb +40 -0
  49. data/modules/mu/cloud/server_pool.rb +1 -0
  50. data/modules/mu/cloud/ssh_sessions.rb +228 -0
  51. data/modules/mu/cloud/winrm_sessions.rb +237 -0
  52. data/modules/mu/cloud/wrappers.rb +169 -0
  53. data/modules/mu/config.rb +135 -82
  54. data/modules/mu/config/alarm.rb +2 -6
  55. data/modules/mu/config/bucket.rb +32 -3
  56. data/modules/mu/config/cache_cluster.rb +2 -2
  57. data/modules/mu/config/cdn.rb +100 -0
  58. data/modules/mu/config/collection.rb +1 -1
  59. data/modules/mu/config/container_cluster.rb +7 -2
  60. data/modules/mu/config/database.rb +84 -105
  61. data/modules/mu/config/database.yml +1 -2
  62. data/modules/mu/config/dnszone.rb +5 -4
  63. data/modules/mu/config/doc_helpers.rb +5 -6
  64. data/modules/mu/config/endpoint.rb +2 -1
  65. data/modules/mu/config/firewall_rule.rb +3 -19
  66. data/modules/mu/config/folder.rb +1 -1
  67. data/modules/mu/config/function.rb +17 -8
  68. data/modules/mu/config/group.rb +1 -1
  69. data/modules/mu/config/habitat.rb +1 -1
  70. data/modules/mu/config/job.rb +89 -0
  71. data/modules/mu/config/loadbalancer.rb +57 -11
  72. data/modules/mu/config/log.rb +1 -1
  73. data/modules/mu/config/msg_queue.rb +1 -1
  74. data/modules/mu/config/nosqldb.rb +1 -1
  75. data/modules/mu/config/notifier.rb +8 -19
  76. data/modules/mu/config/ref.rb +92 -14
  77. data/modules/mu/config/role.rb +1 -1
  78. data/modules/mu/config/schema_helpers.rb +38 -37
  79. data/modules/mu/config/search_domain.rb +1 -1
  80. data/modules/mu/config/server.rb +12 -13
  81. data/modules/mu/config/server.yml +1 -0
  82. data/modules/mu/config/server_pool.rb +3 -7
  83. data/modules/mu/config/storage_pool.rb +1 -1
  84. data/modules/mu/config/tail.rb +11 -0
  85. data/modules/mu/config/user.rb +1 -1
  86. data/modules/mu/config/vpc.rb +27 -23
  87. data/modules/mu/config/vpc.yml +0 -1
  88. data/modules/mu/defaults/AWS.yaml +91 -68
  89. data/modules/mu/defaults/Azure.yaml +1 -0
  90. data/modules/mu/defaults/Google.yaml +1 -0
  91. data/modules/mu/deploy.rb +33 -19
  92. data/modules/mu/groomer.rb +16 -1
  93. data/modules/mu/groomers/ansible.rb +123 -21
  94. data/modules/mu/groomers/chef.rb +64 -11
  95. data/modules/mu/logger.rb +120 -144
  96. data/modules/mu/master.rb +97 -4
  97. data/modules/mu/master/ssl.rb +0 -1
  98. data/modules/mu/mommacat.rb +154 -867
  99. data/modules/mu/mommacat/daemon.rb +23 -14
  100. data/modules/mu/mommacat/naming.rb +110 -3
  101. data/modules/mu/mommacat/search.rb +495 -0
  102. data/modules/mu/mommacat/storage.rb +225 -192
  103. data/modules/mu/{clouds → providers}/README.md +1 -1
  104. data/modules/mu/{clouds → providers}/aws.rb +281 -64
  105. data/modules/mu/{clouds → providers}/aws/alarm.rb +3 -3
  106. data/modules/mu/{clouds → providers}/aws/bucket.rb +275 -41
  107. data/modules/mu/{clouds → providers}/aws/cache_cluster.rb +14 -50
  108. data/modules/mu/providers/aws/cdn.rb +782 -0
  109. data/modules/mu/{clouds → providers}/aws/collection.rb +5 -5
  110. data/modules/mu/{clouds → providers}/aws/container_cluster.rb +708 -749
  111. data/modules/mu/providers/aws/database.rb +1744 -0
  112. data/modules/mu/{clouds → providers}/aws/dnszone.rb +75 -57
  113. data/modules/mu/providers/aws/endpoint.rb +1072 -0
  114. data/modules/mu/{clouds → providers}/aws/firewall_rule.rb +212 -242
  115. data/modules/mu/{clouds → providers}/aws/folder.rb +1 -1
  116. data/modules/mu/{clouds → providers}/aws/function.rb +289 -134
  117. data/modules/mu/{clouds → providers}/aws/group.rb +18 -20
  118. data/modules/mu/{clouds → providers}/aws/habitat.rb +3 -3
  119. data/modules/mu/providers/aws/job.rb +466 -0
  120. data/modules/mu/{clouds → providers}/aws/loadbalancer.rb +50 -41
  121. data/modules/mu/{clouds → providers}/aws/log.rb +5 -5
  122. data/modules/mu/{clouds → providers}/aws/msg_queue.rb +14 -11
  123. data/modules/mu/{clouds → providers}/aws/nosqldb.rb +96 -5
  124. data/modules/mu/{clouds → providers}/aws/notifier.rb +135 -63
  125. data/modules/mu/{clouds → providers}/aws/role.rb +94 -57
  126. data/modules/mu/{clouds → providers}/aws/search_domain.rb +173 -42
  127. data/modules/mu/{clouds → providers}/aws/server.rb +782 -1107
  128. data/modules/mu/{clouds → providers}/aws/server_pool.rb +36 -46
  129. data/modules/mu/{clouds → providers}/aws/storage_pool.rb +21 -38
  130. data/modules/mu/{clouds → providers}/aws/user.rb +12 -16
  131. data/modules/mu/{clouds → providers}/aws/userdata/README.md +0 -0
  132. data/modules/mu/{clouds → providers}/aws/userdata/linux.erb +5 -4
  133. data/modules/mu/{clouds → providers}/aws/userdata/windows.erb +2 -1
  134. data/modules/mu/{clouds → providers}/aws/vpc.rb +429 -849
  135. data/modules/mu/providers/aws/vpc_subnet.rb +286 -0
  136. data/modules/mu/{clouds → providers}/azure.rb +13 -0
  137. data/modules/mu/{clouds → providers}/azure/container_cluster.rb +1 -5
  138. data/modules/mu/{clouds → providers}/azure/firewall_rule.rb +8 -1
  139. data/modules/mu/{clouds → providers}/azure/habitat.rb +0 -0
  140. data/modules/mu/{clouds → providers}/azure/loadbalancer.rb +0 -0
  141. data/modules/mu/{clouds → providers}/azure/role.rb +0 -0
  142. data/modules/mu/{clouds → providers}/azure/server.rb +32 -24
  143. data/modules/mu/{clouds → providers}/azure/user.rb +1 -1
  144. data/modules/mu/{clouds → providers}/azure/userdata/README.md +0 -0
  145. data/modules/mu/{clouds → providers}/azure/userdata/linux.erb +0 -0
  146. data/modules/mu/{clouds → providers}/azure/userdata/windows.erb +0 -0
  147. data/modules/mu/{clouds → providers}/azure/vpc.rb +4 -6
  148. data/modules/mu/{clouds → providers}/cloudformation.rb +10 -0
  149. data/modules/mu/{clouds → providers}/cloudformation/alarm.rb +3 -3
  150. data/modules/mu/{clouds → providers}/cloudformation/cache_cluster.rb +3 -3
  151. data/modules/mu/{clouds → providers}/cloudformation/collection.rb +3 -3
  152. data/modules/mu/{clouds → providers}/cloudformation/database.rb +6 -17
  153. data/modules/mu/{clouds → providers}/cloudformation/dnszone.rb +3 -3
  154. data/modules/mu/{clouds → providers}/cloudformation/firewall_rule.rb +3 -3
  155. data/modules/mu/{clouds → providers}/cloudformation/loadbalancer.rb +3 -3
  156. data/modules/mu/{clouds → providers}/cloudformation/log.rb +3 -3
  157. data/modules/mu/{clouds → providers}/cloudformation/server.rb +7 -7
  158. data/modules/mu/{clouds → providers}/cloudformation/server_pool.rb +5 -5
  159. data/modules/mu/{clouds → providers}/cloudformation/vpc.rb +3 -3
  160. data/modules/mu/{clouds → providers}/docker.rb +0 -0
  161. data/modules/mu/{clouds → providers}/google.rb +29 -6
  162. data/modules/mu/{clouds → providers}/google/bucket.rb +5 -5
  163. data/modules/mu/{clouds → providers}/google/container_cluster.rb +59 -37
  164. data/modules/mu/{clouds → providers}/google/database.rb +5 -12
  165. data/modules/mu/{clouds → providers}/google/firewall_rule.rb +5 -5
  166. data/modules/mu/{clouds → providers}/google/folder.rb +5 -9
  167. data/modules/mu/{clouds → providers}/google/function.rb +14 -8
  168. data/modules/mu/{clouds → providers}/google/group.rb +9 -17
  169. data/modules/mu/{clouds → providers}/google/habitat.rb +4 -8
  170. data/modules/mu/{clouds → providers}/google/loadbalancer.rb +5 -5
  171. data/modules/mu/{clouds → providers}/google/role.rb +50 -31
  172. data/modules/mu/{clouds → providers}/google/server.rb +142 -55
  173. data/modules/mu/{clouds → providers}/google/server_pool.rb +14 -14
  174. data/modules/mu/{clouds → providers}/google/user.rb +34 -24
  175. data/modules/mu/{clouds → providers}/google/userdata/README.md +0 -0
  176. data/modules/mu/{clouds → providers}/google/userdata/linux.erb +0 -0
  177. data/modules/mu/{clouds → providers}/google/userdata/windows.erb +0 -0
  178. data/modules/mu/{clouds → providers}/google/vpc.rb +46 -15
  179. data/modules/tests/aws-jobs-functions.yaml +46 -0
  180. data/modules/tests/centos6.yaml +15 -0
  181. data/modules/tests/centos7.yaml +15 -0
  182. data/modules/tests/centos8.yaml +12 -0
  183. data/modules/tests/ecs.yaml +23 -0
  184. data/modules/tests/eks.yaml +1 -1
  185. data/modules/tests/functions/node-function/lambda_function.js +10 -0
  186. data/modules/tests/functions/python-function/lambda_function.py +12 -0
  187. data/modules/tests/includes-and-params.yaml +2 -1
  188. data/modules/tests/microservice_app.yaml +288 -0
  189. data/modules/tests/rds.yaml +108 -0
  190. data/modules/tests/regrooms/rds.yaml +123 -0
  191. data/modules/tests/server-with-scrub-muisms.yaml +2 -1
  192. data/modules/tests/super_complex_bok.yml +2 -2
  193. data/modules/tests/super_simple_bok.yml +3 -5
  194. data/modules/tests/win2k12.yaml +25 -0
  195. data/modules/tests/win2k16.yaml +25 -0
  196. data/modules/tests/win2k19.yaml +25 -0
  197. data/requirements.txt +1 -0
  198. data/spec/mu/clouds/azure_spec.rb +2 -2
  199. metadata +169 -93
  200. data/extras/image-generators/AWS/windows.yaml +0 -18
  201. data/modules/mu/clouds/aws/database.rb +0 -1974
  202. data/modules/mu/clouds/aws/endpoint.rb +0 -596
  203. data/modules/tests/needwork/win2k12.yaml +0 -13
@@ -152,7 +152,7 @@ module MU
152
152
  instance_name = MU.deploy_id+"-"+@config['name']+"-"+resource.logical_resource_id
153
153
  MU::Cloud::AWS.createTag(resource.physical_resource_id, "Name", instance_name, credentials: @config['credentials'])
154
154
 
155
- instance = MU::Cloud::AWS::Server.notifyDeploy(
155
+ instance = MU::Cloud.resourceClass("AWS", "Server").notifyDeploy(
156
156
  @config['name']+"-"+resource.logical_resource_id,
157
157
  resource.physical_resource_id
158
158
  )
@@ -170,7 +170,7 @@ module MU
170
170
 
171
171
  mu_zone, _junk = MU::Cloud::DNSZone.find(name: "mu")
172
172
  if !mu_zone.nil?
173
- MU::Cloud::AWS::DNSZone.genericMuDNSEntry(instance_name, instance["private_ip_address"], MU::Cloud::Server)
173
+ MU::Cloud.resourceClass("AWS", "DNSZone").genericMuDNSEntry(instance_name, instance["private_ip_address"], MU::Cloud::Server)
174
174
  else
175
175
  MU::Master.addInstanceToEtcHosts(instance["public_ip_address"], instance_name)
176
176
  end
@@ -178,7 +178,7 @@ module MU
178
178
  when "AWS::EC2::SecurityGroup"
179
179
  MU::Cloud::AWS.createStandardTags(resource.physical_resource_id)
180
180
  MU::Cloud::AWS.createTag(resource.physical_resource_id, "Name", MU.deploy_id+"-"+@config['name']+'-'+resource.logical_resource_id, credentials: @config['credentials'])
181
- MU::Cloud::AWS::FirewallRule.notifyDeploy(
181
+ MU::Cloud.resourceClass("AWS", "FirewallRule").notifyDeploy(
182
182
  @config['name']+"-"+resource.logical_resource_id,
183
183
  resource.physical_resource_id
184
184
  )
@@ -242,7 +242,7 @@ module MU
242
242
  # @param region [String]: The cloud provider region
243
243
  # @param wait [Boolean]: Block on the removal of this stack; AWS deletion will continue in the background otherwise if false.
244
244
  # @return [void]
245
- def self.cleanup(noop: false, ignoremaster: false, region: MU.curRegion, wait: false, credentials: nil, flags: {})
245
+ def self.cleanup(noop: false, deploy_id: MU.deploy_id, ignoremaster: false, region: MU.curRegion, wait: false, credentials: nil, flags: {})
246
246
  MU.log "AWS::Collection.cleanup: need to support flags['known']", MU::DEBUG, details: flags
247
247
  MU.log "Placeholder: AWS Collection artifacts do not support tags, so ignoremaster cleanup flag has no effect", MU::DEBUG, details: ignoremaster
248
248
 
@@ -251,7 +251,7 @@ module MU
251
251
  resp.stacks.each { |stack|
252
252
  ok = false
253
253
  stack.tags.each { |tag|
254
- ok = true if (tag.key == "MU-ID") and tag.value == MU.deploy_id
254
+ ok = true if (tag.key == "MU-ID") and tag.value == deploy_id
255
255
  }
256
256
  if ok
257
257
  MU.log "Deleting CloudFormation stack #{stack.stack_name})"
@@ -39,123 +39,81 @@ module MU
39
39
  def create
40
40
  if @config['flavor'] == "EKS" or
41
41
  (@config['flavor'] == "Fargate" and !@config['containers'])
42
- subnet_ids = []
43
- @config["vpc"]["subnets"].each { |subnet|
44
- subnet_obj = @vpc.getSubnet(cloud_id: subnet["subnet_id"].to_s, name: subnet["subnet_name"].to_s)
45
- raise MuError, "Couldn't find a live subnet matching #{subnet} in #{@vpc} (#{@vpc.subnets})" if subnet_obj.nil?
46
- subnet_ids << subnet_obj.cloud_id
47
- }
48
42
 
49
- role_arn = @deploy.findLitterMate(name: @config['name']+"controlplane", type: "roles").arn
43
+ subnet_ids = mySubnets.map { |s| s.cloud_id }
50
44
 
51
- security_groups = []
52
- if @dependencies.has_key?("firewall_rule")
53
- @dependencies['firewall_rule'].values.each { |sg|
54
- security_groups << sg.cloud_id
45
+ params = {
46
+ :name => @mu_name,
47
+ :version => @config['kubernetes']['version'],
48
+ :role_arn => @deploy.findLitterMate(name: @config['name']+"controlplane", type: "roles").arn,
49
+ :resources_vpc_config => {
50
+ :security_group_ids => myFirewallRules.map { |fw| fw.cloud_id },
51
+ :subnet_ids => subnet_ids
52
+ }
53
+ }
54
+ if @config['logging'] and @config['logging'].size > 0
55
+ params[:logging] = {
56
+ :cluster_logging => [
57
+ {
58
+ :types => @config['logging'],
59
+ :enabled => true
60
+ }
61
+ ]
55
62
  }
56
63
  end
64
+ params.delete(:version) if params[:version] == "latest"
57
65
 
58
- resp = nil
59
- begin
60
- params = {
61
- :name => @mu_name,
62
- :version => @config['kubernetes']['version'],
63
- :role_arn => role_arn,
64
- :resources_vpc_config => {
65
- :security_group_ids => security_groups,
66
- :subnet_ids => subnet_ids
66
+ on_retry = Proc.new { |e|
67
+ # soul-crushing, yet effective
68
+ if e.message.match(/because (#{Regexp.quote(@config['region'])}[a-z]), the targeted availability zone, does not currently have sufficient capacity/)
69
+ bad_az = Regexp.last_match(1)
70
+ deletia = []
71
+ mySubnets.each { |subnet|
72
+ deletia << subnet.cloud_id if subnet.az == bad_az
67
73
  }
68
- }
69
- if @config['logging'] and @config['logging'].size > 0
70
- params[:logging] = {
71
- :cluster_logging => [
72
- {
73
- :types => @config['logging'],
74
- :enabled => true
75
- }
76
- ]
74
+ raise e if deletia.empty?
75
+ MU.log "#{bad_az} does not have EKS capacity. Dropping unsupported subnets from ContainerCluster '#{@config['name']}' and retrying.", MU::NOTICE, details: deletia
76
+ deletia.each { |subnet|
77
+ params[:resources_vpc_config][:subnet_ids].delete(subnet)
77
78
  }
78
79
  end
79
- params.delete(:version) if params[:version] == "latest"
80
+ }
80
81
 
82
+ MU.retrier([Aws::EKS::Errors::UnsupportedAvailabilityZoneException, Aws::EKS::Errors::InvalidParameterException], on_retry: on_retry, max: subnet_ids.size) {
81
83
  MU.log "Creating EKS cluster #{@mu_name}", details: params
82
- resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).create_cluster(params)
83
- rescue Aws::EKS::Errors::UnsupportedAvailabilityZoneException => e
84
- # this isn't the dumbest thing we've ever done, but it's up there
85
- if e.message.match(/because (#{Regexp.quote(@config['region'])}[a-z]), the targeted availability zone, does not currently have sufficient capacity/)
86
- bad_az = Regexp.last_match(1)
87
- deletia = nil
88
- subnet_ids.each { |subnet|
89
- subnet_obj = @vpc.getSubnet(cloud_id: subnet)
90
- if subnet_obj.az == bad_az
91
- deletia = subnet
92
- break
93
- end
94
- }
95
- raise e if deletia.nil?
96
- MU.log "#{bad_az} does not have EKS capacity. Dropping #{deletia} from ContainerCluster '#{@config['name']}' and retrying.", MU::NOTICE
97
- subnet_ids.delete(deletia)
98
- retry
99
- end
100
- rescue Aws::EKS::Errors::InvalidParameterException => e
101
- if e.message.match(/role with arn: #{Regexp.quote(role_arn)}.*?(could not be assumed|does not exist)/i)
102
- sleep 5
103
- retry
104
- else
105
- MU.log e.message, MU::WARN, details: params
106
- sleep 5
107
- retry
108
- end
109
- end
84
+ MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).create_cluster(params)
85
+ }
86
+ @cloud_id = @mu_name
110
87
 
111
- status = nil
112
- retries = 0
113
- begin
114
- resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_cluster(
115
- name: @mu_name
116
- )
117
- status = resp.cluster.status
118
- if status == "FAILED"
88
+ loop_if = Proc.new {
89
+ cloud_desc(use_cache: false).status != "ACTIVE"
90
+ }
91
+
92
+ MU.retrier(ignoreme: [Aws::EKS::Errors::ResourceNotFoundException], wait: 30, max: 60, loop_if: loop_if) { |retries, _wait|
93
+ if cloud_desc.status == "FAILED"
119
94
  raise MuError, "EKS cluster #{@mu_name} had FAILED status"
120
95
  end
121
- if retries > 0 and (retries % 3) == 0 and status != "ACTIVE"
122
- MU.log "Waiting for EKS cluster #{@mu_name} to become active (currently #{status})", MU::NOTICE
96
+ if retries > 0 and (retries % 3) == 0 and cloud_desc.status != "ACTIVE"
97
+ MU.log "Waiting for EKS cluster #{@mu_name} to become active (currently #{cloud_desc.status})", MU::NOTICE
123
98
  end
124
- sleep 30
125
- retries += 1
126
- rescue Aws::EKS::Errors::ResourceNotFoundException => e
127
- if retries < 30
128
- if retries > 0 and (retries % 3) == 0
129
- MU.log "Got #{e.message} trying to describe EKS cluster #{@mu_name}, waiting and retrying", MU::WARN, details: resp
130
- end
131
- sleep 30
132
- retries += 1
133
- retry
134
- else
135
- raise e
136
- end
137
- end while status != "ACTIVE"
99
+ }
138
100
 
139
101
  MU.log "Creation of EKS cluster #{@mu_name} complete"
140
102
  else
141
103
  MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).create_cluster(
142
104
  cluster_name: @mu_name
143
105
  )
144
-
106
+ @cloud_id = @mu_name
145
107
  end
146
- @cloud_id = @mu_name
147
108
  end
148
109
 
149
110
  # Called automatically by {MU::Deploy#createResources}
150
111
  def groom
151
112
 
152
- serverpool = if ['EKS', 'ECS'].include?(@config['flavor'])
153
- @deploy.findLitterMate(type: "server_pools", name: @config["name"]+"workers")
154
- end
155
- resource_lookup = MU::Cloud::AWS.listInstanceTypes(@config['region'])[@config['region']]
156
-
113
+ # EKS or Fargate-EKS: do Kubernetes things
157
114
  if @config['flavor'] == "EKS" or
158
115
  (@config['flavor'] == "Fargate" and !@config['containers'])
116
+
159
117
  # This will be needed if a loadbalancer has never been created in
160
118
  # this account; EKS applications might want one, but will fail in
161
119
  # confusing ways if this hasn't been done.
@@ -166,239 +124,17 @@ module MU
166
124
  rescue ::Aws::IAM::Errors::InvalidInput
167
125
  end
168
126
 
169
- kube = ERB.new(File.read(MU.myRoot+"/cookbooks/mu-tools/templates/default/kubeconfig-eks.erb"))
170
- configmap = ERB.new(File.read(MU.myRoot+"/extras/aws-auth-cm.yaml.erb"))
171
- tagme = [@vpc.cloud_id]
172
- tagme_elb = []
173
- @vpc.subnets.each { |s|
174
- tagme << s.cloud_id
175
- tagme_elb << s.cloud_id if !s.private?
176
- }
177
- rtbs = MU::Cloud::AWS.ec2(region: @config['region'], credentials: @config['credentials']).describe_route_tables(
178
- filters: [ { name: "vpc-id", values: [@vpc.cloud_id] } ]
179
- ).route_tables
180
- tagme.concat(rtbs.map { |r| r.route_table_id } )
181
- main_sg = @deploy.findLitterMate(type: "firewall_rules", name: "server_pool#{@config['name']}workers")
182
- tagme << main_sg.cloud_id if main_sg
183
- MU.log "Applying kubernetes.io tags to VPC resources", details: tagme
184
- MU::Cloud::AWS.createTag(tagme, "kubernetes.io/cluster/#{@mu_name}", "shared", credentials: @config['credentials'])
185
- MU::Cloud::AWS.createTag(tagme_elb, "kubernetes.io/cluster/elb", @mu_name, credentials: @config['credentials'])
186
-
187
- if @config['flavor'] == "Fargate"
188
- fargate_subnets = []
189
- @config["vpc"]["subnets"].each { |subnet|
190
- subnet_obj = @vpc.getSubnet(cloud_id: subnet["subnet_id"].to_s, name: subnet["subnet_name"].to_s)
191
- raise MuError, "Couldn't find a live subnet matching #{subnet} in #{@vpc} (#{@vpc.subnets})" if subnet_obj.nil?
192
- next if !subnet_obj.private?
193
- fargate_subnets << subnet_obj.cloud_id
194
- }
195
- podrole_arn = @deploy.findLitterMate(name: @config['name']+"pods", type: "roles").arn
196
- poolnum = 0
197
- poolthreads =[]
198
- @config['kubernetes_pools'].each { |selectors|
199
- profname = @mu_name+"-"+poolnum.to_s
200
- poolnum += 1
201
- desc = {
202
- :fargate_profile_name => profname,
203
- :cluster_name => @mu_name,
204
- :pod_execution_role_arn => podrole_arn,
205
- :selectors => selectors,
206
- :subnets => fargate_subnets.sort,
207
- :tags => @tags
208
- }
209
- begin
210
- resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_fargate_profile(
211
- cluster_name: @mu_name,
212
- fargate_profile_name: profname
213
- )
214
- if resp and resp.fargate_profile
215
- old_desc = MU.structToHash(resp.fargate_profile, stringify_keys: true)
216
- new_desc = MU.structToHash(desc, stringify_keys: true)
217
- ["created_at", "status", "fargate_profile_arn"].each { |k|
218
- old_desc.delete(k)
219
- }
220
- old_desc["subnets"].sort!
221
- if !old_desc.eql?(new_desc)
222
- MU.log "Deleting Fargate profile #{profname} in order to apply changes", MU::WARN, details: desc
223
- MU::Cloud::AWS::ContainerCluster.purge_fargate_profile(profname, @mu_name, @config['region'], @credentials)
224
- else
225
- next
226
- end
227
- end
228
- rescue Aws::EKS::Errors::ResourceNotFoundException
229
- # This is just fine!
230
- end
231
- MU.log "Creating EKS Fargate profile #{profname}", details: desc
232
- resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).create_fargate_profile(desc)
233
- begin
234
- resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_fargate_profile(
235
- cluster_name: @mu_name,
236
- fargate_profile_name: profname
237
- )
238
- sleep 1 if resp.fargate_profile.status == "CREATING"
239
- end while resp.fargate_profile.status == "CREATING"
240
- MU.log "Creation of EKS Fargate profile #{profname} complete"
241
- }
242
- end
243
-
244
- me = cloud_desc
245
- @endpoint = me.endpoint
246
- @cacert = me.certificate_authority.data
247
- @cluster = @mu_name
248
- if @config['flavor'] != "Fargate"
249
- resp = MU::Cloud::AWS.iam(credentials: @config['credentials']).get_role(role_name: @mu_name+"WORKERS")
250
- @worker_role_arn = resp.role.arn
251
- end
252
- kube_conf = @deploy.deploy_dir+"/kubeconfig-#{@config['name']}"
253
- gitlab_helper = @deploy.deploy_dir+"/gitlab-eks-helper-#{@config['name']}.sh"
254
-
255
- File.open(kube_conf, "w"){ |k|
256
- k.puts kube.result(binding)
257
- }
258
- gitlab = ERB.new(File.read(MU.myRoot+"/extras/gitlab-eks-helper.sh.erb"))
259
- File.open(gitlab_helper, "w"){ |k|
260
- k.puts gitlab.result(binding)
261
- }
127
+ apply_kubernetes_tags
128
+ create_fargate_kubernetes_profile if @config['flavor'] == "Fargate"
129
+ apply_kubernetes_resources
262
130
 
263
- if @config['flavor'] != "Fargate"
264
- eks_auth = @deploy.deploy_dir+"/eks-auth-cm-#{@config['name']}.yaml"
265
- File.open(eks_auth, "w"){ |k|
266
- k.puts configmap.result(binding)
267
- }
268
- authmap_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{eks_auth}"}
269
- MU.log "Configuring Kubernetes <=> IAM mapping for worker nodes", MU::NOTICE, details: authmap_cmd
270
- # maybe guard this mess
271
- retries = 0
272
- begin
273
- puts %x{#{authmap_cmd}}
274
- if $?.exitstatus != 0
275
- if retries >= 10
276
- raise MuError, "Failed to apply #{authmap_cmd}"
277
- end
278
- sleep 10
279
- retries += 1
280
- end
281
- end while $?.exitstatus != 0
282
-
283
- end
284
-
285
- # and this one
286
- admin_user_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{MU.myRoot}/extras/admin-user.yaml"}
287
- admin_role_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{MU.myRoot}/extras/admin-role-binding.yaml"}
288
- MU.log "Configuring Kubernetes admin-user and role", MU::NOTICE, details: admin_user_cmd+"\n"+admin_role_cmd
289
- %x{#{admin_user_cmd}}
290
- %x{#{admin_role_cmd}}
291
-
292
- if @config['kubernetes_resources']
293
- MU::Master.applyKubernetesResources(
294
- @config['name'],
295
- @config['kubernetes_resources'],
296
- kubeconfig: kube_conf,
297
- outputdir: @deploy.deploy_dir
298
- )
299
- end
300
-
301
- MU.log %Q{How to interact with your EKS cluster\nkubectl --kubeconfig "#{kube_conf}" get all\nkubectl --kubeconfig "#{kube_conf}" create -f some_k8s_deploy.yml\nkubectl --kubeconfig "#{kube_conf}" get nodes}, MU::SUMMARY
302
131
  elsif @config['flavor'] != "Fargate"
303
- resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).list_container_instances({
304
- cluster: @mu_name
305
- })
306
- existing = {}
307
- if resp
308
- uuids = []
309
- resp.container_instance_arns.each { |arn|
310
- uuids << arn.sub(/^.*?:container-instance\//, "")
311
- }
312
- if uuids.size > 0
313
- resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).describe_container_instances({
314
- cluster: @mu_name,
315
- container_instances: uuids
316
- })
317
- resp.container_instances.each { |i|
318
- existing[i.ec2_instance_id] = i
319
- }
320
- end
321
- end
322
-
323
- threads = []
324
- serverpool.listNodes.each { |mynode|
325
- resources = resource_lookup[node.cloud_desc.instance_type]
326
- threads << Thread.new(mynode) { |node|
327
- ident_doc = nil
328
- ident_doc_sig = nil
329
- if !node.windows?
330
- session = node.getSSHSession(10, 30)
331
- ident_doc = session.exec!("curl -s http://169.254.169.254/latest/dynamic/instance-identity/document/")
332
- ident_doc_sig = session.exec!("curl -s http://169.254.169.254/latest/dynamic/instance-identity/signature/")
333
- # else
334
- # begin
335
- # session = node.getWinRMSession(1, 60)
336
- # rescue StandardError # XXX
337
- # session = node.getSSHSession(1, 60)
338
- # end
339
- end
340
- MU.log "Identity document for #{node}", MU::DEBUG, details: ident_doc
341
- MU.log "Identity document signature for #{node}", MU::DEBUG, details: ident_doc_sig
342
- params = {
343
- :cluster => @mu_name,
344
- :instance_identity_document => ident_doc,
345
- :instance_identity_document_signature => ident_doc_sig,
346
- :total_resources => [
347
- {
348
- :name => "CPU",
349
- :type => "INTEGER",
350
- :integer_value => resources["vcpu"].to_i
351
- },
352
- {
353
- :name => "MEMORY",
354
- :type => "INTEGER",
355
- :integer_value => (resources["memory"]*1024*1024).to_i
356
- }
357
- ]
358
- }
359
- if !existing.has_key?(node.cloud_id)
360
- MU.log "Registering ECS instance #{node} in cluster #{@mu_name}", details: params
361
- else
362
- params[:container_instance_arn] = existing[node.cloud_id].container_instance_arn
363
- MU.log "Updating ECS instance #{node} in cluster #{@mu_name}", MU::NOTICE, details: params
364
- end
365
- MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).register_container_instance(params)
366
-
367
- }
368
- }
369
- threads.each { |t|
370
- t.join
371
- }
132
+ manage_ecs_workers
372
133
  end
373
134
 
135
+ # ECS: manage containers/services/tasks
374
136
  if @config['flavor'] != "EKS" and @config['containers']
375
137
 
376
- security_groups = []
377
- if @dependencies.has_key?("firewall_rule")
378
- @dependencies['firewall_rule'].values.each { |sg|
379
- security_groups << sg.cloud_id
380
- }
381
- end
382
-
383
- tasks_registered = 0
384
- retries = 0
385
- svc_resp = begin
386
- MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).list_services(
387
- cluster: arn
388
- )
389
- rescue Aws::ECS::Errors::ClusterNotFoundException => e
390
- if retries < 10
391
- sleep 5
392
- retries += 1
393
- retry
394
- else
395
- raise e
396
- end
397
- end
398
- existing_svcs = svc_resp.service_arns.map { |s|
399
- s.gsub(/.*?:service\/(.*)/, '\1')
400
- }
401
-
402
138
  # Reorganize things so that we have services and task definitions
403
139
  # mapped to the set of containers they must contain
404
140
  tasks = {}
@@ -409,238 +145,35 @@ module MU
409
145
  tasks[service_name] << c
410
146
  }
411
147
 
148
+ existing_svcs = list_ecs_services
149
+
412
150
  tasks.each_pair { |service_name, containers|
413
- launch_type = @config['flavor'] == "ECS" ? "EC2" : "FARGATE"
414
- cpu_total = 0
415
- mem_total = 0
416
151
  role_arn = nil
417
- lbs = []
418
152
 
419
- container_definitions = containers.map { |c|
420
- container_name = @mu_name+"-"+c['name'].upcase
153
+ container_definitions, role, lbs = get_ecs_container_definitions(containers)
154
+ role_arn ||= role
155
+
156
+ cpu_total = mem_total = 0
157
+ containers.each { |c|
421
158
  cpu_total += c['cpu']
422
159
  mem_total += c['memory']
423
-
424
- if c["role"] and !role_arn
425
- found = MU::MommaCat.findStray(
426
- @config['cloud'],
427
- "role",
428
- cloud_id: c["role"]["id"],
429
- name: c["role"]["name"],
430
- deploy_id: c["role"]["deploy_id"] || @deploy.deploy_id,
431
- dummy_ok: false
432
- )
433
- if found
434
- found = found.first
435
- if found and found.cloudobj
436
- role_arn = found.cloudobj.arn
437
- end
438
- else
439
- raise MuError, "Unable to find execution role from #{c["role"]}"
440
- end
441
- end
442
-
443
- if c['loadbalancers'] != []
444
- c['loadbalancers'].each {|lb|
445
- found = @deploy.findLitterMate(name: lb['name'], type: "loadbalancer")
446
- if found
447
- MU.log "Mapping LB #{found.mu_name} to service #{c['name']}", MU::INFO
448
- if found.cloud_desc.type != "classic"
449
- elb_groups = MU::Cloud::AWS.elb2(region: @config['region'], credentials: @config['credentials']).describe_target_groups({
450
- load_balancer_arn: found.cloud_desc.load_balancer_arn
451
- })
452
- matching_target_groups = []
453
- elb_groups.target_groups.each { |tg|
454
- if tg.port.to_i == lb['container_port'].to_i
455
- matching_target_groups << {
456
- arn: tg['target_group_arn'],
457
- name: tg['target_group_name']
458
- }
459
- end
460
- }
461
- if matching_target_groups.length >= 1
462
- MU.log "#{matching_target_groups.length} matching target groups found. Mapping #{container_name} to target group #{matching_target_groups.first['name']}", MU::INFO
463
- lbs << {
464
- container_name: container_name,
465
- container_port: lb['container_port'],
466
- target_group_arn: matching_target_groups.first[:arn]
467
- }
468
- else
469
- raise MuError, "No matching target groups found"
470
- end
471
- elsif @config['flavor'] == "Fargate" && found.cloud_desc.type == "classic"
472
- raise MuError, "Classic Load Balancers are not supported with Fargate."
473
- else
474
- MU.log "Mapping Classic LB #{found.mu_name} to service #{container_name}", MU::INFO
475
- lbs << {
476
- container_name: container_name,
477
- container_port: lb['container_port'],
478
- load_balancer_name: found.mu_name
479
- }
480
- end
481
- else
482
- raise MuError, "Unable to find loadbalancers from #{c["loadbalancers"].first['name']}"
483
- end
484
- }
485
- end
486
-
487
- params = {
488
- name: @mu_name+"-"+c['name'].upcase,
489
- image: c['image'],
490
- memory: c['memory'],
491
- cpu: c['cpu']
492
- }
493
- if !@config['vpc']
494
- c['hostname'] ||= @mu_name+"-"+c['name'].upcase
495
- end
496
- [:essential, :hostname, :start_timeout, :stop_timeout, :user, :working_directory, :disable_networking, :privileged, :readonly_root_filesystem, :interactive, :pseudo_terminal, :links, :entry_point, :command, :dns_servers, :dns_search_domains, :docker_security_options, :port_mappings, :repository_credentials, :mount_points, :environment, :volumes_from, :secrets, :depends_on, :extra_hosts, :docker_labels, :ulimits, :system_controls, :health_check, :resource_requirements].each { |param|
497
- if c.has_key?(param.to_s)
498
- params[param] = if !c[param.to_s].nil? and (c[param.to_s].is_a?(Hash) or c[param.to_s].is_a?(Array))
499
- MU.strToSym(c[param.to_s])
500
- else
501
- c[param.to_s]
502
- end
503
- end
504
- }
505
- if @config['vpc']
506
- [:hostname, :dns_servers, :dns_search_domains, :links].each { |param|
507
- if params[param]
508
- MU.log "Container parameter #{param.to_s} not supported in VPC clusters, ignoring", MU::WARN
509
- params.delete(param)
510
- end
511
- }
512
- end
513
- if @config['flavor'] == "Fargate"
514
- [:privileged, :docker_security_options].each { |param|
515
- if params[param]
516
- MU.log "Container parameter #{param.to_s} not supported in Fargate clusters, ignoring", MU::WARN
517
- params.delete(param)
518
- end
519
- }
520
- end
521
- if c['log_configuration']
522
- log_obj = @deploy.findLitterMate(name: c['log_configuration']['options']['awslogs-group'], type: "logs")
523
- if log_obj
524
- c['log_configuration']['options']['awslogs-group'] = log_obj.mu_name
525
- end
526
- params[:log_configuration] = MU.strToSym(c['log_configuration'])
527
- end
528
- params
529
160
  }
530
-
531
161
  cpu_total = 2 if cpu_total == 0
532
162
  mem_total = 2 if mem_total == 0
533
163
 
534
- task_params = {
535
- family: @deploy.deploy_id,
536
- container_definitions: container_definitions,
537
- requires_compatibilities: [launch_type]
538
- }
539
-
540
- if @config['volumes']
541
- task_params[:volumes] = []
542
- @config['volumes'].each { |v|
543
- vol = { :name => v['name'] }
544
- if v['type'] == "host"
545
- vol[:host] = {}
546
- if v['host_volume_source_path']
547
- vol[:host][:source_path] = v['host_volume_source_path']
548
- end
549
- elsif v['type'] == "docker"
550
- vol[:docker_volume_configuration] = MU.strToSym(v['docker_volume_configuration'])
551
- else
552
- raise MuError, "Invalid volume type '#{v['type']}' specified in ContainerCluster '#{@mu_name}'"
553
- end
554
- task_params[:volumes] << vol
555
- }
556
- end
557
-
558
- if role_arn
559
- task_params[:execution_role_arn] = role_arn
560
- task_params[:task_role_arn] = role_arn
561
- end
562
- if @config['flavor'] == "Fargate"
563
- task_params[:network_mode] = "awsvpc"
564
- task_params[:cpu] = cpu_total.to_i.to_s
565
- task_params[:memory] = mem_total.to_i.to_s
566
- end
567
-
568
- tasks_registered += 1
569
- MU.log "Registering task definition #{service_name} with #{container_definitions.size.to_s} containers"
164
+ task_def = register_ecs_task(container_definitions, service_name, cpu_total, mem_total, role_arn: role_arn)
570
165
 
571
- # XXX this helpfully keeps revisions, but let's compare anyway and avoid cluttering with identical ones
572
- resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).register_task_definition(task_params)
166
+ create_update_ecs_service(task_def, service_name, lbs, existing_svcs)
167
+ existing_svcs << service_name
168
+ }
573
169
 
574
- task_def = resp.task_definition.task_definition_arn
575
- service_params = {
576
- :cluster => @mu_name,
577
- :desired_count => @config['instance_count'], # XXX this makes no sense
578
- :service_name => service_name,
579
- :launch_type => launch_type,
580
- :task_definition => task_def,
581
- :load_balancers => lbs
170
+ if tasks.size > 0
171
+ tasks_failing = false
172
+ MU.retrier(wait: 15, max: 10, loop_if: Proc.new { tasks_failing }){ |retries, _wait|
173
+ tasks_failing = !MU::Cloud::AWS::ContainerCluster.tasksRunning?(@mu_name, log: (retries > 0), region: @config['region'], credentials: @config['credentials'])
582
174
  }
583
- if @config['vpc']
584
- subnet_ids = []
585
- all_public = true
586
-
587
- subnets =
588
- if @config["vpc"]["subnets"].empty?
589
- @vpc.subnets
590
- else
591
- subnet_objects= []
592
- @config["vpc"]["subnets"].each { |subnet|
593
- sobj = @vpc.getSubnet(cloud_id: subnet["subnet_id"], name: subnet["subnet_name"])
594
- if sobj.nil?
595
- MU.log "Got nil result from @vpc.getSubnet(cloud_id: #{subnet["subnet_id"]}, name: #{subnet["subnet_name"]})", MU::WARN
596
- else
597
- subnet_objects << sobj
598
- end
599
- }
600
- subnet_objects
601
- end
602
-
603
- subnets.each { |subnet_obj|
604
- subnet_ids << subnet_obj.cloud_id
605
- all_public = false if subnet_obj.private?
606
- }
607
-
608
- service_params[:network_configuration] = {
609
- :awsvpc_configuration => {
610
- :subnets => subnet_ids,
611
- :security_groups => security_groups,
612
- :assign_public_ip => all_public ? "ENABLED" : "DISABLED"
613
- }
614
- }
615
- end
616
-
617
- if !existing_svcs.include?(service_name)
618
- MU.log "Creating Service #{service_name}"
619
-
620
- resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).create_service(service_params)
621
- else
622
- service_params[:service] = service_params[:service_name].dup
623
- service_params.delete(:service_name)
624
- service_params.delete(:launch_type)
625
- MU.log "Updating Service #{service_name}", MU::NOTICE, details: service_params
626
-
627
- resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).update_service(service_params)
628
- end
629
- existing_svcs << service_name
630
- }
631
175
 
632
- max_retries = 10
633
- retries = 0
634
- if tasks_registered > 0
635
- retry_me = false
636
- begin
637
- retry_me = !MU::Cloud::AWS::ContainerCluster.tasksRunning?(@mu_name, log: (retries > 0), region: @config['region'], credentials: @config['credentials'])
638
- retries += 1
639
- sleep 15 if retry_me
640
- end while retry_me and retries < max_retries
641
- tasks = nil
642
-
643
- if retry_me
176
+ if tasks_failing
644
177
  MU.log "Not all tasks successfully launched in cluster #{@mu_name}", MU::WARN
645
178
  end
646
179
  end
@@ -754,6 +287,7 @@ MU.log c.name, MU::NOTICE, details: t
754
287
  # @return [OpenStruct]
755
288
  def cloud_desc(use_cache: true)
756
289
  return @cloud_desc_cache if @cloud_desc_cache and use_cache
290
+ return nil if !@cloud_id
757
291
  @cloud_desc_cache = if @config['flavor'] == "EKS" or
758
292
  (@config['flavor'] == "Fargate" and !@config['containers'])
759
293
  resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_cluster(
@@ -793,7 +327,7 @@ MU.log c.name, MU::NOTICE, details: t
793
327
  end
794
328
 
795
329
  @@eks_versions = {}
796
- @@eks_version_semaphore = Mutex.new
330
+ @@eks_version_semaphores = {}
797
331
  # Use the AWS SSM API to fetch the current version of the Amazon Linux
798
332
  # ECS-optimized AMI, so we can use it as a default AMI for ECS deploys.
799
333
  # @param flavor [String]: ECS or EKS
@@ -806,24 +340,22 @@ MU.log c.name, MU::NOTICE, details: t
806
340
  names: ["/aws/service/#{flavor.downcase}/optimized-ami/amazon-linux/recommended"]
807
341
  )
808
342
  else
809
- @@eks_version_semaphore.synchronize {
343
+ @@eks_version_semaphores[region] ||= Mutex.new
344
+
345
+ @@eks_version_semaphores[region].synchronize {
810
346
  if !@@eks_versions[region]
811
347
  @@eks_versions[region] ||= []
812
348
  versions = {}
813
- resp = nil
814
- next_token = nil
815
- begin
816
- resp = MU::Cloud::AWS.ssm(region: region).get_parameters_by_path(
817
- path: "/aws/service/#{flavor.downcase}",
818
- recursive: true,
819
- next_token: next_token
820
- )
821
- resp.parameters.each { |p|
822
- p.name.match(/\/aws\/service\/eks\/optimized-ami\/([^\/]+?)\//)
823
- versions[Regexp.last_match[1]] = true
824
- }
825
- next_token = resp.next_token
826
- end while !next_token.nil?
349
+ resp = MU::Cloud::AWS.ssm(region: region).get_parameters_by_path(
350
+ path: "/aws/service/#{flavor.downcase}/optimized-ami",
351
+ recursive: true,
352
+ max_results: 10 # as high as it goes, ugh
353
+ )
354
+
355
+ resp.parameters.each { |p|
356
+ p.name.match(/\/aws\/service\/eks\/optimized-ami\/([^\/]+?)\//)
357
+ versions[Regexp.last_match[1]] = true
358
+ }
827
359
  @@eks_versions[region] = versions.keys.sort { |a, b| MU.version_sort(a, b) }
828
360
  end
829
361
  }
@@ -843,15 +375,31 @@ MU.log c.name, MU::NOTICE, details: t
843
375
  nil
844
376
  end
845
377
 
378
+ @@supported_eks_region_cache = []
379
+ @@eks_region_semaphore = Mutex.new
380
+
846
381
  # Return the list of regions where we know EKS is supported.
847
382
  def self.EKSRegions(credentials = nil)
848
- eks_regions = []
849
- MU::Cloud::AWS.listRegions(credentials: credentials).each { |r|
850
- ami = getStandardImage("EKS", r)
851
- eks_regions << r if ami
852
- }
383
+ @@eks_region_semaphore.synchronize {
384
+ if @@supported_eks_region_cache and !@@supported_eks_region_cache.empty?
385
+ return @@supported_eks_region_cache
386
+ end
387
+ start = Time.now
388
+ # the SSM API is painfully slow for large result sets, so thread
389
+ # these and do them in parallel
390
+ @@supported_eks_region_cache = []
391
+ region_threads = []
392
+ MU::Cloud::AWS.listRegions(credentials: credentials).each { |region|
393
+ region_threads << Thread.new(region) { |r|
394
+ r_start = Time.now
395
+ ami = getStandardImage("EKS", r)
396
+ @@supported_eks_region_cache << r if ami
397
+ }
398
+ }
399
+ region_threads.each { |t| t.join }
853
400
 
854
- eks_regions
401
+ @@supported_eks_region_cache
402
+ }
855
403
  end
856
404
 
857
405
  # Does this resource type exist as a global (cloud-wide) artifact, or
@@ -872,158 +420,143 @@ MU.log c.name, MU::NOTICE, details: t
872
420
  # @param ignoremaster [Boolean]: If true, will remove resources not flagged as originating from this Mu server
873
421
  # @param region [String]: The cloud provider region
874
422
  # @return [void]
875
- def self.cleanup(noop: false, ignoremaster: false, region: MU.curRegion, credentials: nil, flags: {})
423
+ def self.cleanup(noop: false, deploy_id: MU.deploy_id, ignoremaster: false, region: MU.curRegion, credentials: nil, flags: {})
876
424
  MU.log "AWS::ContainerCluster.cleanup: need to support flags['known']", MU::DEBUG, details: flags
877
425
  MU.log "Placeholder: AWS ContainerCluster artifacts do not support tags, so ignoremaster cleanup flag has no effect", MU::DEBUG, details: ignoremaster
878
426
 
879
- resp = MU::Cloud::AWS.ecs(credentials: credentials, region: region).list_clusters
880
-
881
- if resp and resp.cluster_arns and resp.cluster_arns.size > 0
882
- resp.cluster_arns.each { |arn|
883
- if arn.match(/:cluster\/(#{MU.deploy_id}[^:]+)$/)
884
- cluster = Regexp.last_match[1]
885
-
886
- svc_resp = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_services(
887
- cluster: arn
888
- )
889
- if svc_resp and svc_resp.service_arns
890
- svc_resp.service_arns.each { |svc_arn|
891
- svc_name = svc_arn.gsub(/.*?:service\/(.*)/, '\1')
892
- MU.log "Deleting Service #{svc_name} from ECS Cluster #{cluster}"
893
- if !noop
894
- MU::Cloud::AWS.ecs(region: region, credentials: credentials).delete_service(
895
- cluster: arn,
896
- service: svc_name,
897
- force: true # man forget scaling up and down if we're just deleting the cluster
898
- )
899
- end
900
- }
901
- end
902
-
903
- instances = MU::Cloud::AWS.ecs(credentials: credentials, region: region).list_container_instances({
904
- cluster: cluster
905
- })
906
- if instances
907
- instances.container_instance_arns.each { |instance_arn|
908
- uuid = instance_arn.sub(/^.*?:container-instance\//, "")
909
- MU.log "Deregistering instance #{uuid} from ECS Cluster #{cluster}"
910
- if !noop
911
- resp = MU::Cloud::AWS.ecs(credentials: credentials, region: region).deregister_container_instance({
912
- cluster: cluster,
913
- container_instance: uuid,
914
- force: true,
915
- })
916
- end
917
- }
918
- end
919
- MU.log "Deleting ECS Cluster #{cluster}"
920
- if !noop
921
- # TODO de-register container instances
922
- begin
923
- MU::Cloud::AWS.ecs(credentials: credentials, region: region).delete_cluster(
924
- cluster: cluster
925
- )
926
- rescue Aws::ECS::Errors::ClusterContainsTasksException
927
- sleep 5
928
- retry
929
- end
930
- end
931
- end
932
- }
933
- end
934
-
935
- tasks = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_task_definitions(
936
- family_prefix: MU.deploy_id
937
- )
427
+ purge_ecs_clusters(noop: noop, region: region, credentials: credentials, deploy_id: deploy_id)
938
428
 
939
- if tasks and tasks.task_definition_arns
940
- tasks.task_definition_arns.each { |arn|
941
- MU.log "Deregistering Fargate task definition #{arn}"
942
- if !noop
943
- MU::Cloud::AWS.ecs(region: region, credentials: credentials).deregister_task_definition(
944
- task_definition: arn
945
- )
946
- end
947
- }
948
- end
429
+ purge_eks_clusters(noop: noop, region: region, credentials: credentials, deploy_id: deploy_id)
949
430
 
950
- return if !MU::Cloud::AWS::ContainerCluster.EKSRegions.include?(region)
431
+ end
951
432
 
433
+ def self.purge_eks_clusters(noop: false, region: MU.curRegion, credentials: nil, deploy_id: MU.deploy_id)
952
434
  resp = begin
953
435
  MU::Cloud::AWS.eks(credentials: credentials, region: region).list_clusters
954
436
  rescue Aws::EKS::Errors::AccessDeniedException
955
437
  # EKS isn't actually live in this region, even though SSM lists
956
438
  # base images for it
439
+ if @@supported_eks_region_cache
440
+ @@supported_eks_region_cache.delete(region)
441
+ end
957
442
  return
958
443
  end
959
444
 
445
+ return if !resp or !resp.clusters
960
446
 
961
- if resp and resp.clusters
962
- resp.clusters.each { |cluster|
963
- if cluster.match(/^#{MU.deploy_id}-/)
447
+ resp.clusters.each { |cluster|
448
+ if cluster.match(/^#{deploy_id}-/)
964
449
 
965
- desc = MU::Cloud::AWS.eks(credentials: credentials, region: region).describe_cluster(
966
- name: cluster
967
- ).cluster
450
+ desc = MU::Cloud::AWS.eks(credentials: credentials, region: region).describe_cluster(
451
+ name: cluster
452
+ ).cluster
968
453
 
969
- profiles = MU::Cloud::AWS.eks(region: region, credentials: credentials).list_fargate_profiles(
970
- cluster_name: cluster
971
- )
972
- if profiles and profiles.fargate_profile_names
973
- profiles.fargate_profile_names.each { |profile|
974
- MU.log "Deleting Fargate EKS profile #{profile}"
975
- next if noop
976
- MU::Cloud::AWS::ContainerCluster.purge_fargate_profile(profile, cluster, region, credentials)
977
- }
978
- end
454
+ profiles = MU::Cloud::AWS.eks(region: region, credentials: credentials).list_fargate_profiles(
455
+ cluster_name: cluster
456
+ )
457
+ if profiles and profiles.fargate_profile_names
458
+ profiles.fargate_profile_names.each { |profile|
459
+ MU.log "Deleting Fargate EKS profile #{profile}"
460
+ next if noop
461
+ MU::Cloud::AWS::ContainerCluster.purge_fargate_profile(profile, cluster, region, credentials)
462
+ }
463
+ end
979
464
 
980
- untag = []
981
- untag << desc.resources_vpc_config.vpc_id
982
- subnets = MU::Cloud::AWS.ec2(credentials: credentials, region: region).describe_subnets(
983
- filters: [ { name: "vpc-id", values: [desc.resources_vpc_config.vpc_id] } ]
984
- ).subnets
985
-
986
- # subnets
987
- untag.concat(subnets.map { |s| s.subnet_id } )
988
- rtbs = MU::Cloud::AWS.ec2(credentials: credentials, region: region).describe_route_tables(
989
- filters: [ { name: "vpc-id", values: [desc.resources_vpc_config.vpc_id] } ]
990
- ).route_tables
991
- untag.concat(rtbs.map { |r| r.route_table_id } )
992
- untag.concat(desc.resources_vpc_config.subnet_ids)
993
- untag.concat(desc.resources_vpc_config.security_group_ids)
994
- MU.log "Removing Kubernetes tags from VPC resources for #{cluster}", details: untag
995
- if !noop
996
- MU::Cloud::AWS.removeTag("kubernetes.io/cluster/#{cluster}", "shared", untag)
997
- MU::Cloud::AWS.removeTag("kubernetes.io/cluster/elb", cluster, untag)
465
+ remove_kubernetes_tags(cluster, desc, region: region, credentials: credentials, noop: noop)
466
+
467
+ MU.log "Deleting EKS Cluster #{cluster}"
468
+ next if noop
469
+ MU::Cloud::AWS.eks(credentials: credentials, region: region).delete_cluster(
470
+ name: cluster
471
+ )
472
+
473
+ status = nil
474
+ loop_if = Proc.new {
475
+ status != "FAILED"
476
+ }
477
+
478
+ MU.retrier(ignoreme: [Aws::EKS::Errors::ResourceNotFoundException], wait: 60){ |retries, _wait|
479
+ status = MU::Cloud::AWS.eks(credentials: credentials, region: region).describe_cluster(
480
+ name: cluster
481
+ ).cluster.status
482
+ if retries > 0 and (retries % 3) == 0
483
+ MU.log "Waiting for EKS cluster #{cluster} to finish deleting (status #{status})", MU::NOTICE
998
484
  end
999
- MU.log "Deleting EKS Cluster #{cluster}"
1000
- if !noop
1001
- MU::Cloud::AWS.eks(credentials: credentials, region: region).delete_cluster(
1002
- name: cluster
485
+ }
486
+ # MU::Cloud.resourceClass("AWS", "Server").removeIAMProfile(cluster)
487
+ end
488
+ }
489
+ end
490
+ private_class_method :purge_eks_clusters
491
+
492
+ def self.purge_ecs_clusters(noop: false, region: MU.curRegion, credentials: nil, deploy_id: MU.deploy_id)
493
+ start = Time.now
494
+ resp = MU::Cloud::AWS.ecs(credentials: credentials, region: region).list_clusters
495
+
496
+ return if !resp or !resp.cluster_arns or resp.cluster_arns.empty?
497
+
498
+ resp.cluster_arns.each { |arn|
499
+ if arn.match(/:cluster\/(#{deploy_id}[^:]+)$/)
500
+ cluster = Regexp.last_match[1]
501
+
502
+ svc_resp = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_services(
503
+ cluster: arn
504
+ )
505
+ if svc_resp and svc_resp.service_arns
506
+ svc_resp.service_arns.each { |svc_arn|
507
+ svc_name = svc_arn.gsub(/.*?:service\/(.*)/, '\1')
508
+ MU.log "Deleting Service #{svc_name} from ECS Cluster #{cluster}"
509
+ next if noop
510
+ MU::Cloud::AWS.ecs(region: region, credentials: credentials).delete_service(
511
+ cluster: arn,
512
+ service: svc_name,
513
+ force: true # man forget scaling up and down if we're just deleting the cluster
1003
514
  )
1004
- begin
1005
- status = nil
1006
- retries = 0
1007
- begin
1008
- deletion = MU::Cloud::AWS.eks(credentials: credentials, region: region).describe_cluster(
1009
- name: cluster
1010
- )
1011
- status = deletion.cluster.status
1012
- if retries > 0 and (retries % 3) == 0
1013
- MU.log "Waiting for EKS cluster #{cluster} to finish deleting (status #{status})", MU::NOTICE
1014
- end
1015
- retries += 1
1016
- sleep 30
1017
- end while status
1018
- rescue Aws::EKS::Errors::ResourceNotFoundException
1019
- # this is what we want
1020
- end
1021
- # MU::Cloud::AWS::Server.removeIAMProfile(cluster)
1022
- end
515
+ }
516
+ end
517
+
518
+ instances = MU::Cloud::AWS.ecs(credentials: credentials, region: region).list_container_instances({
519
+ cluster: cluster
520
+ })
521
+ if instances
522
+ instances.container_instance_arns.each { |instance_arn|
523
+ uuid = instance_arn.sub(/^.*?:container-instance\//, "")
524
+ MU.log "Deregistering instance #{uuid} from ECS Cluster #{cluster}"
525
+ next if noop
526
+ resp = MU::Cloud::AWS.ecs(credentials: credentials, region: region).deregister_container_instance({
527
+ cluster: cluster,
528
+ container_instance: uuid,
529
+ force: true,
530
+ })
531
+ }
532
+ end
533
+ MU.log "Deleting ECS Cluster #{cluster}"
534
+ next if noop
535
+ MU.retrier([Aws::ECS::Errors::ClusterContainsTasksException], wait: 5){
536
+ # TODO de-register container instances
537
+ MU::Cloud::AWS.ecs(credentials: credentials, region: region).delete_cluster(
538
+ cluster: cluster
539
+ )
540
+ }
541
+ end
542
+ }
543
+
544
+ tasks = MU::Cloud::AWS.ecs(region: region, credentials: credentials).list_task_definitions(
545
+ family_prefix: deploy_id
546
+ )
547
+
548
+ if tasks and tasks.task_definition_arns
549
+ tasks.task_definition_arns.each { |arn|
550
+ MU.log "Deregistering Fargate task definition #{arn}"
551
+ if !noop
552
+ MU::Cloud::AWS.ecs(region: region, credentials: credentials).deregister_task_definition(
553
+ task_definition: arn
554
+ )
1023
555
  end
1024
556
  }
1025
557
  end
1026
558
  end
559
+ private_class_method :purge_ecs_clusters
1027
560
 
1028
561
  # Locate an existing container_cluster.
1029
562
  # @return [Hash<String,OpenStruct>]: The cloud provider's complete descriptions of matching container_clusters.
@@ -1034,10 +567,8 @@ MU.log c.name, MU::NOTICE, details: t
1034
567
  resp = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).describe_clusters(clusters: [args[:cloud_id]])
1035
568
  if resp.clusters and resp.clusters.size > 0
1036
569
  found[args[:cloud_id]] = resp.clusters.first
1037
- end
1038
-
1039
- # XXX name collision is possible here
1040
- if found.size == 0
570
+ else
571
+ # XXX misses due to name collision are possible here
1041
572
  desc = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).describe_cluster(name: args[:cloud_id])
1042
573
  found[args[:cloud_id]] = desc.cluster if desc and desc.cluster
1043
574
  end
@@ -1045,14 +576,14 @@ MU.log c.name, MU::NOTICE, details: t
1045
576
  next_token = nil
1046
577
  begin
1047
578
  resp = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).list_clusters(next_token: next_token)
1048
- if resp and resp.cluster_arns and resp.cluster_arns.size > 0
1049
- names = resp.cluster_arns.map { |a| a.sub(/.*?:cluster\//, '') }
1050
- descs = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).describe_clusters(clusters: names)
1051
- if descs and descs.clusters
1052
- descs.clusters.each { |c|
1053
- found[c.cluster_name] = c
1054
- }
1055
- end
579
+ break if !resp or !resp.cluster_arns
580
+ next_token = resp.next_token
581
+ names = resp.cluster_arns.map { |a| a.sub(/.*?:cluster\//, '') }
582
+ descs = MU::Cloud::AWS.ecs(region: args[:region], credentials: args[:credentials]).describe_clusters(clusters: names)
583
+ if descs and descs.clusters
584
+ descs.clusters.each { |c|
585
+ found[c.cluster_name] = c
586
+ }
1056
587
  end
1057
588
  end while next_token
1058
589
 
@@ -1060,14 +591,12 @@ MU.log c.name, MU::NOTICE, details: t
1060
591
  next_token = nil
1061
592
  begin
1062
593
  resp = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).list_clusters(next_token: next_token)
1063
- if resp and resp.clusters
1064
- resp.clusters.each { |c|
1065
- puts c
1066
- desc = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).describe_cluster(name: c)
1067
- found[c] = desc.cluster if desc and desc.cluster
1068
- }
1069
- next_token = resp.next_token
1070
- end
594
+ break if !resp or !resp.clusters
595
+ resp.clusters.each { |c|
596
+ desc = MU::Cloud::AWS.eks(region: args[:region], credentials: args[:credentials]).describe_cluster(name: c)
597
+ found[c] = desc.cluster if desc and desc.cluster
598
+ }
599
+ next_token = resp.next_token
1071
600
  rescue Aws::EKS::Errors::AccessDeniedException
1072
601
  # not all regions support EKS
1073
602
  end while next_token
@@ -1703,18 +1232,18 @@ MU.log c.name, MU::NOTICE, details: t
1703
1232
  # @return [Boolean]: True if validation succeeded, False otherwise
1704
1233
  def self.validateConfig(cluster, configurator)
1705
1234
  ok = true
1706
-
1707
- cluster['size'] = MU::Cloud::AWS::Server.validateInstanceType(cluster["instance_type"], cluster["region"])
1235
+ start = Time.now
1236
+ cluster['size'] = MU::Cloud.resourceClass("AWS", "Server").validateInstanceType(cluster["instance_type"], cluster["region"])
1708
1237
  ok = false if cluster['size'].nil?
1709
1238
 
1710
1239
  cluster["flavor"] = "EKS" if cluster["flavor"].match(/^Kubernetes$/i)
1711
1240
 
1712
- if cluster["flavor"] == "ECS" and cluster["kubernetes"] and !MU::Cloud::AWS.isGovCloud?(cluster["region"])
1241
+ if cluster["flavor"] == "ECS" and cluster["kubernetes"] and !MU::Cloud::AWS.isGovCloud?(cluster["region"]) and !cluster["containers"] and MU::Cloud::AWS::ContainerCluster.EKSRegions(cluster['credentials']).include?(cluster['region'])
1713
1242
  cluster["flavor"] = "EKS"
1714
1243
  MU.log "Setting flavor of ContainerCluster '#{cluster['name']}' to EKS ('kubernetes' stanza was specified)", MU::NOTICE
1715
1244
  end
1716
1245
 
1717
- if cluster["flavor"] == "EKS" and !MU::Cloud::AWS::ContainerCluster.EKSRegions.include?(cluster['region'])
1246
+ if cluster["flavor"] == "EKS" and !MU::Cloud::AWS::ContainerCluster.EKSRegions(cluster['credentials']).include?(cluster['region'])
1718
1247
  MU.log "EKS is only available in some regions", MU::ERR, details: MU::Cloud::AWS::ContainerCluster.EKSRegions
1719
1248
  ok = false
1720
1249
  end
@@ -1784,7 +1313,7 @@ MU.log c.name, MU::NOTICE, details: t
1784
1313
  end
1785
1314
 
1786
1315
  if !created_generic_loggroup
1787
- cluster["dependencies"] << { "type" => "log", "name" => logname }
1316
+ MU::Config.addDependency(cluster, logname, "log")
1788
1317
  logdesc = {
1789
1318
  "name" => logname,
1790
1319
  "region" => cluster["region"],
@@ -1823,10 +1352,7 @@ MU.log c.name, MU::NOTICE, details: t
1823
1352
  }
1824
1353
  configurator.insertKitten(roledesc, "roles")
1825
1354
 
1826
- cluster["dependencies"] << {
1827
- "type" => "role",
1828
- "name" => rolename
1829
- }
1355
+ MU::Config.addDependency(cluster, rolename, "role")
1830
1356
  end
1831
1357
 
1832
1358
  created_generic_loggroup = true
@@ -1855,11 +1381,10 @@ MU.log c.name, MU::NOTICE, details: t
1855
1381
  role["tags"] = cluster["tags"] if !cluster["tags"].nil?
1856
1382
  role["optional_tags"] = cluster["optional_tags"] if !cluster["optional_tags"].nil?
1857
1383
  configurator.insertKitten(role, "roles")
1858
- cluster['dependencies'] << {
1859
- "type" => "role",
1860
- "name" => cluster["name"]+"pods",
1861
- "phase" => "groom"
1862
- }
1384
+ MU::Config.addDependency(cluster, cluster["name"]+"pods", "role", phase: "groom")
1385
+ if !MU::Master.kubectl
1386
+ MU.log "Since I can't find a kubectl executable, you will have to handle all service account, user, and role bindings manually!", MU::WARN
1387
+ end
1863
1388
  end
1864
1389
 
1865
1390
  if MU::Cloud::AWS.isGovCloud?(cluster["region"]) and cluster["flavor"] == "EKS"
@@ -1869,7 +1394,8 @@ MU.log c.name, MU::NOTICE, details: t
1869
1394
 
1870
1395
 
1871
1396
  if ["ECS", "EKS"].include?(cluster["flavor"])
1872
- std_ami = getStandardImage(cluster["flavor"], cluster['region'], version: cluster['kubernetes']['version'], gpu: cluster['gpu'])
1397
+ version = cluster["kubernetes"] ? cluster['kubernetes']['version'] : nil
1398
+ std_ami = getStandardImage(cluster["flavor"], cluster['region'], version: version, gpu: cluster['gpu'])
1873
1399
  cluster["host_image"] ||= std_ami
1874
1400
  if cluster["host_image"] != std_ami
1875
1401
  if cluster["flavor"] == "ECS"
@@ -1957,17 +1483,17 @@ MU.log c.name, MU::NOTICE, details: t
1957
1483
  end
1958
1484
 
1959
1485
  if cluster["flavor"] == "EKS"
1486
+
1487
+ if !MU::Master.kubectl
1488
+ MU.log "Without a kubectl executable, I cannot bind IAM roles to EKS worker nodes", MU::ERR
1489
+ ok = false
1490
+ end
1960
1491
  worker_pool["canned_iam_policies"] = [
1961
1492
  "AmazonEKSWorkerNodePolicy",
1962
1493
  "AmazonEKS_CNI_Policy",
1963
1494
  "AmazonEC2ContainerRegistryReadOnly"
1964
1495
  ]
1965
- worker_pool["dependencies"] = [
1966
- {
1967
- "type" => "container_cluster",
1968
- "name" => cluster['name']
1969
- }
1970
- ]
1496
+ MU::Config.addDependency(worker_pool, cluster["name"], "container_cluster")
1971
1497
  worker_pool["run_list"] = ["recipe[mu-tools::eks]"]
1972
1498
  worker_pool["run_list"].concat(cluster["run_list"]) if cluster["run_list"]
1973
1499
  MU::Config::Server.common_properties.keys.each { |k|
@@ -1975,16 +1501,14 @@ MU.log c.name, MU::NOTICE, details: t
1975
1501
  worker_pool[k] = cluster[k]
1976
1502
  end
1977
1503
  }
1978
-
1504
+ else
1505
+ worker_pool["groom"] = false # don't meddle with ECS workers unnecessarily
1979
1506
  end
1980
1507
 
1981
1508
  configurator.insertKitten(worker_pool, "server_pools")
1982
1509
 
1983
1510
  if cluster["flavor"] == "ECS"
1984
- cluster["dependencies"] << {
1985
- "name" => cluster["name"]+"workers",
1986
- "type" => "server_pool",
1987
- }
1511
+ MU::Config.addDependency(cluster, cluster["name"]+"workers", "server_pool")
1988
1512
  end
1989
1513
 
1990
1514
  end
@@ -2006,11 +1530,7 @@ MU.log c.name, MU::NOTICE, details: t
2006
1530
  role["tags"] = cluster["tags"] if !cluster["tags"].nil?
2007
1531
  role["optional_tags"] = cluster["optional_tags"] if !cluster["optional_tags"].nil?
2008
1532
  configurator.insertKitten(role, "roles")
2009
- cluster['dependencies'] << {
2010
- "type" => "role",
2011
- "name" => cluster["name"]+"controlplane",
2012
- "phase" => "groom"
2013
- }
1533
+ MU::Config.addDependency(cluster, cluster["name"]+"controlplane", "role", phase: "groom")
2014
1534
  end
2015
1535
 
2016
1536
  ok
@@ -2033,28 +1553,467 @@ MU.log c.name, MU::NOTICE, details: t
2033
1553
  sleep 10
2034
1554
  retry
2035
1555
  end
2036
- sleep 5
2037
- retries = 0
2038
- begin
2039
- begin
1556
+
1557
+ loop_if = Proc.new {
2040
1558
  check = MU::Cloud::AWS.eks(region: region, credentials: credentials).describe_fargate_profile(
2041
1559
  cluster_name: cluster,
2042
1560
  fargate_profile_name: profile
2043
1561
  )
2044
- rescue Aws::EKS::Errors::ResourceNotFoundException
2045
- break
2046
- end
1562
+ check.fargate_profile.status == "DELETING"
1563
+ }
2047
1564
 
1565
+ MU.retrier(ignoreme: [Aws::EKS::Errors::ResourceNotFoundException], wait: 30, max: 40, loop_if: loop_if) {
2048
1566
  if check.fargate_profile.status != "DELETING"
2049
- MU.log "Failed to delete Fargate EKS profile #{profile}", MU::ERR, details: check
2050
1567
  break
2051
- end
2052
- if retries > 0 and (retries % 3) == 0
1568
+ elsif retries > 0 and (retries % 3) == 0
2053
1569
  MU.log "Waiting for Fargate EKS profile #{profile} to delete (status #{check.fargate_profile.status})", MU::NOTICE
2054
1570
  end
2055
- sleep 30
2056
- retries += 1
2057
- end while retries < 40
1571
+ }
1572
+ end
1573
+
1574
+ private
1575
+
1576
+ def apply_kubernetes_resources
1577
+ kube = ERB.new(File.read(MU.myRoot+"/cookbooks/mu-tools/templates/default/kubeconfig-eks.erb"))
1578
+ configmap = ERB.new(File.read(MU.myRoot+"/extras/aws-auth-cm.yaml.erb"))
1579
+ @endpoint = cloud_desc.endpoint
1580
+ @cacert = cloud_desc.certificate_authority.data
1581
+ @cluster = @mu_name
1582
+ if @config['flavor'] != "Fargate"
1583
+ resp = MU::Cloud::AWS.iam(credentials: @config['credentials']).get_role(role_name: @mu_name+"WORKERS")
1584
+ @worker_role_arn = resp.role.arn
1585
+ end
1586
+ kube_conf = @deploy.deploy_dir+"/kubeconfig-#{@config['name']}"
1587
+ gitlab_helper = @deploy.deploy_dir+"/gitlab-eks-helper-#{@config['name']}.sh"
1588
+
1589
+ File.open(kube_conf, "w"){ |k|
1590
+ k.puts kube.result(binding)
1591
+ }
1592
+ gitlab = ERB.new(File.read(MU.myRoot+"/extras/gitlab-eks-helper.sh.erb"))
1593
+ File.open(gitlab_helper, "w"){ |k|
1594
+ k.puts gitlab.result(binding)
1595
+ }
1596
+
1597
+ if @config['flavor'] != "Fargate"
1598
+ eks_auth = @deploy.deploy_dir+"/eks-auth-cm-#{@config['name']}.yaml"
1599
+ File.open(eks_auth, "w"){ |k|
1600
+ k.puts configmap.result(binding)
1601
+ }
1602
+ authmap_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{eks_auth}"}
1603
+
1604
+ MU.log "Configuring Kubernetes <=> IAM mapping for worker nodes", MU::NOTICE, details: authmap_cmd
1605
+
1606
+ MU.retrier(max: 10, wait: 10, loop_if: Proc.new {$?.exitstatus != 0}){
1607
+ puts %x{#{authmap_cmd}}
1608
+ }
1609
+ raise MuError, "Failed to apply #{authmap_cmd}" if $?.exitstatus != 0
1610
+ end
1611
+
1612
+ if MU::Master.kubectl
1613
+ admin_user_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{MU.myRoot}/extras/admin-user.yaml"}
1614
+ admin_role_cmd = %Q{#{MU::Master.kubectl} --kubeconfig "#{kube_conf}" apply -f "#{MU.myRoot}/extras/admin-role-binding.yaml"}
1615
+ MU.log "Configuring Kubernetes admin-user and role", MU::NOTICE, details: admin_user_cmd+"\n"+admin_role_cmd
1616
+ %x{#{admin_user_cmd}}
1617
+ %x{#{admin_role_cmd}}
1618
+
1619
+ if @config['kubernetes_resources']
1620
+ MU::Master.applyKubernetesResources(
1621
+ @config['name'],
1622
+ @config['kubernetes_resources'],
1623
+ kubeconfig: kube_conf,
1624
+ outputdir: @deploy.deploy_dir
1625
+ )
1626
+ end
1627
+ end
1628
+
1629
+ MU.log %Q{How to interact with your EKS cluster\nkubectl --kubeconfig "#{kube_conf}" get all\nkubectl --kubeconfig "#{kube_conf}" create -f some_k8s_deploy.yml\nkubectl --kubeconfig "#{kube_conf}" get nodes}, MU::SUMMARY
1630
+ end
1631
+
1632
+ def create_fargate_kubernetes_profile
1633
+ fargate_subnets = mySubnets.map { |s| s.cloud_id }
1634
+
1635
+ podrole_arn = @deploy.findLitterMate(name: @config['name']+"pods", type: "roles").arn
1636
+ poolnum = 0
1637
+
1638
+ @config['kubernetes_pools'].each { |selectors|
1639
+ profname = @mu_name+"-"+poolnum.to_s
1640
+ poolnum += 1
1641
+ desc = {
1642
+ :fargate_profile_name => profname,
1643
+ :cluster_name => @mu_name,
1644
+ :pod_execution_role_arn => podrole_arn,
1645
+ :selectors => selectors,
1646
+ :subnets => fargate_subnets.sort,
1647
+ :tags => @tags
1648
+ }
1649
+ begin
1650
+ resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_fargate_profile(
1651
+ cluster_name: @mu_name,
1652
+ fargate_profile_name: profname
1653
+ )
1654
+ if resp and resp.fargate_profile
1655
+ old_desc = MU.structToHash(resp.fargate_profile, stringify_keys: true)
1656
+ new_desc = MU.structToHash(desc, stringify_keys: true)
1657
+ ["created_at", "status", "fargate_profile_arn"].each { |k|
1658
+ old_desc.delete(k)
1659
+ }
1660
+ old_desc["subnets"].sort!
1661
+ if !old_desc.eql?(new_desc)
1662
+ MU.log "Deleting Fargate profile #{profname} in order to apply changes", MU::WARN, details: desc
1663
+ MU::Cloud::AWS::ContainerCluster.purge_fargate_profile(profname, @mu_name, @config['region'], @credentials)
1664
+ else
1665
+ next
1666
+ end
1667
+ end
1668
+ rescue Aws::EKS::Errors::ResourceNotFoundException
1669
+ # This is just fine!
1670
+ end
1671
+ MU.log "Creating EKS Fargate profile #{profname}", details: desc
1672
+ resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).create_fargate_profile(desc)
1673
+ begin
1674
+ resp = MU::Cloud::AWS.eks(region: @config['region'], credentials: @config['credentials']).describe_fargate_profile(
1675
+ cluster_name: @mu_name,
1676
+ fargate_profile_name: profname
1677
+ )
1678
+ sleep 1 if resp.fargate_profile.status == "CREATING"
1679
+ end while resp.fargate_profile.status == "CREATING"
1680
+ MU.log "Creation of EKS Fargate profile #{profname} complete"
1681
+ }
1682
+ end
1683
+
1684
+ def self.remove_kubernetes_tags(cluster, desc, region: MU.myRegion, credentials: nil, noop: false)
1685
+ untag = []
1686
+ untag << desc.resources_vpc_config.vpc_id
1687
+ subnets = MU::Cloud::AWS.ec2(credentials: credentials, region: region).describe_subnets(
1688
+ filters: [ { name: "vpc-id", values: [desc.resources_vpc_config.vpc_id] } ]
1689
+ ).subnets
1690
+
1691
+ # subnets
1692
+ untag.concat(subnets.map { |s| s.subnet_id } )
1693
+ rtbs = MU::Cloud::AWS.ec2(credentials: credentials, region: region).describe_route_tables(
1694
+ filters: [ { name: "vpc-id", values: [desc.resources_vpc_config.vpc_id] } ]
1695
+ ).route_tables
1696
+ untag.concat(rtbs.map { |r| r.route_table_id } )
1697
+ untag.concat(desc.resources_vpc_config.subnet_ids)
1698
+ untag.concat(desc.resources_vpc_config.security_group_ids)
1699
+ MU.log "Removing Kubernetes tags from VPC resources for #{cluster}", details: untag
1700
+ if !noop
1701
+ MU::Cloud::AWS.removeTag("kubernetes.io/cluster/#{cluster}", "shared", untag)
1702
+ MU::Cloud::AWS.removeTag("kubernetes.io/cluster/elb", cluster, untag)
1703
+ end
1704
+ end
1705
+ private_class_method :remove_kubernetes_tags
1706
+
1707
+ def apply_kubernetes_tags
1708
+ tagme = [@vpc.cloud_id]
1709
+ tagme_elb = []
1710
+ @vpc.subnets.each { |s|
1711
+ tagme << s.cloud_id
1712
+ tagme_elb << s.cloud_id if !s.private?
1713
+ }
1714
+ rtbs = MU::Cloud::AWS.ec2(region: @config['region'], credentials: @config['credentials']).describe_route_tables(
1715
+ filters: [ { name: "vpc-id", values: [@vpc.cloud_id] } ]
1716
+ ).route_tables
1717
+ tagme.concat(rtbs.map { |r| r.route_table_id } )
1718
+ main_sg = @deploy.findLitterMate(type: "firewall_rules", name: "server_pool#{@config['name']}workers")
1719
+ tagme << main_sg.cloud_id if main_sg
1720
+ MU.log "Applying kubernetes.io tags to VPC resources", details: tagme
1721
+ MU::Cloud::AWS.createTag(tagme, "kubernetes.io/cluster/#{@mu_name}", "shared", credentials: @config['credentials'])
1722
+ MU::Cloud::AWS.createTag(tagme_elb, "kubernetes.io/cluster/elb", @mu_name, credentials: @config['credentials'])
1723
+ end
1724
+
1725
+ def manage_ecs_workers
1726
+ resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).list_container_instances({
1727
+ cluster: @mu_name
1728
+ })
1729
+ existing = {}
1730
+ if resp
1731
+ uuids = []
1732
+ resp.container_instance_arns.each { |arn|
1733
+ uuids << arn.sub(/^.*?:container-instance\//, "")
1734
+ }
1735
+ if uuids.size > 0
1736
+ resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).describe_container_instances({
1737
+ cluster: @mu_name,
1738
+ container_instances: uuids
1739
+ })
1740
+ resp.container_instances.each { |i|
1741
+ existing[i.ec2_instance_id] = i
1742
+ }
1743
+ end
1744
+ end
1745
+
1746
+ threads = []
1747
+ resource_lookup = MU::Cloud::AWS.listInstanceTypes(@config['region'])[@config['region']]
1748
+ serverpool = if ['EKS', 'ECS'].include?(@config['flavor'])
1749
+ @deploy.findLitterMate(type: "server_pools", name: @config["name"]+"workers")
1750
+ end
1751
+ serverpool.listNodes.each { |mynode|
1752
+ resources = resource_lookup[mynode.cloud_desc.instance_type]
1753
+ threads << Thread.new(mynode) { |node|
1754
+ ident_doc = nil
1755
+ ident_doc_sig = nil
1756
+ if !node.windows?
1757
+ session = node.getSSHSession(10, 30)
1758
+ ident_doc = session.exec!("curl -s http://169.254.169.254/latest/dynamic/instance-identity/document/")
1759
+ ident_doc_sig = session.exec!("curl -s http://169.254.169.254/latest/dynamic/instance-identity/signature/")
1760
+ # else
1761
+ # begin
1762
+ # session = node.getWinRMSession(1, 60)
1763
+ # rescue StandardError # XXX
1764
+ # session = node.getSSHSession(1, 60)
1765
+ # end
1766
+ end
1767
+ MU.log "Identity document for #{node}", MU::DEBUG, details: ident_doc
1768
+ MU.log "Identity document signature for #{node}", MU::DEBUG, details: ident_doc_sig
1769
+ params = {
1770
+ :cluster => @mu_name,
1771
+ :instance_identity_document => ident_doc,
1772
+ :instance_identity_document_signature => ident_doc_sig,
1773
+ :total_resources => [
1774
+ {
1775
+ :name => "CPU",
1776
+ :type => "INTEGER",
1777
+ :integer_value => resources["vcpu"].to_i
1778
+ },
1779
+ {
1780
+ :name => "MEMORY",
1781
+ :type => "INTEGER",
1782
+ :integer_value => (resources["memory"]*1024*1024).to_i
1783
+ }
1784
+ ]
1785
+ }
1786
+ if !existing.has_key?(node.cloud_id)
1787
+ MU.log "Registering ECS instance #{node} in cluster #{@mu_name}", details: params
1788
+ else
1789
+ params[:container_instance_arn] = existing[node.cloud_id].container_instance_arn
1790
+ MU.log "Updating ECS instance #{node} in cluster #{@mu_name}", MU::NOTICE, details: params
1791
+ end
1792
+ MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).register_container_instance(params)
1793
+
1794
+ }
1795
+ }
1796
+ threads.each { |t|
1797
+ t.join
1798
+ }
1799
+ end
1800
+
1801
+ def get_ecs_loadbalancers(container_name)
1802
+ lbs = []
1803
+
1804
+ if @loadbalancers and !@loadbalancers.empty?
1805
+ @loadbalancers.each {|lb|
1806
+ MU.log "Mapping LB #{lb.mu_name} to service #{c['name']}", MU::INFO
1807
+ if lb.cloud_desc.type != "classic"
1808
+ elb_groups = MU::Cloud::AWS.elb2(region: @config['region'], credentials: @config['credentials']).describe_target_groups({
1809
+ load_balancer_arn: lb.cloud_desc.load_balancer_arn
1810
+ })
1811
+ matching_target_groups = []
1812
+ elb_groups.target_groups.each { |tg|
1813
+ if tg.port.to_i == lb['container_port'].to_i
1814
+ matching_target_groups << {
1815
+ arn: tg['target_group_arn'],
1816
+ name: tg['target_group_name']
1817
+ }
1818
+ end
1819
+ }
1820
+ if matching_target_groups.length >= 1
1821
+ MU.log "#{matching_target_groups.length} matching target groups lb. Mapping #{container_name} to target group #{matching_target_groups.first['name']}", MU::INFO
1822
+ lbs << {
1823
+ container_name: container_name,
1824
+ container_port: lb['container_port'],
1825
+ target_group_arn: matching_target_groups.first[:arn]
1826
+ }
1827
+ else
1828
+ raise MuError, "No matching target groups lb"
1829
+ end
1830
+ elsif @config['flavor'] == "Fargate" && lb.cloud_desc.type == "classic"
1831
+ raise MuError, "Classic Load Balancers are not supported with Fargate."
1832
+ else
1833
+ MU.log "Mapping Classic LB #{lb.mu_name} to service #{container_name}", MU::INFO
1834
+ lbs << {
1835
+ container_name: container_name,
1836
+ container_port: lb['container_port'],
1837
+ load_balancer_name: lb.mu_name
1838
+ }
1839
+ end
1840
+ }
1841
+ end
1842
+
1843
+ lbs
1844
+ end
1845
+
1846
+ def get_ecs_container_definitions(containers)
1847
+ role_arn = nil
1848
+ lbs = []
1849
+
1850
+ defs = containers.map { |c|
1851
+ container_name = @mu_name+"-"+c['name'].upcase
1852
+ lbs.concat(get_ecs_loadbalancers(container_name))
1853
+
1854
+ if c["role"] and !role_arn
1855
+ found = MU::MommaCat.findStray(
1856
+ @config['cloud'],
1857
+ "role",
1858
+ cloud_id: c["role"]["id"],
1859
+ name: c["role"]["name"],
1860
+ deploy_id: c["role"]["deploy_id"] || @deploy.deploy_id,
1861
+ dummy_ok: false
1862
+ )
1863
+ if found
1864
+ found = found.first
1865
+ if found and found.cloudobj
1866
+ role_arn = found.cloudobj.arn
1867
+ end
1868
+ else
1869
+ raise MuError, "Unable to find execution role from #{c["role"]}"
1870
+ end
1871
+ end
1872
+
1873
+ params = {
1874
+ name: @mu_name+"-"+c['name'].upcase,
1875
+ image: c['image'],
1876
+ memory: c['memory'],
1877
+ cpu: c['cpu']
1878
+ }
1879
+ if !@config['vpc']
1880
+ c['hostname'] ||= @mu_name+"-"+c['name'].upcase
1881
+ end
1882
+ [:essential, :hostname, :start_timeout, :stop_timeout, :user, :working_directory, :disable_networking, :privileged, :readonly_root_filesystem, :interactive, :pseudo_terminal, :links, :entry_point, :command, :dns_servers, :dns_search_domains, :docker_security_options, :port_mappings, :repository_credentials, :mount_points, :environment, :volumes_from, :secrets, :depends_on, :extra_hosts, :docker_labels, :ulimits, :system_controls, :health_check, :resource_requirements].each { |param|
1883
+ if c.has_key?(param.to_s)
1884
+ params[param] = if !c[param.to_s].nil? and (c[param.to_s].is_a?(Hash) or c[param.to_s].is_a?(Array))
1885
+ MU.strToSym(c[param.to_s])
1886
+ else
1887
+ c[param.to_s]
1888
+ end
1889
+ end
1890
+ }
1891
+ if @config['vpc']
1892
+ [:hostname, :dns_servers, :dns_search_domains, :links].each { |param|
1893
+ if params[param]
1894
+ MU.log "Container parameter #{param.to_s} not supported in VPC clusters, ignoring", MU::WARN
1895
+ params.delete(param)
1896
+ end
1897
+ }
1898
+ end
1899
+ if @config['flavor'] == "Fargate"
1900
+ [:privileged, :docker_security_options].each { |param|
1901
+ if params[param]
1902
+ MU.log "Container parameter #{param.to_s} not supported in Fargate clusters, ignoring", MU::WARN
1903
+ params.delete(param)
1904
+ end
1905
+ }
1906
+ end
1907
+ if c['log_configuration']
1908
+ log_obj = @deploy.findLitterMate(name: c['log_configuration']['options']['awslogs-group'], type: "logs")
1909
+ if log_obj
1910
+ c['log_configuration']['options']['awslogs-group'] = log_obj.mu_name
1911
+ end
1912
+ params[:log_configuration] = MU.strToSym(c['log_configuration'])
1913
+ end
1914
+ params
1915
+ }
1916
+
1917
+ [defs, role_arn, lbs]
1918
+ end
1919
+
1920
+ def register_ecs_task(container_definitions, service_name, cpu_total = 2, mem_total = 2, role_arn: nil)
1921
+ task_params = {
1922
+ family: @deploy.deploy_id,
1923
+ container_definitions: container_definitions,
1924
+ requires_compatibilities: [@config['flavor'] == "ECS" ? "EC2" : "FARGATE"]
1925
+ }
1926
+
1927
+ if @config['volumes']
1928
+ task_params[:volumes] = []
1929
+ @config['volumes'].each { |v|
1930
+ vol = { :name => v['name'] }
1931
+ if v['type'] == "host"
1932
+ vol[:host] = {}
1933
+ if v['host_volume_source_path']
1934
+ vol[:host][:source_path] = v['host_volume_source_path']
1935
+ end
1936
+ elsif v['type'] == "docker"
1937
+ vol[:docker_volume_configuration] = MU.strToSym(v['docker_volume_configuration'])
1938
+ else
1939
+ raise MuError, "Invalid volume type '#{v['type']}' specified in ContainerCluster '#{@mu_name}'"
1940
+ end
1941
+ task_params[:volumes] << vol
1942
+ }
1943
+ end
1944
+
1945
+ if role_arn
1946
+ task_params[:execution_role_arn] = role_arn
1947
+ task_params[:task_role_arn] = role_arn
1948
+ end
1949
+ if @config['flavor'] == "Fargate"
1950
+ task_params[:network_mode] = "awsvpc"
1951
+ task_params[:cpu] = cpu_total.to_i.to_s
1952
+ task_params[:memory] = mem_total.to_i.to_s
1953
+ elsif @config['vpc']
1954
+ task_params[:network_mode] = "awsvpc"
1955
+ end
1956
+
1957
+ MU.log "Registering task definition #{service_name} with #{container_definitions.size.to_s} containers"
1958
+
1959
+ # XXX this helpfully keeps revisions, but let's compare anyway and avoid cluttering with identical ones
1960
+ resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).register_task_definition(task_params)
1961
+
1962
+ resp.task_definition.task_definition_arn
1963
+ end
1964
+
1965
+ def list_ecs_services
1966
+ svc_resp = nil
1967
+ MU.retrier([Aws::ECS::Errors::ClusterNotFoundException], wait: 5, max: 10){
1968
+ svc_resp = MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).list_services(
1969
+ cluster: arn
1970
+ )
1971
+ }
1972
+
1973
+ svc_resp.service_arns.map { |s|
1974
+ s.gsub(/.*?:service\/(.*)/, '\1')
1975
+ }
1976
+ end
1977
+
1978
+ def create_update_ecs_service(task_def, service_name, lbs, existing_svcs)
1979
+ service_params = {
1980
+ :cluster => @mu_name,
1981
+ :desired_count => @config['instance_count'], # XXX this makes no sense
1982
+ :service_name => service_name,
1983
+ :launch_type => @config['flavor'] == "ECS" ? "EC2" : "FARGATE",
1984
+ :task_definition => task_def,
1985
+ :load_balancers => lbs
1986
+ }
1987
+ if @config['vpc']
1988
+ subnet_ids = []
1989
+ all_public = true
1990
+
1991
+ mySubnets.each { |subnet|
1992
+ subnet_ids << subnet.cloud_id
1993
+ all_public = false if subnet.private?
1994
+ }
1995
+
1996
+ service_params[:network_configuration] = {
1997
+ :awsvpc_configuration => {
1998
+ :subnets => subnet_ids,
1999
+ :security_groups => myFirewallRules.map { |fw| fw.cloud_id },
2000
+ :assign_public_ip => all_public ? "ENABLED" : "DISABLED"
2001
+ }
2002
+ }
2003
+ end
2004
+
2005
+ if !existing_svcs.include?(service_name)
2006
+ MU.log "Creating Service #{service_name}"
2007
+
2008
+ MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).create_service(service_params)
2009
+ else
2010
+ service_params[:service] = service_params[:service_name].dup
2011
+ service_params.delete(:service_name)
2012
+ service_params.delete(:launch_type)
2013
+ MU.log "Updating Service #{service_name}", MU::NOTICE, details: service_params
2014
+
2015
+ MU::Cloud::AWS.ecs(region: @config['region'], credentials: @config['credentials']).update_service(service_params)
2016
+ end
2058
2017
  end
2059
2018
 
2060
2019
  end