cloud-mu 3.1.6 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/bin/mu-adopt +4 -12
  3. data/bin/mu-azure-tests +57 -0
  4. data/bin/mu-cleanup +2 -4
  5. data/bin/mu-configure +37 -1
  6. data/bin/mu-deploy +3 -3
  7. data/bin/mu-findstray-tests +25 -0
  8. data/bin/mu-gen-docs +2 -4
  9. data/bin/mu-run-tests +23 -10
  10. data/cloud-mu.gemspec +2 -2
  11. data/cookbooks/mu-tools/libraries/helper.rb +1 -1
  12. data/cookbooks/mu-tools/recipes/apply_security.rb +14 -14
  13. data/cookbooks/mu-tools/recipes/aws_api.rb +9 -0
  14. data/extras/generate-stock-images +1 -0
  15. data/modules/mu.rb +82 -95
  16. data/modules/mu/adoption.rb +356 -56
  17. data/modules/mu/cleanup.rb +21 -20
  18. data/modules/mu/cloud.rb +79 -1753
  19. data/modules/mu/cloud/database.rb +49 -0
  20. data/modules/mu/cloud/dnszone.rb +46 -0
  21. data/modules/mu/cloud/machine_images.rb +212 -0
  22. data/modules/mu/cloud/providers.rb +81 -0
  23. data/modules/mu/cloud/resource_base.rb +920 -0
  24. data/modules/mu/cloud/server.rb +40 -0
  25. data/modules/mu/cloud/server_pool.rb +1 -0
  26. data/modules/mu/cloud/ssh_sessions.rb +228 -0
  27. data/modules/mu/cloud/winrm_sessions.rb +237 -0
  28. data/modules/mu/cloud/wrappers.rb +165 -0
  29. data/modules/mu/config.rb +122 -80
  30. data/modules/mu/config/alarm.rb +2 -6
  31. data/modules/mu/config/bucket.rb +1 -1
  32. data/modules/mu/config/cache_cluster.rb +1 -1
  33. data/modules/mu/config/collection.rb +1 -1
  34. data/modules/mu/config/container_cluster.rb +2 -2
  35. data/modules/mu/config/database.rb +83 -104
  36. data/modules/mu/config/database.yml +1 -2
  37. data/modules/mu/config/dnszone.rb +1 -1
  38. data/modules/mu/config/doc_helpers.rb +4 -5
  39. data/modules/mu/config/endpoint.rb +1 -1
  40. data/modules/mu/config/firewall_rule.rb +3 -19
  41. data/modules/mu/config/folder.rb +1 -1
  42. data/modules/mu/config/function.rb +1 -1
  43. data/modules/mu/config/group.rb +1 -1
  44. data/modules/mu/config/habitat.rb +1 -1
  45. data/modules/mu/config/loadbalancer.rb +57 -11
  46. data/modules/mu/config/log.rb +1 -1
  47. data/modules/mu/config/msg_queue.rb +1 -1
  48. data/modules/mu/config/nosqldb.rb +1 -1
  49. data/modules/mu/config/notifier.rb +1 -1
  50. data/modules/mu/config/ref.rb +30 -4
  51. data/modules/mu/config/role.rb +1 -1
  52. data/modules/mu/config/schema_helpers.rb +30 -34
  53. data/modules/mu/config/search_domain.rb +1 -1
  54. data/modules/mu/config/server.rb +4 -12
  55. data/modules/mu/config/server_pool.rb +3 -7
  56. data/modules/mu/config/storage_pool.rb +1 -1
  57. data/modules/mu/config/tail.rb +10 -0
  58. data/modules/mu/config/user.rb +1 -1
  59. data/modules/mu/config/vpc.rb +12 -17
  60. data/modules/mu/defaults/AWS.yaml +32 -32
  61. data/modules/mu/defaults/Azure.yaml +1 -0
  62. data/modules/mu/defaults/Google.yaml +1 -0
  63. data/modules/mu/deploy.rb +16 -15
  64. data/modules/mu/groomer.rb +15 -0
  65. data/modules/mu/groomers/chef.rb +3 -0
  66. data/modules/mu/logger.rb +120 -144
  67. data/modules/mu/master.rb +1 -1
  68. data/modules/mu/mommacat.rb +54 -25
  69. data/modules/mu/mommacat/daemon.rb +10 -7
  70. data/modules/mu/mommacat/naming.rb +82 -3
  71. data/modules/mu/mommacat/search.rb +47 -15
  72. data/modules/mu/mommacat/storage.rb +72 -41
  73. data/modules/mu/{clouds → providers}/README.md +1 -1
  74. data/modules/mu/{clouds → providers}/aws.rb +114 -47
  75. data/modules/mu/{clouds → providers}/aws/alarm.rb +1 -1
  76. data/modules/mu/{clouds → providers}/aws/bucket.rb +2 -2
  77. data/modules/mu/{clouds → providers}/aws/cache_cluster.rb +10 -46
  78. data/modules/mu/{clouds → providers}/aws/collection.rb +3 -3
  79. data/modules/mu/{clouds → providers}/aws/container_cluster.rb +15 -33
  80. data/modules/mu/providers/aws/database.rb +1744 -0
  81. data/modules/mu/{clouds → providers}/aws/dnszone.rb +2 -5
  82. data/modules/mu/{clouds → providers}/aws/endpoint.rb +2 -11
  83. data/modules/mu/{clouds → providers}/aws/firewall_rule.rb +33 -29
  84. data/modules/mu/{clouds → providers}/aws/folder.rb +0 -0
  85. data/modules/mu/{clouds → providers}/aws/function.rb +2 -10
  86. data/modules/mu/{clouds → providers}/aws/group.rb +9 -13
  87. data/modules/mu/{clouds → providers}/aws/habitat.rb +1 -1
  88. data/modules/mu/{clouds → providers}/aws/loadbalancer.rb +41 -33
  89. data/modules/mu/{clouds → providers}/aws/log.rb +2 -2
  90. data/modules/mu/{clouds → providers}/aws/msg_queue.rb +2 -8
  91. data/modules/mu/{clouds → providers}/aws/nosqldb.rb +0 -0
  92. data/modules/mu/{clouds → providers}/aws/notifier.rb +0 -0
  93. data/modules/mu/{clouds → providers}/aws/role.rb +7 -7
  94. data/modules/mu/{clouds → providers}/aws/search_domain.rb +8 -13
  95. data/modules/mu/{clouds → providers}/aws/server.rb +55 -90
  96. data/modules/mu/{clouds → providers}/aws/server_pool.rb +10 -33
  97. data/modules/mu/{clouds → providers}/aws/storage_pool.rb +19 -36
  98. data/modules/mu/{clouds → providers}/aws/user.rb +8 -12
  99. data/modules/mu/{clouds → providers}/aws/userdata/README.md +0 -0
  100. data/modules/mu/{clouds → providers}/aws/userdata/linux.erb +0 -0
  101. data/modules/mu/{clouds → providers}/aws/userdata/windows.erb +0 -0
  102. data/modules/mu/{clouds → providers}/aws/vpc.rb +135 -70
  103. data/modules/mu/{clouds → providers}/aws/vpc_subnet.rb +0 -0
  104. data/modules/mu/{clouds → providers}/azure.rb +4 -1
  105. data/modules/mu/{clouds → providers}/azure/container_cluster.rb +1 -5
  106. data/modules/mu/{clouds → providers}/azure/firewall_rule.rb +8 -1
  107. data/modules/mu/{clouds → providers}/azure/habitat.rb +0 -0
  108. data/modules/mu/{clouds → providers}/azure/loadbalancer.rb +0 -0
  109. data/modules/mu/{clouds → providers}/azure/role.rb +0 -0
  110. data/modules/mu/{clouds → providers}/azure/server.rb +30 -23
  111. data/modules/mu/{clouds → providers}/azure/user.rb +1 -1
  112. data/modules/mu/{clouds → providers}/azure/userdata/README.md +0 -0
  113. data/modules/mu/{clouds → providers}/azure/userdata/linux.erb +0 -0
  114. data/modules/mu/{clouds → providers}/azure/userdata/windows.erb +0 -0
  115. data/modules/mu/{clouds → providers}/azure/vpc.rb +4 -6
  116. data/modules/mu/{clouds → providers}/cloudformation.rb +1 -1
  117. data/modules/mu/{clouds → providers}/cloudformation/alarm.rb +3 -3
  118. data/modules/mu/{clouds → providers}/cloudformation/cache_cluster.rb +3 -3
  119. data/modules/mu/{clouds → providers}/cloudformation/collection.rb +3 -3
  120. data/modules/mu/{clouds → providers}/cloudformation/database.rb +6 -17
  121. data/modules/mu/{clouds → providers}/cloudformation/dnszone.rb +3 -3
  122. data/modules/mu/{clouds → providers}/cloudformation/firewall_rule.rb +3 -3
  123. data/modules/mu/{clouds → providers}/cloudformation/loadbalancer.rb +3 -3
  124. data/modules/mu/{clouds → providers}/cloudformation/log.rb +3 -3
  125. data/modules/mu/{clouds → providers}/cloudformation/server.rb +7 -7
  126. data/modules/mu/{clouds → providers}/cloudformation/server_pool.rb +5 -5
  127. data/modules/mu/{clouds → providers}/cloudformation/vpc.rb +3 -3
  128. data/modules/mu/{clouds → providers}/docker.rb +0 -0
  129. data/modules/mu/{clouds → providers}/google.rb +14 -6
  130. data/modules/mu/{clouds → providers}/google/bucket.rb +1 -1
  131. data/modules/mu/{clouds → providers}/google/container_cluster.rb +28 -13
  132. data/modules/mu/{clouds → providers}/google/database.rb +1 -8
  133. data/modules/mu/{clouds → providers}/google/firewall_rule.rb +2 -2
  134. data/modules/mu/{clouds → providers}/google/folder.rb +4 -8
  135. data/modules/mu/{clouds → providers}/google/function.rb +3 -3
  136. data/modules/mu/{clouds → providers}/google/group.rb +8 -16
  137. data/modules/mu/{clouds → providers}/google/habitat.rb +3 -7
  138. data/modules/mu/{clouds → providers}/google/loadbalancer.rb +1 -1
  139. data/modules/mu/{clouds → providers}/google/role.rb +42 -34
  140. data/modules/mu/{clouds → providers}/google/server.rb +25 -10
  141. data/modules/mu/{clouds → providers}/google/server_pool.rb +10 -10
  142. data/modules/mu/{clouds → providers}/google/user.rb +31 -21
  143. data/modules/mu/{clouds → providers}/google/userdata/README.md +0 -0
  144. data/modules/mu/{clouds → providers}/google/userdata/linux.erb +0 -0
  145. data/modules/mu/{clouds → providers}/google/userdata/windows.erb +0 -0
  146. data/modules/mu/{clouds → providers}/google/vpc.rb +37 -2
  147. data/modules/tests/centos6.yaml +11 -0
  148. data/modules/tests/centos7.yaml +11 -0
  149. data/modules/tests/centos8.yaml +12 -0
  150. data/modules/tests/rds.yaml +108 -0
  151. data/modules/tests/regrooms/rds.yaml +123 -0
  152. data/spec/mu/clouds/azure_spec.rb +2 -2
  153. metadata +108 -89
  154. data/modules/mu/clouds/aws/database.rb +0 -1974
@@ -0,0 +1,40 @@
1
+ # Copyright:: Copyright (c) 2020 eGlobalTech, Inc., all rights reserved
2
+ #
3
+ # Licensed under the BSD-3 license (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License in the root of the project or at
6
+ #
7
+ # http://egt-labs.com/mu/LICENSE.html
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module MU
16
+ # Plugins under this namespace serve as interfaces to cloud providers and
17
+ # other provisioning layers.
18
+ class Cloud
19
+
20
+ # Generic methods for all Server/ServerPool implementations
21
+ [:Server, :ServerPool].each { |name|
22
+ Object.const_get("MU").const_get("Cloud").const_get(name).class_eval {
23
+
24
+ def windows?
25
+ return true if %w{win2k16 win2k12r2 win2k12 win2k8 win2k8r2 win2k19 windows}.include?(@config['platform'])
26
+ begin
27
+ return true if cloud_desc.respond_to?(:platform) and cloud_desc.platform == "Windows"
28
+ # XXX ^ that's AWS-speak, doesn't cover GCP or anything else; maybe we should require cloud layers to implement this so we can just call @cloudobj.windows?
29
+ rescue MU::MuError
30
+ return false
31
+ end
32
+ false
33
+ end
34
+
35
+ }
36
+ }
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1 @@
1
+ modules/mu/cloud/server.rb
@@ -0,0 +1,228 @@
1
+ # Copyright:: Copyright (c) 2020 eGlobalTech, Inc., all rights reserved
2
+ #
3
+ # Licensed under the BSD-3 license (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License in the root of the project or at
6
+ #
7
+ # http://egt-labs.com/mu/LICENSE.html
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module MU
16
+ # Plugins under this namespace serve as interfaces to cloud providers and
17
+ # other provisioning layers.
18
+ class Cloud
19
+
20
+ # An exception we can use with transient Net::SSH errors, which require
21
+ # special handling due to obnoxious asynchronous interrupt behaviors.
22
+ class NetSSHFail < MuNonFatal;
23
+ end
24
+
25
+ # Net::SSH exceptions seem to have their own behavior vis a vis threads,
26
+ # and our regular call stack gets circumvented when they're thrown. Cheat
27
+ # here to catch them gracefully.
28
+ def self.handleNetSSHExceptions
29
+ Thread.handle_interrupt(Net::SSH::Exception => :never) {
30
+ begin
31
+ Thread.handle_interrupt(Net::SSH::Exception => :immediate) {
32
+ MU.log "(Probably harmless) Caught a Net::SSH Exception in #{Thread.current.inspect}", MU::DEBUG, details: Thread.current.backtrace
33
+ }
34
+ ensure
35
+ # raise NetSSHFail, "Net::SSH had a nutty"
36
+ end
37
+ }
38
+ end
39
+
40
+ [:Server, :ServerPool].each { |name|
41
+ Object.const_get("MU").const_get("Cloud").const_get(name).class_eval {
42
+
43
+ # Basic setup tasks performed on a new node during its first initial
44
+ # ssh connection. Most of this is terrible Windows glue.
45
+ # @param ssh [Net::SSH::Connection::Session]: The active SSH session to the new node.
46
+ def initialSSHTasks(ssh)
47
+ win_env_fix = %q{echo 'export PATH="$PATH:/cygdrive/c/opscode/chef/embedded/bin"' > "$HOME/chef-client"; echo 'prev_dir="`pwd`"; for __dir in /proc/registry/HKEY_LOCAL_MACHINE/SYSTEM/CurrentControlSet/Control/Session\ Manager/Environment;do cd "$__dir"; for __var in `ls * | grep -v TEMP | grep -v TMP`;do __var=`echo $__var | tr "[a-z]" "[A-Z]"`; test -z "${!__var}" && export $__var="`cat $__var`" >/dev/null 2>&1; done; done; cd "$prev_dir"; /cygdrive/c/opscode/chef/bin/chef-client.bat $@' >> "$HOME/chef-client"; chmod 700 "$HOME/chef-client"; ( grep "^alias chef-client=" "$HOME/.bashrc" || echo 'alias chef-client="$HOME/chef-client"' >> "$HOME/.bashrc" ) ; ( grep "^alias mu-groom=" "$HOME/.bashrc" || echo 'alias mu-groom="powershell -File \"c:/Program Files/Amazon/Ec2ConfigService/Scripts/UserScript.ps1\""' >> "$HOME/.bashrc" )}
48
+ win_installer_check = %q{ls /proc/registry/HKEY_LOCAL_MACHINE/SOFTWARE/Microsoft/Windows/CurrentVersion/Installer/}
49
+ lnx_installer_check = %q{ps auxww | awk '{print $11}' | egrep '(/usr/bin/yum|apt-get|dpkg)'}
50
+ lnx_updates_check = %q{( test -f /.mu-installer-ran-updates || ! test -d /var/lib/cloud/instance ) || echo "userdata still running"}
51
+ win_set_pw = nil
52
+
53
+ if windows? and !@config['use_cloud_provider_windows_password']
54
+ # This covers both the case where we have a windows password passed from a vault and where we need to use a a random Windows Admin password generated by MU::Cloud::Server.generateWindowsPassword
55
+ pw = @groomer.getSecret(
56
+ vault: @config['mu_name'],
57
+ item: "windows_credentials",
58
+ field: "password"
59
+ )
60
+ win_check_for_pw = %Q{powershell -Command '& {Add-Type -AssemblyName System.DirectoryServices.AccountManagement; $Creds = (New-Object System.Management.Automation.PSCredential("#{@config["windows_admin_username"]}", (ConvertTo-SecureString "#{pw}" -AsPlainText -Force)));$DS = New-Object System.DirectoryServices.AccountManagement.PrincipalContext([System.DirectoryServices.AccountManagement.ContextType]::Machine); $DS.ValidateCredentials($Creds.GetNetworkCredential().UserName, $Creds.GetNetworkCredential().password); echo $Result}'}
61
+ win_set_pw = %Q{powershell -Command "& {(([adsi]('WinNT://./#{@config["windows_admin_username"]}, user')).psbase.invoke('SetPassword', '#{pw}'))}"}
62
+ end
63
+
64
+ # There shouldn't be a use case where a domain joined computer goes through initialSSHTasks. Removing Active Directory specific computer rename.
65
+ set_hostname = true
66
+ hostname = nil
67
+ if !@config['active_directory'].nil?
68
+ if @config['active_directory']['node_type'] == "domain_controller" && @config['active_directory']['domain_controller_hostname']
69
+ hostname = @config['active_directory']['domain_controller_hostname']
70
+ @mu_windows_name = hostname
71
+ set_hostname = true
72
+ else
73
+ # Do we have an AD specific hostname?
74
+ hostname = @mu_windows_name
75
+ set_hostname = true
76
+ end
77
+ else
78
+ hostname = @mu_windows_name
79
+ end
80
+ win_check_for_hostname = %Q{powershell -Command '& {hostname}'}
81
+ win_set_hostname = %Q{powershell -Command "& {Rename-Computer -NewName '#{hostname}' -Force -PassThru -Restart; Restart-Computer -Force }"}
82
+
83
+ begin
84
+ # Set our admin password first, if we need to
85
+ if windows? and !win_set_pw.nil? and !win_check_for_pw.nil?
86
+ output = ssh.exec!(win_check_for_pw)
87
+ raise MU::Cloud::BootstrapTempFail, "Got nil output from ssh session, waiting and retrying" if output.nil?
88
+ if !output.match(/True/)
89
+ MU.log "Setting Windows password for user #{@config['windows_admin_username']}", details: ssh.exec!(win_set_pw)
90
+ end
91
+ end
92
+ if windows?
93
+ output = ssh.exec!(win_env_fix)
94
+ output += ssh.exec!(win_installer_check)
95
+ raise MU::Cloud::BootstrapTempFail, "Got nil output from ssh session, waiting and retrying" if output.nil?
96
+ if output.match(/InProgress/)
97
+ raise MU::Cloud::BootstrapTempFail, "Windows Installer service is still doing something, need to wait"
98
+ end
99
+ if set_hostname and !@hostname_set and @mu_windows_name
100
+ output = ssh.exec!(win_check_for_hostname)
101
+ raise MU::Cloud::BootstrapTempFail, "Got nil output from ssh session, waiting and retrying" if output.nil?
102
+ if !output.match(/#{@mu_windows_name}/)
103
+ MU.log "Setting Windows hostname to #{@mu_windows_name}", details: ssh.exec!(win_set_hostname)
104
+ @hostname_set = true
105
+ # Reboot from the API too, in case Windows is flailing
106
+ if !@cloudobj.nil?
107
+ @cloudobj.reboot
108
+ else
109
+ reboot
110
+ end
111
+ raise MU::Cloud::BootstrapTempFail, "Set hostname in Windows, waiting for reboot"
112
+ end
113
+ end
114
+ else
115
+ output = ssh.exec!(lnx_installer_check)
116
+ if !output.nil? and !output.empty?
117
+ raise MU::Cloud::BootstrapTempFail, "Linux package manager is still doing something, need to wait (#{output})"
118
+ end
119
+ if !@config['skipinitialupdates'] and
120
+ !@config['scrub_mu_isms'] and
121
+ !@config['userdata_script']
122
+ output = ssh.exec!(lnx_updates_check)
123
+ if !output.nil? and output.match(/userdata still running/)
124
+ raise MU::Cloud::BootstrapTempFail, "Waiting for initial userdata system updates to complete"
125
+ end
126
+ end
127
+ end
128
+ rescue RuntimeError => e
129
+ raise MU::Cloud::BootstrapTempFail, "Got #{e.inspect} performing initial SSH connect tasks, will try again"
130
+ end
131
+
132
+ end
133
+
134
+ # @param max_retries [Integer]: Number of connection attempts to make before giving up
135
+ # @param retry_interval [Integer]: Number of seconds to wait between connection attempts
136
+ # @return [Net::SSH::Connection::Session]
137
+ def getSSHSession(max_retries = 12, retry_interval = 30)
138
+ ssh_keydir = Etc.getpwnam(@deploy.mu_user).dir+"/.ssh"
139
+ nat_ssh_key, nat_ssh_user, nat_ssh_host, canonical_ip, ssh_user, _ssh_key_name = getSSHConfig
140
+ session = nil
141
+ retries = 0
142
+
143
+ # XXX WHY is this a thing
144
+ Thread.handle_interrupt(Errno::ECONNREFUSED => :never) {
145
+ }
146
+
147
+ begin
148
+ MU::Cloud.handleNetSSHExceptions
149
+ if !nat_ssh_host.nil?
150
+ proxy_cmd = "ssh -q -o StrictHostKeyChecking=no -W %h:%p #{nat_ssh_user}@#{nat_ssh_host}"
151
+ MU.log "Attempting SSH to #{canonical_ip} (#{@mu_name}) as #{ssh_user} with key #{@deploy.ssh_key_name} using proxy '#{proxy_cmd}'" if retries == 0
152
+ proxy = Net::SSH::Proxy::Command.new(proxy_cmd)
153
+ session = Net::SSH.start(
154
+ canonical_ip,
155
+ ssh_user,
156
+ :config => false,
157
+ :keys_only => true,
158
+ :keys => [ssh_keydir+"/"+nat_ssh_key, ssh_keydir+"/"+@deploy.ssh_key_name],
159
+ :verify_host_key => false,
160
+ # :verbose => :info,
161
+ :host_key => "ssh-rsa",
162
+ :port => 22,
163
+ :auth_methods => ['publickey'],
164
+ :proxy => proxy
165
+ )
166
+ else
167
+
168
+ MU.log "Attempting SSH to #{canonical_ip} (#{@mu_name}) as #{ssh_user} with key #{ssh_keydir}/#{@deploy.ssh_key_name}" if retries == 0
169
+ session = Net::SSH.start(
170
+ canonical_ip,
171
+ ssh_user,
172
+ :config => false,
173
+ :keys_only => true,
174
+ :keys => [ssh_keydir+"/"+@deploy.ssh_key_name],
175
+ :verify_host_key => false,
176
+ # :verbose => :info,
177
+ :host_key => "ssh-rsa",
178
+ :port => 22,
179
+ :auth_methods => ['publickey']
180
+ )
181
+ end
182
+ retries = 0
183
+ rescue Net::SSH::HostKeyMismatch => e
184
+ MU.log("Remembering new key: #{e.fingerprint}")
185
+ e.remember_host!
186
+ session.close
187
+ retry
188
+ # rescue SystemCallError, Timeout::Error, Errno::ECONNRESET, Errno::EHOSTUNREACH, Net::SSH::Proxy::ConnectError, SocketError, Net::SSH::Disconnect, Net::SSH::AuthenticationFailed, IOError, Net::SSH::ConnectionTimeout, Net::SSH::Proxy::ConnectError, MU::Cloud::NetSSHFail => e
189
+ rescue SystemExit, Timeout::Error, Net::SSH::AuthenticationFailed, Net::SSH::Disconnect, Net::SSH::ConnectionTimeout, Net::SSH::Proxy::ConnectError, Net::SSH::Exception, Errno::ECONNRESET, Errno::EHOSTUNREACH, Errno::ECONNREFUSED, Errno::EPIPE, SocketError, IOError => e
190
+ begin
191
+ session.close if !session.nil?
192
+ rescue Net::SSH::Disconnect, IOError => e
193
+ if windows?
194
+ MU.log "Windows has probably closed the ssh session before we could. Waiting before trying again", MU::NOTICE
195
+ else
196
+ MU.log "ssh session was closed unexpectedly, waiting before trying again", MU::NOTICE
197
+ end
198
+ sleep 10
199
+ end
200
+
201
+ if retries < max_retries
202
+ retries = retries + 1
203
+ msg = "ssh #{ssh_user}@#{@mu_name}: #{e.message}, waiting #{retry_interval}s (attempt #{retries}/#{max_retries})"
204
+ if retries == 1 or (retries/max_retries <= 0.5 and (retries % 3) == 0)
205
+ MU.log msg, MU::NOTICE
206
+ if !MU::Cloud.resourceClass(@cloud, "VPC").haveRouteToInstance?(cloud_desc, credentials: @credentials) and
207
+ canonical_ip.match(/(^127\.)|(^192\.168\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^::1$)|(^[fF][cCdD])/) and
208
+ !nat_ssh_host
209
+ MU.log "Node #{@mu_name} at #{canonical_ip} looks like it's in a private address space, and I don't appear to have a direct route to it. It may not be possible to connect with this routing!", MU::WARN
210
+ end
211
+ elsif retries/max_retries > 0.5
212
+ MU.log msg, MU::WARN, details: e.inspect
213
+ end
214
+ sleep retry_interval
215
+ retry
216
+ else
217
+ raise MuError, "#{@mu_name}: #{e.inspect} trying to connect with SSH, max_retries exceeded", e.backtrace
218
+ end
219
+ end
220
+ return session
221
+ end
222
+ }
223
+
224
+ }
225
+
226
+ end
227
+
228
+ end
@@ -0,0 +1,237 @@
1
+ # Copyright:: Copyright (c) 2020 eGlobalTech, Inc., all rights reserved
2
+ #
3
+ # Licensed under the BSD-3 license (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License in the root of the project or at
6
+ #
7
+ # http://egt-labs.com/mu/LICENSE.html
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ autoload :WinRM, "winrm"
16
+
17
+ module MU
18
+ # Plugins under this namespace serve as interfaces to cloud providers and
19
+ # other provisioning layers.
20
+ class Cloud
21
+
22
+ [:Server, :ServerPool].each { |name|
23
+ Object.const_get("MU").const_get("Cloud").const_get(name).class_eval {
24
+
25
+ # Gracefully message and attempt to accommodate the common transient errors peculiar to Windows nodes
26
+ # @param e [Exception]: The exception that we're handling
27
+ # @param retries [Integer]: The current number of retries, which we'll increment and pass back to the caller
28
+ # @param rebootable_fails [Integer]: The current number of reboot-worthy failures, which we'll increment and pass back to the caller
29
+ # @param max_retries [Integer]: Maximum number of retries to attempt; we'll raise an exception if this is exceeded
30
+ # @param reboot_on_problems [Boolean]: Whether we should try to reboot a "stuck" machine
31
+ # @param retry_interval [Integer]: How many seconds to wait before returning for another attempt
32
+ def handleWindowsFail(e, retries, rebootable_fails, max_retries: 30, reboot_on_problems: false, retry_interval: 45)
33
+ msg = "WinRM connection to https://"+@mu_name+":5986/wsman: #{e.message}, waiting #{retry_interval}s (attempt #{retries}/#{max_retries})"
34
+ if e.class.name == "WinRM::WinRMAuthorizationError" or e.message.match(/execution expired/) and reboot_on_problems
35
+ if rebootable_fails > 0 and (rebootable_fails % 7) == 0
36
+ MU.log "#{@mu_name} still misbehaving, forcing Stop and Start from API", MU::WARN
37
+ reboot(true) # vicious API stop/start
38
+ sleep retry_interval*3
39
+ rebootable_fails = 0
40
+ else
41
+ if rebootable_fails == 5
42
+ MU.log "#{@mu_name} misbehaving, attempting to reboot from API", MU::WARN
43
+ reboot # graceful API restart
44
+ sleep retry_interval*2
45
+ end
46
+ rebootable_fails = rebootable_fails + 1
47
+ end
48
+ end
49
+ if retries < max_retries
50
+ if retries == 1 or (retries/max_retries <= 0.5 and (retries % 3) == 0 and retries != 0)
51
+ MU.log msg, MU::NOTICE
52
+ elsif retries/max_retries > 0.5
53
+ MU.log msg, MU::WARN, details: e.inspect
54
+ end
55
+ sleep retry_interval
56
+ retries = retries + 1
57
+ else
58
+ raise MuError, "#{@mu_name}: #{e.inspect} trying to connect with WinRM, max_retries exceeded", e.backtrace
59
+ end
60
+ return [retries, rebootable_fails]
61
+ end
62
+
63
+ def windowsRebootPending?(shell = nil)
64
+ if shell.nil?
65
+ shell = getWinRMSession(1, 30)
66
+ end
67
+ # if (Get-Item "HKLM:/SOFTWARE/Microsoft/Windows/CurrentVersion/WindowsUpdate/Auto Update/RebootRequired" -EA Ignore) { exit 1 }
68
+ cmd = %Q{
69
+ if (Get-ChildItem "HKLM:/Software/Microsoft/Windows/CurrentVersion/Component Based Servicing/RebootPending" -EA Ignore) {
70
+ echo "Component Based Servicing/RebootPending is true"
71
+ exit 1
72
+ }
73
+ if (Get-ItemProperty "HKLM:/SYSTEM/CurrentControlSet/Control/Session Manager" -Name PendingFileRenameOperations -EA Ignore) {
74
+ echo "Control/Session Manager/PendingFileRenameOperations is true"
75
+ exit 1
76
+ }
77
+ try {
78
+ $util = [wmiclass]"\\\\.\\root\\ccm\\clientsdk:CCM_ClientUtilities"
79
+ $status = $util.DetermineIfRebootPending()
80
+ if(($status -ne $null) -and $status.RebootPending){
81
+ echo "WMI says RebootPending is true"
82
+ exit 1
83
+ }
84
+ } catch {
85
+ exit 0
86
+ }
87
+ exit 0
88
+ }
89
+ resp = shell.run(cmd)
90
+ returnval = resp.exitcode == 0 ? false : true
91
+ shell.close
92
+ returnval
93
+ end
94
+
95
+ # Basic setup tasks performed on a new node during its first WinRM
96
+ # connection. Most of this is terrible Windows glue.
97
+ # @param shell [WinRM::Shells::Powershell]: An active Powershell session to the new node.
98
+ def initialWinRMTasks(shell)
99
+ retries = 0
100
+ rebootable_fails = 0
101
+ begin
102
+ if !@config['use_cloud_provider_windows_password']
103
+ pw = @groomer.getSecret(
104
+ vault: @config['mu_name'],
105
+ item: "windows_credentials",
106
+ field: "password"
107
+ )
108
+ win_check_for_pw = %Q{Add-Type -AssemblyName System.DirectoryServices.AccountManagement; $Creds = (New-Object System.Management.Automation.PSCredential("#{@config["windows_admin_username"]}", (ConvertTo-SecureString "#{pw}" -AsPlainText -Force)));$DS = New-Object System.DirectoryServices.AccountManagement.PrincipalContext([System.DirectoryServices.AccountManagement.ContextType]::Machine); $DS.ValidateCredentials($Creds.GetNetworkCredential().UserName, $Creds.GetNetworkCredential().password); echo $Result}
109
+ resp = shell.run(win_check_for_pw)
110
+ if resp.stdout.chomp != "True"
111
+ win_set_pw = %Q{(([adsi]('WinNT://./#{@config["windows_admin_username"]}, user')).psbase.invoke('SetPassword', '#{pw}'))}
112
+ resp = shell.run(win_set_pw)
113
+ puts resp.stdout
114
+ MU.log "Resetting Windows host password", MU::NOTICE, details: resp.stdout
115
+ end
116
+ end
117
+
118
+ # Install Cygwin here, because for some reason it breaks inside Chef
119
+ # XXX would love to not do this here
120
+ pkgs = ["bash", "mintty", "vim", "curl", "openssl", "wget", "lynx", "openssh"]
121
+ admin_home = "c:/bin/cygwin/home/#{@config["windows_admin_username"]}"
122
+ install_cygwin = %Q{
123
+ If (!(Test-Path "c:/bin/cygwin/Cygwin.bat")){
124
+ $WebClient = New-Object System.Net.WebClient
125
+ $WebClient.DownloadFile("http://cygwin.com/setup-x86_64.exe","$env:Temp/setup-x86_64.exe")
126
+ Start-Process -wait -FilePath $env:Temp/setup-x86_64.exe -ArgumentList "-q -n -l $env:Temp/cygwin -R c:/bin/cygwin -s http://mirror.cs.vt.edu/pub/cygwin/cygwin/ -P #{pkgs.join(',')}"
127
+ }
128
+ if(!(Test-Path #{admin_home})){
129
+ New-Item -type directory -path #{admin_home}
130
+ }
131
+ if(!(Test-Path #{admin_home}/.ssh)){
132
+ New-Item -type directory -path #{admin_home}/.ssh
133
+ }
134
+ if(!(Test-Path #{admin_home}/.ssh/authorized_keys)){
135
+ New-Item #{admin_home}/.ssh/authorized_keys -type file -force -value "#{@deploy.ssh_public_key}"
136
+ }
137
+ }
138
+ resp = shell.run(install_cygwin)
139
+ if resp.exitcode != 0
140
+ MU.log "Failed at installing Cygwin", MU::ERR, details: resp
141
+ end
142
+
143
+ hostname = nil
144
+ if !@config['active_directory'].nil?
145
+ if @config['active_directory']['node_type'] == "domain_controller" && @config['active_directory']['domain_controller_hostname']
146
+ hostname = @config['active_directory']['domain_controller_hostname']
147
+ @mu_windows_name = hostname
148
+ else
149
+ # Do we have an AD specific hostname?
150
+ hostname = @mu_windows_name
151
+ end
152
+ else
153
+ hostname = @mu_windows_name
154
+ end
155
+ resp = shell.run(%Q{hostname})
156
+
157
+ if resp.stdout.chomp != hostname
158
+ resp = shell.run(%Q{Rename-Computer -NewName '#{hostname}' -Force -PassThru -Restart; Restart-Computer -Force})
159
+ MU.log "Renaming Windows host to #{hostname}; this will trigger a reboot", MU::NOTICE, details: resp.stdout
160
+ reboot(true)
161
+ sleep 30
162
+ end
163
+ rescue WinRM::WinRMError, HTTPClient::ConnectTimeoutError => e
164
+ retries, rebootable_fails = handleWindowsFail(e, retries, rebootable_fails, max_retries: 10, reboot_on_problems: true, retry_interval: 30)
165
+ retry
166
+ end
167
+ end
168
+
169
+ # Get a privileged Powershell session on the server in question, using SSL-encrypted WinRM with certificate authentication.
170
+ # @param max_retries [Integer]:
171
+ # @param retry_interval [Integer]:
172
+ # @param timeout [Integer]:
173
+ # @param winrm_retries [Integer]:
174
+ # @param reboot_on_problems [Boolean]:
175
+ def getWinRMSession(max_retries = 40, retry_interval = 60, timeout: 30, winrm_retries: 2, reboot_on_problems: false)
176
+ _nat_ssh_key, _nat_ssh_user, _nat_ssh_host, canonical_ip, _ssh_user, _ssh_key_name = getSSHConfig
177
+ @mu_name ||= @config['mu_name']
178
+
179
+ shell = nil
180
+ opts = nil
181
+ # and now, a thing I really don't want to do
182
+ MU::Master.addInstanceToEtcHosts(canonical_ip, @mu_name)
183
+
184
+ # catch exceptions that circumvent our regular call stack
185
+ Thread.abort_on_exception = false
186
+ Thread.handle_interrupt(WinRM::WinRMWSManFault => :never) {
187
+ begin
188
+ Thread.handle_interrupt(WinRM::WinRMWSManFault => :immediate) {
189
+ MU.log "(Probably harmless) Caught a WinRM::WinRMWSManFault in #{Thread.current.inspect}", MU::DEBUG, details: Thread.current.backtrace
190
+ }
191
+ ensure
192
+ # Reraise something useful
193
+ end
194
+ }
195
+
196
+ retries = 0
197
+ rebootable_fails = 0
198
+ begin
199
+ loglevel = retries > 4 ? MU::NOTICE : MU::DEBUG
200
+ MU.log "Calling WinRM on #{@mu_name}", loglevel, details: opts
201
+ opts = {
202
+ retry_limit: winrm_retries,
203
+ no_ssl_peer_verification: true, # XXX this should not be necessary; we get 'hostname "foo" does not match the server certificate' even when it clearly does match
204
+ ca_trust_path: "#{MU.mySSLDir}/Mu_CA.pem",
205
+ transport: :ssl,
206
+ operation_timeout: timeout,
207
+ }
208
+ if retries % 2 == 0 # NTLM password over https
209
+ opts[:endpoint] = 'https://'+canonical_ip+':5986/wsman'
210
+ opts[:user] = @config['windows_admin_username']
211
+ opts[:password] = getWindowsAdminPassword
212
+ else # certificate auth over https
213
+ opts[:endpoint] = 'https://'+@mu_name+':5986/wsman'
214
+ opts[:client_cert] = "#{MU.mySSLDir}/#{@mu_name}-winrm.crt"
215
+ opts[:client_key] = "#{MU.mySSLDir}/#{@mu_name}-winrm.key"
216
+ end
217
+ conn = WinRM::Connection.new(opts)
218
+ conn.logger.level = :debug if retries > 2
219
+ MU.log "WinRM connection to #{@mu_name} created", MU::DEBUG, details: conn
220
+ shell = conn.shell(:powershell)
221
+ shell.run('ipconfig') # verify that we can do something
222
+ rescue Errno::EHOSTUNREACH, Errno::ECONNREFUSED, HTTPClient::ConnectTimeoutError, OpenSSL::SSL::SSLError, SocketError, WinRM::WinRMError, Timeout::Error => e
223
+ retries, rebootable_fails = handleWindowsFail(e, retries, rebootable_fails, max_retries: max_retries, reboot_on_problems: reboot_on_problems, retry_interval: retry_interval)
224
+ retry
225
+ ensure
226
+ MU::Master.removeInstanceFromEtcHosts(@mu_name)
227
+ end
228
+
229
+ shell
230
+ end
231
+
232
+ }
233
+ }
234
+
235
+ end
236
+
237
+ end