chef-winrm-fs 1.3.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,573 @@
1
+ # frozen_string_literal: false
2
+
3
+ #
4
+ # Author:: Fletcher (<fnichol@nichol.ca>)
5
+ #
6
+ # Copyright (C) 2015, Fletcher Nichol
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ # See the License for the specific language governing permissions and
18
+ # limitations under the License.
19
+
20
+ require 'benchmark' unless defined?(Benchmark)
21
+ require 'csv' unless defined?(CSV)
22
+ require 'digest' unless defined?(Digest)
23
+ require 'securerandom' unless defined?(SecureRandom)
24
+ require 'stringio' unless defined?(StringIO)
25
+
26
+ require 'winrm/exceptions'
27
+ require 'winrm-fs/core/tmp_zip'
28
+
29
+ module WinRM
30
+ module FS
31
+ module Core
32
+ # Wrapped exception for any internally raised WinRM-related errors.
33
+ #
34
+ # @author Fletcher Nichol <fnichol@nichol.ca>
35
+ class FileTransporterFailed < ::WinRM::WinRMError; end
36
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/ClassLength
37
+
38
+ # Exception for the case where upload source contains more than one
39
+ # StringIO object, or a combination of file/directory paths and StringIO object
40
+ class UploadSourceError < StandardError
41
+ def initialize(msg = 'Only a single StringIO object may be uploaded.')
42
+ super
43
+ end
44
+ end
45
+
46
+ # Object which can upload one or more files or directories to a remote
47
+ # host over WinRM using PowerShell scripts and CMD commands. Note that
48
+ # this form of file transfer is *not* ideal and extremely costly on both
49
+ # the local and remote sides. Great pains are made to minimize round
50
+ # trips to the remote host and to minimize the number of PowerShell
51
+ # sessions being invoked which can be 2 orders of magnitude more
52
+ # expensive than vanilla CMD commands.
53
+ #
54
+ # This object is supported by a `PowerShell` instance as it
55
+ # depends on the `#run` API contract.
56
+ #
57
+ # An optional logger can be supplied, assuming it can respond to the
58
+ # `#debug` and `#debug?` messages.
59
+ #
60
+ # @author Fletcher Nichol <fnichol@nichol.ca>
61
+ # @author Matt Wrock <matt@mattwrock.com>
62
+ class FileTransporter
63
+ # Creates a FileTransporter given a PowerShell object.
64
+ #
65
+ # @param shell [PowerShell] a winrm PowerShell object
66
+ def initialize(shell, opts = {})
67
+ @shell = shell
68
+ @logger = shell.logger
69
+ @id_generator = opts.fetch(:id_generator) { -> { SecureRandom.uuid } }
70
+ Zip.unicode_names = true
71
+ end
72
+
73
+ # Uploads a collection of files and/or directories to the remote host.
74
+ #
75
+ # **TODO Notes:**
76
+ # * options could specify zip mode, zip options, etc.
77
+ # * maybe option to set tmpfile base dir to override $env:PATH?
78
+ # * progress yields block like net-scp progress
79
+ # * final API: def upload(locals, remote, _options = {}, &_progress)
80
+ #
81
+ # @param locals [Array<String>,String,StringIO] one or more
82
+ # local file or directory paths, StringIO objects also accepted
83
+ # @param remote [String] the base destination path on the remote host
84
+ # @return [Hash] report hash, keyed by the local SHA1 digest
85
+ def upload(locals, remote)
86
+ files = nil
87
+ report = nil
88
+ remote = remote.to_s
89
+ elapsed1 = Benchmark.measure do
90
+ files = make_files_hash([locals].flatten, remote)
91
+ report = check_files(files)
92
+ merge_with_report!(files, report)
93
+ reconcile_destinations!(files)
94
+ end
95
+ total_size = total_base64_transfer_size(files)
96
+
97
+ elapsed2 = Benchmark.measure do
98
+ report = stream_upload_files(files) do |local_path, xfered|
99
+ yield xfered, total_size, local_path, remote if block_given?
100
+ end
101
+ merge_with_report!(files, report)
102
+ end
103
+
104
+ elapsed3 = Benchmark.measure do
105
+ report = extract_files(files)
106
+ merge_with_report!(files, report)
107
+ cleanup(files)
108
+ end
109
+
110
+ logger.debug(
111
+ "Uploaded #{files.keys.size} items " \
112
+ "dirty_check: #{duration(elapsed1.real)} " \
113
+ "stream_files: #{duration(elapsed2.real)} " \
114
+ "extract: #{duration(elapsed3.real)} " \
115
+ )
116
+
117
+ [total_size, files]
118
+ end
119
+
120
+ def close
121
+ shell.close
122
+ end
123
+
124
+ private
125
+
126
+ # @return [String] the Array pack template for Base64 encoding a stream
127
+ # of data
128
+ # @api private
129
+ BASE64_PACK = 'm0'.freeze
130
+
131
+ # @return [String] the directory where temporary upload artifacts are
132
+ # persisted
133
+ # @api private
134
+ TEMP_UPLOAD_DIRECTORY = '$env:TEMP\\winrm-upload'.freeze
135
+
136
+ # @return [#debug,#debug?] the logger
137
+ # @api private
138
+ attr_reader :logger
139
+
140
+ # @return [Winrm::Shells::Powershell] a WinRM Powershell shell
141
+ # @api private
142
+ attr_reader :shell
143
+
144
+ # @return [Integer] the maximum number of bytes to send per request
145
+ # when streaming a file. This is optimized to send as much data
146
+ # as allowed in a single PSRP fragment
147
+ # @api private
148
+ def max_encoded_write
149
+ @max_encoded_write ||= begin
150
+ empty_command = WinRM::PSRP::MessageFactory.create_pipeline_message(
151
+ '00000000-0000-0000-0000-000000000000',
152
+ '00000000-0000-0000-0000-000000000000',
153
+ stream_command('')
154
+ )
155
+ shell.max_fragment_blob_size - empty_command.bytes.length
156
+ end
157
+ end
158
+
159
+ # Examines the files and corrects the file destination if it is
160
+ # targeting an existing folder. In this case, the destination path
161
+ # will have the base name of the source file appended. This only
162
+ # applies to file uploads and not to folder uploads.
163
+ #
164
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
165
+ # @return [Hash] a report hash, keyed by the local SHA1 digest
166
+ # @api private
167
+ def reconcile_destinations!(files)
168
+ files.each do |_, data|
169
+ data['dst'] = File.join(data['dst'], File.basename(data['src'])) if data['target_is_folder'] == 'True'
170
+ end
171
+ end
172
+
173
+ # Adds an entry to a files Hash (keyed by local SHA1 digest) for a
174
+ # directory. When a directory is added, a temporary Zip file is created
175
+ # containing the contents of the directory and any file-related data
176
+ # such as SHA1 digest, size, etc. will be referring to the Zip file.
177
+ #
178
+ # @param hash [Hash] hash to be mutated
179
+ # @param dir [String] directory path to be Zipped and added
180
+ # @param remote [String] path to destination on remote host
181
+ # @api private
182
+ def add_directory_hash!(hash, dir, remote)
183
+ logger.debug "creating hash for directory #{remote}"
184
+ zip_io = TmpZip.new(dir, logger)
185
+ zip_sha1 = sha1sum(zip_io.path)
186
+
187
+ hash[zip_sha1] = {
188
+ 'src' => dir,
189
+ 'src_zip' => zip_io.path.to_s,
190
+ 'zip_io' => zip_io,
191
+ 'tmpzip' => "#{TEMP_UPLOAD_DIRECTORY}\\tmpzip-#{zip_sha1}.zip",
192
+ 'dst' => "#{remote}\\#{File.basename(dir)}",
193
+ 'size' => File.size(zip_io.path)
194
+ }
195
+ end
196
+
197
+ # Adds an entry to a files Hash (keyed by local SHA1 digest) for a file.
198
+ #
199
+ # @param hash [Hash] hash to be mutated
200
+ # @param local [String, StringIO] file path or StringIO object
201
+ # @param remote [String] path to destination on remote host
202
+ # @api private
203
+ def add_file_hash!(hash, local, remote)
204
+ logger.debug "creating hash for file #{remote}"
205
+ hash[sha1sum(local)] = {
206
+ 'src' => local,
207
+ 'dst' => remote,
208
+ 'size' => local.is_a?(StringIO) ? local.size : File.size(local)
209
+ }
210
+ end
211
+
212
+ # Runs the check_files PowerShell script against a collection of
213
+ # destination path/SHA1 checksum pairs. The PowerShell script returns
214
+ # its results as a CSV-formatted report which is converted into a Ruby
215
+ # Hash.
216
+ #
217
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
218
+ # @return [Hash] a report hash, keyed by the local SHA1 digest
219
+ # @api private
220
+ def check_files(files)
221
+ logger.debug 'Running check_files.ps1'
222
+ hash_file = check_files_ps_hash(files)
223
+ script = WinRM::FS::Scripts.render('check_files', hash_file: hash_file)
224
+ parse_response(shell.run(script))
225
+ end
226
+
227
+ # Constructs a collection of destination path/SHA1 checksum pairs as a
228
+ # String representation of the contents of a PowerShell Hash Table.
229
+ #
230
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
231
+ # @return [String] the inner contents of a PowerShell Hash Table
232
+ # @api private
233
+ def check_files_ps_hash(files)
234
+ hash = files.map do |sha1, data|
235
+ [
236
+ sha1,
237
+ {
238
+ 'target' => data.fetch('tmpzip', data['dst']),
239
+ 'src_basename' => data['src'].is_a?(StringIO) ? data['dst'] : File.basename(data['src']),
240
+ 'dst' => data['dst']
241
+ }
242
+ ]
243
+ end
244
+ ps_hash(Hash[hash])
245
+ end
246
+
247
+ # Performs any final cleanup on the report Hash and removes any
248
+ # temporary files/resources used in the upload task.
249
+ #
250
+ # @param files [Hash] a files hash
251
+ # @api private
252
+ def cleanup(files)
253
+ files.select { |_, data| data.key?('zip_io') }.each do |sha1, data|
254
+ data.fetch('zip_io').unlink
255
+ files.fetch(sha1).delete('zip_io')
256
+ logger.debug "Cleaned up src_zip #{data['src_zip']}"
257
+ end
258
+ end
259
+
260
+ # Runs the extract_files PowerShell script against a collection of
261
+ # temporary file/destination path pairs. The PowerShell script returns
262
+ # its results as a CSV-formatted report which is converted into a Ruby
263
+ # Hash. The script will not be invoked if there are no zip files
264
+ # present in the incoming files Hash.
265
+ #
266
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
267
+ # @return [Hash] a report hash, keyed by the local SHA1 digest
268
+ # @api private
269
+ def extract_files(files)
270
+ extracted_files = extract_files_ps_hash(files)
271
+
272
+ if extracted_files == ps_hash({})
273
+ logger.debug 'No remote files to extract, skipping'
274
+ {}
275
+ else
276
+ logger.debug 'Running extract_files.ps1'
277
+ script = WinRM::FS::Scripts.render('extract_files', hash_file: extracted_files)
278
+
279
+ parse_response(shell.run(script))
280
+ end
281
+ end
282
+
283
+ # Constructs a collection of temporary file/destination path pairs for
284
+ # all zipped folders as a String representation of the contents of a
285
+ # PowerShell Hash Table.
286
+ #
287
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
288
+ # @return [String] the inner contents of a PowerShell Hash Table
289
+ # @api private
290
+ def extract_files_ps_hash(files)
291
+ file_data = files.select { |_, data| data.key?('tmpzip') }
292
+
293
+ result = file_data.map do |sha1, data|
294
+ val = { 'dst' => data['dst'] }
295
+ val['tmpzip'] = data['tmpzip'] if data['tmpzip']
296
+
297
+ [sha1, val]
298
+ end
299
+
300
+ ps_hash(Hash[result])
301
+ end
302
+
303
+ # Returns a formatted string representing a duration in seconds.
304
+ #
305
+ # @param total [Integer] the total number of seconds
306
+ # @return [String] a formatted string of the form (XmYY.00s)
307
+ def duration(total)
308
+ total = 0 if total.nil?
309
+ minutes = (total / 60).to_i
310
+ seconds = (total - (minutes * 60))
311
+ format('(%dm%.2fs)', minutes, seconds)
312
+ end
313
+
314
+ # Contructs a Hash of files or directories, keyed by the local SHA1
315
+ # digest. Each file entry has a source and destination set, at a
316
+ # minimum.
317
+ #
318
+ # @param locals [Array<String,StringIO>] a collection of local files,
319
+ # directories or StringIO objects
320
+ # @param remote [String] the base destination path on the remote host
321
+ # @return [Hash] files hash, keyed by the local SHA1 digest
322
+ # @api private
323
+ def make_files_hash(locals, remote)
324
+ hash = {}
325
+ check_locals_array(locals)
326
+ locals.each do |local|
327
+ if local.is_a?(StringIO)
328
+ add_file_hash!(hash, local, remote)
329
+ else
330
+ local = local.to_s
331
+ expanded = File.expand_path(local)
332
+ expanded += local[-1] if local.end_with?('/', '\\')
333
+ if File.file?(expanded)
334
+ add_file_hash!(hash, expanded, remote)
335
+ elsif File.directory?(expanded)
336
+ add_directory_hash!(hash, expanded, remote)
337
+ else
338
+ raise Errno::ENOENT, "No such file or directory #{expanded}"
339
+ end
340
+ end
341
+ end
342
+ hash
343
+ end
344
+
345
+ # Ensure that only a single StringIO object is uploaded at a time
346
+ # This is necessary because the contents of the buffer will be written
347
+ # to the destination.
348
+ # @param locals [Array<String,StringIO>] a collection of local files,
349
+ # directories or StringIO objects
350
+ # @api private
351
+ def check_locals_array(locals)
352
+ string_io = false
353
+ path = false
354
+ locals.each do |local|
355
+ raise UploadSourceError if string_io
356
+
357
+ if local.is_a?(StringIO)
358
+ string_io = true
359
+ else
360
+ path = true
361
+ end
362
+ raise UploadSourceError if string_io && path
363
+ end
364
+ end
365
+
366
+ # @return [String] the SHA1 digest of a local file or StringIO
367
+ # @api private
368
+ def sha1sum(local)
369
+ if local.is_a?(StringIO)
370
+ Digest::SHA1.hexdigest(local.string)
371
+ else
372
+ Digest::SHA1.file(local).hexdigest
373
+ end
374
+ end
375
+
376
+ # Destructively merges a report Hash into an existing files Hash.
377
+ # **Note:** this method mutates the files Hash.
378
+ #
379
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
380
+ # @param report [Hash] report hash, keyed by the local SHA1 digest
381
+ # @api private
382
+ def merge_with_report!(files, report)
383
+ files.merge!(report) { |_, oldval, newval| oldval.merge(newval) }
384
+ end
385
+
386
+ # @param depth [Integer] number of padding characters (default: `0`)
387
+ # @return [String] a whitespace padded string of the given length
388
+ # @api private
389
+ def pad(depth = 0)
390
+ ' ' * depth
391
+ end
392
+
393
+ # Parses response of a PowerShell script or CMD command which contains
394
+ # a CSV-formatted document in the standard output stream.
395
+ #
396
+ # @param output [WinRM::Output] output object with stdout, stderr, and
397
+ # exit code
398
+ # @return [Hash] report hash, keyed by the local SHA1 digest
399
+ # @api private
400
+ def parse_response(output)
401
+ exitcode = output.exitcode
402
+ stderr = output.stderr
403
+
404
+ if exitcode != 0
405
+ raise FileTransporterFailed, "[#{self.class}] Upload failed " \
406
+ "(exitcode: #{exitcode})\n#{stderr}"
407
+ elsif stderr != '\r\n' && stderr != ''
408
+ raise FileTransporterFailed, "[#{self.class}] Upload failed " \
409
+ "(exitcode: 0), but stderr present\n#{stderr}"
410
+ end
411
+
412
+ logger.debug 'Parsing CSV Response'
413
+ logger.debug output.stdout
414
+
415
+ array = CSV.parse(output.stdout, headers: true).map(&:to_hash)
416
+ array.each { |h| h.each { |key, value| h[key] = nil if value == '' } }
417
+ Hash[array.map { |entry| [entry.fetch('src_sha1'), entry] }]
418
+ end
419
+
420
+ # Converts a Ruby hash into a PowerShell hash table, represented in a
421
+ # String.
422
+ #
423
+ # @param obj [Object] source Hash or object when used in recursive
424
+ # calls
425
+ # @param depth [Integer] padding depth, used in recursive calls
426
+ # (default: `0`)
427
+ # @return [String] a PowerShell hash table
428
+ # @api private
429
+ def ps_hash(obj, depth = 0)
430
+ if obj.is_a?(Hash)
431
+ obj.map do |k, v|
432
+ %(#{pad(depth + 2)}#{ps_hash(k)} = #{ps_hash(v, depth + 2)})
433
+ end.join(";\n").insert(0, "@{\n").insert(-1, "\n#{pad(depth)}}")
434
+ else
435
+ %("#{obj}")
436
+ end
437
+ end
438
+
439
+ # Uploads an IO stream to a Base64-encoded destination file.
440
+ #
441
+ # **Implementation Note:** Some of the code in this method may appear
442
+ # slightly too dense and while adding additional variables would help,
443
+ # the code is written very precisely to avoid unwanted allocations
444
+ # which will bloat the Ruby VM's object space (and memory footprint).
445
+ # The goal here is to stream potentially large files to a remote host
446
+ # while not loading the entire file into memory first, then Base64
447
+ # encoding it--duplicating the file in memory again.
448
+ #
449
+ # @param input_io [#read] a readable stream or object to be uploaded
450
+ # @param dest [String] path to the destination file on the remote host
451
+ # @return [Integer,Integer] the number of resulting upload chunks and
452
+ # the number of bytes transferred to the remote host
453
+ # @api private
454
+ def stream_upload(input_io, dest)
455
+ read_size = ((max_encoded_write - dest.length) / 4) * 3
456
+ chunk = 1
457
+ bytes = 0
458
+ # Do not freeze this string
459
+ buffer = ''
460
+ shell.run(<<-PS
461
+ $to = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("#{dest}")
462
+ $parent = Split-Path $to
463
+ if(!(Test-path $parent)) { mkdir $parent | Out-Null }
464
+ $fileStream = New-Object -TypeName System.IO.FileStream -ArgumentList @(
465
+ $to,
466
+ [system.io.filemode]::Create,
467
+ [System.io.FileAccess]::Write,
468
+ [System.IO.FileShare]::ReadWrite
469
+ )
470
+ # Powershell caches ScrpitBlocks in a dictionary
471
+ # keyed on the script block text. Thats just great
472
+ # unless the script is super large and called a gillion
473
+ # times like we might do. In such a case it will saturate the
474
+ # Large Object Heap and lead to Out Of Memory exceptions
475
+ # for large files or folders. So we call the internal method
476
+ # ClearScriptBlockCache to clear it.
477
+ $bindingFlags= [Reflection.BindingFlags] "NonPublic,Static"
478
+ $method = [scriptblock].GetMethod("ClearScriptBlockCache", $bindingFlags)
479
+ PS
480
+ )
481
+
482
+ while input_io.read(read_size, buffer)
483
+ bytes += (buffer.bytesize / 3 * 4)
484
+ shell.run(stream_command([buffer].pack(BASE64_PACK)))
485
+ logger.debug "Wrote chunk #{chunk} for #{dest}" if chunk % 25 == 0
486
+ chunk += 1
487
+ yield bytes if block_given?
488
+ end
489
+ shell.run('$fileStream.Dispose()')
490
+ buffer = nil # rubocop:disable Lint/UselessAssignment
491
+
492
+ [chunk - 1, bytes]
493
+ end
494
+
495
+ def stream_command(encoded_bytes)
496
+ <<-PS
497
+ if($method) { $method.Invoke($Null, $Null) }
498
+ $bytes=[Convert]::FromBase64String('#{encoded_bytes}')
499
+ $fileStream.Write($bytes, 0, $bytes.length)
500
+ PS
501
+ end
502
+
503
+ # Uploads a local file.
504
+ #
505
+ # @param src [String, StringIO] path to a local file or StringIO object
506
+ # @param dest [String] path to the file on the remote host
507
+ # @return [Integer,Integer] the number of resulting upload chunks and
508
+ # the number of bytes transferred to the remote host
509
+ # @api private
510
+ def stream_upload_file(src, dest, &block)
511
+ logger.debug "Uploading #{src} to #{dest}"
512
+ chunks = 0
513
+ bytes = 0
514
+ elapsed = Benchmark.measure do
515
+ if src.is_a?(StringIO)
516
+ chunks, bytes = stream_upload(src, dest, &block)
517
+ else
518
+ File.open(src, 'rb') do |io|
519
+ chunks, bytes = stream_upload(io, dest, &block)
520
+ end
521
+ end
522
+ end
523
+ logger.debug(
524
+ "Finished uploading #{src} to #{dest} " \
525
+ "(#{bytes.to_f / 1000} KB over #{chunks} chunks) " \
526
+ "in #{duration(elapsed.real)}"
527
+ )
528
+
529
+ [chunks, bytes]
530
+ end
531
+
532
+ # Uploads a collection of "dirty" files to the remote host as
533
+ # Base64-encoded temporary files. A "dirty" file is one which has the
534
+ # `"chk_dirty"` option set to `"True"` in the incoming files Hash.
535
+ #
536
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
537
+ # @return [Hash] a report hash, keyed by the local SHA1 digest
538
+ # @api private
539
+ def stream_upload_files(files)
540
+ response = {}
541
+ files.each do |sha1, data|
542
+ src = data.fetch('src_zip', data['src'])
543
+ if data['chk_dirty'] == 'True'
544
+ response[sha1] = { 'dest' => data['tmpzip'] || data['dst'] }
545
+ chunks, bytes = stream_upload_file(src, data['tmpzip'] || data['dst']) do |xfered|
546
+ yield data['src'], xfered
547
+ end
548
+ response[sha1]['chunks'] = chunks
549
+ response[sha1]['xfered'] = bytes
550
+ else
551
+ logger.debug "File #{data['dst']} is up to date, skipping"
552
+ end
553
+ end
554
+ response
555
+ end
556
+
557
+ # Total by byte count to be transferred.
558
+ # Calculates count based on the sum of base64 encoded content size
559
+ # of all files base 64 that are dirty.
560
+ #
561
+ # @param files [Hash] files hash, keyed by the local SHA1 digest
562
+ # @return [Fixnum] total byte size
563
+ # @api private
564
+ def total_base64_transfer_size(files)
565
+ size = 0
566
+ files.values.each { |file| size += file['size'] if file['chk_dirty'] == 'True' }
567
+ size / 3 * 4
568
+ end
569
+ end
570
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize, Metrics/ClassLength
571
+ end
572
+ end
573
+ end