right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -21,275 +21,429 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # ancestor
25
+ require 'right_scraper/retrievers'
26
+
27
+ require 'fileutils'
28
+ require 'shellwords'
24
29
  require 'tmpdir'
30
+ require 'right_git'
31
+ require 'right_support'
25
32
 
26
- # TEAL FIX: figure out a way to do this monkey-patch without always rquiring the
27
- # blackwinter gem and/or create a rightscale-git fork with this fix.
28
- #
29
- # ADDENDUM: we can't unconditionally require the git gem because git is not
30
- # always available.
31
- begin
32
- require 'git'
33
- require 'git/lib'
34
-
35
- module Git
36
- class Lib
37
- # Monkey patch to prevent screw up any subsequent shell out to git
38
- def command_with_preserved_env(cmd, opts = [], chdir = true, redirect = '', &block)
39
- variables_to_preserve = ['GIT_DIR', 'GIT_INDEX_FILE', 'GIT_WORK_TREE']
40
- preserved_env = Hash[variables_to_preserve.map { |var| [var, ENV[var]] }]
41
- begin
42
- command_without_preserved_env(cmd, opts, chdir, redirect, &block)
43
- ensure
44
- preserved_env.each { |var, value| ENV[var] = value }
45
- end
46
- end
33
+ module RightScraper::Retrievers
47
34
 
48
- # Monkey patch to blackwinter-git that strips ANSI escape sequences
49
- # from command output to avoid confusing the parser.
50
- def run_command_with_color_stripping(git_cmd, &block)
51
- out = run_command_without_color_stripping(git_cmd, &block)
52
- out.gsub!(/\e\[[^m]*m/, '')
53
- out
54
- end
35
+ # Retriever for resources stored in a git repository.
36
+ class Git < ::RightScraper::Retrievers::CheckoutBase
55
37
 
56
- unless self.methods.include?('run_command_without_color_stripping')
57
- alias :run_command_without_color_stripping :run_command
58
- alias :run_command :run_command_with_color_stripping
59
- end
38
+ @@available = false
60
39
 
61
- unless self.methods.include?('command_without_preserved_env')
62
- alias :command_without_preserved_env :command
63
- alias :command :command_with_preserved_env
40
+ # Determines if downloader is available.
41
+ def available?
42
+ unless @@available
43
+ begin
44
+ cmd = "git --version"
45
+ `#{cmd}`
46
+ if $?.success?
47
+ @@available = true
48
+ else
49
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
50
+ end
51
+ rescue
52
+ @logger.note_error($!, :available, "git retriever is unavailable")
53
+ end
64
54
  end
55
+ @@available
65
56
  end
66
- end
67
- rescue ::Git::GitExecuteError
68
- # silently ignore git gem's failed attempt to execute git on load.
69
- end
70
57
 
71
- module RightScraper
72
- module Retrievers
73
- # Retriever for resources stored in a git repository.
74
- class Git < CheckoutBasedRetriever
75
- @@available = false
58
+ # Ignore .git directories.
59
+ def ignorable_paths
60
+ ['.git']
61
+ end
76
62
 
77
- # Determines if downloader is available.
78
- def available?
79
- unless @@available
80
- begin
81
- # note that require 'git' does the same version check on load but
82
- # we don't want to assume any particular implementation.
83
- #
84
- # FIX: we might want to parse the result and require a minimum git
85
- # client version.
86
- cmd = "git --version"
87
- `#{cmd}`
88
- if $?.success?
89
- @@available = true
90
- else
91
- raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
92
- end
93
- rescue
94
- @logger.note_error($!, :available, "git retriever is unavailable")
95
- end
63
+ # In addition to normal retriever initialization, if the
64
+ # underlying repository has a credential we need to initialize a
65
+ # fresh SSHAgent and add the credential to it.
66
+ def retrieve
67
+ raise RetrieverError.new("git retriever is unavailable") unless available?
68
+ private_key = @repository.first_credential
69
+ private_key = nil if private_key && private_key.empty?
70
+ if is_windows?
71
+ if private_key
72
+ with_private_key_windows(private_key) { super }
73
+ else
74
+ super
96
75
  end
97
- @@available
98
- end
99
-
100
- # In addition to normal retriever initialization, if the
101
- # underlying repository has a credential we need to initialize a
102
- # fresh SSHAgent and add the credential to it.
103
- def retrieve
104
- raise RetrieverError.new("git retriever is unavailable") unless available?
105
-
106
- start_time = nil
107
- end_time = nil
108
- RightScraper::Processes::SSHAgent.with do |agent|
109
- unless @repository.first_credential.nil? || @repository.first_credential.empty?
110
- agent.add_key(@repository.first_credential)
111
- end
112
- start_time = ::Time.now
76
+ else
77
+ # always start the ssh agent in Linux so we can disable strict host name
78
+ # checking, regardless of credentials.
79
+ ::RightScraper::Processes::SSHAgent.with do |agent|
80
+ agent.add_key(private_key) if private_key
113
81
  super
114
- end_time = ::Time.now
115
82
  end
83
+ end
84
+ true
85
+ end
116
86
 
117
- # TEAL FIX: the use of blackwinter-git has defeated the logic that
118
- # ensured the max bytes was not exceeded during checkout. we will need
119
- # to replace blackwinter-git in future but in the interim our only
120
- # solution is to warn the user after the checkout has completed that we
121
- # are going to restrict their repo size/time in an upcoming release.
122
- if size_limit_exceeded?
123
- message =
124
- "The size of the downloaded repository exceeded a soft limit of" +
125
- " #{@max_bytes / (1024 * 1024)} MB. This will become a hard limit" +
126
- " in an upcoming release. You may avoid retrieval failure by" +
127
- " moving some of your files to seperate repositories."
128
- @logger.note_warning(message)
129
- end
130
- if @max_seconds && (end_time >= start_time + @max_seconds)
131
- message =
132
- "The time to download the repository exceeded a soft limit of" +
133
- " #{@max_seconds} seconds. This will become a hard limit" +
134
- " in an upcoming release. You may avoid retrieval failure by" +
135
- " moving some of your files to seperate repositories."
136
- @logger.note_warning(message)
137
- end
138
- true
87
+ # Return true if a checkout exists. Currently tests for .git in
88
+ # the checkout.
89
+ #
90
+ # === Returns ===
91
+ # Boolean:: true if the checkout already exists (and thus
92
+ # incremental updating can occur).
93
+ def exists?
94
+ File.exists?(File.join(@repo_dir, '.git'))
95
+ end
96
+
97
+ # Determines if the remote SHA/tag/branch referenced by the repostory
98
+ # differs from what appears on disk.
99
+ #
100
+ # @return [TrueClass|FalseClass] true if changed
101
+ def remote_differs?
102
+ remote_sha = nil
103
+ current_sha = nil
104
+ git_repo = git_repo_for(@repo_dir)
105
+ without_size_limit(git_repo) do
106
+ do_fetch(git_repo)
107
+ revision = resolve_revision
108
+ remote_name = validate_revision(git_repo, revision)
109
+ remote_sha = git_repo.sha_for(remote_name ? remote_name : revision)
110
+ current_sha = git_repo.sha_for(nil)
139
111
  end
112
+ current_sha != remote_sha
113
+ end
140
114
 
141
- # Determines if total size of files created by child process has exceeded
142
- # the limit specified, if any.
143
- #
144
- # === Return
145
- # @return [TrueClass|FalseClass] true if size limit exceeded
146
- def size_limit_exceeded?
147
- exceeded = false
148
- if @max_bytes
149
- globbie = ::File.join(@repo_dir, '**/*')
150
- size = 0
151
- ::Dir.glob(globbie) do |f|
152
- size += ::File.stat(f).size rescue 0 if ::File.file?(f)
153
- if size > @max_bytes
154
- exceeded = true
155
- break
156
- end
157
- end
115
+ # Implements CheckoutBase#do_checkout
116
+ def do_checkout
117
+ git_repo = @logger.operation(:cloning, "to #{@repo_dir}") do
118
+ without_host_key_checking do
119
+ ::RightGit::Git::Repository.clone_to(
120
+ @repository.url,
121
+ @repo_dir,
122
+ :logger => git_repo_logger,
123
+ :shell => git_repo_shell)
158
124
  end
159
- exceeded
160
125
  end
126
+ do_fetch(git_repo)
127
+ do_checkout_revision(git_repo)
128
+ internal_update_tag(git_repo)
129
+ true
130
+ end
161
131
 
162
- # Return true if a checkout exists. Currently tests for .git in
163
- # the checkout.
164
- #
165
- # === Returns ===
166
- # Boolean:: true if the checkout already exists (and thus
167
- # incremental updating can occur).
168
- def exists?
169
- File.exists?(File.join(@repo_dir, '.git'))
132
+ # Implements CheckoutBase#do_update
133
+ def do_update
134
+ # note that a recent fetch was performed by remote_differs? and even if
135
+ # remotes have changed again in the brief interim it would invalidate
136
+ # the decisions already made if we refetched now.
137
+ git_repo = git_repo_for(@repo_dir)
138
+ @logger.operation(:cleanup, "ensure no untracked files in #{@repo_dir}") do
139
+ git_repo.hard_reset_to(nil)
140
+ do_clean_all(git_repo)
170
141
  end
142
+ do_checkout_revision(git_repo)
143
+ do_clean_all(git_repo) # clean again once we are on requested revision
144
+ internal_update_tag(git_repo)
145
+ true
146
+ end
171
147
 
172
- def do_fetch(git)
173
- @logger.operation(:fetch) do
174
- git.tags.each {|tag| git.lib.tag(['-d', tag.name])}
175
- git.fetch(['--all', '--prune', '--tags'])
176
- end
148
+ # Implements CheckoutBase#do_update_tag
149
+ def do_update_tag
150
+ git_repo = git_repo_for(@repo_dir)
151
+ without_size_limit(git_repo) do
152
+ internal_update_tag(git_repo)
177
153
  end
154
+ true
155
+ end
178
156
 
179
- # Incrementally update the checkout. The operations are as follows:
180
- # * checkout #tag
181
- # * if #tag is the head of a branch:
182
- # * find that branch's remote
183
- # * fetch it
184
- # * merge changes
185
- # * update @repository#tag
186
- # Note that if #tag is a SHA revision or a tag that exists in the
187
- # current repository, no fetching is done.
188
- def do_update
189
- git = ::Git.open(@repo_dir)
190
- do_fetch(git)
191
- @logger.operation(:cleanup, "ensure no untracked files in #{@repo_dir}") do
192
- git.reset_hard
193
- Dir.chdir(@repo_dir) do
194
- # ignore outcome; there is no way to record 'warnings'
195
- system("git clean -f")
196
- end
197
- end
198
- do_checkout_revision(git)
199
- do_update_tag(git)
157
+ private
158
+
159
+ DEFAULT_BRANCH_NAME = 'master'
160
+
161
+ def git_repo_for(dir)
162
+ ::RightGit::Git::Repository.new(
163
+ dir,
164
+ :logger => git_repo_logger,
165
+ :shell => git_repo_shell)
166
+ end
167
+
168
+ def git_repo_logger
169
+ # note that info-level logging is normally suppressed by scraper so git
170
+ # repo won't log anything but warnings and errors unless logger is made
171
+ # verbose.
172
+ @logger
173
+ end
174
+
175
+ def git_repo_shell
176
+ @git_repo_shell ||= ::RightScraper::Processes::Shell.new(
177
+ :logger => git_repo_logger,
178
+ :initial_directory => repo_dir,
179
+ :max_bytes => max_bytes,
180
+ :max_seconds => max_seconds,
181
+ :watch_directory => repo_dir)
182
+ end
183
+
184
+ def internal_update_tag(git_repo)
185
+ @repository = @repository.clone
186
+ @repository.tag = git_repo.sha_for(nil)
187
+ true
188
+ end
189
+
190
+ def do_checkout_revision(git_repo)
191
+ @logger.operation(:checkout_revision) do
192
+ revision = resolve_revision
193
+ remote_name = validate_revision(git_repo, revision)
194
+ git_repo.checkout_to(revision, :force => true)
195
+ git_repo.hard_reset_to(remote_name) if remote_name
196
+
197
+ # initialize/update submodules based on current SHA.
198
+ #
199
+ # TEAL FIX: there is no support for checking-out same branch/tag in
200
+ # the submodule(s) but this could be an advanced feature.
201
+ git_repo.update_submodules(:recursive => true)
200
202
  end
203
+ true
204
+ end
201
205
 
202
- def do_update_tag(git)
203
- @repository = @repository.clone
204
- @repository.tag = git.gtree("HEAD").sha
206
+ def do_fetch(git_repo)
207
+ @logger.operation(:fetch) do
208
+ # delete local tags, which may or may not still exist on remote.
209
+ git_repo.tags.each do |tag|
210
+ git_args = ['tag', '-d', tag.name]
211
+ git_repo.spit_output(git_args)
212
+ end
213
+ git_repo.fetch_all(:prune => true)
205
214
  end
215
+ end
206
216
 
207
- # Clone the remote repository. The operations are as follows:
208
- # * clone repository to @repo_dir
209
- # * checkout #tag
210
- # * update @repository#tag
211
- def do_checkout
212
- super
213
- git = @logger.operation(:cloning, "to #{@repo_dir}") do
214
- without_host_key_checking do
215
- ::Git.clone(@repository.url, @repo_dir)
216
- end
217
+ # Cleans anything that is currently untracked in the repo directory and
218
+ # any submodules. the idea is to prevent untracked items interfering with
219
+ # the normal behavior that would result if checkout were always to a clean
220
+ # directory. just switching between branches and updating submodules can
221
+ # leave untracked artifacts that affect behavior.
222
+ def do_clean_all(git_repo)
223
+ old_initial_directory = git_repo.repo_dir
224
+ clean_all_options = {
225
+ :directories => true,
226
+ :gitignored => true,
227
+ :submodules => true
228
+ }
229
+ relative_paths = [
230
+ '.',
231
+ git_repo.submodule_paths(:recursive => true)
232
+ ].flatten
233
+ relative_paths.each do |relative_path|
234
+ subdir_path = ::File.expand_path(::File.join(@repo_dir, relative_path))
235
+ if ::File.directory?(subdir_path)
236
+ # reuse shell with any watch parameters already set but vary the
237
+ # initial directory for each submodule.
238
+ git_repo.shell.initial_directory = subdir_path
239
+ git_repo.clean_all(clean_all_options)
217
240
  end
218
- do_fetch(git)
219
- do_checkout_revision(git)
220
- do_update_tag git
221
241
  end
242
+ true
243
+ rescue ::RightGit::RightGitError => e
244
+ @logger.note_warning(e.message)
245
+ false
246
+ ensure
247
+ git_repo.shell.initial_directory = old_initial_directory
248
+ end
249
+
250
+ def resolve_revision
251
+ revision = @repository.tag.to_s.strip
252
+ revision.empty? ? DEFAULT_BRANCH_NAME : revision.shellescape
253
+ end
222
254
 
223
- def do_checkout_revision(git)
224
- @logger.operation(:checkout_revision) do
225
- case
226
- when tag?(git, repo_tag) && branch?(git, repo_tag) then
227
- raise "Ambiguous reference: '#{repo_tag}' denotes both a branch and a tag"
228
- when branch = find_remote_branch(git, repo_tag) then
229
- branch.checkout
230
- when branch = find_local_branch(git, repo_tag) then
231
- branch.checkout
255
+ # Validates the given revision string to ensure it is safe and sane before
256
+ # attempting to use it.
257
+ #
258
+ # @param [::RightGit::Git::Repository] git_repo for validation
259
+ # @param [String] revision for validation
260
+ #
261
+ # @return [String] remote_name (for branch reset) or nil
262
+ #
263
+ # @raise [RetrieverError] on validation failure
264
+ def validate_revision(git_repo, revision)
265
+ branches = git_repo.branches(:all => true)
266
+ local_branches = branches.local
267
+ remote_branches = branches.remote
268
+ by_name = lambda { |item| item.name == revision }
269
+
270
+ # determine if revision is a tag.
271
+ remote_name = nil
272
+ if git_repo.tags.any?(&by_name)
273
+ if remote_branches.any?(&by_name)
274
+ # note that git has some resolution scheme for ambiguous SHA, tag,
275
+ # branch names but we do not support ambiguity.
276
+ raise RetrieverError, "Ambiguous name is both a remote branch and a tag: #{revision.inspect}"
277
+ elsif local_branches.any?(&by_name)
278
+ # odd corner case of a name that once was a remote branch (now
279
+ # deleted) that has become a tag instead. the user is not exactly
280
+ # at fault here (aside from being indecisive) so let's attempt to
281
+ # clean up after him. try switching to another local branch
282
+ # (i.e. master) and then deleting the obsolete local branch.
283
+ error_message = "Ambiguous name is both a local branch and a tag: #{revision.inspect}"
284
+ if revision == DEFAULT_BRANCH_NAME
285
+ # Darwin Awards winner; scraping with a tag named 'master' :@
286
+ raise RetrieverError, error_message
232
287
  else
233
- git.checkout(repo_tag)
288
+ begin
289
+ # checkout master and delete obsolete local branch.
290
+ git_repo.checkout_to(DEFAULT_BRANCH_NAME, :force => true)
291
+ git_repo.spit_output("branch -D #{revision}")
292
+ rescue ::RightGit::RightGitError
293
+ # ignore failed attempt to recover; raise original error.
294
+ raise RetrieverError, error_message
295
+ end
234
296
  end
235
- end if repo_tag
297
+ end
298
+ else
299
+ # not a tag; SHA or branch.
300
+ #
301
+ # note that we could try to trivially determine if revision was a
302
+ # SHA by matching the SHA1 pattern except that:
303
+ # 1) git accepts partial SHAs so long as they uniquely distinguish
304
+ # a commit for checkout.
305
+ # 2) a branch or tag could name could match the SHA pattern (i.e.
306
+ # 40 hexadecimal characters) with no warnings from git. git will
307
+ # even allow a user to use a SHA as a tag name when that SHA
308
+ # exists (and may represent a different commit).
309
+ # confusing tags with SHAs should be universally discouraged but we
310
+ # need to be flexible here.
311
+ #
312
+ # a local branch may no longer exist remotely or may be behind or
313
+ # have diverged from remote branch. handle all cases.
314
+ remotes = remote_branches.select(&by_name)
315
+ if remotes.size > 1
316
+ # multiple remote branches exist (from different origins); branch
317
+ # name is ambiguous.
318
+ raise RetrieverError, "Ambiguous remote branch name: #{revision.inspect}"
319
+ elsif remotes.size == 1
320
+ # a remote branch exists.
321
+ remote_name = remotes.first.fullname
322
+ elsif local_branches.any?(&by_name)
323
+ # local branch only; failure due to missing remote branch.
324
+ #
325
+ # note that obsolete local branches are not supported by retrieval
326
+ # only because it would give the user a false positive.
327
+ raise RetrieverError, "Missing remote branch: #{revision.inspect}."
328
+ end # else a full or partial SHA or unknown revision
236
329
  end
330
+ remote_name
331
+ end
237
332
 
238
- # Ignore .git directories.
239
- def ignorable_paths
240
- ['.git']
333
+ # Temporarily disables checking the size of the repo_dir against the
334
+ # configured size limit. This permits performing git queries against a repo
335
+ # on disk that would normally exceed the size limit if it hadn't already
336
+ # been fully checked out in the past. If a repo has been scraped in the past
337
+ # and does not have any new commits, then it is acceptable even if it
338
+ # would exceed the current size limit.
339
+ def without_size_limit(git_repo)
340
+ old_max_bytes = git_repo.shell.max_bytes
341
+ begin
342
+ git_repo.shell.max_bytes = nil
343
+ yield
344
+ ensure
345
+ git_repo.shell.max_bytes = old_max_bytes
241
346
  end
347
+ end
242
348
 
243
- def tag?(git, name)
244
- git.tags.find {|t| t.name == name}
349
+ # Temporarily disable SSH host-key checking for SSH clients invoked by Git, for the duration of the
350
+ # block that is passed to this method.
351
+ #
352
+ # @yield after disabling strict host key checking, yields to caller
353
+ def without_host_key_checking(&callback)
354
+ if is_windows?
355
+ without_host_key_checking_windows(&callback)
356
+ else
357
+ without_host_key_checking_linux(&callback)
245
358
  end
359
+ end
246
360
 
247
- def branch?(git, name)
248
- git.branches.find {|t| t.name == name}
249
- end
361
+ # Temporarily disable SSH host-key checking for SSH clients invoked by Git, for the duration of the
362
+ # block that is passed to this method.
363
+ #
364
+ # @yield after disabling strict host key checking, yields to caller
365
+ def without_host_key_checking_linux
366
+ tmpdir = ::Dir.mktmpdir
367
+ ssh_cmd = ::File.join(tmpdir, 'ssh')
250
368
 
251
- def repo_tag
252
- name = (@repository.tag || "master").chomp
253
- name = "master" if name.empty?
254
- name
369
+ ::File.open(ssh_cmd, 'w') do |cmd|
370
+ cmd.puts "#!/bin/bash"
371
+ cmd.puts "exec ssh -o StrictHostKeyChecking=no ${@}"
255
372
  end
373
+ ::FileUtils.chmod(0700, ssh_cmd)
256
374
 
257
- def find_branch(git, tag)
258
- find_local_branch(git, tag) || find_remote_branch(git, tag)
259
- end
375
+ old_env = ::ENV['GIT_SSH']
376
+ ::ENV['GIT_SSH'] = ssh_cmd
377
+ yield
378
+ ensure
379
+ ::FileUtils.rm_rf(tmpdir)
380
+ ::ENV['GIT_SSH'] = old_env
381
+ end
260
382
 
261
- def find_local_branch(git, name)
262
- git.branches.local.find {|b| b.name == name}
263
- end
383
+ # The "ssh.exe" that comes with msysgit doesn't appear to configure things
384
+ # properly under Windows (or it does for SYSTEM account but not user
385
+ # accounts) when disabling strict hostname checking. We can instead
386
+ # temporarily create/replace the "%USERPROFILE%\.ssh\config" file to disable
387
+ # checking for all hostnames.
388
+ #
389
+ # @yield after disabling strict host key checking, yields to caller
390
+ def without_host_key_checking_windows(&callback)
391
+ config_path = ::File.expand_path(::File.join(home_dir_windows, '.ssh', 'config'))
392
+ config_text = <<EOF
393
+ Host *
394
+ StrictHostKeyChecking no
395
+ EOF
396
+ with_replaced_file(config_path, config_text, &callback)
397
+ end
264
398
 
265
- def find_remote_branch(git, name)
266
- git.branches.remote.find {|b| b.name == name}
267
- end
399
+ def with_private_key_windows(private_key, &callback)
400
+ private_key_path = ::File.expand_path(::File.join(home_dir_windows, '.ssh', 'id_rsa'))
401
+ with_replaced_file(private_key_path, private_key, &callback)
402
+ end
268
403
 
269
- # Temporarily disable SSH host-key checking for SSH clients invoked by Git, for the duration of the
270
- # block that is passed to this method.
271
- #
272
- # @yield after disabling strict host key checking, yields to caller
273
- def without_host_key_checking
274
- tmpdir = Dir.mktmpdir
275
- ssh_cmd = File.join(tmpdir, 'ssh')
276
-
277
- File.open(ssh_cmd, 'w') do |cmd|
278
- cmd.puts "#!/bin/bash"
279
- cmd.puts "exec ssh -o StrictHostKeyChecking=no ${@}"
404
+ # Utility for replacing a file temporarily within a scope and ensuring it is
405
+ # restored afterward.
406
+ #
407
+ # @param [String] filepath to replace
408
+ # @param [String] contents to substitute
409
+ #
410
+ # @yield after replacing file
411
+ def with_replaced_file(filepath, contents)
412
+ ::Dir.mktmpdir do |temp_dir|
413
+ begin
414
+ temp_path = ::File.join(temp_dir, ::File.basename(filepath))
415
+ ::FileUtils.mkdir_p(::File.dirname(filepath))
416
+ if ::File.file?(filepath)
417
+ ::FileUtils.mv(filepath, temp_path, :force => true)
418
+ end
419
+ ::File.open(filepath, 'w') { |f| f.write(contents) }
420
+ yield
421
+ ensure
422
+ begin
423
+ if ::File.file?(temp_path)
424
+ ::FileUtils.mv(temp_path, filepath, :force => true)
425
+ elsif ::File.file?(filepath)
426
+ ::File.unlink(filepath)
427
+ end
428
+ rescue ::Exception => e
429
+ @logger.note_warning("Failed to restore #{filepath.inspect}: #{e.message}")
430
+ end
280
431
  end
281
- FileUtils.chmod(0700, ssh_cmd)
282
-
283
- old_env = ENV['GIT_SSH']
284
- ENV['GIT_SSH'] = ssh_cmd
285
-
286
- result = yield
287
- ensure
288
- FileUtils.rm_rf(tmpdir)
289
- ENV['GIT_SSH'] = old_env
432
+ end
433
+ end
290
434
 
291
- result
435
+ # @return default location for git-related configuration files.
436
+ def home_dir_windows
437
+ home_dir = ::ENV['USERPROFILE']
438
+ unless home_dir && ::File.directory?(home_dir)
439
+ raise RetrieverError, "Invalid USERPROFILE directory: #{home_dir.inspect}"
292
440
  end
441
+ home_dir
442
+ end
443
+
444
+ # @return [TrueClass|FalseClass] true if running on Windows
445
+ def is_windows?
446
+ !!(RUBY_PLATFORM =~ /mswin|win32|dos|mingw|cygwin/)
293
447
  end
294
448
  end
295
449
  end