right_scraper 3.2.6 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -21,275 +21,429 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # ancestor
25
+ require 'right_scraper/retrievers'
26
+
27
+ require 'fileutils'
28
+ require 'shellwords'
24
29
  require 'tmpdir'
30
+ require 'right_git'
31
+ require 'right_support'
25
32
 
26
- # TEAL FIX: figure out a way to do this monkey-patch without always rquiring the
27
- # blackwinter gem and/or create a rightscale-git fork with this fix.
28
- #
29
- # ADDENDUM: we can't unconditionally require the git gem because git is not
30
- # always available.
31
- begin
32
- require 'git'
33
- require 'git/lib'
34
-
35
- module Git
36
- class Lib
37
- # Monkey patch to prevent screw up any subsequent shell out to git
38
- def command_with_preserved_env(cmd, opts = [], chdir = true, redirect = '', &block)
39
- variables_to_preserve = ['GIT_DIR', 'GIT_INDEX_FILE', 'GIT_WORK_TREE']
40
- preserved_env = Hash[variables_to_preserve.map { |var| [var, ENV[var]] }]
41
- begin
42
- command_without_preserved_env(cmd, opts, chdir, redirect, &block)
43
- ensure
44
- preserved_env.each { |var, value| ENV[var] = value }
45
- end
46
- end
33
+ module RightScraper::Retrievers
47
34
 
48
- # Monkey patch to blackwinter-git that strips ANSI escape sequences
49
- # from command output to avoid confusing the parser.
50
- def run_command_with_color_stripping(git_cmd, &block)
51
- out = run_command_without_color_stripping(git_cmd, &block)
52
- out.gsub!(/\e\[[^m]*m/, '')
53
- out
54
- end
35
+ # Retriever for resources stored in a git repository.
36
+ class Git < ::RightScraper::Retrievers::CheckoutBase
55
37
 
56
- unless self.methods.include?('run_command_without_color_stripping')
57
- alias :run_command_without_color_stripping :run_command
58
- alias :run_command :run_command_with_color_stripping
59
- end
38
+ @@available = false
60
39
 
61
- unless self.methods.include?('command_without_preserved_env')
62
- alias :command_without_preserved_env :command
63
- alias :command :command_with_preserved_env
40
+ # Determines if downloader is available.
41
+ def available?
42
+ unless @@available
43
+ begin
44
+ cmd = "git --version"
45
+ `#{cmd}`
46
+ if $?.success?
47
+ @@available = true
48
+ else
49
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
50
+ end
51
+ rescue
52
+ @logger.note_error($!, :available, "git retriever is unavailable")
53
+ end
64
54
  end
55
+ @@available
65
56
  end
66
- end
67
- rescue ::Git::GitExecuteError
68
- # silently ignore git gem's failed attempt to execute git on load.
69
- end
70
57
 
71
- module RightScraper
72
- module Retrievers
73
- # Retriever for resources stored in a git repository.
74
- class Git < CheckoutBasedRetriever
75
- @@available = false
58
+ # Ignore .git directories.
59
+ def ignorable_paths
60
+ ['.git']
61
+ end
76
62
 
77
- # Determines if downloader is available.
78
- def available?
79
- unless @@available
80
- begin
81
- # note that require 'git' does the same version check on load but
82
- # we don't want to assume any particular implementation.
83
- #
84
- # FIX: we might want to parse the result and require a minimum git
85
- # client version.
86
- cmd = "git --version"
87
- `#{cmd}`
88
- if $?.success?
89
- @@available = true
90
- else
91
- raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
92
- end
93
- rescue
94
- @logger.note_error($!, :available, "git retriever is unavailable")
95
- end
63
+ # In addition to normal retriever initialization, if the
64
+ # underlying repository has a credential we need to initialize a
65
+ # fresh SSHAgent and add the credential to it.
66
+ def retrieve
67
+ raise RetrieverError.new("git retriever is unavailable") unless available?
68
+ private_key = @repository.first_credential
69
+ private_key = nil if private_key && private_key.empty?
70
+ if is_windows?
71
+ if private_key
72
+ with_private_key_windows(private_key) { super }
73
+ else
74
+ super
96
75
  end
97
- @@available
98
- end
99
-
100
- # In addition to normal retriever initialization, if the
101
- # underlying repository has a credential we need to initialize a
102
- # fresh SSHAgent and add the credential to it.
103
- def retrieve
104
- raise RetrieverError.new("git retriever is unavailable") unless available?
105
-
106
- start_time = nil
107
- end_time = nil
108
- RightScraper::Processes::SSHAgent.with do |agent|
109
- unless @repository.first_credential.nil? || @repository.first_credential.empty?
110
- agent.add_key(@repository.first_credential)
111
- end
112
- start_time = ::Time.now
76
+ else
77
+ # always start the ssh agent in Linux so we can disable strict host name
78
+ # checking, regardless of credentials.
79
+ ::RightScraper::Processes::SSHAgent.with do |agent|
80
+ agent.add_key(private_key) if private_key
113
81
  super
114
- end_time = ::Time.now
115
82
  end
83
+ end
84
+ true
85
+ end
116
86
 
117
- # TEAL FIX: the use of blackwinter-git has defeated the logic that
118
- # ensured the max bytes was not exceeded during checkout. we will need
119
- # to replace blackwinter-git in future but in the interim our only
120
- # solution is to warn the user after the checkout has completed that we
121
- # are going to restrict their repo size/time in an upcoming release.
122
- if size_limit_exceeded?
123
- message =
124
- "The size of the downloaded repository exceeded a soft limit of" +
125
- " #{@max_bytes / (1024 * 1024)} MB. This will become a hard limit" +
126
- " in an upcoming release. You may avoid retrieval failure by" +
127
- " moving some of your files to seperate repositories."
128
- @logger.note_warning(message)
129
- end
130
- if @max_seconds && (end_time >= start_time + @max_seconds)
131
- message =
132
- "The time to download the repository exceeded a soft limit of" +
133
- " #{@max_seconds} seconds. This will become a hard limit" +
134
- " in an upcoming release. You may avoid retrieval failure by" +
135
- " moving some of your files to seperate repositories."
136
- @logger.note_warning(message)
137
- end
138
- true
87
+ # Return true if a checkout exists. Currently tests for .git in
88
+ # the checkout.
89
+ #
90
+ # === Returns ===
91
+ # Boolean:: true if the checkout already exists (and thus
92
+ # incremental updating can occur).
93
+ def exists?
94
+ File.exists?(File.join(@repo_dir, '.git'))
95
+ end
96
+
97
+ # Determines if the remote SHA/tag/branch referenced by the repostory
98
+ # differs from what appears on disk.
99
+ #
100
+ # @return [TrueClass|FalseClass] true if changed
101
+ def remote_differs?
102
+ remote_sha = nil
103
+ current_sha = nil
104
+ git_repo = git_repo_for(@repo_dir)
105
+ without_size_limit(git_repo) do
106
+ do_fetch(git_repo)
107
+ revision = resolve_revision
108
+ remote_name = validate_revision(git_repo, revision)
109
+ remote_sha = git_repo.sha_for(remote_name ? remote_name : revision)
110
+ current_sha = git_repo.sha_for(nil)
139
111
  end
112
+ current_sha != remote_sha
113
+ end
140
114
 
141
- # Determines if total size of files created by child process has exceeded
142
- # the limit specified, if any.
143
- #
144
- # === Return
145
- # @return [TrueClass|FalseClass] true if size limit exceeded
146
- def size_limit_exceeded?
147
- exceeded = false
148
- if @max_bytes
149
- globbie = ::File.join(@repo_dir, '**/*')
150
- size = 0
151
- ::Dir.glob(globbie) do |f|
152
- size += ::File.stat(f).size rescue 0 if ::File.file?(f)
153
- if size > @max_bytes
154
- exceeded = true
155
- break
156
- end
157
- end
115
+ # Implements CheckoutBase#do_checkout
116
+ def do_checkout
117
+ git_repo = @logger.operation(:cloning, "to #{@repo_dir}") do
118
+ without_host_key_checking do
119
+ ::RightGit::Git::Repository.clone_to(
120
+ @repository.url,
121
+ @repo_dir,
122
+ :logger => git_repo_logger,
123
+ :shell => git_repo_shell)
158
124
  end
159
- exceeded
160
125
  end
126
+ do_fetch(git_repo)
127
+ do_checkout_revision(git_repo)
128
+ internal_update_tag(git_repo)
129
+ true
130
+ end
161
131
 
162
- # Return true if a checkout exists. Currently tests for .git in
163
- # the checkout.
164
- #
165
- # === Returns ===
166
- # Boolean:: true if the checkout already exists (and thus
167
- # incremental updating can occur).
168
- def exists?
169
- File.exists?(File.join(@repo_dir, '.git'))
132
+ # Implements CheckoutBase#do_update
133
+ def do_update
134
+ # note that a recent fetch was performed by remote_differs? and even if
135
+ # remotes have changed again in the brief interim it would invalidate
136
+ # the decisions already made if we refetched now.
137
+ git_repo = git_repo_for(@repo_dir)
138
+ @logger.operation(:cleanup, "ensure no untracked files in #{@repo_dir}") do
139
+ git_repo.hard_reset_to(nil)
140
+ do_clean_all(git_repo)
170
141
  end
142
+ do_checkout_revision(git_repo)
143
+ do_clean_all(git_repo) # clean again once we are on requested revision
144
+ internal_update_tag(git_repo)
145
+ true
146
+ end
171
147
 
172
- def do_fetch(git)
173
- @logger.operation(:fetch) do
174
- git.tags.each {|tag| git.lib.tag(['-d', tag.name])}
175
- git.fetch(['--all', '--prune', '--tags'])
176
- end
148
+ # Implements CheckoutBase#do_update_tag
149
+ def do_update_tag
150
+ git_repo = git_repo_for(@repo_dir)
151
+ without_size_limit(git_repo) do
152
+ internal_update_tag(git_repo)
177
153
  end
154
+ true
155
+ end
178
156
 
179
- # Incrementally update the checkout. The operations are as follows:
180
- # * checkout #tag
181
- # * if #tag is the head of a branch:
182
- # * find that branch's remote
183
- # * fetch it
184
- # * merge changes
185
- # * update @repository#tag
186
- # Note that if #tag is a SHA revision or a tag that exists in the
187
- # current repository, no fetching is done.
188
- def do_update
189
- git = ::Git.open(@repo_dir)
190
- do_fetch(git)
191
- @logger.operation(:cleanup, "ensure no untracked files in #{@repo_dir}") do
192
- git.reset_hard
193
- Dir.chdir(@repo_dir) do
194
- # ignore outcome; there is no way to record 'warnings'
195
- system("git clean -f")
196
- end
197
- end
198
- do_checkout_revision(git)
199
- do_update_tag(git)
157
+ private
158
+
159
+ DEFAULT_BRANCH_NAME = 'master'
160
+
161
+ def git_repo_for(dir)
162
+ ::RightGit::Git::Repository.new(
163
+ dir,
164
+ :logger => git_repo_logger,
165
+ :shell => git_repo_shell)
166
+ end
167
+
168
+ def git_repo_logger
169
+ # note that info-level logging is normally suppressed by scraper so git
170
+ # repo won't log anything but warnings and errors unless logger is made
171
+ # verbose.
172
+ @logger
173
+ end
174
+
175
+ def git_repo_shell
176
+ @git_repo_shell ||= ::RightScraper::Processes::Shell.new(
177
+ :logger => git_repo_logger,
178
+ :initial_directory => repo_dir,
179
+ :max_bytes => max_bytes,
180
+ :max_seconds => max_seconds,
181
+ :watch_directory => repo_dir)
182
+ end
183
+
184
+ def internal_update_tag(git_repo)
185
+ @repository = @repository.clone
186
+ @repository.tag = git_repo.sha_for(nil)
187
+ true
188
+ end
189
+
190
+ def do_checkout_revision(git_repo)
191
+ @logger.operation(:checkout_revision) do
192
+ revision = resolve_revision
193
+ remote_name = validate_revision(git_repo, revision)
194
+ git_repo.checkout_to(revision, :force => true)
195
+ git_repo.hard_reset_to(remote_name) if remote_name
196
+
197
+ # initialize/update submodules based on current SHA.
198
+ #
199
+ # TEAL FIX: there is no support for checking-out same branch/tag in
200
+ # the submodule(s) but this could be an advanced feature.
201
+ git_repo.update_submodules(:recursive => true)
200
202
  end
203
+ true
204
+ end
201
205
 
202
- def do_update_tag(git)
203
- @repository = @repository.clone
204
- @repository.tag = git.gtree("HEAD").sha
206
+ def do_fetch(git_repo)
207
+ @logger.operation(:fetch) do
208
+ # delete local tags, which may or may not still exist on remote.
209
+ git_repo.tags.each do |tag|
210
+ git_args = ['tag', '-d', tag.name]
211
+ git_repo.spit_output(git_args)
212
+ end
213
+ git_repo.fetch_all(:prune => true)
205
214
  end
215
+ end
206
216
 
207
- # Clone the remote repository. The operations are as follows:
208
- # * clone repository to @repo_dir
209
- # * checkout #tag
210
- # * update @repository#tag
211
- def do_checkout
212
- super
213
- git = @logger.operation(:cloning, "to #{@repo_dir}") do
214
- without_host_key_checking do
215
- ::Git.clone(@repository.url, @repo_dir)
216
- end
217
+ # Cleans anything that is currently untracked in the repo directory and
218
+ # any submodules. the idea is to prevent untracked items interfering with
219
+ # the normal behavior that would result if checkout were always to a clean
220
+ # directory. just switching between branches and updating submodules can
221
+ # leave untracked artifacts that affect behavior.
222
+ def do_clean_all(git_repo)
223
+ old_initial_directory = git_repo.repo_dir
224
+ clean_all_options = {
225
+ :directories => true,
226
+ :gitignored => true,
227
+ :submodules => true
228
+ }
229
+ relative_paths = [
230
+ '.',
231
+ git_repo.submodule_paths(:recursive => true)
232
+ ].flatten
233
+ relative_paths.each do |relative_path|
234
+ subdir_path = ::File.expand_path(::File.join(@repo_dir, relative_path))
235
+ if ::File.directory?(subdir_path)
236
+ # reuse shell with any watch parameters already set but vary the
237
+ # initial directory for each submodule.
238
+ git_repo.shell.initial_directory = subdir_path
239
+ git_repo.clean_all(clean_all_options)
217
240
  end
218
- do_fetch(git)
219
- do_checkout_revision(git)
220
- do_update_tag git
221
241
  end
242
+ true
243
+ rescue ::RightGit::RightGitError => e
244
+ @logger.note_warning(e.message)
245
+ false
246
+ ensure
247
+ git_repo.shell.initial_directory = old_initial_directory
248
+ end
249
+
250
+ def resolve_revision
251
+ revision = @repository.tag.to_s.strip
252
+ revision.empty? ? DEFAULT_BRANCH_NAME : revision.shellescape
253
+ end
222
254
 
223
- def do_checkout_revision(git)
224
- @logger.operation(:checkout_revision) do
225
- case
226
- when tag?(git, repo_tag) && branch?(git, repo_tag) then
227
- raise "Ambiguous reference: '#{repo_tag}' denotes both a branch and a tag"
228
- when branch = find_remote_branch(git, repo_tag) then
229
- branch.checkout
230
- when branch = find_local_branch(git, repo_tag) then
231
- branch.checkout
255
+ # Validates the given revision string to ensure it is safe and sane before
256
+ # attempting to use it.
257
+ #
258
+ # @param [::RightGit::Git::Repository] git_repo for validation
259
+ # @param [String] revision for validation
260
+ #
261
+ # @return [String] remote_name (for branch reset) or nil
262
+ #
263
+ # @raise [RetrieverError] on validation failure
264
+ def validate_revision(git_repo, revision)
265
+ branches = git_repo.branches(:all => true)
266
+ local_branches = branches.local
267
+ remote_branches = branches.remote
268
+ by_name = lambda { |item| item.name == revision }
269
+
270
+ # determine if revision is a tag.
271
+ remote_name = nil
272
+ if git_repo.tags.any?(&by_name)
273
+ if remote_branches.any?(&by_name)
274
+ # note that git has some resolution scheme for ambiguous SHA, tag,
275
+ # branch names but we do not support ambiguity.
276
+ raise RetrieverError, "Ambiguous name is both a remote branch and a tag: #{revision.inspect}"
277
+ elsif local_branches.any?(&by_name)
278
+ # odd corner case of a name that once was a remote branch (now
279
+ # deleted) that has become a tag instead. the user is not exactly
280
+ # at fault here (aside from being indecisive) so let's attempt to
281
+ # clean up after him. try switching to another local branch
282
+ # (i.e. master) and then deleting the obsolete local branch.
283
+ error_message = "Ambiguous name is both a local branch and a tag: #{revision.inspect}"
284
+ if revision == DEFAULT_BRANCH_NAME
285
+ # Darwin Awards winner; scraping with a tag named 'master' :@
286
+ raise RetrieverError, error_message
232
287
  else
233
- git.checkout(repo_tag)
288
+ begin
289
+ # checkout master and delete obsolete local branch.
290
+ git_repo.checkout_to(DEFAULT_BRANCH_NAME, :force => true)
291
+ git_repo.spit_output("branch -D #{revision}")
292
+ rescue ::RightGit::RightGitError
293
+ # ignore failed attempt to recover; raise original error.
294
+ raise RetrieverError, error_message
295
+ end
234
296
  end
235
- end if repo_tag
297
+ end
298
+ else
299
+ # not a tag; SHA or branch.
300
+ #
301
+ # note that we could try to trivially determine if revision was a
302
+ # SHA by matching the SHA1 pattern except that:
303
+ # 1) git accepts partial SHAs so long as they uniquely distinguish
304
+ # a commit for checkout.
305
+ # 2) a branch or tag could name could match the SHA pattern (i.e.
306
+ # 40 hexadecimal characters) with no warnings from git. git will
307
+ # even allow a user to use a SHA as a tag name when that SHA
308
+ # exists (and may represent a different commit).
309
+ # confusing tags with SHAs should be universally discouraged but we
310
+ # need to be flexible here.
311
+ #
312
+ # a local branch may no longer exist remotely or may be behind or
313
+ # have diverged from remote branch. handle all cases.
314
+ remotes = remote_branches.select(&by_name)
315
+ if remotes.size > 1
316
+ # multiple remote branches exist (from different origins); branch
317
+ # name is ambiguous.
318
+ raise RetrieverError, "Ambiguous remote branch name: #{revision.inspect}"
319
+ elsif remotes.size == 1
320
+ # a remote branch exists.
321
+ remote_name = remotes.first.fullname
322
+ elsif local_branches.any?(&by_name)
323
+ # local branch only; failure due to missing remote branch.
324
+ #
325
+ # note that obsolete local branches are not supported by retrieval
326
+ # only because it would give the user a false positive.
327
+ raise RetrieverError, "Missing remote branch: #{revision.inspect}."
328
+ end # else a full or partial SHA or unknown revision
236
329
  end
330
+ remote_name
331
+ end
237
332
 
238
- # Ignore .git directories.
239
- def ignorable_paths
240
- ['.git']
333
+ # Temporarily disables checking the size of the repo_dir against the
334
+ # configured size limit. This permits performing git queries against a repo
335
+ # on disk that would normally exceed the size limit if it hadn't already
336
+ # been fully checked out in the past. If a repo has been scraped in the past
337
+ # and does not have any new commits, then it is acceptable even if it
338
+ # would exceed the current size limit.
339
+ def without_size_limit(git_repo)
340
+ old_max_bytes = git_repo.shell.max_bytes
341
+ begin
342
+ git_repo.shell.max_bytes = nil
343
+ yield
344
+ ensure
345
+ git_repo.shell.max_bytes = old_max_bytes
241
346
  end
347
+ end
242
348
 
243
- def tag?(git, name)
244
- git.tags.find {|t| t.name == name}
349
+ # Temporarily disable SSH host-key checking for SSH clients invoked by Git, for the duration of the
350
+ # block that is passed to this method.
351
+ #
352
+ # @yield after disabling strict host key checking, yields to caller
353
+ def without_host_key_checking(&callback)
354
+ if is_windows?
355
+ without_host_key_checking_windows(&callback)
356
+ else
357
+ without_host_key_checking_linux(&callback)
245
358
  end
359
+ end
246
360
 
247
- def branch?(git, name)
248
- git.branches.find {|t| t.name == name}
249
- end
361
+ # Temporarily disable SSH host-key checking for SSH clients invoked by Git, for the duration of the
362
+ # block that is passed to this method.
363
+ #
364
+ # @yield after disabling strict host key checking, yields to caller
365
+ def without_host_key_checking_linux
366
+ tmpdir = ::Dir.mktmpdir
367
+ ssh_cmd = ::File.join(tmpdir, 'ssh')
250
368
 
251
- def repo_tag
252
- name = (@repository.tag || "master").chomp
253
- name = "master" if name.empty?
254
- name
369
+ ::File.open(ssh_cmd, 'w') do |cmd|
370
+ cmd.puts "#!/bin/bash"
371
+ cmd.puts "exec ssh -o StrictHostKeyChecking=no ${@}"
255
372
  end
373
+ ::FileUtils.chmod(0700, ssh_cmd)
256
374
 
257
- def find_branch(git, tag)
258
- find_local_branch(git, tag) || find_remote_branch(git, tag)
259
- end
375
+ old_env = ::ENV['GIT_SSH']
376
+ ::ENV['GIT_SSH'] = ssh_cmd
377
+ yield
378
+ ensure
379
+ ::FileUtils.rm_rf(tmpdir)
380
+ ::ENV['GIT_SSH'] = old_env
381
+ end
260
382
 
261
- def find_local_branch(git, name)
262
- git.branches.local.find {|b| b.name == name}
263
- end
383
+ # The "ssh.exe" that comes with msysgit doesn't appear to configure things
384
+ # properly under Windows (or it does for SYSTEM account but not user
385
+ # accounts) when disabling strict hostname checking. We can instead
386
+ # temporarily create/replace the "%USERPROFILE%\.ssh\config" file to disable
387
+ # checking for all hostnames.
388
+ #
389
+ # @yield after disabling strict host key checking, yields to caller
390
+ def without_host_key_checking_windows(&callback)
391
+ config_path = ::File.expand_path(::File.join(home_dir_windows, '.ssh', 'config'))
392
+ config_text = <<EOF
393
+ Host *
394
+ StrictHostKeyChecking no
395
+ EOF
396
+ with_replaced_file(config_path, config_text, &callback)
397
+ end
264
398
 
265
- def find_remote_branch(git, name)
266
- git.branches.remote.find {|b| b.name == name}
267
- end
399
+ def with_private_key_windows(private_key, &callback)
400
+ private_key_path = ::File.expand_path(::File.join(home_dir_windows, '.ssh', 'id_rsa'))
401
+ with_replaced_file(private_key_path, private_key, &callback)
402
+ end
268
403
 
269
- # Temporarily disable SSH host-key checking for SSH clients invoked by Git, for the duration of the
270
- # block that is passed to this method.
271
- #
272
- # @yield after disabling strict host key checking, yields to caller
273
- def without_host_key_checking
274
- tmpdir = Dir.mktmpdir
275
- ssh_cmd = File.join(tmpdir, 'ssh')
276
-
277
- File.open(ssh_cmd, 'w') do |cmd|
278
- cmd.puts "#!/bin/bash"
279
- cmd.puts "exec ssh -o StrictHostKeyChecking=no ${@}"
404
+ # Utility for replacing a file temporarily within a scope and ensuring it is
405
+ # restored afterward.
406
+ #
407
+ # @param [String] filepath to replace
408
+ # @param [String] contents to substitute
409
+ #
410
+ # @yield after replacing file
411
+ def with_replaced_file(filepath, contents)
412
+ ::Dir.mktmpdir do |temp_dir|
413
+ begin
414
+ temp_path = ::File.join(temp_dir, ::File.basename(filepath))
415
+ ::FileUtils.mkdir_p(::File.dirname(filepath))
416
+ if ::File.file?(filepath)
417
+ ::FileUtils.mv(filepath, temp_path, :force => true)
418
+ end
419
+ ::File.open(filepath, 'w') { |f| f.write(contents) }
420
+ yield
421
+ ensure
422
+ begin
423
+ if ::File.file?(temp_path)
424
+ ::FileUtils.mv(temp_path, filepath, :force => true)
425
+ elsif ::File.file?(filepath)
426
+ ::File.unlink(filepath)
427
+ end
428
+ rescue ::Exception => e
429
+ @logger.note_warning("Failed to restore #{filepath.inspect}: #{e.message}")
430
+ end
280
431
  end
281
- FileUtils.chmod(0700, ssh_cmd)
282
-
283
- old_env = ENV['GIT_SSH']
284
- ENV['GIT_SSH'] = ssh_cmd
285
-
286
- result = yield
287
- ensure
288
- FileUtils.rm_rf(tmpdir)
289
- ENV['GIT_SSH'] = old_env
432
+ end
433
+ end
290
434
 
291
- result
435
+ # @return default location for git-related configuration files.
436
+ def home_dir_windows
437
+ home_dir = ::ENV['USERPROFILE']
438
+ unless home_dir && ::File.directory?(home_dir)
439
+ raise RetrieverError, "Invalid USERPROFILE directory: #{home_dir.inspect}"
292
440
  end
441
+ home_dir
442
+ end
443
+
444
+ # @return [TrueClass|FalseClass] true if running on Windows
445
+ def is_windows?
446
+ !!(RUBY_PLATFORM =~ /mswin|win32|dos|mingw|cygwin/)
293
447
  end
294
448
  end
295
449
  end