right_scraper 1.0.26 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. data/Gemfile +16 -0
  2. data/README.rdoc +9 -28
  3. data/Rakefile +51 -39
  4. data/lib/right_scraper/builders/base.rb +64 -0
  5. data/lib/right_scraper/builders/filesystem.rb +96 -0
  6. data/lib/right_scraper/builders/union.rb +57 -0
  7. data/lib/right_scraper/logger.rb +102 -0
  8. data/lib/right_scraper/loggers/noisy.rb +85 -0
  9. data/lib/right_scraper/processes/ssh.rb +188 -0
  10. data/lib/right_scraper/repositories/base.rb +299 -0
  11. data/lib/right_scraper/repositories/download.rb +90 -0
  12. data/lib/right_scraper/repositories/git.rb +92 -0
  13. data/lib/right_scraper/repositories/mock.rb +70 -0
  14. data/lib/right_scraper/repositories/svn.rb +96 -0
  15. data/lib/right_scraper/resources/base.rb +70 -0
  16. data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
  17. data/lib/right_scraper/resources/workflow.rb +55 -0
  18. data/lib/right_scraper/retrievers/base.rb +114 -0
  19. data/lib/right_scraper/retrievers/checkout.rb +79 -0
  20. data/lib/right_scraper/retrievers/download.rb +97 -0
  21. data/lib/right_scraper/retrievers/git.rb +140 -0
  22. data/lib/right_scraper/retrievers/svn.rb +87 -0
  23. data/lib/right_scraper/scanners/base.rb +111 -0
  24. data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
  25. data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
  26. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
  27. data/lib/right_scraper/scanners/union.rb +89 -0
  28. data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
  29. data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
  30. data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
  31. data/lib/right_scraper/scraper.rb +81 -57
  32. data/lib/right_scraper/scraper_logger.rb +61 -0
  33. data/lib/right_scraper/scrapers/base.rb +262 -0
  34. data/lib/right_scraper/scrapers/cookbook.rb +73 -0
  35. data/lib/right_scraper/scrapers/workflow.rb +88 -0
  36. data/lib/right_scraper/svn_client.rb +101 -0
  37. data/lib/right_scraper/version.rb +28 -0
  38. data/lib/right_scraper.rb +35 -11
  39. data/right_scraper.gemspec +26 -13
  40. data/right_scraper.rconf +13 -0
  41. data/spec/builder_spec.rb +50 -0
  42. data/spec/cookbook_helper.rb +73 -0
  43. data/spec/cookbook_manifest_spec.rb +55 -0
  44. data/spec/cookbook_s3_upload_spec.rb +152 -0
  45. data/spec/download/download_retriever_spec.rb +118 -0
  46. data/spec/download/download_retriever_spec_helper.rb +72 -0
  47. data/spec/download/download_spec.rb +130 -0
  48. data/spec/download/multi_dir_spec.rb +106 -0
  49. data/spec/download/multi_dir_spec_helper.rb +40 -0
  50. data/spec/git/cookbook_spec.rb +166 -0
  51. data/spec/git/demokey +27 -0
  52. data/spec/git/demokey.pub +1 -0
  53. data/spec/git/password_key +30 -0
  54. data/spec/git/password_key.pub +1 -0
  55. data/spec/git/repository_spec.rb +110 -0
  56. data/spec/git/retriever_spec.rb +505 -0
  57. data/spec/git/retriever_spec_helper.rb +112 -0
  58. data/spec/git/scraper_spec.rb +136 -0
  59. data/spec/git/ssh_spec.rb +170 -0
  60. data/spec/git/url_spec.rb +103 -0
  61. data/spec/logger_spec.rb +185 -0
  62. data/spec/repository_spec.rb +89 -23
  63. data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
  64. data/spec/scanner_spec.rb +61 -0
  65. data/spec/scraper_helper.rb +96 -0
  66. data/spec/scraper_spec.rb +123 -45
  67. data/spec/spec_helper.rb +87 -14
  68. data/spec/svn/cookbook_spec.rb +97 -0
  69. data/spec/svn/multi_svn_spec.rb +64 -0
  70. data/spec/svn/multi_svn_spec_helper.rb +40 -0
  71. data/spec/svn/repository_spec.rb +72 -0
  72. data/spec/svn/retriever_spec.rb +261 -0
  73. data/spec/svn/scraper_spec.rb +90 -0
  74. data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
  75. data/spec/svn/url_spec.rb +47 -0
  76. data/spec/url_spec.rb +164 -0
  77. metadata +203 -31
  78. data/lib/right_scraper/linux/process_monitor.rb +0 -84
  79. data/lib/right_scraper/repository.rb +0 -78
  80. data/lib/right_scraper/scraper_base.rb +0 -175
  81. data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
  82. data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
  83. data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
  84. data/lib/right_scraper/watcher.rb +0 -158
  85. data/lib/right_scraper/win32/process_monitor.rb +0 -98
  86. data/spec/download/download_scraper_spec.rb +0 -94
  87. data/spec/git/git_scraper_spec.rb +0 -165
  88. data/spec/git/git_scraper_spec_helper.rb +0 -72
  89. data/spec/rcov.opts +0 -1
  90. data/spec/spec.opts +0 -2
  91. data/spec/svn/svn_scraper_spec.rb +0 -148
  92. data/spec/watcher_spec.rb +0 -74
@@ -1,84 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- # *nix specific watcher implementation
27
- class ProcessMonitor
28
- # Spawn given process and callback given block with output and exit code. This method
29
- # accepts a variable number of parameters; the first param is always the command to
30
- # run; successive parameters are command-line arguments for the process.
31
- #
32
- # === Parameters
33
- # cmd(String):: Name of the command to run
34
- # arg1(String):: Optional, first command-line argumument
35
- # arg2(String):: Optional, first command-line argumument
36
- # ...
37
- # argN(String):: Optional, Nth command-line argumument
38
- #
39
- # === Block
40
- # Given block should take one argument which is a hash which may contain
41
- # the keys :output and :exit_code. The value associated with :output is a chunk
42
- # of output while the value associated with :exit_code is the process exit code
43
- # This block won't be called anymore once the :exit_code key has associated value
44
- #
45
- # === Return
46
- # pid(Integer):: Spawned process pid
47
- def spawn(cmd, *args)
48
- args = args.map { |a| a.to_s } #exec only likes string arguments
49
-
50
- #Run subprocess; capture its output using a pipe
51
- pr, pw = IO::pipe
52
- @pid = fork do
53
- pr.close
54
- STDIN.reopen(File.open('/dev/null', 'r'))
55
- STDOUT.reopen(pw)
56
- STDERR.reopen(pw)
57
- exec(cmd, *args)
58
- end
59
-
60
- #Monitor subprocess output and status in a dedicated thread
61
- pw.close
62
- @io = pr
63
- @reader = Thread.new do
64
- until @io.eof?
65
- yield(:output => @io.read)
66
- end
67
- Process.wait(@pid)
68
- yield(:exit_code => $?.exitstatus)
69
- end
70
-
71
- return @pid
72
- end
73
-
74
- # Close io and join reader thread
75
- #
76
- # === Return
77
- # true:: Always return true
78
- def cleanup
79
- @reader.join
80
- @io.close
81
- end
82
-
83
- end
84
- end
@@ -1,78 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- # Description of remote repository that needs to be scraped.
27
- class Repository
28
-
29
- # (String) Human readable repository name used for progress reports
30
- attr_accessor :display_name
31
-
32
- # (String) One of 'git', 'svn' or 'download'
33
- attr_accessor :repo_type
34
-
35
- # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
36
- attr_accessor :url
37
-
38
- # (String) Optional, tag or branch of repository that should be downloaded
39
- # Not used for 'download' repositories
40
- attr_accessor :tag
41
-
42
- # (Array) List of directories containing cookbooks in repository
43
- # Root directory is used if this is nil or empty
44
- attr_accessor :cookbooks_path
45
-
46
- # (String) Optional, SVN username or git private SSH key content
47
- attr_accessor :first_credential
48
-
49
- # (String) Optional, SVN password
50
- attr_accessor :second_credential
51
-
52
- # Initialize repository from given hash
53
- # Hash keys should correspond to attributes of this class
54
- #
55
- # === Parameters
56
- # opts(Hash):: Hash to be converted into a RightScale::Repository instance
57
- #
58
- # === Return
59
- # repo(RightScale::Repository):: Resulting repository instance
60
- def self.from_hash(opts)
61
- repo = RightScale::Repository.new
62
- opts.each do |k, v|
63
- repo.__send__("#{k.to_s}=".to_sym, v)
64
- end
65
- repo
66
- end
67
-
68
- # Unique representation for this repo, should resolve to the same string
69
- # for repos that should be cloned in same directory
70
- #
71
- # === Returns
72
- # res(String):: Unique representation for this repo
73
- def to_s
74
- res = "#{repo_type} #{url}:#{tag}"
75
- end
76
- end
77
-
78
- end
@@ -1,175 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- require 'digest/md5'
25
-
26
- module RightScale
27
-
28
- # Base class for all scrapers.
29
- # Actual scraper implementation should override scrape_imp and optionally incremental_update?
30
- class ScraperBase
31
-
32
- # (String) Path to directory containing all scraped repositories
33
- attr_accessor :root_dir
34
-
35
- # (RightScale::Repository) Last scraped repository
36
- attr_reader :repo
37
-
38
- # (Array) Error messages if any
39
- attr_reader :errors
40
-
41
- # (String) Path to local directory where repository was downloaded
42
- attr_reader :current_repo_dir
43
-
44
- # Set path to directory containing all scraped repos as well as space and time upperbounds
45
- #
46
- # === Parameters
47
- # root_dir(String):: Path to scraped repos parent directory
48
- # max_bytes(Integer):: Maximum size allowed for repos, -1 for no limit (default)
49
- # max_seconds(Integer):: Maximum number of seconds a single scrape operation should take, -1 for no limit (default)
50
- def initialize(root_dir, max_bytes, max_seconds)
51
- @root_dir = root_dir
52
- @watcher = Watcher.new(max_bytes, max_seconds)
53
- end
54
-
55
- # Common implementation of scrape method for all repository types.
56
- # Each scraper implementation should override scrape_imp which is called
57
- # after this method initializes all the scraper attributes properly.
58
- # See RightScale::Scraper#scrape
59
- def scrape(repo, incremental=true, &callback)
60
- @repo = repo
61
- @callback = callback
62
- @current_repo_dir = ScraperBase.repo_dir(root_dir, repo)
63
- @scrape_dir_path = File.expand_path(File.join(@current_repo_dir, '..'))
64
- @incremental = incremental && incremental_update?
65
- @errors = []
66
- FileUtils.rm_rf(@current_repo_dir) unless @incremental
67
- scrape_imp
68
- true
69
- end
70
-
71
- # Path to directory where given repo should be or was downloaded
72
- #
73
- # === Parameters
74
- # root_dir(String):: Path to directory containing all scraped repositories
75
- # repo(Hash|RightScale::Repository):: Remote repository corresponding to local directory
76
- #
77
- # === Return
78
- # repo_dir(String):: Path to local directory that corresponds to given repository
79
- def self.repo_dir(root_dir, repo)
80
- repo = Repository.from_hash(repo) if repo.is_a?(Hash)
81
- dir_name = Digest::MD5.hexdigest(repo.to_s)
82
- dir_path = File.join(root_dir, dir_name)
83
- repo_dir = "#{dir_path}/repo"
84
- end
85
-
86
- # Was last call to scrape successful?
87
- # Call errors to get error messages if false
88
- #
89
- # === Return
90
- # succeeded(TrueClass|FalseClass):: true if scrape finished with no error, false otherwise.
91
- def succeeded?
92
- succeeded = @errors.nil? || @errors.size == 0
93
- end
94
-
95
- protected
96
-
97
- # Check whether it is possible to perform an incremental update of the repo
98
- #
99
- # === Return
100
- # true:: Scrape directory contains files belonging to the scraped repo and protocol supports
101
- # incremental updates
102
- # false:: Otherwise
103
- def incremental_update?
104
- false # Incremental updates not supported by default
105
- end
106
-
107
- # Override this method with scraper specific implementation in descendants
108
- #
109
- # === Return
110
- # true:: Always return true
111
- def scrape_imp
112
- raise "Method not implemented"
113
- end
114
-
115
- # Update state of scraper according to status returned by watcher
116
- #
117
- # === Parameters
118
- # res(RightScale::WatchResult):: Watcher status to be analyzed
119
- # msg_title(String):: Error message title in case of failure
120
- # ok_codes:: Successful process return codes, only 0 by default
121
- #
122
- # === Return
123
- # true:: Always return true
124
- def handle_watcher_result(res, msg_title, ok_codes=[0])
125
- if res.status == :timeout
126
- @errors << "#{msg_title} is taking more time than #{@watcher.max_seconds / 60} minutes, aborting..."
127
- FileUtils.rm_rf(@current_repo_dir)
128
- elsif res.status == :size_exceeded
129
- @errors << "#{msg_title} is taking more space than #{@watcher.max_bytes / 1048576} MB, aborting..."
130
- FileUtils.rm_rf(@current_repo_dir)
131
- elsif !ok_codes.include?(res.exit_code)
132
- if @incremental
133
- @callback.call("#{msg_title} failed: #{res.output}, reverting to non incremental update", is_step=false) if @callback
134
- FileUtils.rm_rf(@current_repo_dir)
135
- @incremental = false
136
- else
137
- @errors << "#{msg_title} failed: #{res.output}"
138
- end
139
- end
140
- true
141
- end
142
-
143
- # Spawn given process, wait for it to complete, and return its output The exit status
144
- # of the process is available in the $? global. Functions similarly to the backtick
145
- # operator, only it avoids invoking the command interpreter under operating systems
146
- # that support fork-and-exec.
147
- #
148
- # This method accepts a variable number of parameters; the first param is always the
149
- # command to run; successive parameters are command-line arguments for the process.
150
- #
151
- # === Parameters
152
- # cmd(String):: Name of the command to run
153
- # arg1(String):: Optional, first command-line argumument
154
- # arg2(String):: Optional, first command-line argumument
155
- # ...
156
- # argN(String):: Optional, Nth command-line argumument
157
- #
158
- # === Return
159
- # output(String):: The process' output
160
- def run(cmd, *args)
161
- pm = ProcessMonitor.new
162
- output = StringIO.new
163
-
164
- pm.spawn(cmd, *args) do |options|
165
- output << options[:output] if options[:output]
166
- end
167
-
168
- pm.cleanup
169
- output.close
170
- output = output.string
171
- return output
172
- end
173
-
174
- end
175
- end
@@ -1,67 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- class DownloadScraper < ScraperBase
27
-
28
- # Download and expand remote repository, see RightScale::ScraperBase#scrape
29
- #
30
- # === Return
31
- # true:: Always return true
32
- def scrape_imp
33
- msg = "Downloading repository '#{@repo.display_name}'"
34
- @callback.call(msg, is_step=true) if @callback
35
- filename = @repo.url.split('/').last
36
-
37
- if @repo.first_credential && @repo.second_credential &&
38
- !@repo.first_credential.strip.empty? && !@repo.second_credential.strip.empty?
39
- user_opt = ['--user', "#{@repo.first_credential}:#{@repo.second_credential}"]
40
- else
41
- user_opt = []
42
- end
43
-
44
- args = ['--fail', '--silent', '--show-error', '--insecure', '--location']
45
- args += user_opt
46
- args += ['--output', "#{@current_repo_dir}/#{filename}", @repo.url]
47
-
48
- FileUtils.mkdir_p(@current_repo_dir)
49
- res = @watcher.launch_and_watch('curl', args, @current_repo_dir)
50
- handle_watcher_result(res, 'Download')
51
- if succeeded?
52
- unzip_opt = case @repo.url[/\.(.*)$/]
53
- when 'bzip', 'bzip2', 'bz2' then 'j'
54
- when 'tgz', 'gzip', 'gz' then 'z'
55
- else ''
56
- end
57
- Dir.chdir(@current_repo_dir) do
58
- res = run('tar', "x#{unzip_opt}f", filename)
59
- @errors << res unless $?.success?
60
- File.delete(filename)
61
- end
62
- end
63
- true
64
- end
65
-
66
- end
67
- end
@@ -1,283 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- class GitScraper < ScraperBase
27
-
28
- # Check whether it is possible to perform an incremental update of the repo
29
- #
30
- # === Return
31
- # true:: Scrape directory contains files belonging to the scraped repo and protocol supports
32
- # incremental updates
33
- # false:: Otherwise
34
- def incremental_update?
35
- # FIX: current version of msysgit crashes attempting "git pull" on 64-bit
36
- # servers. we will avoid incremental for now in hopes of getting a fix for
37
- # msysgit or else a native Windows implementation such as Git#
38
- return false if (is_windows? || !File.directory?(@current_repo_dir))
39
- Dir.chdir(@current_repo_dir) do
40
- remote_url = run('git', 'config', '--get', 'remote.origin.url').chomp
41
- $?.success? && remote_url == @repo.url
42
- end
43
- end
44
-
45
- # Scrape git repository, see RightScale::ScraperBase#scrape
46
- #
47
- # === Return
48
- # true:: Always return true
49
- def scrape_imp
50
- msg = @incremental ? 'Pulling ' : 'Cloning '
51
- msg += "git repository '#{@repo.display_name}'"
52
- @callback.call(msg, is_step=true) if @callback
53
- @ssh_cmd = ssh_command
54
- is_tag = is_branch = on_branch = nil
55
- has_tag = !@repo.tag.nil? && !@repo.tag.empty?
56
-
57
- if @incremental
58
- checkout = false
59
- Dir.chdir(@current_repo_dir) do
60
- if has_tag
61
- analysis = analyze_repo_tag
62
- if succeeded?
63
- is_tag = analysis[:tag]
64
- is_branch = analysis[:branch]
65
- on_branch = analysis[:on_branch]
66
- checkout = is_tag && !is_branch
67
- if is_tag && is_branch
68
- @errors << 'Repository tag ambiguous: could be git tag or git branch'
69
- elsif !is_tag && !is_branch
70
- current_sha = run('git', 'rev-parse', 'HEAD').chomp
71
- if current_sha == @repo.tag
72
- @callback.call("Nothing to update: already using #{@repo.tag}", is_step=false) if @callback
73
- return true
74
- else
75
- # Probably a SHA, retrieve all commits
76
- git_fetch(:depth => 2**31 - 1)
77
- checkout = true
78
- end
79
- end
80
- if succeeded?
81
- if checkout
82
- git_checkout(@repo.tag)
83
- else
84
- git_checkout(@repo.tag) if is_branch && !on_branch
85
- git_fetch(:depth => 1, :merge => true, :remote_tag => @repo.tag)
86
- end
87
- end
88
- end
89
- else
90
- git_fetch(:depth => 1, :merge => true)
91
- end
92
- end
93
- end
94
-
95
- if !@incremental && succeeded?
96
- args = ['clone', '--quiet', '--depth', '1', @repo.url, @current_repo_dir]
97
- ENV['GIT_SSH'] = @ssh_cmd
98
- res = @watcher.launch_and_watch('git', args, @current_repo_dir)
99
- ENV['GIT_SSH'] = nil
100
- handle_watcher_result(res, 'git clone')
101
- if has_tag && succeeded?
102
- Dir.chdir(@current_repo_dir) do
103
- if is_tag.nil?
104
- analysis = analyze_repo_tag
105
- is_tag = analysis[:tag]
106
- is_branch = analysis[:branch]
107
- on_branch = analysis[:on_branch]
108
- end
109
- if succeeded?
110
- if is_tag && is_branch
111
- @errors << 'Repository tag ambiguous: could be git tag or git branch'
112
- elsif is_branch
113
- if !on_branch
114
- output = run('git', 'branch', @repo.tag, "origin/#{repo.tag}")
115
- @errors << output unless $?.success?
116
- end
117
- elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
118
- git_fetch(:depth => 2**31 -1)
119
- end
120
- if succeeded? && !on_branch
121
- git_checkout(@repo.tag)
122
- end
123
- end
124
- end
125
- end
126
- end
127
- true
128
- end
129
-
130
- # Default SSH options used with git
131
- DEFAULT_SSH_OPTIONS = { :PasswordAuthentication => 'no',
132
- :HostbasedAuthentication => 'no',
133
- :StrictHostKeyChecking => 'no',
134
- :IdentitiesOnly => 'yes' }
135
-
136
- # SSH options command line built from default options and given custom options
137
- #
138
- # === Parameters
139
- # opts(Hash):: Custom options
140
- #
141
- # === Return
142
- # options(String):: SSH command line options
143
- def ssh_options(opts={})
144
- opts = DEFAULT_SSH_OPTIONS.merge(opts || {})
145
- options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
146
- end
147
-
148
- # Store private SSH key into temporary folder and create temporary script
149
- # that wraps SSH and uses this key.
150
- #
151
- # === Return
152
- # ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
153
- def ssh_command
154
- return win32_ssh_command if is_windows?
155
- ssh_dir = File.join(@scrape_dir_path, '.ssh')
156
- FileUtils.mkdir_p(ssh_dir)
157
- key_content = @repo.first_credential
158
- if key_content.nil?
159
- # Explicitely disable public key authentication so we don't endup using the system's key
160
- options = { :PubkeyAuthentication => 'no' }
161
- else
162
- ssh_key_path = File.join(ssh_dir, 'id_rsa')
163
- File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
164
- File.chmod(0600, ssh_key_path)
165
- options = { :IdentityFile => ssh_key_path }
166
- end
167
- ssh_config = File.join(ssh_dir, 'ssh_config')
168
- File.open(ssh_config, 'w') { |f| f.puts(ssh_options(options)) }
169
- ssh = File.join(ssh_dir, 'ssh')
170
- File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
171
- File.chmod(0755, ssh)
172
-
173
- return ssh
174
- end
175
-
176
- # Prepare SSH for git on Windows
177
- # The GIT_SSH trick doesn't seem to work on Windows, instead actually
178
- # save the private key in the user ssh folder.
179
- # Note: This will override any pre-existing SSH key that was on the system
180
- #
181
- # === Return
182
- # '':: Always return an empty string
183
- #
184
- # === Raise
185
- # Exception:: If the USERPROFILE environment variable is not set
186
- def win32_ssh_command
187
- key_content = @repo.first_credential
188
- unless key_content.nil?
189
- # resolve key file path.
190
- raise 'Environment variable USERPROFILE is missing' unless ENV['USERPROFILE']
191
- user_profile_dir_path = ENV['USERPROFILE']
192
- ssh_keys_dir = File.join(user_profile_dir_path, '.ssh')
193
- FileUtils.mkdir_p(ssh_keys_dir) unless File.directory?(ssh_keys_dir)
194
- ssh_key_file_path = File.join(ssh_keys_dir, 'id_rsa')
195
-
196
- # (re)create key file. must overwrite any existing credentials in case
197
- # we are switching repositories and have different credentials for each.
198
- File.open(ssh_key_file_path, 'w') { |f| f.puts(key_content) }
199
-
200
- # we need to create the "known_hosts" file or else the process will
201
- # halt in windows waiting for a yes/no response to the unknown
202
- # git host. this is normally handled by specifying
203
- # "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
204
- # still a mystery why this doesn't work properly in windows.
205
- # so make a ssh call which creates the proper "known_hosts" file.
206
- run('ssh', '-o', 'StrictHostKeyChecking=no', repo.url.split(':').first)
207
- end
208
- return ''
209
- end
210
-
211
- # Fetch remote commits using given depth
212
- # Check size of repo and time it takes to retrieve commits
213
- # Update errors collection upon failure (check for succeeded? after call)
214
- # Note: Assume that current working directory is a git directory
215
- #
216
- # === Parameters
217
- # opts[:depth(Integer):: Git fetch depth argument, full fetch if not set
218
- # opts[:merge]:: Do a pull if set
219
- # opts[:remote_tag]:: Remote ref to use, use default if not specified
220
- #
221
- # === Return
222
- # true:: Always return true
223
- def git_fetch(opts={})
224
- depth = opts[:depth] || 2**31 - 1 # Specify max to override depth of already cloned repo
225
- remote = opts[:remote_tag]
226
- remote = 'master' if remote.nil? || remote.rstrip.empty?
227
- action = (opts[:merge] ? 'pull' : 'fetch')
228
- args = [action, '--tags', '--depth', depth, 'origin', remote]
229
- ENV['GIT_SSH'] = @ssh_cmd
230
- res = @watcher.launch_and_watch('git', args, @current_repo_dir)
231
- ENV['GIT_SSH'] = nil
232
- handle_watcher_result(res, "git #{action}")
233
- end
234
-
235
- # Does a git checkout to given tag
236
- # Update errors collection upon failure (check for succeeded? after call)
237
- # Note: Assume that current working directory is a git directory
238
- #
239
- # === Parameters
240
- # tag(String):: Tag to checkout
241
- #
242
- # === Return
243
- # output(String):: Output of git command
244
- def git_checkout(tag)
245
- output = run('git', 'checkout', tag)
246
- @errors << output unless $?.success?
247
- output
248
- end
249
-
250
- # Analyze repository tag to detect whether it's a branch, a tag or neither (i.e. SHA ref)
251
- # Also detech wether the branch is already checked out
252
- # Update errors collection upon failure (check for succeeded? after call)
253
- # Note: Assume that current working directory is a git directory
254
- #
255
- # === Return
256
- # res(Hash)::
257
- # - res[:tag]:: true if git repo has a tag with a name corresponding to the repository tag
258
- # - res[:branch]:: true if git repo has a branch with a name corresponding to the repository tag
259
- # - res [:on_branch]:: true if branch is already checked out
260
- def analyze_repo_tag
261
- is_tag = is_branch = on_branch = nil
262
- begin
263
- is_tag = run('git', 'tag').split("\n").include?(@repo.tag)
264
- is_branch = run('git', 'branch', '-r').split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
265
- on_branch = is_branch && !!run('git', 'branch').split("\n").include?("* #{@repo.tag}")
266
- rescue Exception => e
267
- @errors << "Analysis of repository tag failed with: #{e.message}"
268
- end
269
- res = { :tag => is_tag, :branch => is_branch, :on_branch => on_branch }
270
- end
271
-
272
- private
273
-
274
- # Check for windows.
275
- #
276
- # === Return
277
- #
278
- def is_windows?
279
- return !!(RUBY_PLATFORM =~ /mswin/)
280
- end
281
-
282
- end
283
- end