right_scraper 1.0.26 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. data/Gemfile +16 -0
  2. data/README.rdoc +9 -28
  3. data/Rakefile +51 -39
  4. data/lib/right_scraper/builders/base.rb +64 -0
  5. data/lib/right_scraper/builders/filesystem.rb +96 -0
  6. data/lib/right_scraper/builders/union.rb +57 -0
  7. data/lib/right_scraper/logger.rb +102 -0
  8. data/lib/right_scraper/loggers/noisy.rb +85 -0
  9. data/lib/right_scraper/processes/ssh.rb +188 -0
  10. data/lib/right_scraper/repositories/base.rb +299 -0
  11. data/lib/right_scraper/repositories/download.rb +90 -0
  12. data/lib/right_scraper/repositories/git.rb +92 -0
  13. data/lib/right_scraper/repositories/mock.rb +70 -0
  14. data/lib/right_scraper/repositories/svn.rb +96 -0
  15. data/lib/right_scraper/resources/base.rb +70 -0
  16. data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
  17. data/lib/right_scraper/resources/workflow.rb +55 -0
  18. data/lib/right_scraper/retrievers/base.rb +114 -0
  19. data/lib/right_scraper/retrievers/checkout.rb +79 -0
  20. data/lib/right_scraper/retrievers/download.rb +97 -0
  21. data/lib/right_scraper/retrievers/git.rb +140 -0
  22. data/lib/right_scraper/retrievers/svn.rb +87 -0
  23. data/lib/right_scraper/scanners/base.rb +111 -0
  24. data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
  25. data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
  26. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
  27. data/lib/right_scraper/scanners/union.rb +89 -0
  28. data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
  29. data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
  30. data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
  31. data/lib/right_scraper/scraper.rb +81 -57
  32. data/lib/right_scraper/scraper_logger.rb +61 -0
  33. data/lib/right_scraper/scrapers/base.rb +262 -0
  34. data/lib/right_scraper/scrapers/cookbook.rb +73 -0
  35. data/lib/right_scraper/scrapers/workflow.rb +88 -0
  36. data/lib/right_scraper/svn_client.rb +101 -0
  37. data/lib/right_scraper/version.rb +28 -0
  38. data/lib/right_scraper.rb +35 -11
  39. data/right_scraper.gemspec +26 -13
  40. data/right_scraper.rconf +13 -0
  41. data/spec/builder_spec.rb +50 -0
  42. data/spec/cookbook_helper.rb +73 -0
  43. data/spec/cookbook_manifest_spec.rb +55 -0
  44. data/spec/cookbook_s3_upload_spec.rb +152 -0
  45. data/spec/download/download_retriever_spec.rb +118 -0
  46. data/spec/download/download_retriever_spec_helper.rb +72 -0
  47. data/spec/download/download_spec.rb +130 -0
  48. data/spec/download/multi_dir_spec.rb +106 -0
  49. data/spec/download/multi_dir_spec_helper.rb +40 -0
  50. data/spec/git/cookbook_spec.rb +166 -0
  51. data/spec/git/demokey +27 -0
  52. data/spec/git/demokey.pub +1 -0
  53. data/spec/git/password_key +30 -0
  54. data/spec/git/password_key.pub +1 -0
  55. data/spec/git/repository_spec.rb +110 -0
  56. data/spec/git/retriever_spec.rb +505 -0
  57. data/spec/git/retriever_spec_helper.rb +112 -0
  58. data/spec/git/scraper_spec.rb +136 -0
  59. data/spec/git/ssh_spec.rb +170 -0
  60. data/spec/git/url_spec.rb +103 -0
  61. data/spec/logger_spec.rb +185 -0
  62. data/spec/repository_spec.rb +89 -23
  63. data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
  64. data/spec/scanner_spec.rb +61 -0
  65. data/spec/scraper_helper.rb +96 -0
  66. data/spec/scraper_spec.rb +123 -45
  67. data/spec/spec_helper.rb +87 -14
  68. data/spec/svn/cookbook_spec.rb +97 -0
  69. data/spec/svn/multi_svn_spec.rb +64 -0
  70. data/spec/svn/multi_svn_spec_helper.rb +40 -0
  71. data/spec/svn/repository_spec.rb +72 -0
  72. data/spec/svn/retriever_spec.rb +261 -0
  73. data/spec/svn/scraper_spec.rb +90 -0
  74. data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
  75. data/spec/svn/url_spec.rb +47 -0
  76. data/spec/url_spec.rb +164 -0
  77. metadata +203 -31
  78. data/lib/right_scraper/linux/process_monitor.rb +0 -84
  79. data/lib/right_scraper/repository.rb +0 -78
  80. data/lib/right_scraper/scraper_base.rb +0 -175
  81. data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
  82. data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
  83. data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
  84. data/lib/right_scraper/watcher.rb +0 -158
  85. data/lib/right_scraper/win32/process_monitor.rb +0 -98
  86. data/spec/download/download_scraper_spec.rb +0 -94
  87. data/spec/git/git_scraper_spec.rb +0 -165
  88. data/spec/git/git_scraper_spec_helper.rb +0 -72
  89. data/spec/rcov.opts +0 -1
  90. data/spec/spec.opts +0 -2
  91. data/spec/svn/svn_scraper_spec.rb +0 -148
  92. data/spec/watcher_spec.rb +0 -74
@@ -1,84 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- # *nix specific watcher implementation
27
- class ProcessMonitor
28
- # Spawn given process and callback given block with output and exit code. This method
29
- # accepts a variable number of parameters; the first param is always the command to
30
- # run; successive parameters are command-line arguments for the process.
31
- #
32
- # === Parameters
33
- # cmd(String):: Name of the command to run
34
- # arg1(String):: Optional, first command-line argumument
35
- # arg2(String):: Optional, first command-line argumument
36
- # ...
37
- # argN(String):: Optional, Nth command-line argumument
38
- #
39
- # === Block
40
- # Given block should take one argument which is a hash which may contain
41
- # the keys :output and :exit_code. The value associated with :output is a chunk
42
- # of output while the value associated with :exit_code is the process exit code
43
- # This block won't be called anymore once the :exit_code key has associated value
44
- #
45
- # === Return
46
- # pid(Integer):: Spawned process pid
47
- def spawn(cmd, *args)
48
- args = args.map { |a| a.to_s } #exec only likes string arguments
49
-
50
- #Run subprocess; capture its output using a pipe
51
- pr, pw = IO::pipe
52
- @pid = fork do
53
- pr.close
54
- STDIN.reopen(File.open('/dev/null', 'r'))
55
- STDOUT.reopen(pw)
56
- STDERR.reopen(pw)
57
- exec(cmd, *args)
58
- end
59
-
60
- #Monitor subprocess output and status in a dedicated thread
61
- pw.close
62
- @io = pr
63
- @reader = Thread.new do
64
- until @io.eof?
65
- yield(:output => @io.read)
66
- end
67
- Process.wait(@pid)
68
- yield(:exit_code => $?.exitstatus)
69
- end
70
-
71
- return @pid
72
- end
73
-
74
- # Close io and join reader thread
75
- #
76
- # === Return
77
- # true:: Always return true
78
- def cleanup
79
- @reader.join
80
- @io.close
81
- end
82
-
83
- end
84
- end
@@ -1,78 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- # Description of remote repository that needs to be scraped.
27
- class Repository
28
-
29
- # (String) Human readable repository name used for progress reports
30
- attr_accessor :display_name
31
-
32
- # (String) One of 'git', 'svn' or 'download'
33
- attr_accessor :repo_type
34
-
35
- # (String) URL to repository (e.g 'git://github.com/rightscale/right_scraper.git')
36
- attr_accessor :url
37
-
38
- # (String) Optional, tag or branch of repository that should be downloaded
39
- # Not used for 'download' repositories
40
- attr_accessor :tag
41
-
42
- # (Array) List of directories containing cookbooks in repository
43
- # Root directory is used if this is nil or empty
44
- attr_accessor :cookbooks_path
45
-
46
- # (String) Optional, SVN username or git private SSH key content
47
- attr_accessor :first_credential
48
-
49
- # (String) Optional, SVN password
50
- attr_accessor :second_credential
51
-
52
- # Initialize repository from given hash
53
- # Hash keys should correspond to attributes of this class
54
- #
55
- # === Parameters
56
- # opts(Hash):: Hash to be converted into a RightScale::Repository instance
57
- #
58
- # === Return
59
- # repo(RightScale::Repository):: Resulting repository instance
60
- def self.from_hash(opts)
61
- repo = RightScale::Repository.new
62
- opts.each do |k, v|
63
- repo.__send__("#{k.to_s}=".to_sym, v)
64
- end
65
- repo
66
- end
67
-
68
- # Unique representation for this repo, should resolve to the same string
69
- # for repos that should be cloned in same directory
70
- #
71
- # === Returns
72
- # res(String):: Unique representation for this repo
73
- def to_s
74
- res = "#{repo_type} #{url}:#{tag}"
75
- end
76
- end
77
-
78
- end
@@ -1,175 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- require 'digest/md5'
25
-
26
- module RightScale
27
-
28
- # Base class for all scrapers.
29
- # Actual scraper implementation should override scrape_imp and optionally incremental_update?
30
- class ScraperBase
31
-
32
- # (String) Path to directory containing all scraped repositories
33
- attr_accessor :root_dir
34
-
35
- # (RightScale::Repository) Last scraped repository
36
- attr_reader :repo
37
-
38
- # (Array) Error messages if any
39
- attr_reader :errors
40
-
41
- # (String) Path to local directory where repository was downloaded
42
- attr_reader :current_repo_dir
43
-
44
- # Set path to directory containing all scraped repos as well as space and time upperbounds
45
- #
46
- # === Parameters
47
- # root_dir(String):: Path to scraped repos parent directory
48
- # max_bytes(Integer):: Maximum size allowed for repos, -1 for no limit (default)
49
- # max_seconds(Integer):: Maximum number of seconds a single scrape operation should take, -1 for no limit (default)
50
- def initialize(root_dir, max_bytes, max_seconds)
51
- @root_dir = root_dir
52
- @watcher = Watcher.new(max_bytes, max_seconds)
53
- end
54
-
55
- # Common implementation of scrape method for all repository types.
56
- # Each scraper implementation should override scrape_imp which is called
57
- # after this method initializes all the scraper attributes properly.
58
- # See RightScale::Scraper#scrape
59
- def scrape(repo, incremental=true, &callback)
60
- @repo = repo
61
- @callback = callback
62
- @current_repo_dir = ScraperBase.repo_dir(root_dir, repo)
63
- @scrape_dir_path = File.expand_path(File.join(@current_repo_dir, '..'))
64
- @incremental = incremental && incremental_update?
65
- @errors = []
66
- FileUtils.rm_rf(@current_repo_dir) unless @incremental
67
- scrape_imp
68
- true
69
- end
70
-
71
- # Path to directory where given repo should be or was downloaded
72
- #
73
- # === Parameters
74
- # root_dir(String):: Path to directory containing all scraped repositories
75
- # repo(Hash|RightScale::Repository):: Remote repository corresponding to local directory
76
- #
77
- # === Return
78
- # repo_dir(String):: Path to local directory that corresponds to given repository
79
- def self.repo_dir(root_dir, repo)
80
- repo = Repository.from_hash(repo) if repo.is_a?(Hash)
81
- dir_name = Digest::MD5.hexdigest(repo.to_s)
82
- dir_path = File.join(root_dir, dir_name)
83
- repo_dir = "#{dir_path}/repo"
84
- end
85
-
86
- # Was last call to scrape successful?
87
- # Call errors to get error messages if false
88
- #
89
- # === Return
90
- # succeeded(TrueClass|FalseClass):: true if scrape finished with no error, false otherwise.
91
- def succeeded?
92
- succeeded = @errors.nil? || @errors.size == 0
93
- end
94
-
95
- protected
96
-
97
- # Check whether it is possible to perform an incremental update of the repo
98
- #
99
- # === Return
100
- # true:: Scrape directory contains files belonging to the scraped repo and protocol supports
101
- # incremental updates
102
- # false:: Otherwise
103
- def incremental_update?
104
- false # Incremental updates not supported by default
105
- end
106
-
107
- # Override this method with scraper specific implementation in descendants
108
- #
109
- # === Return
110
- # true:: Always return true
111
- def scrape_imp
112
- raise "Method not implemented"
113
- end
114
-
115
- # Update state of scraper according to status returned by watcher
116
- #
117
- # === Parameters
118
- # res(RightScale::WatchResult):: Watcher status to be analyzed
119
- # msg_title(String):: Error message title in case of failure
120
- # ok_codes:: Successful process return codes, only 0 by default
121
- #
122
- # === Return
123
- # true:: Always return true
124
- def handle_watcher_result(res, msg_title, ok_codes=[0])
125
- if res.status == :timeout
126
- @errors << "#{msg_title} is taking more time than #{@watcher.max_seconds / 60} minutes, aborting..."
127
- FileUtils.rm_rf(@current_repo_dir)
128
- elsif res.status == :size_exceeded
129
- @errors << "#{msg_title} is taking more space than #{@watcher.max_bytes / 1048576} MB, aborting..."
130
- FileUtils.rm_rf(@current_repo_dir)
131
- elsif !ok_codes.include?(res.exit_code)
132
- if @incremental
133
- @callback.call("#{msg_title} failed: #{res.output}, reverting to non incremental update", is_step=false) if @callback
134
- FileUtils.rm_rf(@current_repo_dir)
135
- @incremental = false
136
- else
137
- @errors << "#{msg_title} failed: #{res.output}"
138
- end
139
- end
140
- true
141
- end
142
-
143
- # Spawn given process, wait for it to complete, and return its output The exit status
144
- # of the process is available in the $? global. Functions similarly to the backtick
145
- # operator, only it avoids invoking the command interpreter under operating systems
146
- # that support fork-and-exec.
147
- #
148
- # This method accepts a variable number of parameters; the first param is always the
149
- # command to run; successive parameters are command-line arguments for the process.
150
- #
151
- # === Parameters
152
- # cmd(String):: Name of the command to run
153
- # arg1(String):: Optional, first command-line argumument
154
- # arg2(String):: Optional, first command-line argumument
155
- # ...
156
- # argN(String):: Optional, Nth command-line argumument
157
- #
158
- # === Return
159
- # output(String):: The process' output
160
- def run(cmd, *args)
161
- pm = ProcessMonitor.new
162
- output = StringIO.new
163
-
164
- pm.spawn(cmd, *args) do |options|
165
- output << options[:output] if options[:output]
166
- end
167
-
168
- pm.cleanup
169
- output.close
170
- output = output.string
171
- return output
172
- end
173
-
174
- end
175
- end
@@ -1,67 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- class DownloadScraper < ScraperBase
27
-
28
- # Download and expand remote repository, see RightScale::ScraperBase#scrape
29
- #
30
- # === Return
31
- # true:: Always return true
32
- def scrape_imp
33
- msg = "Downloading repository '#{@repo.display_name}'"
34
- @callback.call(msg, is_step=true) if @callback
35
- filename = @repo.url.split('/').last
36
-
37
- if @repo.first_credential && @repo.second_credential &&
38
- !@repo.first_credential.strip.empty? && !@repo.second_credential.strip.empty?
39
- user_opt = ['--user', "#{@repo.first_credential}:#{@repo.second_credential}"]
40
- else
41
- user_opt = []
42
- end
43
-
44
- args = ['--fail', '--silent', '--show-error', '--insecure', '--location']
45
- args += user_opt
46
- args += ['--output', "#{@current_repo_dir}/#{filename}", @repo.url]
47
-
48
- FileUtils.mkdir_p(@current_repo_dir)
49
- res = @watcher.launch_and_watch('curl', args, @current_repo_dir)
50
- handle_watcher_result(res, 'Download')
51
- if succeeded?
52
- unzip_opt = case @repo.url[/\.(.*)$/]
53
- when 'bzip', 'bzip2', 'bz2' then 'j'
54
- when 'tgz', 'gzip', 'gz' then 'z'
55
- else ''
56
- end
57
- Dir.chdir(@current_repo_dir) do
58
- res = run('tar', "x#{unzip_opt}f", filename)
59
- @errors << res unless $?.success?
60
- File.delete(filename)
61
- end
62
- end
63
- true
64
- end
65
-
66
- end
67
- end
@@ -1,283 +0,0 @@
1
- #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
3
- #
4
- # Permission is hereby granted, free of charge, to any person obtaining
5
- # a copy of this software and associated documentation files (the
6
- # 'Software'), to deal in the Software without restriction, including
7
- # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to
10
- # the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be
13
- # included in all copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
- #++
23
-
24
- module RightScale
25
-
26
- class GitScraper < ScraperBase
27
-
28
- # Check whether it is possible to perform an incremental update of the repo
29
- #
30
- # === Return
31
- # true:: Scrape directory contains files belonging to the scraped repo and protocol supports
32
- # incremental updates
33
- # false:: Otherwise
34
- def incremental_update?
35
- # FIX: current version of msysgit crashes attempting "git pull" on 64-bit
36
- # servers. we will avoid incremental for now in hopes of getting a fix for
37
- # msysgit or else a native Windows implementation such as Git#
38
- return false if (is_windows? || !File.directory?(@current_repo_dir))
39
- Dir.chdir(@current_repo_dir) do
40
- remote_url = run('git', 'config', '--get', 'remote.origin.url').chomp
41
- $?.success? && remote_url == @repo.url
42
- end
43
- end
44
-
45
- # Scrape git repository, see RightScale::ScraperBase#scrape
46
- #
47
- # === Return
48
- # true:: Always return true
49
- def scrape_imp
50
- msg = @incremental ? 'Pulling ' : 'Cloning '
51
- msg += "git repository '#{@repo.display_name}'"
52
- @callback.call(msg, is_step=true) if @callback
53
- @ssh_cmd = ssh_command
54
- is_tag = is_branch = on_branch = nil
55
- has_tag = !@repo.tag.nil? && !@repo.tag.empty?
56
-
57
- if @incremental
58
- checkout = false
59
- Dir.chdir(@current_repo_dir) do
60
- if has_tag
61
- analysis = analyze_repo_tag
62
- if succeeded?
63
- is_tag = analysis[:tag]
64
- is_branch = analysis[:branch]
65
- on_branch = analysis[:on_branch]
66
- checkout = is_tag && !is_branch
67
- if is_tag && is_branch
68
- @errors << 'Repository tag ambiguous: could be git tag or git branch'
69
- elsif !is_tag && !is_branch
70
- current_sha = run('git', 'rev-parse', 'HEAD').chomp
71
- if current_sha == @repo.tag
72
- @callback.call("Nothing to update: already using #{@repo.tag}", is_step=false) if @callback
73
- return true
74
- else
75
- # Probably a SHA, retrieve all commits
76
- git_fetch(:depth => 2**31 - 1)
77
- checkout = true
78
- end
79
- end
80
- if succeeded?
81
- if checkout
82
- git_checkout(@repo.tag)
83
- else
84
- git_checkout(@repo.tag) if is_branch && !on_branch
85
- git_fetch(:depth => 1, :merge => true, :remote_tag => @repo.tag)
86
- end
87
- end
88
- end
89
- else
90
- git_fetch(:depth => 1, :merge => true)
91
- end
92
- end
93
- end
94
-
95
- if !@incremental && succeeded?
96
- args = ['clone', '--quiet', '--depth', '1', @repo.url, @current_repo_dir]
97
- ENV['GIT_SSH'] = @ssh_cmd
98
- res = @watcher.launch_and_watch('git', args, @current_repo_dir)
99
- ENV['GIT_SSH'] = nil
100
- handle_watcher_result(res, 'git clone')
101
- if has_tag && succeeded?
102
- Dir.chdir(@current_repo_dir) do
103
- if is_tag.nil?
104
- analysis = analyze_repo_tag
105
- is_tag = analysis[:tag]
106
- is_branch = analysis[:branch]
107
- on_branch = analysis[:on_branch]
108
- end
109
- if succeeded?
110
- if is_tag && is_branch
111
- @errors << 'Repository tag ambiguous: could be git tag or git branch'
112
- elsif is_branch
113
- if !on_branch
114
- output = run('git', 'branch', @repo.tag, "origin/#{repo.tag}")
115
- @errors << output unless $?.success?
116
- end
117
- elsif !is_tag # Not a branch nor a tag, SHA ref? fetch everything so we have all SHAs
118
- git_fetch(:depth => 2**31 -1)
119
- end
120
- if succeeded? && !on_branch
121
- git_checkout(@repo.tag)
122
- end
123
- end
124
- end
125
- end
126
- end
127
- true
128
- end
129
-
130
- # Default SSH options used with git
131
- DEFAULT_SSH_OPTIONS = { :PasswordAuthentication => 'no',
132
- :HostbasedAuthentication => 'no',
133
- :StrictHostKeyChecking => 'no',
134
- :IdentitiesOnly => 'yes' }
135
-
136
- # SSH options command line built from default options and given custom options
137
- #
138
- # === Parameters
139
- # opts(Hash):: Custom options
140
- #
141
- # === Return
142
- # options(String):: SSH command line options
143
- def ssh_options(opts={})
144
- opts = DEFAULT_SSH_OPTIONS.merge(opts || {})
145
- options = opts.inject('') { |o, (k, v)| o << "#{k.to_s}=#{v}\n" }
146
- end
147
-
148
- # Store private SSH key into temporary folder and create temporary script
149
- # that wraps SSH and uses this key.
150
- #
151
- # === Return
152
- # ssh(String):: Code to initialize GIT_SSH environment variable with path to SSH wrapper script
153
- def ssh_command
154
- return win32_ssh_command if is_windows?
155
- ssh_dir = File.join(@scrape_dir_path, '.ssh')
156
- FileUtils.mkdir_p(ssh_dir)
157
- key_content = @repo.first_credential
158
- if key_content.nil?
159
- # Explicitely disable public key authentication so we don't endup using the system's key
160
- options = { :PubkeyAuthentication => 'no' }
161
- else
162
- ssh_key_path = File.join(ssh_dir, 'id_rsa')
163
- File.open(ssh_key_path, 'w') { |f| f.puts(key_content) }
164
- File.chmod(0600, ssh_key_path)
165
- options = { :IdentityFile => ssh_key_path }
166
- end
167
- ssh_config = File.join(ssh_dir, 'ssh_config')
168
- File.open(ssh_config, 'w') { |f| f.puts(ssh_options(options)) }
169
- ssh = File.join(ssh_dir, 'ssh')
170
- File.open(ssh, 'w') { |f| f.puts("ssh -F #{ssh_config} $*") }
171
- File.chmod(0755, ssh)
172
-
173
- return ssh
174
- end
175
-
176
- # Prepare SSH for git on Windows
177
- # The GIT_SSH trick doesn't seem to work on Windows, instead actually
178
- # save the private key in the user ssh folder.
179
- # Note: This will override any pre-existing SSH key that was on the system
180
- #
181
- # === Return
182
- # '':: Always return an empty string
183
- #
184
- # === Raise
185
- # Exception:: If the USERPROFILE environment variable is not set
186
- def win32_ssh_command
187
- key_content = @repo.first_credential
188
- unless key_content.nil?
189
- # resolve key file path.
190
- raise 'Environment variable USERPROFILE is missing' unless ENV['USERPROFILE']
191
- user_profile_dir_path = ENV['USERPROFILE']
192
- ssh_keys_dir = File.join(user_profile_dir_path, '.ssh')
193
- FileUtils.mkdir_p(ssh_keys_dir) unless File.directory?(ssh_keys_dir)
194
- ssh_key_file_path = File.join(ssh_keys_dir, 'id_rsa')
195
-
196
- # (re)create key file. must overwrite any existing credentials in case
197
- # we are switching repositories and have different credentials for each.
198
- File.open(ssh_key_file_path, 'w') { |f| f.puts(key_content) }
199
-
200
- # we need to create the "known_hosts" file or else the process will
201
- # halt in windows waiting for a yes/no response to the unknown
202
- # git host. this is normally handled by specifying
203
- # "-o StrictHostKeyChecking=no" in the GIT_SSH executable, but it is
204
- # still a mystery why this doesn't work properly in windows.
205
- # so make a ssh call which creates the proper "known_hosts" file.
206
- run('ssh', '-o', 'StrictHostKeyChecking=no', repo.url.split(':').first)
207
- end
208
- return ''
209
- end
210
-
211
- # Fetch remote commits using given depth
212
- # Check size of repo and time it takes to retrieve commits
213
- # Update errors collection upon failure (check for succeeded? after call)
214
- # Note: Assume that current working directory is a git directory
215
- #
216
- # === Parameters
217
- # opts[:depth(Integer):: Git fetch depth argument, full fetch if not set
218
- # opts[:merge]:: Do a pull if set
219
- # opts[:remote_tag]:: Remote ref to use, use default if not specified
220
- #
221
- # === Return
222
- # true:: Always return true
223
- def git_fetch(opts={})
224
- depth = opts[:depth] || 2**31 - 1 # Specify max to override depth of already cloned repo
225
- remote = opts[:remote_tag]
226
- remote = 'master' if remote.nil? || remote.rstrip.empty?
227
- action = (opts[:merge] ? 'pull' : 'fetch')
228
- args = [action, '--tags', '--depth', depth, 'origin', remote]
229
- ENV['GIT_SSH'] = @ssh_cmd
230
- res = @watcher.launch_and_watch('git', args, @current_repo_dir)
231
- ENV['GIT_SSH'] = nil
232
- handle_watcher_result(res, "git #{action}")
233
- end
234
-
235
- # Does a git checkout to given tag
236
- # Update errors collection upon failure (check for succeeded? after call)
237
- # Note: Assume that current working directory is a git directory
238
- #
239
- # === Parameters
240
- # tag(String):: Tag to checkout
241
- #
242
- # === Return
243
- # output(String):: Output of git command
244
- def git_checkout(tag)
245
- output = run('git', 'checkout', tag)
246
- @errors << output unless $?.success?
247
- output
248
- end
249
-
250
- # Analyze repository tag to detect whether it's a branch, a tag or neither (i.e. SHA ref)
251
- # Also detech wether the branch is already checked out
252
- # Update errors collection upon failure (check for succeeded? after call)
253
- # Note: Assume that current working directory is a git directory
254
- #
255
- # === Return
256
- # res(Hash)::
257
- # - res[:tag]:: true if git repo has a tag with a name corresponding to the repository tag
258
- # - res[:branch]:: true if git repo has a branch with a name corresponding to the repository tag
259
- # - res [:on_branch]:: true if branch is already checked out
260
- def analyze_repo_tag
261
- is_tag = is_branch = on_branch = nil
262
- begin
263
- is_tag = run('git', 'tag').split("\n").include?(@repo.tag)
264
- is_branch = run('git', 'branch', '-r').split("\n").map { |t| t.strip }.include?("origin/#{@repo.tag}")
265
- on_branch = is_branch && !!run('git', 'branch').split("\n").include?("* #{@repo.tag}")
266
- rescue Exception => e
267
- @errors << "Analysis of repository tag failed with: #{e.message}"
268
- end
269
- res = { :tag => is_tag, :branch => is_branch, :on_branch => on_branch }
270
- end
271
-
272
- private
273
-
274
- # Check for windows.
275
- #
276
- # === Return
277
- #
278
- def is_windows?
279
- return !!(RUBY_PLATFORM =~ /mswin/)
280
- end
281
-
282
- end
283
- end