right_scraper 3.2.6 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -0,0 +1,178 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ # ancestor
25
+ require 'right_scraper/retrievers'
26
+
27
+ require 'fileutils'
28
+
29
+ module RightScraper::Retrievers
30
+
31
+ # Base class for retrievers that want to do version control operations
32
+ # (CVS, SVN, etc.). Subclasses can get away with implementing only
33
+ # Retrievers::Base#available? and #do_checkout but to support incremental
34
+ # operation need to implement #exists? and #do_update, in addition to
35
+ # Retrievers::Base#ignorable_paths.
36
+ class CheckoutBase < ::RightScraper::Retrievers::Base
37
+
38
+ # Attempts to update and then resorts to clean checkout for repository.
39
+ def retrieve
40
+ raise RetrieverError.new("retriever is unavailable") unless available?
41
+ updated = false
42
+ explanation = ''
43
+ if exists?
44
+ @logger.operation(:updating) do
45
+ # a retriever may be able to determine that the repo directory is
46
+ # already pointing to the same commit as the revision. in that case
47
+ # we can return quickly.
48
+ if remote_differs?
49
+ # there is no point in updating and failing the size check when the
50
+ # directory on disk already exceeds size limit; fall back to a clean
51
+ # checkout in hopes that the latest revision corrects the issue.
52
+ if size_limit_exceeded?
53
+ explanation = 'switching to checkout due to existing directory exceeding size limimt'
54
+ else
55
+ # attempt update.
56
+ begin
57
+ do_update
58
+ updated = true
59
+ rescue ::RightScraper::Processes::Shell::LimitError
60
+ # update exceeded a limitation; requires user intervention
61
+ raise
62
+ rescue Exception => e
63
+ # retry with clean checkout after discarding repo dir.
64
+ explanation = 'switching to checkout after unsuccessful update'
65
+ end
66
+ end
67
+ else
68
+ # no retrieval needed but warn exactly why we didn't do full
69
+ # checkout to avoid being challenged about it.
70
+ repo_ref = @repository.tag
71
+ do_update_tag
72
+ full_head_ref = @repository.tag
73
+ abbreviated_head_ref = full_head_ref[0..6]
74
+ if repo_ref == full_head_ref || repo_ref == abbreviated_head_ref
75
+ detail = abbreviated_head_ref
76
+ else
77
+ detail = "#{repo_ref} = #{abbreviated_head_ref}"
78
+ end
79
+ message =
80
+ "Skipped updating local directory due to the HEAD commit SHA " +
81
+ "on local matching the remote repository reference (#{detail})."
82
+ @logger.note_warning(message)
83
+ return false
84
+ end
85
+ end
86
+ end
87
+
88
+ # Clean checkout only if not updated.
89
+ unless updated
90
+ @logger.operation(:checkout, explanation) do
91
+ # remove any full or partial directory before attempting a clean
92
+ # checkout in case repo_dir is in a bad state.
93
+ if exists?
94
+ ::FileUtils.remove_entry_secure(@repo_dir)
95
+ end
96
+ ::FileUtils.mkdir_p(@repo_dir)
97
+ begin
98
+ do_checkout
99
+ rescue Exception
100
+ # clean checkout failed; repo directory is in an undetermined
101
+ # state and must be deleted to prevent a future update attempt.
102
+ if exists?
103
+ ::FileUtils.remove_entry_secure(@repo_dir) rescue nil
104
+ end
105
+ raise
106
+ end
107
+ end
108
+ end
109
+ true
110
+ end
111
+
112
+ # Return true if a checkout exists.
113
+ #
114
+ # === Returns
115
+ # Boolean:: true if the checkout already exists (and thus
116
+ # incremental updating can occur).
117
+ def exists?
118
+ false
119
+ end
120
+
121
+ # Determines if the remote SHA/tag/branch referenced by the repostory
122
+ # differs from what appears on disk, if possible. Not all retrievers will
123
+ # have this capability. If not, the retriever should default to returning
124
+ # true to indicate that the remote is changed.
125
+ #
126
+ # @return [TrueClass|FalseClass] true if changed
127
+ def remote_differs?
128
+ true
129
+ end
130
+
131
+ # Determines if total size of files in repo_dir has exceeded size limit.
132
+ #
133
+ # === Return
134
+ # @return [TrueClass|FalseClass] true if size limit exceeded
135
+ def size_limit_exceeded?
136
+ if @max_bytes
137
+ # note that Dir.glob ignores hidden directories (e.g. ".git") so the
138
+ # size total correctly excludes those hidden contents that are not to
139
+ # be uploaded after scrape. this may cause the on-disk directory size
140
+ # to far exceed the upload size.
141
+ globbie = ::File.join(@repo_dir, '**/*')
142
+ size = 0
143
+ ::Dir.glob(globbie) do |f|
144
+ size += ::File.stat(f).size rescue 0 if ::File.file?(f)
145
+ break if size > @max_bytes
146
+ end
147
+ size > @max_bytes
148
+ else
149
+ false
150
+ end
151
+ end
152
+
153
+ # Perform a de novo full checkout of the repository. Subclasses
154
+ # must override this to do anything useful.
155
+ #
156
+ # @return [TrueClass] always true
157
+ def do_checkout
158
+ raise NotImplementedError
159
+ end
160
+
161
+ # Perform an incremental update of the checkout. Subclasses that
162
+ # want to handle incremental updating need to override this.
163
+ #
164
+ # @return [TrueClass] always true
165
+ def do_update
166
+ raise NotImplementedError
167
+ end
168
+
169
+ # Updates the tag of the repository associated with this retriever to refer
170
+ # to the HEAD commit (SHA) on disk after retrieval.
171
+ #
172
+ # @return [TrueClass] always true
173
+ def do_update_tag
174
+ raise NotImplementedError
175
+ end
176
+
177
+ end
178
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,68 +21,110 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # ancestor
25
+ require 'right_scraper/retrievers'
26
+
27
+ require 'fileutils'
24
28
  require 'tempfile'
25
29
  require 'digest/sha1'
26
30
  require 'right_popen'
27
31
  require 'right_popen/safe_output_buffer'
28
32
 
29
- module RightScraper
30
- module Retrievers
31
- # A retriever for resources stored in archives on a web server
32
- # somewhere. Uses command line curl and command line tar.
33
- class Download < Base
33
+ module RightScraper::Retrievers
34
34
 
35
- class DownloadError < Exception; end
35
+ # A retriever for resources stored in archives on a web server
36
+ # somewhere. Uses command line curl and command line tar.
37
+ class Download < ::RightScraper::Retrievers::Base
36
38
 
37
- @@available = false
39
+ class DownloadError < Exception; end
38
40
 
39
- # Determines if downloader is available.
40
- def available?
41
- unless @@available
42
- begin
43
- # FIX: we might want to parse the result and require a minimum curl
44
- # version.
45
- cmd = "curl --version"
46
- `#{cmd}`
47
- if $?.success?
48
- @@available = true
49
- else
50
- raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
51
- end
52
- rescue
53
- @logger.note_error($!, :available, "download retriever is unavailable")
41
+ @@available = false
42
+
43
+ # Determines if downloader is available.
44
+ def available?
45
+ unless @@available
46
+ begin
47
+ # FIX: we might want to parse the result and require a minimum curl
48
+ # version.
49
+ cmd = "curl --version"
50
+ `#{cmd}`
51
+ if $?.success?
52
+ @@available = true
53
+ else
54
+ raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
54
55
  end
56
+ rescue
57
+ @logger.note_error($!, :available, "download retriever is unavailable")
55
58
  end
56
- @@available
57
59
  end
60
+ @@available
61
+ end
62
+
63
+ # Directory used to download tarballs
64
+ def workdir
65
+ @workdir ||= ::File.join(::File.dirname(@repo_dir), 'download')
66
+ end
58
67
 
59
- # Directory used to download tarballs
60
- def workdir
61
- @workdir ||= ::File.join(::File.dirname(@repo_dir), 'download')
68
+ # Download tarball and unpack it
69
+ def retrieve
70
+ raise RetrieverError.new("download retriever is unavailable") unless available?
71
+ ::FileUtils.remove_entry_secure @repo_dir if File.exists?(@repo_dir)
72
+ ::FileUtils.remove_entry_secure workdir if File.exists?(workdir)
73
+ ::FileUtils.mkdir_p @repo_dir
74
+ ::FileUtils.mkdir_p workdir
75
+ file = ::File.join(workdir, "package")
76
+
77
+ # TEAL FIX: we have to always-download the tarball before we can
78
+ # determine if contents have changed, but afterward we can compare the
79
+ # previous download against the latest downloaded and short-circuit the
80
+ # remaining flow for the no-difference case.
81
+ @logger.operation(:downloading) do
82
+ credential_command = if @repository.first_credential && @repository.second_credential
83
+ ['-u', "#{@repository.first_credential}:#{@repository.second_credential}"]
84
+ else
85
+ []
86
+ end
87
+ @output = ::RightScale::RightPopen::SafeOutputBuffer.new
88
+ @cmd = [
89
+ 'curl',
90
+ '--silent', '--show-error', '--location', '--fail',
91
+ '--location-trusted', '-o', file, credential_command,
92
+ @repository.url
93
+ ].flatten
94
+ begin
95
+ ::RightScale::RightPopen.popen3_sync(
96
+ @cmd,
97
+ :target => self,
98
+ :pid_handler => :pid_download,
99
+ :timeout_handler => :timeout_download,
100
+ :size_limit_handler => :size_limit_download,
101
+ :exit_handler => :exit_download,
102
+ :stderr_handler => :output_download,
103
+ :stdout_handler => :output_download,
104
+ :inherit_io => true, # avoid killing any rails connection
105
+ :watch_directory => workdir,
106
+ :size_limit_bytes => @max_bytes,
107
+ :timeout_seconds => @max_seconds)
108
+ rescue Exception => e
109
+ @logger.note_phase(:abort, :running_command, 'curl', e)
110
+ raise
111
+ end
62
112
  end
63
113
 
64
- # Download tarball and unpack it
65
- def retrieve
66
- raise RetrieverError.new("download retriever is unavailable") unless available?
67
- FileUtils.remove_entry_secure @repo_dir if File.exists?(@repo_dir)
68
- FileUtils.remove_entry_secure workdir if File.exists?(workdir)
69
- FileUtils.mkdir_p @repo_dir
70
- FileUtils.mkdir_p workdir
71
- file = File.join(workdir, "package")
72
-
73
- @logger.operation(:downloading) do
74
- credential_command = if @repository.first_credential && @repository.second_credential
75
- ['-u', "#{@repository.first_credential}:#{@repository.second_credential}"]
76
- else
77
- []
78
- end
114
+ note_tag(file)
115
+
116
+ @logger.operation(:unpacking) do
117
+ path = @repository.to_url.path
118
+ if path =~ /\.gz$/
119
+ extraction = "xzf"
120
+ elsif path =~ /\.bz2$/
121
+ extraction = "xjf"
122
+ else
123
+ extraction = "xf"
124
+ end
125
+ Dir.chdir(@repo_dir) do
79
126
  @output = ::RightScale::RightPopen::SafeOutputBuffer.new
80
- @cmd = [
81
- 'curl',
82
- '--silent', '--show-error', '--location', '--fail',
83
- '--location-trusted', '-o', file, credential_command,
84
- @repository.url
85
- ].flatten
127
+ @cmd = ['tar', extraction, file]
86
128
  begin
87
129
  ::RightScale::RightPopen.popen3_sync(
88
130
  @cmd,
@@ -94,90 +136,56 @@ module RightScraper
94
136
  :stderr_handler => :output_download,
95
137
  :stdout_handler => :output_download,
96
138
  :inherit_io => true, # avoid killing any rails connection
97
- :watch_directory => workdir,
139
+ :watch_directory => @repo_dir,
98
140
  :size_limit_bytes => @max_bytes,
99
141
  :timeout_seconds => @max_seconds)
100
142
  rescue Exception => e
101
- @logger.note_phase(:abort, :running_command, 'curl', e)
143
+ @logger.note_phase(:abort, :running_command, @cmd.first, e)
102
144
  raise
103
145
  end
104
146
  end
105
-
106
- note_tag(file)
107
-
108
- @logger.operation(:unpacking) do
109
- path = @repository.to_url.path
110
- if path =~ /\.gz$/
111
- extraction = "xzf"
112
- elsif path =~ /\.bz2$/
113
- extraction = "xjf"
114
- else
115
- extraction = "xf"
116
- end
117
- Dir.chdir(@repo_dir) do
118
- @output = ::RightScale::RightPopen::SafeOutputBuffer.new
119
- @cmd = ['tar', extraction, file]
120
- begin
121
- ::RightScale::RightPopen.popen3_sync(
122
- @cmd,
123
- :target => self,
124
- :pid_handler => :pid_download,
125
- :timeout_handler => :timeout_download,
126
- :size_limit_handler => :size_limit_download,
127
- :exit_handler => :exit_download,
128
- :stderr_handler => :output_download,
129
- :stdout_handler => :output_download,
130
- :inherit_io => true, # avoid killing any rails connection
131
- :watch_directory => @repo_dir,
132
- :size_limit_bytes => @max_bytes,
133
- :timeout_seconds => @max_seconds)
134
- rescue Exception => e
135
- @logger.note_phase(:abort, :running_command, @cmd.first, e)
136
- raise
137
- end
138
- end
139
- end
140
147
  end
148
+ true
149
+ end
141
150
 
142
- def pid_download(pid)
143
- @logger.note_phase(:begin, :running_command, @cmd.first)
144
- true
145
- end
151
+ def pid_download(pid)
152
+ @logger.note_phase(:begin, :running_command, @cmd.first)
153
+ true
154
+ end
146
155
 
147
- def output_download(data)
148
- @output.safe_buffer_data(data)
149
- end
156
+ def output_download(data)
157
+ @output.safe_buffer_data(data)
158
+ end
150
159
 
151
- def timeout_download
152
- raise DownloadError, "Downloader timed out"
153
- end
160
+ def timeout_download
161
+ raise DownloadError, "Downloader timed out"
162
+ end
154
163
 
155
- def size_limit_download
156
- raise DownloadError, "Downloader exceeded size limit"
157
- end
164
+ def size_limit_download
165
+ raise DownloadError, "Downloader exceeded size limit"
166
+ end
158
167
 
159
- def exit_download(status)
160
- unless status.success?
161
- @output.safe_buffer_data("Exit code = #{status.exitstatus}")
162
- raise DownloadError, "Downloader failed: #{@output.display_text}"
163
- end
164
- @logger.note_phase(:commit, :running_command, @cmd.first)
165
- true
168
+ def exit_download(status)
169
+ unless status.success?
170
+ @output.safe_buffer_data("Exit code = #{status.exitstatus}")
171
+ raise DownloadError, "Downloader failed: #{@output.display_text}"
166
172
  end
173
+ @logger.note_phase(:commit, :running_command, @cmd.first)
174
+ true
175
+ end
167
176
 
168
177
 
169
- # Amend @repository with the tag information from the downloaded
170
- # file.
171
- #
172
- # === Parameters
173
- # file(String):: file that was downloaded
174
- def note_tag(file)
175
- digest = Digest::SHA1.new
176
- File.open(file) {|f| digest << f.read(4096) }
177
- repo = @repository.clone
178
- repo.tag = digest.hexdigest
179
- @repository = repo
180
- end
178
+ # Amend @repository with the tag information from the downloaded
179
+ # file.
180
+ #
181
+ # === Parameters
182
+ # file(String):: file that was downloaded
183
+ def note_tag(file)
184
+ digest = Digest::SHA1.new
185
+ File.open(file) {|f| digest << f.read(4096) }
186
+ repo = @repository.clone
187
+ repo.tag = digest.hexdigest
188
+ @repository = repo
181
189
  end
182
190
  end
183
191
  end