right_scraper 5.0.1 → 5.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ac45d2b026fadbb61775b0b93c9739b163ff8fcb
4
- data.tar.gz: 14b1d686bdb7ca42b6026e4b40352e65990dac3b
3
+ metadata.gz: aeafef3534344761cfb4b2bc01dc8a8aec848390
4
+ data.tar.gz: ac21c7c9c6121c4470f8d90b6a8b16a994e963e0
5
5
  SHA512:
6
- metadata.gz: 45e307c5376f9fe07522f44540a49016ff1166747327b87f6b3a807408a42135a29250b560b6add6dfbfbfa4421dce95fd2dfad3a784b9e85609377cc294e455
7
- data.tar.gz: 60f022f6c81364948e14d49066b988266db2a23c97ead1fc8c3dc8a3bcd993086decfda1430c506bc0961e60661e569fff33304a139aa4b8505a064a21a73958
6
+ metadata.gz: 158ca23e31b79c3b9e57f09cce5f1abf95a28132412f6fd1ae5f91d03bc84874c2cc536399ceb465942180d2cc764dbebf6046362a211c9c1759c4a5cbda2752
7
+ data.tar.gz: 318501b488f55fe9f4ac2701f0d3bad1ee2d26b7e2ab7645818d38f8d1eadd66720436c3a80b80c204b09014d75c4ff140718e2fb813680455822c3c90e2dc21
data/README.rdoc CHANGED
@@ -10,7 +10,7 @@ may specify only the functionality (and required libraries and gems) you require
10
10
  This gem depends on all available RightScraper modules, enabling full support at the
11
11
  cost of requiring some systems administration work external to Ruby.
12
12
 
13
- Maintained by the RightScale Teal Team
13
+ Maintained by the RightScale Sapphire Team
14
14
 
15
15
  == USAGE
16
16
 
@@ -42,7 +42,7 @@ The build can be tested using the RSpec gem.
42
42
 
43
43
  <b>RightScraper</b>
44
44
 
45
- Copyright:: Copyright (c) 2010 RightScale, Inc.
45
+ Copyright:: Copyright (c) 2010-2016 RightScale, Inc.
46
46
 
47
47
  Permission is hereby granted, free of charge, to any person obtaining
48
48
  a copy of this software and associated documentation files (the
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright: Copyright (c) 2010-2016 RightScale, Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining
6
+ # a copy of this software and associated documentation files (the
7
+ # 'Software'), to deal in the Software without restriction, including
8
+ # without limitation the rights to use, copy, modify, merge, publish,
9
+ # distribute, sublicense, and/or sell copies of the Software, and to
10
+ # permit persons to whom the Software is furnished to do so, subject to
11
+ # the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be
14
+ # included in all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+ #++
24
+
25
+ def warn(*args)
26
+ # eliminate ruby/gem warnings from output
27
+ end
28
+
29
+ require 'json'
30
+ require 'right_git'
31
+ require File.expand_path('../../lib/right_scraper', __FILE__)
32
+
33
+ if ::ARGV.size != 2
34
+ $stderr.puts "Usage: #{::File.basename(__FILE__)} <options.json> <repository.json>"
35
+ exit 1
36
+ end
37
+
38
+ main = nil
39
+ begin
40
+ options = ::JSON.load(::File.read(::ARGV.shift))
41
+ repository = ::JSON.load(::File.read(::ARGV.shift))
42
+ main = ::RightScraper::Main.new(options)
43
+ if retrieved = main.retrieve(repository)
44
+ # remove any credentials from repository after retrieval; not needed in
45
+ # order to run scanners subsequently.
46
+ %w(first_credential second_credential).each { |k| repository.delete(k) }
47
+ retrieved[:repository] = repository
48
+ retrieved[:warnings] = main.warnings unless main.warnings.empty?
49
+ $stdout.puts(::JSON.generate(retrieved))
50
+ else
51
+ result = { errors: main.errors }
52
+ $stderr.puts(::JSON.generate(result))
53
+ exit 2
54
+ end
55
+ exit 0
56
+ rescue ::RightScraper::Error, ::RightGit::Shell::ShellError => e
57
+ # all explicitly raised scraper errors derive from these errors.
58
+ result = { errors: [e.message] }
59
+ $stderr.puts(::JSON.generate(result))
60
+ exit 3
61
+ rescue ::SystemExit => e
62
+ exit e.status
63
+ rescue ::Exception => e
64
+ result = {
65
+ errors: ['internal error'],
66
+ unhandled_exception: {
67
+ class: e.class,
68
+ message: e.message,
69
+ backtrace: (e.backtrace || []).join("\n")
70
+ }
71
+ }
72
+ $stderr.puts(::JSON.generate(result))
73
+ exit 4
74
+ ensure
75
+ main.cleanup rescue nil if main
76
+ end
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright: Copyright (c) 2010-2016 RightScale, Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining
6
+ # a copy of this software and associated documentation files (the
7
+ # 'Software'), to deal in the Software without restriction, including
8
+ # without limitation the rights to use, copy, modify, merge, publish,
9
+ # distribute, sublicense, and/or sell copies of the Software, and to
10
+ # permit persons to whom the Software is furnished to do so, subject to
11
+ # the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be
14
+ # included in all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+ #++
24
+
25
+ def warn(*args)
26
+ # eliminate ruby/gem warnings from output
27
+ end
28
+
29
+ require 'json'
30
+ require 'right_git'
31
+ require File.expand_path('../../lib/right_scraper', __FILE__)
32
+
33
+ if ::ARGV.size != 2
34
+ $stderr.puts "Usage: #{::File.basename(__FILE__)} <options.json> <retrieved.json>"
35
+ exit 1
36
+ end
37
+
38
+ main = nil
39
+ begin
40
+ options = ::JSON.load(::File.read(::ARGV.shift))
41
+ retrieved = ::JSON.load(::File.read(::ARGV.shift))
42
+ main = ::RightScraper::Main.new(options)
43
+
44
+ # cleanup any leftover errors/warnings in retrieved hash.
45
+ retrieved.delete('errors')
46
+ retrieved.delete('warnings')
47
+ if main.scan(retrieved)
48
+ retrieved[:warnings] = main.warnings unless main.warnings.empty?
49
+ retrieved[:resources] = main.resources.map do |r|
50
+ # remove repository from each returned cookbook due to redundancy.
51
+ h = r.to_hash
52
+ h.delete(:repository)
53
+ h
54
+ end
55
+ $stdout.puts(::JSON.generate(retrieved))
56
+ else
57
+ result = { errors: main.errors }
58
+ $stderr.puts(::JSON.generate(result))
59
+ exit 1
60
+ end
61
+ exit 0
62
+ rescue ::RightScraper::Error, ::RightGit::Shell::ShellError => e
63
+ # all explicitly raised scraper errors derive from these errors.
64
+ result = { errors: [e.message] }
65
+ $stderr.puts(::JSON.generate(result))
66
+ exit 3
67
+ rescue ::SystemExit => e
68
+ exit e.status
69
+ rescue ::Exception => e
70
+ result = {
71
+ errors: ['internal error'],
72
+ unhandled_exception: {
73
+ class: e.class,
74
+ message: e.message,
75
+ backtrace: (e.backtrace || []).join("\n")
76
+ }
77
+ }
78
+ $stderr.puts(::JSON.generate(result))
79
+ exit 4
80
+ ensure
81
+ main.cleanup rescue nil if main
82
+ end
data/lib/right_scraper.rb CHANGED
@@ -25,6 +25,9 @@ require 'right_scraper/version'
25
25
 
26
26
  # Autoload everything possible
27
27
  module RightScraper
28
+ # base error class
29
+ class Error < ::StandardError; end
30
+
28
31
  autoload :Builders, 'right_scraper/builders'
29
32
  autoload :Loggers, 'right_scraper/loggers'
30
33
  autoload :Main, 'right_scraper/main'
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2016 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -24,6 +24,7 @@
24
24
  # ancestor
25
25
  require 'right_scraper'
26
26
 
27
+ require 'right_support'
27
28
  require 'fileutils'
28
29
 
29
30
  module RightScraper
@@ -48,32 +49,43 @@ module RightScraper
48
49
  # <tt>:max_bytes</tt>:: Maximum number of bytes to read from remote repo, unlimited if nil
49
50
  # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading from remote repo, unlimited if nil
50
51
  def initialize(options={})
51
- options = {
52
+ options = ::RightSupport::Data::Mash.new(
52
53
  :kind => nil,
53
54
  :basedir => nil,
54
55
  :max_bytes => nil,
55
56
  :max_seconds => nil,
56
- :callback => nil,
57
57
  :logger => nil,
58
58
  :s3_key => nil,
59
59
  :s3_secret => nil,
60
60
  :s3_bucket => nil,
61
- :errors => nil,
62
- :warnings => nil,
63
61
  :scanners => nil,
64
62
  :builders => nil,
65
- }.merge(options)
63
+ ).merge(options)
64
+ @old_logger_callback = nil
66
65
  @temporary = !options.has_key?(:basedir)
67
66
  options[:basedir] ||= Dir.mktmpdir
68
67
  options[:logger] ||= ::RightScraper::Loggers::Default.new
69
68
  @logger = options[:logger]
70
69
  @resources = []
70
+ options[:errors] = @logger.errors
71
+ options[:warnings] = @logger.warnings
72
+
73
+ # load classes from scanners and builders options, if necessary.
74
+ [:scanners, :builders].each do |k|
75
+ list = options[k] || []
76
+ list.each_with_index do |clazz, index|
77
+ unless clazz.kind_of?(::Class)
78
+ list[index] = ::Object.const_get(clazz)
79
+ end
80
+ end
81
+ end
71
82
  @options = options
72
83
  end
73
84
 
74
- # Scrape given repository, depositing files into the scrape
75
- # directory. Update content of unique directory incrementally
76
- # when possible with further calls.
85
+ # Scrapes and scans a given repository.
86
+ #
87
+ # @deprecated the newer methodology will perform these operations in stages
88
+ # controlled externally instead of calling this all-in-one method.
77
89
  #
78
90
  # === Parameters
79
91
  # repo(Hash|RightScraper::Repositories::Base):: Repository to be scraped
@@ -98,59 +110,89 @@ module RightScraper
98
110
  # === Raise
99
111
  # 'Invalid repository type':: If repository type is not known
100
112
  def scrape(repo, incremental=true, &callback)
101
- errorlen = errors.size
102
- repo = RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
113
+ @old_logger_callback = @logger.callback
103
114
  @logger.callback = callback
115
+ errorlen = errors.size
104
116
  begin
105
- # 1. Retrieve the files
106
- retriever = nil
107
- repo_dir_changed = false
108
- @logger.operation(:retrieving, "from #{repo}") do
109
- # note that the retriever type may be unavailable but allow the
110
- # retrieve method to raise any such error.
111
- retriever = repo.retriever(@options)
112
- repo_dir_changed = retriever.retrieve
113
- end
114
-
115
- # TEAL FIX: Note that retrieve will now return true iff there has been
116
- # a change to the last scraped repository directory for efficiency
117
- # reasons and only for retreiver types that support this behavior.
118
- #
119
- # Even if the retrieval is skipped due to already having the data on
120
- # disk we still need to scrape its resources only because of the case
121
- # of the metadata scraper daemon, which updates multiple repositories
122
- # of similar criteria.
123
- #
124
- # The issue is that a new repo can appear later with the same criteria
125
- # as an already-scraped repo and will need it's own copy of the
126
- # scraped resources. The easiest (but not most efficient) way to
127
- # deliver these is to rescrape the already-seen resources. This
128
- # becomes more expensive as we rely on generating "metadata.json" from
129
- # "metadata.rb" for cookbooks but is likely not expensive enough to
130
- # need to improve this logic.
131
-
132
-
133
- # 2. Now scrape if there is a scraper in the options
134
- @logger.operation(:scraping, retriever.repo_dir) do
135
- if @options[:kind]
136
- options = @options.merge({:ignorable_paths => retriever.ignorable_paths,
137
- :repo_dir => retriever.repo_dir,
138
- :repository => retriever.repository})
139
- scraper = RightScraper::Scrapers::Base.scraper(options)
140
- @resources += scraper.scrape
141
- end
117
+ if retrieved = retrieve(repo, &callback)
118
+ scan(retrieved, &callback)
142
119
  end
143
120
  rescue Exception
144
- # logger handles communication with the end user and appending
145
- # to our error list, we just need to keep going.
121
+ # legacy logger handles communication with the end user and appending
122
+ # to our error list; we just need to keep going. the new methodology
123
+ # has no such guaranteed communication so the caller will decide how to
124
+ # handle errors, etc.
146
125
  ensure
147
- # ensure basedir is always removed if temporary (even with errors).
148
- ::FileUtils.remove_entry_secure(@options[:basedir]) rescue nil if @temporary
126
+ cleanup
149
127
  end
150
- @logger.callback = nil
151
128
  errors.size == errorlen
152
129
  end
153
130
 
131
+ # Retrieves the given repository. See #scrape for details.
132
+ def retrieve(repo)
133
+ errorlen = errors.size
134
+ unless repo.kind_of?(::RightScraper::Repositories::Base)
135
+ repo = RightScraper::Repositories::Base.from_hash(::RightSupport::Data::Mash.new(repo))
136
+ end
137
+ retriever = nil
138
+
139
+ # 1. Retrieve the files
140
+ @logger.operation(:retrieving, "from #{repo}") do
141
+ # note that the retriever type may be unavailable but allow the
142
+ # retrieve method to raise any such error.
143
+ retriever = repo.retriever(@options)
144
+ retriever.retrieve
145
+ end
146
+
147
+ if errors.size == errorlen
148
+ # create the freed directory with world-writable permission for
149
+ # subsequent scan output for less-privileged child processes.
150
+ freed_base_path = freed_dir(repo)
151
+ ::FileUtils.rm_rf(freed_base_path) if ::File.exist?(freed_base_path)
152
+ ::FileUtils.mkdir_p(freed_base_path)
153
+ ::File.chmod(0777, freed_base_path)
154
+
155
+ # the following hash is needed for running any subsequent scanners.
156
+ {
157
+ ignorable_paths: retriever.ignorable_paths,
158
+ repo_dir: retriever.repo_dir,
159
+ freed_dir: freed_base_path,
160
+ repository: retriever.repository
161
+ }
162
+ else
163
+ nil
164
+ end
165
+ end
166
+
167
+ # Scans a local directory. See #scrape for details.
168
+ def scan(retrieved)
169
+ errorlen = errors.size
170
+ old_callback = @logger.callback
171
+ options = ::RightSupport::Data::Mash.new(@options).merge(retrieved)
172
+ repo = options[:repository]
173
+ unless repo.kind_of?(::RightScraper::Repositories::Base)
174
+ repo = RightScraper::Repositories::Base.from_hash(::RightSupport::Data::Mash.new(repo))
175
+ options[:repository] = repo
176
+ end
177
+ @logger.operation(:scraping, options[:repo_dir]) do
178
+ scraper = ::RightScraper::Scrapers::Base.scraper(options)
179
+ @resources += scraper.scrape
180
+ end
181
+ errors.size == errorlen
182
+ end
183
+
184
+ # base directory for any file operations.
185
+ def base_dir
186
+ @options[:basedir]
187
+ end
188
+
189
+ # cleans up temporary files, etc.
190
+ def cleanup
191
+ @logger.callback = @old_logger_callback
192
+ @old_logger_callback = nil
193
+ ::FileUtils.remove_entry_secure(base_dir) rescue nil if @temporary
194
+ end
195
+
154
196
  # Path to directory where given repo should be or was downloaded
155
197
  #
156
198
  # === Parameters
@@ -159,7 +201,14 @@ module RightScraper
159
201
  # === Return
160
202
  # String:: Path to local directory that corresponds to given repository
161
203
  def repo_dir(repo)
162
- RightScraper::Retrievers::Base.repo_dir(@options[:basedir], repo)
204
+ RightScraper::Retrievers::Base.repo_dir(base_dir, repo)
205
+ end
206
+
207
+ # Path to directory where scanned artifacts can by copied out of containment
208
+ # due to lack of permissions to write to other directories. the freed files
209
+ # can then be reused by subsequent scanners, etc.
210
+ def freed_dir(repo)
211
+ ::File.expand_path('../freed', repo_dir(repo))
163
212
  end
164
213
 
165
214
  # (Array):: Error messages in case of failure
@@ -172,7 +221,7 @@ module RightScraper
172
221
  @logger.warnings
173
222
  end
174
223
 
175
- # Was scraping successful?
224
+ # Was scraping successful?
176
225
  # Call errors to get error messages if false
177
226
  #
178
227
  # === Return
@@ -28,6 +28,5 @@ module RightScraper
28
28
  autoload :Shell, 'right_scraper/processes/shell'
29
29
  autoload :SSHAgent, 'right_scraper/processes/ssh_agent'
30
30
  autoload :SvnClient, 'right_scraper/processes/svn_client'
31
- autoload :Warden, 'right_scraper/processes/warden'
32
31
  end
33
32
  end
@@ -37,7 +37,7 @@ module RightScraper
37
37
  include ::RightGit::Shell::Interface
38
38
 
39
39
  # exceptions.
40
- class LimitError < ::RightGit::Shell::ShellError; end
40
+ class LimitError < ::RightScraper::Error; end
41
41
 
42
42
  class SizeLimitError < LimitError; end
43
43
  class TimeLimitError < LimitError; end
@@ -149,7 +149,7 @@ module RightScraper
149
149
  @exit_code = status.exitstatus
150
150
  if @raise_on_failure && !status.success?
151
151
  @output.buffer << "Exit code = #{@exit_code}"
152
- raise ::RightGit::Shell::ShellError, "Execution failed: #{@output.display_text}"
152
+ raise ::RightScraper::Error, "Execution failed: #{@output.display_text}"
153
153
  end
154
154
  true
155
155
  end