right_scraper 3.2.6 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Scrapers
|
28
|
+
autoload :Base, 'right_scraper/scrapers/base'
|
29
|
+
autoload :Cookbook, 'right_scraper/scrapers/cookbook'
|
30
|
+
autoload :Workflow, 'right_scraper/scrapers/workflow'
|
31
|
+
end
|
32
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,242 +21,254 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/scrapers'
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# <tt>:scanners</tt>:: List of Scanner classes to use, optional
|
43
|
-
# <tt>:builders</tt>:: List of Builder classes to use, optional
|
44
|
-
#
|
45
|
-
# === Return
|
46
|
-
# scraper(Scrapers::Base):: Corresponding scraper instance
|
47
|
-
def self.scraper(options)
|
48
|
-
scraper_kind = options.delete(:kind)
|
49
|
-
scraper_class = @@types[scraper_kind]
|
50
|
-
raise "Can't understand how to build scraper #{scraper_kind}" if scraper_class.nil?
|
51
|
-
scraper = scraper_class.new(options)
|
52
|
-
end
|
27
|
+
module RightScraper::Scrapers
|
28
|
+
|
29
|
+
class ScraperError < Exception; end
|
30
|
+
|
31
|
+
# Base class for all scrapers. Subclasses should override
|
32
|
+
# #find_next which instantiates the resource from the file system.
|
33
|
+
class Base < ::RightScraper::RegisteredBase
|
34
|
+
|
35
|
+
# Scraped resources
|
36
|
+
attr_reader :resources
|
37
|
+
|
38
|
+
# @return [Module] module for registered repository types
|
39
|
+
def self.registration_module
|
40
|
+
::RightScraper::Scrapers
|
41
|
+
end
|
53
42
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
43
|
+
# Initialize scraper
|
44
|
+
#
|
45
|
+
# === Options
|
46
|
+
# <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
|
47
|
+
# <tt>:repo_dir</tt>:: Required, path to directory containing files
|
48
|
+
# to be scraped
|
49
|
+
# <tt>:ignorable_paths</tt>:: List of directory names that should
|
50
|
+
# be ignored by scraper
|
51
|
+
# <tt>:scanners</tt>:: List of Scanner classes to use, optional
|
52
|
+
# <tt>:builders</tt>:: List of Builder classes to use, optional
|
53
|
+
#
|
54
|
+
# === Return
|
55
|
+
# scraper(Scrapers::Base):: Corresponding scraper instance
|
56
|
+
def self.scraper(options)
|
57
|
+
scraper_kind = options.delete(:kind)
|
58
|
+
scraper_class = query_registered_type(scraper_kind)
|
59
|
+
scraper_class.new(options)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Do the scrape!
|
63
|
+
# Extract all resources from directory
|
64
|
+
# Call this method or call 'next_resource' to retrieve
|
65
|
+
# resources one by one (you must then call 'close' yourself)
|
66
|
+
# Fill @resources
|
67
|
+
#
|
68
|
+
# === Return
|
69
|
+
# resources<Array>:: List of all scraped resources
|
70
|
+
def scrape
|
71
|
+
@resources = []
|
72
|
+
begin
|
73
|
+
resource = next_resource
|
74
|
+
until resource.nil?
|
75
|
+
@resources << resource
|
65
76
|
resource = next_resource
|
66
|
-
until resource.nil?
|
67
|
-
@resources << resource
|
68
|
-
resource = next_resource
|
69
|
-
end
|
70
|
-
ensure
|
71
|
-
close
|
72
77
|
end
|
73
|
-
|
78
|
+
ensure
|
79
|
+
close
|
74
80
|
end
|
81
|
+
@resources
|
82
|
+
end
|
75
83
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
84
|
+
# Return the next resource in the filesystem, or nil if none. As
|
85
|
+
# a part of building the resources, invokes the builders.
|
86
|
+
# A resource can be a cookbook, a workflow, a RightScript etc.
|
87
|
+
#
|
88
|
+
# === Returns
|
89
|
+
# Object:: next resource in filesystem, or nil if none.
|
90
|
+
def next_resource
|
91
|
+
@logger.operation(:next) do
|
92
|
+
next nil if @next.nil?
|
85
93
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
91
|
-
value
|
94
|
+
value = @next
|
95
|
+
@next = search_dirs
|
96
|
+
while @next.nil? && !@queue.empty?
|
97
|
+
pop_queue
|
92
98
|
end
|
99
|
+
value
|
93
100
|
end
|
101
|
+
end
|
94
102
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
105
|
-
true
|
103
|
+
# Close any opened file descriptor
|
104
|
+
#
|
105
|
+
# === Return
|
106
|
+
# true:: Always return true
|
107
|
+
def close
|
108
|
+
@builder.finish
|
109
|
+
if @stack && !@stack.empty?
|
110
|
+
@stack.each {|s| s.close}
|
111
|
+
@stack = []
|
106
112
|
end
|
113
|
+
true
|
114
|
+
end
|
107
115
|
|
108
|
-
|
109
|
-
|
110
|
-
# Directory containing files to be scraped
|
111
|
-
attr_reader :repo_dir
|
116
|
+
protected
|
112
117
|
|
113
|
-
|
114
|
-
|
115
|
-
# represents that scraper.
|
116
|
-
@@types = {} unless class_variable_defined?(:@@types)
|
118
|
+
# Directory containing files to be scraped
|
119
|
+
attr_reader :repo_dir
|
117
120
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
@ignorable_paths = options[:ignorable_paths]
|
140
|
-
@stack = []
|
141
|
-
@queue = (@repository.resources_path || [""]).reverse
|
142
|
-
@resources = []
|
143
|
-
scanners = options[:scanners] || default_scanners
|
144
|
-
@scanner = RightScraper::Scanners::Union.new(scanners, options)
|
145
|
-
builders = options[:builders] || default_builders
|
146
|
-
@builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
|
147
|
-
:scanner => @scanner,
|
148
|
-
:logger => @logger,
|
149
|
-
:max_bytes => @max_bytes,
|
150
|
-
:max_seconds => @max_seconds)
|
151
|
-
pop_queue # Initialize @next
|
121
|
+
# Initialize scraper
|
122
|
+
#
|
123
|
+
# === Options
|
124
|
+
# <tt>:repository</tt>:: Required, original repository containing scraped
|
125
|
+
# files
|
126
|
+
# <tt>:repo_dir</tt>:: Required, path to directory containing files
|
127
|
+
# to be scraped
|
128
|
+
# <tt>:ignorable_paths</tt>:: List of directory names that should
|
129
|
+
# be ignored by scraper
|
130
|
+
# <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
|
131
|
+
# to RightScraper::Scanners::ResourceManifest and
|
132
|
+
# RightScraper::Scanners::CookbookMetadata
|
133
|
+
# <tt>:builders</tt>:: List of Builder classes to use, defaulting to
|
134
|
+
# RightScaper::Builders::Filesystem
|
135
|
+
#
|
136
|
+
def initialize(options)
|
137
|
+
raise ScraperError.new("Repository required when initializing a scraper") unless options[:repository]
|
138
|
+
raise ScraperError.new("Repository directory required when initializing a scraper") unless options[:repo_dir]
|
139
|
+
@repository = options[:repository]
|
140
|
+
unless @logger = options[:logger]
|
141
|
+
raise ::ArgumentError, ':logger is required'
|
152
142
|
end
|
143
|
+
@repo_dir = options[:repo_dir]
|
144
|
+
@ignorable_paths = options[:ignorable_paths]
|
145
|
+
@stack = []
|
146
|
+
@queue = (@repository.resources_path || [""]).reverse
|
153
147
|
|
154
|
-
#
|
155
|
-
|
156
|
-
# === Return
|
157
|
-
# Array<Scanner>:: Default scanners
|
158
|
-
def default_scanners
|
159
|
-
end
|
148
|
+
# Make sure the requested cookbook resource path exists
|
149
|
+
missing_paths = @queue.select {|path| !File.directory?(File.join(repo_dir, path)) }.compact.sort
|
160
150
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
def default_brokers
|
166
|
-
end
|
151
|
+
raise ScraperError.new(
|
152
|
+
"Cookbook resource path#{'s' unless missing_paths.size < 2}: " +
|
153
|
+
"[#{missing_paths.join(', ')}] #{missing_paths.size < 2 ? "is" : "are"} " +
|
154
|
+
"non-existent for this repository and branch") unless missing_paths.empty?
|
167
155
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
156
|
+
@resources = []
|
157
|
+
scanners = options[:scanners] || default_scanners
|
158
|
+
@scanner = RightScraper::Scanners::Union.new(scanners, options)
|
159
|
+
builders = options[:builders] || default_builders
|
160
|
+
@builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
|
161
|
+
:scanner => @scanner,
|
162
|
+
:logger => @logger,
|
163
|
+
:max_bytes => @max_bytes,
|
164
|
+
:max_seconds => @max_seconds)
|
165
|
+
pop_queue # Initialize @next
|
166
|
+
end
|
176
167
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
alias_method :tell, :pos
|
168
|
+
# List of default scanners for this scaper
|
169
|
+
#
|
170
|
+
# === Return
|
171
|
+
# Array<Scanner>:: Default scanners
|
172
|
+
def default_scanners
|
173
|
+
end
|
184
174
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
175
|
+
# List of default builders for this scaper
|
176
|
+
#
|
177
|
+
# === Return
|
178
|
+
# Array<Builder>:: Default builders
|
179
|
+
def default_brokers
|
180
|
+
end
|
181
|
+
|
182
|
+
# Find the interesting item in given directory
|
183
|
+
# Override in actual scraper implementation
|
184
|
+
#
|
185
|
+
# === Parameters
|
186
|
+
# dir(Dir):: directory to begin search in
|
187
|
+
def find_next(dir)
|
188
|
+
raise NotImplementedError
|
189
|
+
end
|
190
|
+
|
191
|
+
# Return the position of the scraper. Here, the position is the
|
192
|
+
# path relative from the top of the temporary directory. Akin to
|
193
|
+
# IO#pos or IO#tell.
|
194
|
+
def pos
|
195
|
+
strip_repo_dir(@stack.last.path)
|
196
|
+
end
|
197
|
+
alias_method :tell, :pos
|
202
198
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
199
|
+
# Turn path from an absolute filesystem location to a relative
|
200
|
+
# file location from #repo_dir.
|
201
|
+
#
|
202
|
+
# === Parameters
|
203
|
+
# path(String):: absolute path to relativize
|
204
|
+
#
|
205
|
+
# === Returns
|
206
|
+
# res(String):: relative pathname for path
|
207
|
+
def strip_repo_dir(path)
|
208
|
+
res = path[repo_dir.length+1..-1]
|
209
|
+
if res == nil || res == ""
|
210
|
+
"."
|
211
|
+
else
|
212
|
+
res
|
213
213
|
end
|
214
|
+
end
|
215
|
+
private :strip_repo_dir
|
214
216
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
217
|
+
# Test if the entry given is ignorable. By default just uses
|
218
|
+
# #ignorable_paths
|
219
|
+
#
|
220
|
+
# === Parameters
|
221
|
+
# entry(String):: file name to check
|
222
|
+
#
|
223
|
+
# === Returns
|
224
|
+
# Boolean:: true if the entry should be ignored
|
225
|
+
def ignorable?(entry)
|
226
|
+
@ignorable_paths.include?(entry)
|
227
|
+
end
|
228
|
+
|
229
|
+
# Initialize @next with the next resource
|
230
|
+
#
|
231
|
+
# === Returns
|
232
|
+
# @next(Resources::Base):: Next resource
|
233
|
+
def pop_queue
|
234
|
+
until @queue.empty?
|
235
|
+
nextdir = File.join(repo_dir, @queue.pop)
|
236
|
+
if File.directory?(nextdir)
|
237
|
+
@next = find_next(Dir.new(nextdir))
|
238
|
+
return @next
|
239
|
+
else
|
240
|
+
@logger.warn("When processing in #{@repository}, no such path #{nextdir}")
|
228
241
|
end
|
229
|
-
@next = nil
|
230
242
|
end
|
243
|
+
@next = nil
|
244
|
+
end
|
231
245
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
246
|
+
# Search the directory stack looking for the next resource.
|
247
|
+
def search_dirs
|
248
|
+
@logger.operation(:searching) do
|
249
|
+
until @stack.empty?
|
250
|
+
dir = @stack.last
|
251
|
+
entry = dir.read
|
252
|
+
if entry == nil
|
253
|
+
dir.close
|
254
|
+
@stack.pop
|
255
|
+
next
|
256
|
+
end
|
243
257
|
|
244
|
-
|
245
|
-
|
258
|
+
next if entry == '.' || entry == '..'
|
259
|
+
next if ignorable?(entry)
|
246
260
|
|
247
|
-
|
261
|
+
fullpath = File.join(dir.path, entry)
|
248
262
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
end
|
263
|
+
if File.directory?(fullpath)
|
264
|
+
result = find_next(Dir.new(fullpath))
|
265
|
+
break
|
253
266
|
end
|
254
|
-
result
|
255
267
|
end
|
268
|
+
result
|
256
269
|
end
|
257
|
-
private :search_dirs
|
258
|
-
|
259
270
|
end
|
271
|
+
private :search_dirs
|
272
|
+
|
260
273
|
end
|
261
274
|
end
|
262
|
-
|