right_scraper 3.2.6 → 5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Scrapers
|
28
|
+
autoload :Base, 'right_scraper/scrapers/base'
|
29
|
+
autoload :Cookbook, 'right_scraper/scrapers/cookbook'
|
30
|
+
autoload :Workflow, 'right_scraper/scrapers/workflow'
|
31
|
+
end
|
32
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,242 +21,254 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/scrapers'
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# <tt>:scanners</tt>:: List of Scanner classes to use, optional
|
43
|
-
# <tt>:builders</tt>:: List of Builder classes to use, optional
|
44
|
-
#
|
45
|
-
# === Return
|
46
|
-
# scraper(Scrapers::Base):: Corresponding scraper instance
|
47
|
-
def self.scraper(options)
|
48
|
-
scraper_kind = options.delete(:kind)
|
49
|
-
scraper_class = @@types[scraper_kind]
|
50
|
-
raise "Can't understand how to build scraper #{scraper_kind}" if scraper_class.nil?
|
51
|
-
scraper = scraper_class.new(options)
|
52
|
-
end
|
27
|
+
module RightScraper::Scrapers
|
28
|
+
|
29
|
+
class ScraperError < Exception; end
|
30
|
+
|
31
|
+
# Base class for all scrapers. Subclasses should override
|
32
|
+
# #find_next which instantiates the resource from the file system.
|
33
|
+
class Base < ::RightScraper::RegisteredBase
|
34
|
+
|
35
|
+
# Scraped resources
|
36
|
+
attr_reader :resources
|
37
|
+
|
38
|
+
# @return [Module] module for registered repository types
|
39
|
+
def self.registration_module
|
40
|
+
::RightScraper::Scrapers
|
41
|
+
end
|
53
42
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
43
|
+
# Initialize scraper
|
44
|
+
#
|
45
|
+
# === Options
|
46
|
+
# <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
|
47
|
+
# <tt>:repo_dir</tt>:: Required, path to directory containing files
|
48
|
+
# to be scraped
|
49
|
+
# <tt>:ignorable_paths</tt>:: List of directory names that should
|
50
|
+
# be ignored by scraper
|
51
|
+
# <tt>:scanners</tt>:: List of Scanner classes to use, optional
|
52
|
+
# <tt>:builders</tt>:: List of Builder classes to use, optional
|
53
|
+
#
|
54
|
+
# === Return
|
55
|
+
# scraper(Scrapers::Base):: Corresponding scraper instance
|
56
|
+
def self.scraper(options)
|
57
|
+
scraper_kind = options.delete(:kind)
|
58
|
+
scraper_class = query_registered_type(scraper_kind)
|
59
|
+
scraper_class.new(options)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Do the scrape!
|
63
|
+
# Extract all resources from directory
|
64
|
+
# Call this method or call 'next_resource' to retrieve
|
65
|
+
# resources one by one (you must then call 'close' yourself)
|
66
|
+
# Fill @resources
|
67
|
+
#
|
68
|
+
# === Return
|
69
|
+
# resources<Array>:: List of all scraped resources
|
70
|
+
def scrape
|
71
|
+
@resources = []
|
72
|
+
begin
|
73
|
+
resource = next_resource
|
74
|
+
until resource.nil?
|
75
|
+
@resources << resource
|
65
76
|
resource = next_resource
|
66
|
-
until resource.nil?
|
67
|
-
@resources << resource
|
68
|
-
resource = next_resource
|
69
|
-
end
|
70
|
-
ensure
|
71
|
-
close
|
72
77
|
end
|
73
|
-
|
78
|
+
ensure
|
79
|
+
close
|
74
80
|
end
|
81
|
+
@resources
|
82
|
+
end
|
75
83
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
84
|
+
# Return the next resource in the filesystem, or nil if none. As
|
85
|
+
# a part of building the resources, invokes the builders.
|
86
|
+
# A resource can be a cookbook, a workflow, a RightScript etc.
|
87
|
+
#
|
88
|
+
# === Returns
|
89
|
+
# Object:: next resource in filesystem, or nil if none.
|
90
|
+
def next_resource
|
91
|
+
@logger.operation(:next) do
|
92
|
+
next nil if @next.nil?
|
85
93
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
91
|
-
value
|
94
|
+
value = @next
|
95
|
+
@next = search_dirs
|
96
|
+
while @next.nil? && !@queue.empty?
|
97
|
+
pop_queue
|
92
98
|
end
|
99
|
+
value
|
93
100
|
end
|
101
|
+
end
|
94
102
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
105
|
-
true
|
103
|
+
# Close any opened file descriptor
|
104
|
+
#
|
105
|
+
# === Return
|
106
|
+
# true:: Always return true
|
107
|
+
def close
|
108
|
+
@builder.finish
|
109
|
+
if @stack && !@stack.empty?
|
110
|
+
@stack.each {|s| s.close}
|
111
|
+
@stack = []
|
106
112
|
end
|
113
|
+
true
|
114
|
+
end
|
107
115
|
|
108
|
-
|
109
|
-
|
110
|
-
# Directory containing files to be scraped
|
111
|
-
attr_reader :repo_dir
|
116
|
+
protected
|
112
117
|
|
113
|
-
|
114
|
-
|
115
|
-
# represents that scraper.
|
116
|
-
@@types = {} unless class_variable_defined?(:@@types)
|
118
|
+
# Directory containing files to be scraped
|
119
|
+
attr_reader :repo_dir
|
117
120
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
@ignorable_paths = options[:ignorable_paths]
|
140
|
-
@stack = []
|
141
|
-
@queue = (@repository.resources_path || [""]).reverse
|
142
|
-
@resources = []
|
143
|
-
scanners = options[:scanners] || default_scanners
|
144
|
-
@scanner = RightScraper::Scanners::Union.new(scanners, options)
|
145
|
-
builders = options[:builders] || default_builders
|
146
|
-
@builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
|
147
|
-
:scanner => @scanner,
|
148
|
-
:logger => @logger,
|
149
|
-
:max_bytes => @max_bytes,
|
150
|
-
:max_seconds => @max_seconds)
|
151
|
-
pop_queue # Initialize @next
|
121
|
+
# Initialize scraper
|
122
|
+
#
|
123
|
+
# === Options
|
124
|
+
# <tt>:repository</tt>:: Required, original repository containing scraped
|
125
|
+
# files
|
126
|
+
# <tt>:repo_dir</tt>:: Required, path to directory containing files
|
127
|
+
# to be scraped
|
128
|
+
# <tt>:ignorable_paths</tt>:: List of directory names that should
|
129
|
+
# be ignored by scraper
|
130
|
+
# <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
|
131
|
+
# to RightScraper::Scanners::ResourceManifest and
|
132
|
+
# RightScraper::Scanners::CookbookMetadata
|
133
|
+
# <tt>:builders</tt>:: List of Builder classes to use, defaulting to
|
134
|
+
# RightScaper::Builders::Filesystem
|
135
|
+
#
|
136
|
+
def initialize(options)
|
137
|
+
raise ScraperError.new("Repository required when initializing a scraper") unless options[:repository]
|
138
|
+
raise ScraperError.new("Repository directory required when initializing a scraper") unless options[:repo_dir]
|
139
|
+
@repository = options[:repository]
|
140
|
+
unless @logger = options[:logger]
|
141
|
+
raise ::ArgumentError, ':logger is required'
|
152
142
|
end
|
143
|
+
@repo_dir = options[:repo_dir]
|
144
|
+
@ignorable_paths = options[:ignorable_paths]
|
145
|
+
@stack = []
|
146
|
+
@queue = (@repository.resources_path || [""]).reverse
|
153
147
|
|
154
|
-
#
|
155
|
-
|
156
|
-
# === Return
|
157
|
-
# Array<Scanner>:: Default scanners
|
158
|
-
def default_scanners
|
159
|
-
end
|
148
|
+
# Make sure the requested cookbook resource path exists
|
149
|
+
missing_paths = @queue.select {|path| !File.directory?(File.join(repo_dir, path)) }.compact.sort
|
160
150
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
def default_brokers
|
166
|
-
end
|
151
|
+
raise ScraperError.new(
|
152
|
+
"Cookbook resource path#{'s' unless missing_paths.size < 2}: " +
|
153
|
+
"[#{missing_paths.join(', ')}] #{missing_paths.size < 2 ? "is" : "are"} " +
|
154
|
+
"non-existent for this repository and branch") unless missing_paths.empty?
|
167
155
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
156
|
+
@resources = []
|
157
|
+
scanners = options[:scanners] || default_scanners
|
158
|
+
@scanner = RightScraper::Scanners::Union.new(scanners, options)
|
159
|
+
builders = options[:builders] || default_builders
|
160
|
+
@builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
|
161
|
+
:scanner => @scanner,
|
162
|
+
:logger => @logger,
|
163
|
+
:max_bytes => @max_bytes,
|
164
|
+
:max_seconds => @max_seconds)
|
165
|
+
pop_queue # Initialize @next
|
166
|
+
end
|
176
167
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
alias_method :tell, :pos
|
168
|
+
# List of default scanners for this scaper
|
169
|
+
#
|
170
|
+
# === Return
|
171
|
+
# Array<Scanner>:: Default scanners
|
172
|
+
def default_scanners
|
173
|
+
end
|
184
174
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
175
|
+
# List of default builders for this scaper
|
176
|
+
#
|
177
|
+
# === Return
|
178
|
+
# Array<Builder>:: Default builders
|
179
|
+
def default_brokers
|
180
|
+
end
|
181
|
+
|
182
|
+
# Find the interesting item in given directory
|
183
|
+
# Override in actual scraper implementation
|
184
|
+
#
|
185
|
+
# === Parameters
|
186
|
+
# dir(Dir):: directory to begin search in
|
187
|
+
def find_next(dir)
|
188
|
+
raise NotImplementedError
|
189
|
+
end
|
190
|
+
|
191
|
+
# Return the position of the scraper. Here, the position is the
|
192
|
+
# path relative from the top of the temporary directory. Akin to
|
193
|
+
# IO#pos or IO#tell.
|
194
|
+
def pos
|
195
|
+
strip_repo_dir(@stack.last.path)
|
196
|
+
end
|
197
|
+
alias_method :tell, :pos
|
202
198
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
199
|
+
# Turn path from an absolute filesystem location to a relative
|
200
|
+
# file location from #repo_dir.
|
201
|
+
#
|
202
|
+
# === Parameters
|
203
|
+
# path(String):: absolute path to relativize
|
204
|
+
#
|
205
|
+
# === Returns
|
206
|
+
# res(String):: relative pathname for path
|
207
|
+
def strip_repo_dir(path)
|
208
|
+
res = path[repo_dir.length+1..-1]
|
209
|
+
if res == nil || res == ""
|
210
|
+
"."
|
211
|
+
else
|
212
|
+
res
|
213
213
|
end
|
214
|
+
end
|
215
|
+
private :strip_repo_dir
|
214
216
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
217
|
+
# Test if the entry given is ignorable. By default just uses
|
218
|
+
# #ignorable_paths
|
219
|
+
#
|
220
|
+
# === Parameters
|
221
|
+
# entry(String):: file name to check
|
222
|
+
#
|
223
|
+
# === Returns
|
224
|
+
# Boolean:: true if the entry should be ignored
|
225
|
+
def ignorable?(entry)
|
226
|
+
@ignorable_paths.include?(entry)
|
227
|
+
end
|
228
|
+
|
229
|
+
# Initialize @next with the next resource
|
230
|
+
#
|
231
|
+
# === Returns
|
232
|
+
# @next(Resources::Base):: Next resource
|
233
|
+
def pop_queue
|
234
|
+
until @queue.empty?
|
235
|
+
nextdir = File.join(repo_dir, @queue.pop)
|
236
|
+
if File.directory?(nextdir)
|
237
|
+
@next = find_next(Dir.new(nextdir))
|
238
|
+
return @next
|
239
|
+
else
|
240
|
+
@logger.warn("When processing in #{@repository}, no such path #{nextdir}")
|
228
241
|
end
|
229
|
-
@next = nil
|
230
242
|
end
|
243
|
+
@next = nil
|
244
|
+
end
|
231
245
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
246
|
+
# Search the directory stack looking for the next resource.
|
247
|
+
def search_dirs
|
248
|
+
@logger.operation(:searching) do
|
249
|
+
until @stack.empty?
|
250
|
+
dir = @stack.last
|
251
|
+
entry = dir.read
|
252
|
+
if entry == nil
|
253
|
+
dir.close
|
254
|
+
@stack.pop
|
255
|
+
next
|
256
|
+
end
|
243
257
|
|
244
|
-
|
245
|
-
|
258
|
+
next if entry == '.' || entry == '..'
|
259
|
+
next if ignorable?(entry)
|
246
260
|
|
247
|
-
|
261
|
+
fullpath = File.join(dir.path, entry)
|
248
262
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
end
|
263
|
+
if File.directory?(fullpath)
|
264
|
+
result = find_next(Dir.new(fullpath))
|
265
|
+
break
|
253
266
|
end
|
254
|
-
result
|
255
267
|
end
|
268
|
+
result
|
256
269
|
end
|
257
|
-
private :search_dirs
|
258
|
-
|
259
270
|
end
|
271
|
+
private :search_dirs
|
272
|
+
|
260
273
|
end
|
261
274
|
end
|
262
|
-
|