right_scraper 1.0.26 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +16 -0
- data/README.rdoc +9 -28
- data/Rakefile +51 -39
- data/lib/right_scraper/builders/base.rb +64 -0
- data/lib/right_scraper/builders/filesystem.rb +96 -0
- data/lib/right_scraper/builders/union.rb +57 -0
- data/lib/right_scraper/logger.rb +102 -0
- data/lib/right_scraper/loggers/noisy.rb +85 -0
- data/lib/right_scraper/processes/ssh.rb +188 -0
- data/lib/right_scraper/repositories/base.rb +299 -0
- data/lib/right_scraper/repositories/download.rb +90 -0
- data/lib/right_scraper/repositories/git.rb +92 -0
- data/lib/right_scraper/repositories/mock.rb +70 -0
- data/lib/right_scraper/repositories/svn.rb +96 -0
- data/lib/right_scraper/resources/base.rb +70 -0
- data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
- data/lib/right_scraper/resources/workflow.rb +55 -0
- data/lib/right_scraper/retrievers/base.rb +114 -0
- data/lib/right_scraper/retrievers/checkout.rb +79 -0
- data/lib/right_scraper/retrievers/download.rb +97 -0
- data/lib/right_scraper/retrievers/git.rb +140 -0
- data/lib/right_scraper/retrievers/svn.rb +87 -0
- data/lib/right_scraper/scanners/base.rb +111 -0
- data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
- data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
- data/lib/right_scraper/scanners/union.rb +89 -0
- data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
- data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
- data/lib/right_scraper/scraper.rb +81 -57
- data/lib/right_scraper/scraper_logger.rb +61 -0
- data/lib/right_scraper/scrapers/base.rb +262 -0
- data/lib/right_scraper/scrapers/cookbook.rb +73 -0
- data/lib/right_scraper/scrapers/workflow.rb +88 -0
- data/lib/right_scraper/svn_client.rb +101 -0
- data/lib/right_scraper/version.rb +28 -0
- data/lib/right_scraper.rb +35 -11
- data/right_scraper.gemspec +26 -13
- data/right_scraper.rconf +13 -0
- data/spec/builder_spec.rb +50 -0
- data/spec/cookbook_helper.rb +73 -0
- data/spec/cookbook_manifest_spec.rb +55 -0
- data/spec/cookbook_s3_upload_spec.rb +152 -0
- data/spec/download/download_retriever_spec.rb +118 -0
- data/spec/download/download_retriever_spec_helper.rb +72 -0
- data/spec/download/download_spec.rb +130 -0
- data/spec/download/multi_dir_spec.rb +106 -0
- data/spec/download/multi_dir_spec_helper.rb +40 -0
- data/spec/git/cookbook_spec.rb +166 -0
- data/spec/git/demokey +27 -0
- data/spec/git/demokey.pub +1 -0
- data/spec/git/password_key +30 -0
- data/spec/git/password_key.pub +1 -0
- data/spec/git/repository_spec.rb +110 -0
- data/spec/git/retriever_spec.rb +505 -0
- data/spec/git/retriever_spec_helper.rb +112 -0
- data/spec/git/scraper_spec.rb +136 -0
- data/spec/git/ssh_spec.rb +170 -0
- data/spec/git/url_spec.rb +103 -0
- data/spec/logger_spec.rb +185 -0
- data/spec/repository_spec.rb +89 -23
- data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
- data/spec/scanner_spec.rb +61 -0
- data/spec/scraper_helper.rb +96 -0
- data/spec/scraper_spec.rb +123 -45
- data/spec/spec_helper.rb +87 -14
- data/spec/svn/cookbook_spec.rb +97 -0
- data/spec/svn/multi_svn_spec.rb +64 -0
- data/spec/svn/multi_svn_spec_helper.rb +40 -0
- data/spec/svn/repository_spec.rb +72 -0
- data/spec/svn/retriever_spec.rb +261 -0
- data/spec/svn/scraper_spec.rb +90 -0
- data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
- data/spec/svn/url_spec.rb +47 -0
- data/spec/url_spec.rb +164 -0
- metadata +203 -31
- data/lib/right_scraper/linux/process_monitor.rb +0 -84
- data/lib/right_scraper/repository.rb +0 -78
- data/lib/right_scraper/scraper_base.rb +0 -175
- data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
- data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
- data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
- data/lib/right_scraper/watcher.rb +0 -158
- data/lib/right_scraper/win32/process_monitor.rb +0 -98
- data/spec/download/download_scraper_spec.rb +0 -94
- data/spec/git/git_scraper_spec.rb +0 -165
- data/spec/git/git_scraper_spec_helper.rb +0 -72
- data/spec/rcov.opts +0 -1
- data/spec/spec.opts +0 -2
- data/spec/svn/svn_scraper_spec.rb +0 -148
- data/spec/watcher_spec.rb +0 -74
@@ -0,0 +1,86 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
|
25
|
+
require 'digest/sha1'
|
26
|
+
|
27
|
+
module RightScraper
|
28
|
+
module Scanners
|
29
|
+
# Build manifests from a filesystem.
|
30
|
+
class WorkflowManifest < Base
|
31
|
+
# Create a new manifest scanner. Does not accept any new arguments.
|
32
|
+
def initialize(*args)
|
33
|
+
super
|
34
|
+
@manifest = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Retrieve relative workflow files positions
|
38
|
+
#
|
39
|
+
# === Parameters
|
40
|
+
# workflow(Resources::Workflow):: Workflow whose manifest is being built
|
41
|
+
def begin(workflow)
|
42
|
+
@workflow = workflow
|
43
|
+
@metadata_filename = File.basename(@workflow.metadata_path)
|
44
|
+
@definition_filename = File.basename(@workflow.definition_path)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Complete a scan for the given resource.
|
48
|
+
#
|
49
|
+
# === Parameters ===
|
50
|
+
# resource(RightScraper::Resources::Base):: resource to scan
|
51
|
+
def end(resource)
|
52
|
+
resource.manifest = @manifest
|
53
|
+
@manifest = {}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Notice a file during scanning.
|
57
|
+
#
|
58
|
+
# === Block ===
|
59
|
+
# Return the data for this file. We use a block because it may
|
60
|
+
# not always be necessary to read the data.
|
61
|
+
#
|
62
|
+
# === Parameters ===
|
63
|
+
# relative_position(String):: relative pathname for file from root of resource
|
64
|
+
def notice(relative_position)
|
65
|
+
if [ @metadata_filename, @definition_filename ].include?(relative_position)
|
66
|
+
@manifest[relative_position] = Digest::SHA1.hexdigest(yield)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Notice a directory during scanning. Since the workflow definition and
|
71
|
+
# metadata live in the root directory we don't need to recurse,
|
72
|
+
# but we do need to go into the first directory (identified by
|
73
|
+
# +relative_position+ being +nil+).
|
74
|
+
#
|
75
|
+
# === Parameters
|
76
|
+
# relative_position(String):: relative pathname for the directory from root of workflow
|
77
|
+
#
|
78
|
+
# === Returns
|
79
|
+
# Boolean:: should the scanning recurse into the directory
|
80
|
+
def notice_dir(relative_position)
|
81
|
+
relative_position == nil
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'json'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Scanners
|
28
|
+
# Load workflow metadata from a filesystem.
|
29
|
+
class WorkflowMetadata < Base
|
30
|
+
# Begin a scan for the given workflow.
|
31
|
+
#
|
32
|
+
# === Parameters
|
33
|
+
# workflow(RightScraper::Resources::Workflow):: workflow to scan
|
34
|
+
def begin(workflow)
|
35
|
+
@workflow = workflow
|
36
|
+
@metadata_filename = File.basename(workflow.metadata_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Notice a file during scanning.
|
40
|
+
#
|
41
|
+
# === Block
|
42
|
+
# Return the data for this file. We use a block because it may
|
43
|
+
# not always be necessary to read the data.
|
44
|
+
#
|
45
|
+
# === Parameters
|
46
|
+
# relative_position(String):: relative pathname for the file from root of workflow
|
47
|
+
def notice(relative_position)
|
48
|
+
if relative_position == @metadata_filename
|
49
|
+
@logger.operation(:metadata_parsing) do
|
50
|
+
@workflow.metadata = JSON.parse(yield)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Notice a directory during scanning. Since the workflow definition and
|
56
|
+
# metadata live in the root directory we don't need to recurse,
|
57
|
+
# but we do need to go into the first directory (identified by
|
58
|
+
# +relative_position+ being +nil+).
|
59
|
+
#
|
60
|
+
# === Parameters
|
61
|
+
# relative_position(String):: relative pathname for the directory from root of workflow
|
62
|
+
#
|
63
|
+
# === Returns
|
64
|
+
# Boolean:: should the scanning recurse into the directory
|
65
|
+
def notice_dir(relative_position)
|
66
|
+
relative_position == nil
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
require 'right_aws'
|
24
|
+
require 'json'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Scanners
|
28
|
+
# Upload workflow definition and metadata to an S3 bucket.
|
29
|
+
class WorkflowS3Upload < Base
|
30
|
+
# Create a new S3Upload. In addition to the options recognized
|
31
|
+
# by Scanner, this class recognizes <tt>:s3_key</tt>,
|
32
|
+
# <tt>:s3_secret</tt>, and <tt>:s3_bucket</tt> and requires all
|
33
|
+
# of those.
|
34
|
+
#
|
35
|
+
# === Options
|
36
|
+
# <tt>:s3_key</tt>:: Required. S3 access key.
|
37
|
+
# <tt>:s3_secret</tt>:: Required. S3 secret key.
|
38
|
+
# <tt>:s3_bucket</tt>:: Required. Bucket to upload workflows to.
|
39
|
+
#
|
40
|
+
# === Parameters
|
41
|
+
# options(Hash):: scanner options
|
42
|
+
def initialize(options={})
|
43
|
+
super
|
44
|
+
s3_key = options.fetch(:s3_key)
|
45
|
+
s3_secret = options.fetch(:s3_secret)
|
46
|
+
s3 = RightAws::S3.new(aws_access_key_id=s3_key,
|
47
|
+
aws_secret_access_key=s3_secret,
|
48
|
+
:logger => Logger.new)
|
49
|
+
@bucket = s3.bucket(options.fetch(:s3_bucket))
|
50
|
+
raise "Need an actual, existing S3 bucket!" if @bucket.nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Upon ending a scan for a workflows, upload the workflows
|
54
|
+
# contents to S3.
|
55
|
+
#
|
56
|
+
# === Parameters
|
57
|
+
# workflows(RightScraper::Workflows):: Workflow to scan
|
58
|
+
def end(workflow)
|
59
|
+
@bucket.put(File.join('Workflows', workflow.resource_hash),
|
60
|
+
{
|
61
|
+
:metadata => workflow.metadata,
|
62
|
+
:manifest => workflow.manifest
|
63
|
+
}.to_json)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Upload a file during scanning.
|
67
|
+
#
|
68
|
+
# === Block
|
69
|
+
# Return the data for this file. We use a block because it may
|
70
|
+
# not always be necessary to read the data.
|
71
|
+
#
|
72
|
+
# === Parameters
|
73
|
+
# relative_position(String):: relative pathname for file from root of cookbook
|
74
|
+
def notice(relative_position)
|
75
|
+
# TBD: Only uplad definition and metadata, will there be more files?
|
76
|
+
contents = yield
|
77
|
+
name = Digest::SHA1.hexdigest(contents)
|
78
|
+
path = File.join('Files', name)
|
79
|
+
unless @bucket.key(path).exists?
|
80
|
+
@bucket.put(path, contents)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -1,18 +1,18 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010 RightScale, Inc.
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
6
6
|
# 'Software'), to deal in the Software without restriction, including
|
7
7
|
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
9
|
# permit persons to whom the Software is furnished to do so, subject to
|
10
10
|
# the following conditions:
|
11
11
|
#
|
12
12
|
# The above copyright notice and this permission notice shall be
|
13
13
|
# included in all copies or substantial portions of the Software.
|
14
14
|
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
16
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
17
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
18
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
@@ -20,94 +20,118 @@
|
|
20
20
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'logger'))
|
23
24
|
|
24
|
-
module
|
25
|
-
|
26
|
-
# Hash of repository types associated with corresponding scraper class
|
27
|
-
SCRAPERS = { 'git' => RightScale::GitScraper,
|
28
|
-
'svn' => RightScale::SvnScraper,
|
29
|
-
'download' => RightScale::DownloadScraper }
|
25
|
+
module RightScraper
|
30
26
|
|
31
27
|
# Library main entry point. Instantiate this class and call the scrape
|
32
|
-
# method to download or update a remote repository to the local disk
|
28
|
+
# method to download or update a remote repository to the local disk and
|
29
|
+
# run a scraper on the resulting files.
|
33
30
|
class Scraper
|
34
31
|
|
35
|
-
# (
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
# (Array):: Scraped resources
|
33
|
+
attr_reader :resources
|
34
|
+
|
39
35
|
# Initialize scrape destination directory
|
40
36
|
#
|
41
|
-
# ===
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
|
46
|
-
|
47
|
-
@
|
48
|
-
|
49
|
-
@
|
37
|
+
# === Options
|
38
|
+
# <tt>:kind</tt>:: Type of scraper that will traverse directory for resources, one of :cookbook or :workflow
|
39
|
+
# <tt>:basedir</tt>:: Local directory where files are retrieved and scraped, use temporary directory if nil
|
40
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read from remote repo, unlimited if nil
|
41
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading from remote repo, unlimited if nil
|
42
|
+
def initialize(options={})
|
43
|
+
@temporary = !options.has_key?(:basedir)
|
44
|
+
options[:basedir] ||= Dir.mktmpdir
|
45
|
+
@logger = ScraperLogger.new
|
46
|
+
@options = options.merge({:logger => @logger})
|
47
|
+
@resources = []
|
50
48
|
end
|
51
49
|
|
52
|
-
# Scrape given repository
|
53
|
-
#
|
54
|
-
#
|
50
|
+
# Scrape given repository, depositing files into the scrape
|
51
|
+
# directory. Update content of unique directory incrementally
|
52
|
+
# when possible with further calls.
|
55
53
|
#
|
56
54
|
# === Parameters
|
57
|
-
# repo(Hash|
|
58
|
-
#
|
59
|
-
#
|
60
|
-
# incremental(FalseClass|TrueClass):: Whether scrape should be incremental if possible (true by default)
|
55
|
+
# repo(Hash|RightScraper::Repositories::Base):: Repository to be scraped
|
56
|
+
# Note: repo can either be a Hash or a RightScraper::Repositories::Base instance.
|
57
|
+
# See the RightScraper::Repositories::Base class for valid Hash keys.
|
61
58
|
#
|
62
59
|
# === Block
|
63
60
|
# If a block is given, it will be called back with progress information
|
64
|
-
# the block should take
|
65
|
-
# - first argument is
|
66
|
-
#
|
67
|
-
#
|
61
|
+
# the block should take four arguments:
|
62
|
+
# - first argument is one of <tt>:begin</tt>, <tt>:commit</tt>,
|
63
|
+
# <tt>:abort</tt> which signifies what
|
64
|
+
# the scraper is trying to do and where it is when it does it
|
65
|
+
# - second argument is a symbol describing the operation being performed
|
66
|
+
# in an easy-to-match way
|
67
|
+
# - third argument is optional further explanation
|
68
|
+
# - fourth argument is the exception pending (only relevant for <tt>:abort</tt>)
|
68
69
|
#
|
69
70
|
# === Return
|
70
71
|
# true:: If scrape was successful
|
71
|
-
# false:: If scrape failed, call
|
72
|
+
# false:: If scrape failed, call errors for information on failure
|
72
73
|
#
|
73
74
|
# === Raise
|
74
75
|
# 'Invalid repository type':: If repository type is not known
|
75
76
|
def scrape(repo, incremental=true, &callback)
|
76
|
-
|
77
|
-
repo
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
77
|
+
errorlen = errors.size
|
78
|
+
repo = RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
|
79
|
+
@logger.callback = callback
|
80
|
+
begin
|
81
|
+
# 1. Retrieve the files
|
82
|
+
retriever = nil
|
83
|
+
@logger.operation(:retrieving, "from #{repo}") do
|
84
|
+
retriever = repo.retriever(@options)
|
85
|
+
retriever.retrieve
|
86
|
+
end
|
87
|
+
|
88
|
+
# 2. Now scrape if there is a scraper in the options
|
89
|
+
@logger.operation(:scraping, retriever.repo_dir) do
|
90
|
+
if @options[:kind]
|
91
|
+
options = @options.merge({:ignorable_paths => retriever.ignorable_paths,
|
92
|
+
:repo_dir => retriever.repo_dir,
|
93
|
+
:repository => retriever.repository})
|
94
|
+
scraper = RightScraper::Scrapers::Base.scraper(options)
|
95
|
+
@resources += scraper.scrape
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# 3. Cleanup if temporary
|
100
|
+
FileUtils.remove_entry_secure(@options[:basedir]) if @temporary
|
101
|
+
rescue
|
102
|
+
# logger handles communication with the end user and appending
|
103
|
+
# to our error list, we just need to keep going.
|
104
|
+
end
|
105
|
+
@logger.callback = nil
|
106
|
+
errors.size == errorlen
|
83
107
|
end
|
84
|
-
|
85
|
-
#
|
108
|
+
|
109
|
+
# Path to directory where given repo should be or was downloaded
|
86
110
|
#
|
87
111
|
# === Parameters
|
88
|
-
# repo(Hash|
|
112
|
+
# repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
|
89
113
|
#
|
90
|
-
# === Return
|
91
|
-
#
|
114
|
+
# === Return
|
115
|
+
# String:: Path to local directory that corresponds to given repository
|
92
116
|
def repo_dir(repo)
|
93
|
-
|
117
|
+
RightScraper::Retrievers::Base.repo_dir(@options[:basedir], repo)
|
94
118
|
end
|
95
119
|
|
96
|
-
# Error messages in case of failure
|
97
|
-
#
|
98
|
-
# === Return
|
99
|
-
# errors(Array):: Error messages or empty array if no error
|
120
|
+
# (Array):: Error messages in case of failure
|
100
121
|
def errors
|
101
|
-
|
122
|
+
@logger.errors
|
102
123
|
end
|
103
124
|
|
104
|
-
|
105
|
-
# Call
|
125
|
+
# Was scraping successful?
|
126
|
+
# Call errors to get error messages if false
|
106
127
|
#
|
107
128
|
# === Return
|
108
|
-
#
|
129
|
+
# Boolean:: true if scrape finished with no error, false otherwise.
|
109
130
|
def succeeded?
|
110
|
-
|
131
|
+
errors.empty?
|
111
132
|
end
|
133
|
+
alias_method :successful?, :succeeded?
|
134
|
+
|
112
135
|
end
|
113
136
|
end
|
137
|
+
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# 'Software'), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module RightScraper
|
25
|
+
|
26
|
+
class ScraperLogger < Logger
|
27
|
+
attr_accessor :errors
|
28
|
+
attr_accessor :callback
|
29
|
+
|
30
|
+
def add(severity, message=nil, progname=nil)
|
31
|
+
if severity >= (self.level || Logger::WARN)
|
32
|
+
if message.nil?
|
33
|
+
if block_given?
|
34
|
+
message = yield
|
35
|
+
else
|
36
|
+
message = progname
|
37
|
+
progname = self.progname
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@errors << [nil, :log,
|
41
|
+
{:severity => severity,
|
42
|
+
:message => message,
|
43
|
+
:progname => progname}]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def initialize
|
48
|
+
@errors = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def note_phase(phase, type, explanation, exception=nil)
|
52
|
+
@callback.call(phase, type, explanation, exception) unless @callback.nil?
|
53
|
+
super
|
54
|
+
end
|
55
|
+
|
56
|
+
def note_error(exception, type, explanation="")
|
57
|
+
@errors << [exception, type, explanation]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|