right_scraper 3.2.6 → 5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,12 +21,16 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
24
27
|
module RightScraper
|
25
28
|
module Repositories
|
29
|
+
|
26
30
|
# A repository that is just an archive file hanging off a
|
27
31
|
# web server somewhere. This version uses a command line curl to
|
28
32
|
# download the archive, and command line tar to extract it.
|
29
|
-
class Download < Base
|
33
|
+
class Download < ::RightScraper::Repositories::Base
|
30
34
|
# (String) Type of the repository, here 'download'.
|
31
35
|
def repo_type
|
32
36
|
:download
|
@@ -81,10 +85,8 @@ module RightScraper
|
|
81
85
|
RightScraper::Retrievers::Download.new(self, options)
|
82
86
|
end
|
83
87
|
|
84
|
-
|
85
|
-
|
86
|
-
@@types[:download] = RightScraper::Repositories::Download
|
87
|
-
|
88
|
+
# self-register
|
89
|
+
register_self
|
88
90
|
end
|
89
91
|
end
|
90
92
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,10 +21,13 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
24
27
|
module RightScraper
|
25
28
|
module Repositories
|
26
29
|
# A Git repository.
|
27
|
-
class Git < Base
|
30
|
+
class Git < ::RightScraper::Repositories::Base
|
28
31
|
|
29
32
|
# (String) Optional, tag or branch of repository that should be downloaded
|
30
33
|
attr_accessor :tag
|
@@ -80,13 +83,9 @@ module RightScraper
|
|
80
83
|
RightScraper::Retrievers::Git.new(self, options)
|
81
84
|
end
|
82
85
|
|
83
|
-
#
|
84
|
-
|
85
|
-
|
86
|
-
# Add git URL schemas to the list of okay schemas.
|
87
|
-
@@okay_schemes << "git"
|
88
|
-
@@okay_schemes << "git+ssh"
|
89
|
-
@@okay_schemes << "ssh"
|
86
|
+
# self-register
|
87
|
+
register_self
|
88
|
+
register_url_schemas('git', 'git+ssh', 'ssh')
|
90
89
|
end
|
91
90
|
end
|
92
91
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,10 +21,13 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
24
27
|
module RightScraper
|
25
28
|
module Repositories
|
26
29
|
# A repository that is stored in a Subversion server.
|
27
|
-
class Svn < Base
|
30
|
+
class Svn < ::RightScraper::Repositories::Base
|
28
31
|
|
29
32
|
# (String) Optional, tag or branch of repository that should be downloaded
|
30
33
|
attr_accessor :tag
|
@@ -85,12 +88,9 @@ module RightScraper
|
|
85
88
|
RightScraper::Retrievers::Svn.new(self, options)
|
86
89
|
end
|
87
90
|
|
88
|
-
#
|
89
|
-
|
90
|
-
|
91
|
-
# Add git URL schemas to the list of okay schemas.
|
92
|
-
@@okay_schemes << "svn"
|
93
|
-
@@okay_schemes << "svn+ssh"
|
91
|
+
# self-register
|
92
|
+
register_self
|
93
|
+
register_url_schemas('svn', 'svn+ssh')
|
94
94
|
end
|
95
95
|
end
|
96
96
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Resources
|
28
|
+
autoload :Base, 'right_scraper/resources/base'
|
29
|
+
autoload :Cookbook, 'right_scraper/resources/cookbook'
|
30
|
+
autoload :Workflow, 'right_scraper/resources/workflow'
|
31
|
+
end
|
32
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -20,6 +20,10 @@
|
|
20
20
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
|
+
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/resources'
|
26
|
+
|
23
27
|
require 'digest/sha1'
|
24
28
|
require 'uri'
|
25
29
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,45 +21,52 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/resources'
|
26
|
+
|
24
27
|
require 'digest/md5'
|
25
28
|
require 'json'
|
26
29
|
require 'right_support'
|
27
30
|
|
28
|
-
module RightScraper
|
31
|
+
module RightScraper::Resources
|
32
|
+
class Cookbook < ::RightScraper::Resources::Base
|
29
33
|
|
30
|
-
|
34
|
+
EMPTY_MANIFEST_JSON = ::JSON.dump(:manifest => {}).freeze
|
31
35
|
|
32
|
-
|
36
|
+
# @return [String] repo_dir as local repo root dir (sans relative cookbook pos path)
|
37
|
+
attr_reader :repo_dir
|
33
38
|
|
34
|
-
|
39
|
+
def initialize(repo, pos, repo_dir)
|
40
|
+
super(repo, pos)
|
41
|
+
@repo_dir = repo_dir
|
42
|
+
end
|
35
43
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
def manifest=(value)
|
45
|
+
@manifest_json = nil
|
46
|
+
@resource_hash = nil
|
47
|
+
@manifest = value
|
48
|
+
end
|
41
49
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
50
|
+
def manifest_json
|
51
|
+
unless @manifest_json
|
52
|
+
if manifest && !manifest.empty?
|
53
|
+
# note that we are preserving the :manifest key at the root only to
|
54
|
+
# avoid having to change how the manifest is interpreted by Repose.
|
55
|
+
manifest_hash = { :manifest => manifest }
|
56
|
+
@manifest_json = ::RightSupport::Data::HashTools.deep_sorted_json(manifest_hash, pretty=true).freeze
|
57
|
+
else
|
58
|
+
@manifest_json = EMPTY_MANIFEST_JSON
|
52
59
|
end
|
53
|
-
@manifest_json
|
54
60
|
end
|
61
|
+
@manifest_json
|
62
|
+
end
|
55
63
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
end
|
60
|
-
@resource_hash
|
64
|
+
def resource_hash
|
65
|
+
unless @resource_hash
|
66
|
+
@resource_hash = ::Digest::MD5.hexdigest(manifest_json).freeze
|
61
67
|
end
|
62
|
-
|
68
|
+
@resource_hash
|
63
69
|
end
|
70
|
+
|
64
71
|
end
|
65
72
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,35 +21,34 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
module Resources
|
27
|
-
|
28
|
-
class Workflow < Base
|
29
|
-
|
30
|
-
METADATA_EXT = '.meta'
|
31
|
-
DEFINITION_EXT = '.def'
|
32
|
-
|
33
|
-
# Relative path to definition file
|
34
|
-
# @pos must be set before this can be called
|
35
|
-
#
|
36
|
-
# === Return
|
37
|
-
# path(String):: Path to definition file
|
38
|
-
def definition_path
|
39
|
-
path = @pos
|
40
|
-
end
|
41
|
-
|
42
|
-
# Relative path to metadata file
|
43
|
-
# @pos must be set before this can be called
|
44
|
-
#
|
45
|
-
# === Return
|
46
|
-
# path(String):: Path to metadata file
|
47
|
-
def metadata_path
|
48
|
-
path = @pos.chomp(File.extname(@pos)) + METADATA_EXT if @pos
|
49
|
-
end
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/resources'
|
50
26
|
|
27
|
+
module RightScraper::Resources
|
28
|
+
|
29
|
+
class Workflow < ::RightScraper::Resources::Base
|
30
|
+
|
31
|
+
METADATA_EXT = '.meta'
|
32
|
+
DEFINITION_EXT = '.def'
|
33
|
+
|
34
|
+
# Relative path to definition file
|
35
|
+
# @pos must be set before this can be called
|
36
|
+
#
|
37
|
+
# === Return
|
38
|
+
# path(String):: Path to definition file
|
39
|
+
def definition_path
|
40
|
+
path = @pos
|
41
|
+
end
|
42
|
+
|
43
|
+
# Relative path to metadata file
|
44
|
+
# @pos must be set before this can be called
|
45
|
+
#
|
46
|
+
# === Return
|
47
|
+
# path(String):: Path to metadata file
|
48
|
+
def metadata_path
|
49
|
+
path = @pos.chomp(File.extname(@pos)) + METADATA_EXT if @pos
|
51
50
|
end
|
52
51
|
|
53
52
|
end
|
54
|
-
end
|
55
53
|
|
54
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Retrievers
|
28
|
+
autoload :Base, 'right_scraper/retrievers/base'
|
29
|
+
autoload :CheckoutBase, 'right_scraper/retrievers/checkout_base'
|
30
|
+
autoload :Download, 'right_scraper/retrievers/download'
|
31
|
+
autoload :Git, 'right_scraper/retrievers/git'
|
32
|
+
autoload :Svn, 'right_scraper/retrievers/svn'
|
33
|
+
end
|
34
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,102 +21,98 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
# Base class for all retrievers.
|
27
|
-
#
|
28
|
-
# Retrievers fetch remote repositories into a given path
|
29
|
-
# They will attempt to fetch incrementally when possible (e.g. leveraging
|
30
|
-
# the underlying source control management system incremental capabilities)
|
31
|
-
class Base
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/retrievers'
|
32
26
|
|
33
|
-
|
34
|
-
attr_accessor :max_bytes
|
27
|
+
require 'fileutils'
|
35
28
|
|
36
|
-
|
37
|
-
# retrieve operation.
|
38
|
-
attr_accessor :max_seconds
|
29
|
+
module RightScraper::Retrievers
|
39
30
|
|
40
|
-
|
41
|
-
|
31
|
+
# Base class for all retrievers.
|
32
|
+
#
|
33
|
+
# Retrievers fetch remote repositories into a given path
|
34
|
+
# They will attempt to fetch incrementally when possible (e.g. leveraging
|
35
|
+
# the underlying source control management system incremental capabilities)
|
36
|
+
class Base
|
42
37
|
|
43
|
-
|
44
|
-
attr_reader :repo_dir
|
38
|
+
attr_accessor :max_bytes, :max_seconds
|
45
39
|
|
46
|
-
|
47
|
-
class RetrieverError < Exception; end
|
40
|
+
attr_reader :logger, :repository, :repo_dir
|
48
41
|
|
49
|
-
|
50
|
-
|
51
|
-
# additional options. Options may never be required.
|
52
|
-
#
|
53
|
-
# === Options
|
54
|
-
# <tt>:basedir</tt>:: Required, base directory where all files should be retrieved
|
55
|
-
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
56
|
-
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
57
|
-
# <tt>:logger</tt>:: Logger to use
|
58
|
-
#
|
59
|
-
# === Parameters
|
60
|
-
# repository(RightScraper::Repositories::Base):: repository to scrape
|
61
|
-
# options(Hash):: retriever options
|
62
|
-
#
|
63
|
-
# === Raise
|
64
|
-
# 'Missing base directory':: if :basedir option is missing
|
65
|
-
def initialize(repository, options={})
|
66
|
-
raise 'Missing base directory' unless options[:basedir]
|
67
|
-
@repository = repository
|
68
|
-
@max_bytes = options[:max_bytes] || nil
|
69
|
-
@max_seconds = options[:max_seconds] || nil
|
70
|
-
@basedir = options[:basedir]
|
71
|
-
@repo_dir = RightScraper::Retrievers::Base.repo_dir(@basedir, repository)
|
72
|
-
@logger = options[:logger] || RightScraper::Logger.new
|
73
|
-
@logger.repository = repository
|
74
|
-
@logger.operation(:initialize, "setting up in #{@repo_dir}") do
|
75
|
-
FileUtils.mkdir_p(@repo_dir)
|
76
|
-
end
|
77
|
-
end
|
42
|
+
# exceptions
|
43
|
+
class RetrieverError < Exception; end
|
78
44
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
45
|
+
# Create a new retriever for the given repository. This class
|
46
|
+
# recognizes several options, and subclasses may recognize
|
47
|
+
# additional options. Options may never be required.
|
48
|
+
#
|
49
|
+
# === Options
|
50
|
+
# <tt>:basedir</tt>:: Required, base directory where all files should be retrieved
|
51
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
52
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
53
|
+
# <tt>:logger</tt>:: Logger to use
|
54
|
+
#
|
55
|
+
# === Parameters
|
56
|
+
# repository(RightScraper::Repositories::Base):: repository to scrape
|
57
|
+
# options(Hash):: retriever options
|
58
|
+
#
|
59
|
+
# === Raise
|
60
|
+
# 'Missing base directory':: if :basedir option is missing
|
61
|
+
def initialize(repository, options={})
|
62
|
+
raise 'Missing base directory' unless options[:basedir]
|
63
|
+
@repository = repository
|
64
|
+
@max_bytes = options[:max_bytes] || nil
|
65
|
+
@max_seconds = options[:max_seconds] || nil
|
66
|
+
@basedir = options[:basedir]
|
67
|
+
@repo_dir = RightScraper::Retrievers::Base.repo_dir(@basedir, repository)
|
68
|
+
unless @logger = options[:logger]
|
69
|
+
raise ::ArgumentError, ':logger is required'
|
91
70
|
end
|
92
|
-
|
93
|
-
|
94
|
-
def retrieve
|
95
|
-
raise NotImplementedError
|
71
|
+
@logger.operation(:initialize, "setting up in #{@repo_dir}") do
|
72
|
+
::FileUtils.mkdir_p(@repo_dir)
|
96
73
|
end
|
74
|
+
end
|
97
75
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
# repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
|
103
|
-
#
|
104
|
-
# === Return
|
105
|
-
# String:: Path to local directory that corresponds to given repository
|
106
|
-
def self.repo_dir(root_dir, repo)
|
107
|
-
repo = RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
|
108
|
-
dir_name = repo.repository_hash
|
109
|
-
dir_path = File.join(root_dir, dir_name)
|
110
|
-
"#{dir_path}/repo"
|
111
|
-
end
|
76
|
+
# Determines if retriever is available (has required CLI tools, etc.)
|
77
|
+
def available?
|
78
|
+
raise ::NotImplementedError
|
79
|
+
end
|
112
80
|
|
113
|
-
|
81
|
+
# Paths to ignore when traversing the filesystem. Mostly used for
|
82
|
+
# things like Git and Subversion version control directories.
|
83
|
+
#
|
84
|
+
# === Return
|
85
|
+
# list(Array):: list of filenames to ignore.
|
86
|
+
def ignorable_paths
|
87
|
+
[]
|
88
|
+
end
|
114
89
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
90
|
+
# Retrieve repository, overridden in heirs
|
91
|
+
def retrieve
|
92
|
+
raise ::NotImplementedError
|
93
|
+
end
|
119
94
|
|
95
|
+
# Path to directory where given repo should be or was downloaded
|
96
|
+
#
|
97
|
+
# === Parameters
|
98
|
+
# root_dir(String):: Path to directory containing all scraped repositories
|
99
|
+
# repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
|
100
|
+
#
|
101
|
+
# === Return
|
102
|
+
# String:: Path to local directory that corresponds to given repository
|
103
|
+
def self.repo_dir(root_dir, repo)
|
104
|
+
repo = ::RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
|
105
|
+
dir_name = repo.repository_hash
|
106
|
+
dir_path = ::File.join(root_dir, dir_name)
|
107
|
+
"#{dir_path}/repo"
|
120
108
|
end
|
109
|
+
|
110
|
+
protected
|
111
|
+
|
112
|
+
# (Hash) Lookup table from textual description of scraper type
|
113
|
+
# ('cookbook' or 'workflow' currently) to the class that
|
114
|
+
# represents that scraper.
|
115
|
+
@@types = {} unless class_variable_defined?(:@@types)
|
116
|
+
|
121
117
|
end
|
122
118
|
end
|