right_scraper 3.2.6 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/right_scraper.rb +16 -34
- data/lib/right_scraper/builders.rb +32 -0
- data/lib/right_scraper/builders/base.rb +19 -20
- data/lib/right_scraper/builders/filesystem.rb +8 -6
- data/lib/right_scraper/builders/union.rb +4 -1
- data/lib/right_scraper/loggers.rb +31 -0
- data/lib/right_scraper/loggers/base.rb +113 -0
- data/lib/right_scraper/loggers/default.rb +98 -0
- data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
- data/lib/right_scraper/processes.rb +33 -0
- data/lib/right_scraper/processes/shell.rb +227 -0
- data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
- data/lib/right_scraper/processes/svn_client.rb +117 -0
- data/lib/right_scraper/processes/warden.rb +358 -0
- data/lib/right_scraper/registered_base.rb +154 -0
- data/lib/right_scraper/repositories.rb +33 -0
- data/lib/right_scraper/repositories/base.rb +271 -232
- data/lib/right_scraper/repositories/download.rb +8 -6
- data/lib/right_scraper/repositories/git.rb +8 -9
- data/lib/right_scraper/repositories/svn.rb +8 -8
- data/lib/right_scraper/resources.rb +32 -0
- data/lib/right_scraper/resources/base.rb +5 -1
- data/lib/right_scraper/resources/cookbook.rb +34 -27
- data/lib/right_scraper/resources/workflow.rb +27 -28
- data/lib/right_scraper/retrievers.rb +34 -0
- data/lib/right_scraper/retrievers/base.rb +80 -84
- data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
- data/lib/right_scraper/retrievers/download.rb +125 -117
- data/lib/right_scraper/retrievers/git.rb +377 -223
- data/lib/right_scraper/retrievers/svn.rb +102 -62
- data/lib/right_scraper/scanners.rb +37 -0
- data/lib/right_scraper/scanners/base.rb +77 -80
- data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
- data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
- data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
- data/lib/right_scraper/scanners/union.rb +61 -58
- data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
- data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
- data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
- data/lib/right_scraper/scrapers.rb +32 -0
- data/lib/right_scraper/scrapers/base.rb +217 -205
- data/lib/right_scraper/scrapers/cookbook.rb +42 -40
- data/lib/right_scraper/scrapers/workflow.rb +57 -58
- data/lib/right_scraper/version.rb +3 -0
- data/right_scraper.gemspec +12 -16
- metadata +57 -163
- data/Gemfile +0 -15
- data/Rakefile +0 -89
- data/lib/right_scraper/logger.rb +0 -107
- data/lib/right_scraper/loggers/noisy.rb +0 -85
- data/lib/right_scraper/repositories/mock.rb +0 -70
- data/lib/right_scraper/retrievers/checkout.rb +0 -79
- data/lib/right_scraper/scraper_logger.rb +0 -66
- data/lib/right_scraper/svn_client.rb +0 -164
- data/right_scraper.rconf +0 -13
- data/spec/builder_spec.rb +0 -50
- data/spec/cookbook_helper.rb +0 -73
- data/spec/cookbook_manifest_spec.rb +0 -93
- data/spec/cookbook_s3_upload_spec.rb +0 -159
- data/spec/download/download_retriever_spec.rb +0 -118
- data/spec/download/download_retriever_spec_helper.rb +0 -72
- data/spec/download/download_spec.rb +0 -128
- data/spec/download/multi_dir_spec.rb +0 -106
- data/spec/download/multi_dir_spec_helper.rb +0 -40
- data/spec/git/cookbook_spec.rb +0 -165
- data/spec/git/demokey +0 -27
- data/spec/git/demokey.pub +0 -1
- data/spec/git/password_key +0 -30
- data/spec/git/password_key.pub +0 -1
- data/spec/git/repository_spec.rb +0 -110
- data/spec/git/retriever_spec.rb +0 -553
- data/spec/git/retriever_spec_helper.rb +0 -112
- data/spec/git/scraper_spec.rb +0 -151
- data/spec/git/ssh_spec.rb +0 -174
- data/spec/git/url_spec.rb +0 -103
- data/spec/logger_spec.rb +0 -185
- data/spec/repository_spec.rb +0 -111
- data/spec/retriever_spec_helper.rb +0 -146
- data/spec/scanner_spec.rb +0 -61
- data/spec/scraper_helper.rb +0 -88
- data/spec/scraper_spec.rb +0 -147
- data/spec/spec_helper.rb +0 -185
- data/spec/svn/cookbook_spec.rb +0 -96
- data/spec/svn/multi_svn_spec.rb +0 -64
- data/spec/svn/multi_svn_spec_helper.rb +0 -40
- data/spec/svn/repository_spec.rb +0 -72
- data/spec/svn/retriever_spec.rb +0 -266
- data/spec/svn/scraper_spec.rb +0 -90
- data/spec/svn/svn_retriever_spec_helper.rb +0 -90
- data/spec/svn/url_spec.rb +0 -47
- data/spec/url_spec.rb +0 -164
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,12 +21,16 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
24
27
|
module RightScraper
|
25
28
|
module Repositories
|
29
|
+
|
26
30
|
# A repository that is just an archive file hanging off a
|
27
31
|
# web server somewhere. This version uses a command line curl to
|
28
32
|
# download the archive, and command line tar to extract it.
|
29
|
-
class Download < Base
|
33
|
+
class Download < ::RightScraper::Repositories::Base
|
30
34
|
# (String) Type of the repository, here 'download'.
|
31
35
|
def repo_type
|
32
36
|
:download
|
@@ -81,10 +85,8 @@ module RightScraper
|
|
81
85
|
RightScraper::Retrievers::Download.new(self, options)
|
82
86
|
end
|
83
87
|
|
84
|
-
|
85
|
-
|
86
|
-
@@types[:download] = RightScraper::Repositories::Download
|
87
|
-
|
88
|
+
# self-register
|
89
|
+
register_self
|
88
90
|
end
|
89
91
|
end
|
90
92
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,10 +21,13 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
24
27
|
module RightScraper
|
25
28
|
module Repositories
|
26
29
|
# A Git repository.
|
27
|
-
class Git < Base
|
30
|
+
class Git < ::RightScraper::Repositories::Base
|
28
31
|
|
29
32
|
# (String) Optional, tag or branch of repository that should be downloaded
|
30
33
|
attr_accessor :tag
|
@@ -80,13 +83,9 @@ module RightScraper
|
|
80
83
|
RightScraper::Retrievers::Git.new(self, options)
|
81
84
|
end
|
82
85
|
|
83
|
-
#
|
84
|
-
|
85
|
-
|
86
|
-
# Add git URL schemas to the list of okay schemas.
|
87
|
-
@@okay_schemes << "git"
|
88
|
-
@@okay_schemes << "git+ssh"
|
89
|
-
@@okay_schemes << "ssh"
|
86
|
+
# self-register
|
87
|
+
register_self
|
88
|
+
register_url_schemas('git', 'git+ssh', 'ssh')
|
90
89
|
end
|
91
90
|
end
|
92
91
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,10 +21,13 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/repositories'
|
26
|
+
|
24
27
|
module RightScraper
|
25
28
|
module Repositories
|
26
29
|
# A repository that is stored in a Subversion server.
|
27
|
-
class Svn < Base
|
30
|
+
class Svn < ::RightScraper::Repositories::Base
|
28
31
|
|
29
32
|
# (String) Optional, tag or branch of repository that should be downloaded
|
30
33
|
attr_accessor :tag
|
@@ -85,12 +88,9 @@ module RightScraper
|
|
85
88
|
RightScraper::Retrievers::Svn.new(self, options)
|
86
89
|
end
|
87
90
|
|
88
|
-
#
|
89
|
-
|
90
|
-
|
91
|
-
# Add git URL schemas to the list of okay schemas.
|
92
|
-
@@okay_schemes << "svn"
|
93
|
-
@@okay_schemes << "svn+ssh"
|
91
|
+
# self-register
|
92
|
+
register_self
|
93
|
+
register_url_schemas('svn', 'svn+ssh')
|
94
94
|
end
|
95
95
|
end
|
96
96
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Resources
|
28
|
+
autoload :Base, 'right_scraper/resources/base'
|
29
|
+
autoload :Cookbook, 'right_scraper/resources/cookbook'
|
30
|
+
autoload :Workflow, 'right_scraper/resources/workflow'
|
31
|
+
end
|
32
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -20,6 +20,10 @@
|
|
20
20
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
|
+
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/resources'
|
26
|
+
|
23
27
|
require 'digest/sha1'
|
24
28
|
require 'uri'
|
25
29
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,45 +21,52 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/resources'
|
26
|
+
|
24
27
|
require 'digest/md5'
|
25
28
|
require 'json'
|
26
29
|
require 'right_support'
|
27
30
|
|
28
|
-
module RightScraper
|
31
|
+
module RightScraper::Resources
|
32
|
+
class Cookbook < ::RightScraper::Resources::Base
|
29
33
|
|
30
|
-
|
34
|
+
EMPTY_MANIFEST_JSON = ::JSON.dump(:manifest => {}).freeze
|
31
35
|
|
32
|
-
|
36
|
+
# @return [String] repo_dir as local repo root dir (sans relative cookbook pos path)
|
37
|
+
attr_reader :repo_dir
|
33
38
|
|
34
|
-
|
39
|
+
def initialize(repo, pos, repo_dir)
|
40
|
+
super(repo, pos)
|
41
|
+
@repo_dir = repo_dir
|
42
|
+
end
|
35
43
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
44
|
+
def manifest=(value)
|
45
|
+
@manifest_json = nil
|
46
|
+
@resource_hash = nil
|
47
|
+
@manifest = value
|
48
|
+
end
|
41
49
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
50
|
+
def manifest_json
|
51
|
+
unless @manifest_json
|
52
|
+
if manifest && !manifest.empty?
|
53
|
+
# note that we are preserving the :manifest key at the root only to
|
54
|
+
# avoid having to change how the manifest is interpreted by Repose.
|
55
|
+
manifest_hash = { :manifest => manifest }
|
56
|
+
@manifest_json = ::RightSupport::Data::HashTools.deep_sorted_json(manifest_hash, pretty=true).freeze
|
57
|
+
else
|
58
|
+
@manifest_json = EMPTY_MANIFEST_JSON
|
52
59
|
end
|
53
|
-
@manifest_json
|
54
60
|
end
|
61
|
+
@manifest_json
|
62
|
+
end
|
55
63
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
end
|
60
|
-
@resource_hash
|
64
|
+
def resource_hash
|
65
|
+
unless @resource_hash
|
66
|
+
@resource_hash = ::Digest::MD5.hexdigest(manifest_json).freeze
|
61
67
|
end
|
62
|
-
|
68
|
+
@resource_hash
|
63
69
|
end
|
70
|
+
|
64
71
|
end
|
65
72
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,35 +21,34 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
module Resources
|
27
|
-
|
28
|
-
class Workflow < Base
|
29
|
-
|
30
|
-
METADATA_EXT = '.meta'
|
31
|
-
DEFINITION_EXT = '.def'
|
32
|
-
|
33
|
-
# Relative path to definition file
|
34
|
-
# @pos must be set before this can be called
|
35
|
-
#
|
36
|
-
# === Return
|
37
|
-
# path(String):: Path to definition file
|
38
|
-
def definition_path
|
39
|
-
path = @pos
|
40
|
-
end
|
41
|
-
|
42
|
-
# Relative path to metadata file
|
43
|
-
# @pos must be set before this can be called
|
44
|
-
#
|
45
|
-
# === Return
|
46
|
-
# path(String):: Path to metadata file
|
47
|
-
def metadata_path
|
48
|
-
path = @pos.chomp(File.extname(@pos)) + METADATA_EXT if @pos
|
49
|
-
end
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/resources'
|
50
26
|
|
27
|
+
module RightScraper::Resources
|
28
|
+
|
29
|
+
class Workflow < ::RightScraper::Resources::Base
|
30
|
+
|
31
|
+
METADATA_EXT = '.meta'
|
32
|
+
DEFINITION_EXT = '.def'
|
33
|
+
|
34
|
+
# Relative path to definition file
|
35
|
+
# @pos must be set before this can be called
|
36
|
+
#
|
37
|
+
# === Return
|
38
|
+
# path(String):: Path to definition file
|
39
|
+
def definition_path
|
40
|
+
path = @pos
|
41
|
+
end
|
42
|
+
|
43
|
+
# Relative path to metadata file
|
44
|
+
# @pos must be set before this can be called
|
45
|
+
#
|
46
|
+
# === Return
|
47
|
+
# path(String):: Path to metadata file
|
48
|
+
def metadata_path
|
49
|
+
path = @pos.chomp(File.extname(@pos)) + METADATA_EXT if @pos
|
51
50
|
end
|
52
51
|
|
53
52
|
end
|
54
|
-
end
|
55
53
|
|
54
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2013 RightScale Inc
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
# ancestor
|
24
|
+
require 'right_scraper'
|
25
|
+
|
26
|
+
module RightScraper
|
27
|
+
module Retrievers
|
28
|
+
autoload :Base, 'right_scraper/retrievers/base'
|
29
|
+
autoload :CheckoutBase, 'right_scraper/retrievers/checkout_base'
|
30
|
+
autoload :Download, 'right_scraper/retrievers/download'
|
31
|
+
autoload :Git, 'right_scraper/retrievers/git'
|
32
|
+
autoload :Svn, 'right_scraper/retrievers/svn'
|
33
|
+
end
|
34
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright: Copyright (c) 2010-
|
2
|
+
# Copyright: Copyright (c) 2010-2013 RightScale, Inc.
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
5
5
|
# a copy of this software and associated documentation files (the
|
@@ -21,102 +21,98 @@
|
|
21
21
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
# Base class for all retrievers.
|
27
|
-
#
|
28
|
-
# Retrievers fetch remote repositories into a given path
|
29
|
-
# They will attempt to fetch incrementally when possible (e.g. leveraging
|
30
|
-
# the underlying source control management system incremental capabilities)
|
31
|
-
class Base
|
24
|
+
# ancestor
|
25
|
+
require 'right_scraper/retrievers'
|
32
26
|
|
33
|
-
|
34
|
-
attr_accessor :max_bytes
|
27
|
+
require 'fileutils'
|
35
28
|
|
36
|
-
|
37
|
-
# retrieve operation.
|
38
|
-
attr_accessor :max_seconds
|
29
|
+
module RightScraper::Retrievers
|
39
30
|
|
40
|
-
|
41
|
-
|
31
|
+
# Base class for all retrievers.
|
32
|
+
#
|
33
|
+
# Retrievers fetch remote repositories into a given path
|
34
|
+
# They will attempt to fetch incrementally when possible (e.g. leveraging
|
35
|
+
# the underlying source control management system incremental capabilities)
|
36
|
+
class Base
|
42
37
|
|
43
|
-
|
44
|
-
attr_reader :repo_dir
|
38
|
+
attr_accessor :max_bytes, :max_seconds
|
45
39
|
|
46
|
-
|
47
|
-
class RetrieverError < Exception; end
|
40
|
+
attr_reader :logger, :repository, :repo_dir
|
48
41
|
|
49
|
-
|
50
|
-
|
51
|
-
# additional options. Options may never be required.
|
52
|
-
#
|
53
|
-
# === Options
|
54
|
-
# <tt>:basedir</tt>:: Required, base directory where all files should be retrieved
|
55
|
-
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
56
|
-
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
57
|
-
# <tt>:logger</tt>:: Logger to use
|
58
|
-
#
|
59
|
-
# === Parameters
|
60
|
-
# repository(RightScraper::Repositories::Base):: repository to scrape
|
61
|
-
# options(Hash):: retriever options
|
62
|
-
#
|
63
|
-
# === Raise
|
64
|
-
# 'Missing base directory':: if :basedir option is missing
|
65
|
-
def initialize(repository, options={})
|
66
|
-
raise 'Missing base directory' unless options[:basedir]
|
67
|
-
@repository = repository
|
68
|
-
@max_bytes = options[:max_bytes] || nil
|
69
|
-
@max_seconds = options[:max_seconds] || nil
|
70
|
-
@basedir = options[:basedir]
|
71
|
-
@repo_dir = RightScraper::Retrievers::Base.repo_dir(@basedir, repository)
|
72
|
-
@logger = options[:logger] || RightScraper::Logger.new
|
73
|
-
@logger.repository = repository
|
74
|
-
@logger.operation(:initialize, "setting up in #{@repo_dir}") do
|
75
|
-
FileUtils.mkdir_p(@repo_dir)
|
76
|
-
end
|
77
|
-
end
|
42
|
+
# exceptions
|
43
|
+
class RetrieverError < Exception; end
|
78
44
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
45
|
+
# Create a new retriever for the given repository. This class
|
46
|
+
# recognizes several options, and subclasses may recognize
|
47
|
+
# additional options. Options may never be required.
|
48
|
+
#
|
49
|
+
# === Options
|
50
|
+
# <tt>:basedir</tt>:: Required, base directory where all files should be retrieved
|
51
|
+
# <tt>:max_bytes</tt>:: Maximum number of bytes to read
|
52
|
+
# <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
|
53
|
+
# <tt>:logger</tt>:: Logger to use
|
54
|
+
#
|
55
|
+
# === Parameters
|
56
|
+
# repository(RightScraper::Repositories::Base):: repository to scrape
|
57
|
+
# options(Hash):: retriever options
|
58
|
+
#
|
59
|
+
# === Raise
|
60
|
+
# 'Missing base directory':: if :basedir option is missing
|
61
|
+
def initialize(repository, options={})
|
62
|
+
raise 'Missing base directory' unless options[:basedir]
|
63
|
+
@repository = repository
|
64
|
+
@max_bytes = options[:max_bytes] || nil
|
65
|
+
@max_seconds = options[:max_seconds] || nil
|
66
|
+
@basedir = options[:basedir]
|
67
|
+
@repo_dir = RightScraper::Retrievers::Base.repo_dir(@basedir, repository)
|
68
|
+
unless @logger = options[:logger]
|
69
|
+
raise ::ArgumentError, ':logger is required'
|
91
70
|
end
|
92
|
-
|
93
|
-
|
94
|
-
def retrieve
|
95
|
-
raise NotImplementedError
|
71
|
+
@logger.operation(:initialize, "setting up in #{@repo_dir}") do
|
72
|
+
::FileUtils.mkdir_p(@repo_dir)
|
96
73
|
end
|
74
|
+
end
|
97
75
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
# repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
|
103
|
-
#
|
104
|
-
# === Return
|
105
|
-
# String:: Path to local directory that corresponds to given repository
|
106
|
-
def self.repo_dir(root_dir, repo)
|
107
|
-
repo = RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
|
108
|
-
dir_name = repo.repository_hash
|
109
|
-
dir_path = File.join(root_dir, dir_name)
|
110
|
-
"#{dir_path}/repo"
|
111
|
-
end
|
76
|
+
# Determines if retriever is available (has required CLI tools, etc.)
|
77
|
+
def available?
|
78
|
+
raise ::NotImplementedError
|
79
|
+
end
|
112
80
|
|
113
|
-
|
81
|
+
# Paths to ignore when traversing the filesystem. Mostly used for
|
82
|
+
# things like Git and Subversion version control directories.
|
83
|
+
#
|
84
|
+
# === Return
|
85
|
+
# list(Array):: list of filenames to ignore.
|
86
|
+
def ignorable_paths
|
87
|
+
[]
|
88
|
+
end
|
114
89
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
90
|
+
# Retrieve repository, overridden in heirs
|
91
|
+
def retrieve
|
92
|
+
raise ::NotImplementedError
|
93
|
+
end
|
119
94
|
|
95
|
+
# Path to directory where given repo should be or was downloaded
|
96
|
+
#
|
97
|
+
# === Parameters
|
98
|
+
# root_dir(String):: Path to directory containing all scraped repositories
|
99
|
+
# repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
|
100
|
+
#
|
101
|
+
# === Return
|
102
|
+
# String:: Path to local directory that corresponds to given repository
|
103
|
+
def self.repo_dir(root_dir, repo)
|
104
|
+
repo = ::RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
|
105
|
+
dir_name = repo.repository_hash
|
106
|
+
dir_path = ::File.join(root_dir, dir_name)
|
107
|
+
"#{dir_path}/repo"
|
120
108
|
end
|
109
|
+
|
110
|
+
protected
|
111
|
+
|
112
|
+
# (Hash) Lookup table from textual description of scraper type
|
113
|
+
# ('cookbook' or 'workflow' currently) to the class that
|
114
|
+
# represents that scraper.
|
115
|
+
@@types = {} unless class_variable_defined?(:@@types)
|
116
|
+
|
121
117
|
end
|
122
118
|
end
|