right_scraper 1.0.26 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. data/Gemfile +16 -0
  2. data/README.rdoc +9 -28
  3. data/Rakefile +51 -39
  4. data/lib/right_scraper/builders/base.rb +64 -0
  5. data/lib/right_scraper/builders/filesystem.rb +96 -0
  6. data/lib/right_scraper/builders/union.rb +57 -0
  7. data/lib/right_scraper/logger.rb +102 -0
  8. data/lib/right_scraper/loggers/noisy.rb +85 -0
  9. data/lib/right_scraper/processes/ssh.rb +188 -0
  10. data/lib/right_scraper/repositories/base.rb +299 -0
  11. data/lib/right_scraper/repositories/download.rb +90 -0
  12. data/lib/right_scraper/repositories/git.rb +92 -0
  13. data/lib/right_scraper/repositories/mock.rb +70 -0
  14. data/lib/right_scraper/repositories/svn.rb +96 -0
  15. data/lib/right_scraper/resources/base.rb +70 -0
  16. data/{spec/scraper_base_spec.rb → lib/right_scraper/resources/cookbook.rb} +9 -23
  17. data/lib/right_scraper/resources/workflow.rb +55 -0
  18. data/lib/right_scraper/retrievers/base.rb +114 -0
  19. data/lib/right_scraper/retrievers/checkout.rb +79 -0
  20. data/lib/right_scraper/retrievers/download.rb +97 -0
  21. data/lib/right_scraper/retrievers/git.rb +140 -0
  22. data/lib/right_scraper/retrievers/svn.rb +87 -0
  23. data/lib/right_scraper/scanners/base.rb +111 -0
  24. data/lib/right_scraper/scanners/cookbook_manifest.rb +59 -0
  25. data/lib/right_scraper/scanners/cookbook_metadata.rb +69 -0
  26. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +84 -0
  27. data/lib/right_scraper/scanners/union.rb +89 -0
  28. data/lib/right_scraper/scanners/workflow_manifest.rb +86 -0
  29. data/lib/right_scraper/scanners/workflow_metadata.rb +70 -0
  30. data/lib/right_scraper/scanners/workflow_s3_upload.rb +85 -0
  31. data/lib/right_scraper/scraper.rb +81 -57
  32. data/lib/right_scraper/scraper_logger.rb +61 -0
  33. data/lib/right_scraper/scrapers/base.rb +262 -0
  34. data/lib/right_scraper/scrapers/cookbook.rb +73 -0
  35. data/lib/right_scraper/scrapers/workflow.rb +88 -0
  36. data/lib/right_scraper/svn_client.rb +101 -0
  37. data/lib/right_scraper/version.rb +28 -0
  38. data/lib/right_scraper.rb +35 -11
  39. data/right_scraper.gemspec +26 -13
  40. data/right_scraper.rconf +13 -0
  41. data/spec/builder_spec.rb +50 -0
  42. data/spec/cookbook_helper.rb +73 -0
  43. data/spec/cookbook_manifest_spec.rb +55 -0
  44. data/spec/cookbook_s3_upload_spec.rb +152 -0
  45. data/spec/download/download_retriever_spec.rb +118 -0
  46. data/spec/download/download_retriever_spec_helper.rb +72 -0
  47. data/spec/download/download_spec.rb +130 -0
  48. data/spec/download/multi_dir_spec.rb +106 -0
  49. data/spec/download/multi_dir_spec_helper.rb +40 -0
  50. data/spec/git/cookbook_spec.rb +166 -0
  51. data/spec/git/demokey +27 -0
  52. data/spec/git/demokey.pub +1 -0
  53. data/spec/git/password_key +30 -0
  54. data/spec/git/password_key.pub +1 -0
  55. data/spec/git/repository_spec.rb +110 -0
  56. data/spec/git/retriever_spec.rb +505 -0
  57. data/spec/git/retriever_spec_helper.rb +112 -0
  58. data/spec/git/scraper_spec.rb +136 -0
  59. data/spec/git/ssh_spec.rb +170 -0
  60. data/spec/git/url_spec.rb +103 -0
  61. data/spec/logger_spec.rb +185 -0
  62. data/spec/repository_spec.rb +89 -23
  63. data/spec/{scraper_spec_helper_base.rb → retriever_spec_helper.rb} +41 -27
  64. data/spec/scanner_spec.rb +61 -0
  65. data/spec/scraper_helper.rb +96 -0
  66. data/spec/scraper_spec.rb +123 -45
  67. data/spec/spec_helper.rb +87 -14
  68. data/spec/svn/cookbook_spec.rb +97 -0
  69. data/spec/svn/multi_svn_spec.rb +64 -0
  70. data/spec/svn/multi_svn_spec_helper.rb +40 -0
  71. data/spec/svn/repository_spec.rb +72 -0
  72. data/spec/svn/retriever_spec.rb +261 -0
  73. data/spec/svn/scraper_spec.rb +90 -0
  74. data/spec/svn/{svn_scraper_spec_helper.rb → svn_retriever_spec_helper.rb} +46 -27
  75. data/spec/svn/url_spec.rb +47 -0
  76. data/spec/url_spec.rb +164 -0
  77. metadata +203 -31
  78. data/lib/right_scraper/linux/process_monitor.rb +0 -84
  79. data/lib/right_scraper/repository.rb +0 -78
  80. data/lib/right_scraper/scraper_base.rb +0 -175
  81. data/lib/right_scraper/scrapers/download_scraper.rb +0 -67
  82. data/lib/right_scraper/scrapers/git_scraper.rb +0 -283
  83. data/lib/right_scraper/scrapers/svn_scraper.rb +0 -119
  84. data/lib/right_scraper/watcher.rb +0 -158
  85. data/lib/right_scraper/win32/process_monitor.rb +0 -98
  86. data/spec/download/download_scraper_spec.rb +0 -94
  87. data/spec/git/git_scraper_spec.rb +0 -165
  88. data/spec/git/git_scraper_spec_helper.rb +0 -72
  89. data/spec/rcov.opts +0 -1
  90. data/spec/spec.opts +0 -2
  91. data/spec/svn/svn_scraper_spec.rb +0 -148
  92. data/spec/watcher_spec.rb +0 -74
@@ -0,0 +1,70 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
24
+
25
+ module RightScraper
26
+ module Repositories
27
+ # A "repository" that is just there for testing. This class is not
28
+ # loaded by default.
29
+ class Mock < Base
30
+ # Create a new mock repository.
31
+ def initialize
32
+ @repo_type = :mock
33
+ end
34
+ # (String) Type of the repository, here 'download'.
35
+ attr_accessor :repo_type
36
+
37
+ # (String) Optional, tag or branch of repository that should be downloaded
38
+ attr_accessor :tag
39
+
40
+ # (String) Optional, username
41
+ attr_accessor :first_credential
42
+
43
+ # (String) Optional, password
44
+ attr_accessor :second_credential
45
+
46
+ # Unique representation for this repo, should resolve to the same string
47
+ # for repos that should be cloned in same directory
48
+ #
49
+ # === Returns
50
+ # res(String):: Unique representation for this repo
51
+ def to_s
52
+ res = "mock #{url}:#{tag}"
53
+ end
54
+
55
+ # (Base class) Appropriate class for scraping this sort of
56
+ # repository.
57
+ def scraper
58
+ @@scraper || raise("Scraper for mocks isn't defined yet")
59
+ end
60
+
61
+ # Set the correct sort of scraper to use for mock repositories.
62
+ def self.scraper=(scraper)
63
+ @@scraper = scraper
64
+ end
65
+
66
+ # Add this repository to the list of available types.
67
+ @@types[:mock] = RightScraper::Repositories::Mock
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,96 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScraper
25
+ module Repositories
26
+ # A repository that is stored in a Subversion server.
27
+ class Svn < Base
28
+
29
+ # (String) Optional, tag or branch of repository that should be downloaded
30
+ attr_accessor :tag
31
+ alias_method :revision, :tag
32
+
33
+ # (String) Optional, SVN username
34
+ attr_accessor :first_credential
35
+ alias_method :username, :first_credential
36
+
37
+ # (String) Optional, SVN password
38
+ attr_accessor :second_credential
39
+ alias_method :password, :second_credential
40
+
41
+ # Create a new SvnRepository. If the tag is not specified,
42
+ # defaults to HEAD.
43
+ def initialize(*args)
44
+ super
45
+ @tag = "HEAD" if @tag.nil?
46
+ end
47
+
48
+ # (String) Type of the repository, here 'svn'.
49
+ def repo_type
50
+ :svn
51
+ end
52
+
53
+ # Return a unique identifier for this revision in this repository.
54
+ #
55
+ # === Returns
56
+ # String:: opaque unique ID for this revision in this repository
57
+ def checkout_hash
58
+ digest("#{PROTOCOL_VERSION}\000#{repo_type}\000#{url}\000#{tag}")
59
+ end
60
+
61
+ # Convert this repository to a URL in the style of resource URLs.
62
+ #
63
+ # === Returns
64
+ # URI:: URL representing this repository
65
+ def to_url
66
+ if first_credential
67
+ uri = add_users_to(url, first_credential, second_credential)
68
+ else
69
+ uri = URI.parse(url)
70
+ end
71
+ uri
72
+ end
73
+
74
+ # Instantiate retriever for this kind of repository
75
+ #
76
+ # === Options
77
+ # <tt>:max_bytes</tt>:: Maximum number of bytes to read
78
+ # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
79
+ # <tt>:basedir</tt>:: Destination directory, use temp dir if not specified
80
+ # <tt>:logger</tt>:: Logger to use
81
+ #
82
+ # === Return
83
+ # retriever(Retrivers::Svn):: Retriever for this repository
84
+ def retriever(options)
85
+ RightScraper::Retrievers::Svn.new(self, options)
86
+ end
87
+
88
+ # Add this repository to the list of available types.
89
+ @@types[:svn] = RightScraper::Repositories::Svn
90
+
91
+ # Add git URL schemas to the list of okay schemas.
92
+ @@okay_schemes << "svn"
93
+ @@okay_schemes << "svn+ssh"
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,70 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ require 'digest/sha1'
24
+ require 'uri'
25
+
26
+ module RightScraper
27
+
28
+ module Resources
29
+
30
+ # Localized representation of a resource. Contains the resource
31
+ # contents, and the metadata as a hash. A resource at its core is any
32
+ # abstraction that is statically represented by a set of files and
33
+ # directories and metadata.
34
+ #
35
+ # The JSON metadata for the resource is in #metadata, and the
36
+ # manifest is in #manifest.
37
+ class Base
38
+
39
+ # (Repositories::Base) Repository the resource was fetched from.
40
+ attr_reader :repository
41
+
42
+ # (Hash) Metadata from the resource.
43
+ attr_accessor :metadata
44
+
45
+ # (Hash) Manifest for resource. A hash of path => SHA-1 digests.
46
+ attr_accessor :manifest
47
+
48
+ # (String) Position in the repository.
49
+ attr_accessor :pos
50
+
51
+ # Create a new resource from the given parameters.
52
+ #
53
+ # === Parameters
54
+ # repo(Repositories::Base):: Repository containing this resource
55
+ def initialize(repo, pos)
56
+ @repository = repo
57
+ @pos = pos
58
+ end
59
+
60
+ # Resource hash
61
+ #
62
+ # === Return
63
+ # hash(String):: Hexadecimal value that uniquely identifies this resource
64
+ def resource_hash
65
+ Digest::SHA1.hexdigest("#{PROTOCOL_VERSION}\000#{@repository.checkout_hash}\000#{@pos}")
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -1,18 +1,18 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
6
6
  # 'Software'), to deal in the Software without restriction, including
7
7
  # without limitation the rights to use, copy, modify, merge, publish,
8
- # distribute, sublicense, and/or sell copies of the Software, and to
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
9
  # permit persons to whom the Software is furnished to do so, subject to
10
10
  # the following conditions:
11
11
  #
12
12
  # The above copyright notice and this permission notice shall be
13
13
  # included in all copies or substantial portions of the Software.
14
14
  #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
16
  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
17
  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
18
  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
@@ -21,26 +21,12 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- require File.join(File.dirname(__FILE__), 'spec_helper')
25
- require 'scraper_base'
24
+ module RightScraper
26
25
 
27
- describe RightScale::ScraperBase do
26
+ module Resources
27
+
28
+ class Cookbook < Base
29
+ end
28
30
 
29
- before(:each) do
30
- @base = RightScale::ScraperBase.new('/tmp', max_bytes=1024**2, max_seconds=20)
31
- end
32
-
33
- it 'should initialize the scrape directory' do
34
- @base.root_dir.should == '/tmp'
35
- end
36
-
37
- it 'should default to non incremental updates' do
38
- @base.send(:incremental_update?).should be_false
39
- end
40
-
41
- it 'should allow retrieving the download directory path' do
42
- repo_dir = RightScale::ScraperBase.repo_dir('root_dir', { :repo_type => :git, :url => 'git://github.com/rightscale/right_scraper.git' })
43
- repo_dir.should =~ /^root_dir\//
44
31
  end
45
-
46
- end
32
+ end
@@ -0,0 +1,55 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScraper
25
+
26
+ module Resources
27
+
28
+ class Workflow < Base
29
+
30
+ METADATA_EXT = '.meta'
31
+ DEFINITION_EXT = '.def'
32
+
33
+ # Relative path to definition file
34
+ # @pos must be set before this can be called
35
+ #
36
+ # === Return
37
+ # path(String):: Path to definition file
38
+ def definition_path
39
+ path = @pos
40
+ end
41
+
42
+ # Relative path to metadata file
43
+ # @pos must be set before this can be called
44
+ #
45
+ # === Return
46
+ # path(String):: Path to metadata file
47
+ def metadata_path
48
+ path = @pos.chomp(File.extname(@pos)) + METADATA_EXT if @pos
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+ end
55
+
@@ -0,0 +1,114 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScraper
25
+ module Retrievers
26
+ # Base class for all retrievers.
27
+ #
28
+ # Retrievers fetch remote repositories into a given path
29
+ # They will attempt to fetch incrementally when possible (e.g. leveraging
30
+ # the underlying source control management system incremental capabilities)
31
+ class Base
32
+
33
+ # Integer:: optional maximum size permitted for repositories
34
+ attr_accessor :max_bytes
35
+
36
+ # Integer:: optional maximum number of seconds for any single
37
+ # retrieve operation.
38
+ attr_accessor :max_seconds
39
+
40
+ # RightScraper::Repositories::Base:: repository currently being retrieved
41
+ attr_reader :repository
42
+
43
+ # String:: Path to directory where files are retrieved
44
+ attr_reader :repo_dir
45
+
46
+ # Create a new retriever for the given repository. This class
47
+ # recognizes several options, and subclasses may recognize
48
+ # additional options. Options may never be required.
49
+ #
50
+ # === Options
51
+ # <tt>:basedir</tt>:: Required, base directory where all files should be retrieved
52
+ # <tt>:max_bytes</tt>:: Maximum number of bytes to read
53
+ # <tt>:max_seconds</tt>:: Maximum number of seconds to spend reading
54
+ # <tt>:logger</tt>:: Logger to use
55
+ #
56
+ # === Parameters
57
+ # repository(RightScraper::Repositories::Base):: repository to scrape
58
+ # options(Hash):: retriever options
59
+ #
60
+ # === Raise
61
+ # 'Missing base directory':: if :basedir option is missing
62
+ def initialize(repository, options={})
63
+ raise 'Missing base directory' unless options[:basedir]
64
+ @repository = repository
65
+ @max_bytes = options[:max_bytes] || nil
66
+ @max_seconds = options[:max_seconds] || nil
67
+ @basedir = options[:basedir]
68
+ @repo_dir = RightScraper::Retrievers::Base.repo_dir(@basedir, repository)
69
+ @logger = options[:logger] || RightScraper::Logger.new
70
+ @logger.repository = repository
71
+ @logger.operation(:initialize, "setting up in #{@repo_dir}") do
72
+ FileUtils.mkdir_p(@repo_dir)
73
+ end
74
+ end
75
+
76
+ # Paths to ignore when traversing the filesystem. Mostly used for
77
+ # things like Git and Subversion version control directories.
78
+ #
79
+ # === Return
80
+ # list(Array):: list of filenames to ignore.
81
+ def ignorable_paths
82
+ []
83
+ end
84
+
85
+ # Retrieve repository, overridden in heirs
86
+ def retrieve
87
+ raise NotImplementedError
88
+ end
89
+
90
+ # Path to directory where given repo should be or was downloaded
91
+ #
92
+ # === Parameters
93
+ # root_dir(String):: Path to directory containing all scraped repositories
94
+ # repo(Hash|RightScraper::Repositories::Base):: Remote repository corresponding to local directory
95
+ #
96
+ # === Return
97
+ # String:: Path to local directory that corresponds to given repository
98
+ def self.repo_dir(root_dir, repo)
99
+ repo = RightScraper::Repositories::Base.from_hash(repo) if repo.is_a?(Hash)
100
+ dir_name = repo.repository_hash
101
+ dir_path = File.join(root_dir, dir_name)
102
+ "#{dir_path}/repo"
103
+ end
104
+
105
+ protected
106
+
107
+ # (Hash) Lookup table from textual description of scraper type
108
+ # ('cookbook' or 'workflow' currently) to the class that
109
+ # represents that scraper.
110
+ @@types = {} unless class_variable_defined?(:@@types)
111
+
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,79 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ module RightScraper
25
+ module Retrievers
26
+
27
+ # Base class for retrievers that want to do version control
28
+ # operations (CVS, SVN, etc.). Subclasses can get away with
29
+ # implementing only #do_checkout but to support incremental
30
+ # operation need to implement #exists? and #do_update, in addition
31
+ # to Retrievers::Base#ignorable_paths.
32
+ class CheckoutBasedRetriever < Base
33
+
34
+ # Check out repository into the directory. Occurs between
35
+ # variable initialization and beginning scraping.
36
+ def retrieve
37
+ if exists?
38
+ begin
39
+ @logger.operation(:updating) do
40
+ do_update
41
+ end
42
+ rescue
43
+ @logger.note_error($!, :updating, "switching to using checkout")
44
+ FileUtils.remove_entry_secure @repo_dir
45
+ @logger.operation(:checkout) do
46
+ do_checkout
47
+ end
48
+ end
49
+ else
50
+ @logger.operation(:checkout) do
51
+ do_checkout
52
+ end
53
+ end
54
+ end
55
+
56
+ # Return true if a checkout exists.
57
+ #
58
+ # === Returns
59
+ # Boolean:: true if the checkout already exists (and thus
60
+ # incremental updating can occur).
61
+ def exists?
62
+ false
63
+ end
64
+
65
+ # Perform an incremental update of the checkout. Subclasses that
66
+ # want to handle incremental updating need to override this.
67
+ def do_update
68
+ do_checkout
69
+ end
70
+
71
+ # Perform a de novo full checkout of the repository. Subclasses
72
+ # must override this to do anything useful.
73
+ def do_checkout
74
+ FileUtils.mkdir_p(@repo_dir)
75
+ end
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,97 @@
1
+ #--
2
+ # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # 'Software'), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ require 'process_watcher'
24
+ require 'tempfile'
25
+ require 'digest/sha1'
26
+
27
+ module RightScraper
28
+ module Retrievers
29
+ # A retriever for resources stored in archives on a web server
30
+ # somewhere. Uses command line curl and command line tar.
31
+ class Download < Base
32
+
33
+ # Directory used to download tarballs
34
+ def workdir
35
+ File.join(@basedir, @repository.repository_hash)
36
+ end
37
+
38
+ # Path to directory where files are retrieved
39
+ def repo_dir
40
+ File.join(workdir, "archive")
41
+ end
42
+
43
+ # Download tarball and unpack it
44
+ def retrieve
45
+ FileUtils.remove_entry_secure workdir if File.exists?(workdir)
46
+ FileUtils.mkdir_p repo_dir
47
+ file = File.join(workdir, "package")
48
+
49
+ @logger.operation(:downloading) do
50
+ credential_command = if @repository.first_credential && @repository.second_credential
51
+ ["-u", "#{@repository.first_credential}:#{@repository.second_credential}"]
52
+ else
53
+ []
54
+ end
55
+ ProcessWatcher.watch("curl", ["--silent", "--show-error", "--location", "--fail",
56
+ "--location-trusted", "-o", file,
57
+ credential_command, @repository.url].flatten,
58
+ workdir, @max_bytes || -1, @max_seconds || -1) do |phase, command, exception|
59
+ @logger.note_phase(phase, :running_command, command, exception)
60
+ end
61
+ end
62
+
63
+ note_tag(file)
64
+
65
+ @logger.operation(:unpacking) do
66
+ path = @repository.to_url.path
67
+ if path =~ /\.gz$/
68
+ extraction = "xzf"
69
+ elsif path =~ /\.bz2$/
70
+ extraction = "xjf"
71
+ else
72
+ extraction = "xf"
73
+ end
74
+ Dir.chdir(repo_dir) do
75
+ ProcessWatcher.watch("tar", [extraction, file], repo_dir,
76
+ @max_bytes || -1, @max_seconds || -1) do |phase, command, exception|
77
+ @logger.note_phase(phase, :running_command, command, exception)
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ # Amend @repository with the tag information from the downloaded
84
+ # file.
85
+ #
86
+ # === Parameters
87
+ # file(String):: file that was downloaded
88
+ def note_tag(file)
89
+ digest = Digest::SHA1.new
90
+ File.open(file) {|f| digest << f.read(4096) }
91
+ repo = @repository.clone
92
+ repo.tag = digest.hexdigest
93
+ @repository = repo
94
+ end
95
+ end
96
+ end
97
+ end