right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -20,81 +20,121 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
- require File.join(File.dirname(__FILE__), '..', 'svn_client')
24
-
25
- module RightScraper
26
- module Retrievers
27
- # Retriever for svn repositories
28
- class Svn < CheckoutBasedRetriever
29
-
30
- include RightScraper::SvnClient
31
-
32
- @@available = false
33
-
34
- # Determines if svn is available.
35
- def available?
36
- unless @@available
37
- begin
38
- calculate_version
39
- @@available = true
40
- rescue SvnClientError => e
41
- @logger.note_error(e, :available, "svn retriever is unavailable")
42
- end
23
+
24
+ # ancestor
25
+ require 'right_scraper/retrievers'
26
+
27
+ module RightScraper::Retrievers
28
+
29
+ # Retriever for svn repositories
30
+ class Svn < ::RightScraper::Retrievers::CheckoutBase
31
+
32
+ SVN_CLIENT = ::RightScraper::Processes::SvnClient
33
+
34
+ @@available = false
35
+
36
+ # Determines if svn is available.
37
+ def available?
38
+ unless @@available
39
+ begin
40
+ SVN_CLIENT.calculate_version
41
+ @@available = true
42
+ rescue SVN_CLIENT::SvnClientError => e
43
+ @logger.note_error(e, :available, 'svn retriever is unavailable')
43
44
  end
44
- @@available
45
45
  end
46
+ @@available
47
+ end
46
48
 
47
- # Return true if a checkout exists. Currently tests for .svn in
48
- # the checkout.
49
- #
50
- # === Returns
51
- # Boolean:: true if the checkout already exists (and thus
52
- # incremental updating can occur).
53
- def exists?
54
- File.exists?(File.join(@repo_dir, '.svn'))
55
- end
49
+ # Return true if a checkout exists. Currently tests for .svn in
50
+ # the checkout.
51
+ #
52
+ # === Returns
53
+ # Boolean:: true if the checkout already exists (and thus
54
+ # incremental updating can occur).
55
+ def exists?
56
+ ::File.exists?(::File.join(@repo_dir, '.svn'))
57
+ end
56
58
 
57
- # Incrementally update the checkout. The operations are as follows:
58
- # * update to #tag
59
- # In theory if #tag is a revision number that already exists no
60
- # update is necessary. It's not clear if the SVN client libraries
61
- # are bright enough to notice this.
62
- def do_update
63
- @logger.operation(:update) do
64
- run_svn("update", get_tag_argument)
65
- end
59
+ # Ignore .svn directories.
60
+ def ignorable_paths
61
+ ['.svn']
62
+ end
63
+
64
+ # Implements CheckoutBase#do_checkout
65
+ def do_checkout
66
+ @logger.operation(:checkout_revision) do
67
+ revision = resolve_revision
68
+ svn_args = ['checkout', @repository.url, @repo_dir]
69
+ svn_args += ['--revision', revision] if revision
70
+ svn_args << '--force'
71
+ svn_client.execute(svn_args)
66
72
  do_update_tag
67
73
  end
74
+ end
68
75
 
69
- # Update our idea of what the head of the repository is. We
70
- # would like to use svn info, but that doesn't do the right
71
- # thing all the time; the right thing to do is to run log and
72
- # pick out the first tag.
73
- def do_update_tag
74
- @repository = @repository.clone
75
- lines = run_svn_with_buffered_output("log", "-r", 'HEAD')
76
- lines.each do |line|
77
- if line =~ /^r(\d+)/
78
- @repository.tag = $1
79
- break
80
- end
81
- end
76
+ # Implements CheckoutBase#do_update
77
+ def do_update
78
+ @logger.operation(:update) do
79
+ revision = resolve_revision
80
+ svn_client.execute('update', '--revision', revision, '--force')
81
+ do_update_tag
82
82
  end
83
+ end
83
84
 
84
- # Check out the remote repository. The operations are as follows:
85
- # * checkout repository at #tag to @repo_dir
86
- def do_checkout
87
- super
88
- @logger.operation(:checkout_revision) do
89
- run_svn_no_chdir("checkout", @repository.url, @repo_dir, get_tag_argument)
85
+ # Implements CheckoutBase#do_update_tag
86
+ def do_update_tag
87
+ # query latest count=1 log entry for latest revision; don't attempt to
88
+ # specify revision on the assumption that the requested revision is
89
+ # already checked out. the --revision argument appears to expect a
90
+ # revision from-to range or else a start date or date range or else a
91
+ # specific revision number. it prints nothing when HEAD is specified by
92
+ # itself.
93
+ @repository = @repository.clone
94
+ svn_args = ['log', '--limit', '1']
95
+ svn_client.output_for(svn_args).lines.each do |line|
96
+ if matched = SVN_LOG_REGEX.match(line)
97
+ @repository.tag = matched[1]
98
+ break
90
99
  end
91
- do_update_tag
92
100
  end
101
+ end
93
102
 
94
- # Ignore .svn directories.
95
- def ignorable_paths
96
- ['.svn']
103
+ private
104
+
105
+ # http://svnbook.red-bean.com/en/1.7/svn.tour.revs.specifiers.html#svn.tour.revs.keywords
106
+ #
107
+ # Example: HEAD | <revision number> | {<datetime>}
108
+ #
109
+ # {2010-12-06T19:11:25} === {2010-12-06 19:11:25 +0000}
110
+ SVN_REVISION_REGEX = /^(HEAD|\d+|\{[0-9: T+\-]+\})$/
111
+
112
+ # Example:
113
+ # r12 | ira | 2006-11-27 12:31:51 -0600 (Mon, 27 Nov 2006) | 6 lines
114
+ SVN_LOG_REGEX = /^r(\d+)/ # ignoring additional info after revision
115
+
116
+ def resolve_revision
117
+ revision = @repository.tag.to_s.strip
118
+ if revision.empty?
119
+ revision = nil
120
+ elsif (revision =~ SVN_REVISION_REGEX).nil?
121
+ raise RetrieverError, "Revision reference contained illegal characters: #{revision.inspect}"
97
122
  end
123
+ # timestamps can contain spaces; surround them with double quotes.
124
+ revision = revision.inspect if revision.index(' ')
125
+ revision
98
126
  end
127
+
128
+ def svn_client
129
+ @svn_client ||= SVN_CLIENT.new(
130
+ @repository,
131
+ @logger,
132
+ ::RightScraper::Processes::Shell.new(
133
+ :initial_directory => self.repo_dir,
134
+ :max_bytes => self.max_bytes,
135
+ :max_seconds => self.max_seconds,
136
+ :watch_directory => self.repo_dir))
137
+ end
138
+
99
139
  end
100
140
  end
@@ -0,0 +1,37 @@
1
+ #
2
+ # Copyright (c) 2013 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # ancestor
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Scanners
28
+ autoload :Base, 'right_scraper/scanners/base'
29
+ autoload :CookbookManifest, 'right_scraper/scanners/cookbook_manifest'
30
+ autoload :CookbookMetadata, 'right_scraper/scanners/cookbook_metadata'
31
+ autoload :CookbookS3Upload, 'right_scraper/scanners/cookbook_s3_upload'
32
+ autoload :Union, 'right_scraper/scanners/union'
33
+ autoload :WorkflowManifest, 'right_scraper/scanners/workflow_manifest'
34
+ autoload :WorkflowMetadata, 'right_scraper/scanners/workflow_metadata'
35
+ autoload :WorkflowS3Upload, 'right_scraper/scanners/workflow_s3_upload'
36
+ end
37
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,91 +21,88 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module RightScraper
25
- module Scanners
26
- # Base class for scanning filesystems. Subclasses should override
27
- # #notice and may override #new, #begin, #end and
28
- # #notice_dir.
29
- #
30
- # Overriding #new is useful for getting
31
- # additional arguments. Overriding #begin allows you to do
32
- # processing before the scan of a given resource begins;
33
- # overriding #end allows you to do processing after it completes.
34
- #
35
- # Most processing will occur in #notice, which notifies you that a
36
- # file has been detected, and in #notice_dir. In #notice you are
37
- # handed the relative position of the file from the start of the
38
- # resource; so if you were scanning <tt>/a/resource</tt> and
39
- # noticed a file <tt>b/c</tt>, #notice would be called with
40
- # <tt>"b/c"</tt>, even though the full pathname is
41
- # <tt>/a/resource/b/c</tt>. If you decide you need the actual
42
- # data, #notice takes a block which will return that data to you
43
- # if you +yield+.
44
- #
45
- # In #notice_dir you are handed the relative position of a
46
- # directory. The return value determines whether you find the
47
- # directory worth recursing into, or not--as an example, when
48
- # looking for the <tt>metadata.json</tt> file it is never
49
- # necessary to descend past the topmost directory of the resource,
50
- # but the same is not true when building a manifest.
51
- class Base
52
- # Create a new Scanner. Recognizes options as given. Some
53
- # options may be required, others optional. This class recognizes
54
- # only _:logger_.
55
- #
56
- # === Options ===
57
- # _:logger_:: Optional. Logger currently being used
58
- #
59
- # === Parameters ===
60
- # options(Hash):: scanner options
61
- def initialize(options={})
62
- @logger = options.fetch(:logger, RightScraper::Logger.new)
63
- end
24
+ # ancestor
25
+ require 'right_scraper/scanners'
64
26
 
65
- # Notification that all scans for this repository have
66
- # completed.
67
- def finish
68
- end
27
+ module RightScraper::Scanners
69
28
 
70
- # Begin a scan for the given resource.
71
- #
72
- # === Parameters ===
73
- # resource(RightScraper::Resource::Base):: resource to scan
74
- def begin(resource)
75
- end
29
+ # Base class for scanning filesystems. Subclasses should override
30
+ # #notice and may override #new, #begin, #end and
31
+ # #notice_dir.
32
+ #
33
+ # Overriding #new is useful for getting
34
+ # additional arguments. Overriding #begin allows you to do
35
+ # processing before the scan of a given resource begins;
36
+ # overriding #end allows you to do processing after it completes.
37
+ #
38
+ # Most processing will occur in #notice, which notifies you that a
39
+ # file has been detected, and in #notice_dir. In #notice you are
40
+ # handed the relative position of the file from the start of the
41
+ # resource; so if you were scanning <tt>/a/resource</tt> and
42
+ # noticed a file <tt>b/c</tt>, #notice would be called with
43
+ # <tt>"b/c"</tt>, even though the full pathname is
44
+ # <tt>/a/resource/b/c</tt>. If you decide you need the actual
45
+ # data, #notice takes a block which will return that data to you
46
+ # if you +yield+.
47
+ #
48
+ # In #notice_dir you are handed the relative position of a
49
+ # directory. The return value determines whether you find the
50
+ # directory worth recursing into, or not--as an example, when
51
+ # looking for the <tt>metadata.json</tt> file it is never
52
+ # necessary to descend past the topmost directory of the resource,
53
+ # but the same is not true when building a manifest.
54
+ class Base
76
55
 
77
- # Finish a scan for the given resource.
78
- #
79
- # === Parameters ===
80
- # resource(RightScraper::Resource::Base):: resource that just finished
81
- # scanning
82
- def end(resource)
56
+ # @param [Hash] options for scanner
57
+ def initialize(options={})
58
+ unless @logger = options[:logger]
59
+ raise ::ArgumentError, ':logger is required'
83
60
  end
61
+ end
84
62
 
85
- # Notice a file during scanning.
86
- #
87
- # === Block ===
88
- # Return the data for this file. We use a block because it may
89
- # not always be necessary to read the data.
90
- #
91
- # === Parameters ===
92
- # relative_position(String):: relative pathname for _pathname_
93
- # from root of resource
94
- def notice(relative_position)
95
- end
63
+ # Notification that all scans for this repository have
64
+ # completed.
65
+ def finish
66
+ end
96
67
 
97
- # Notice a directory during scanning. Returns true if the scanner
98
- # should recurse into the directory (the default behavior)
99
- #
100
- # === Parameters ===
101
- # relative_position(String):: relative pathname for the directory
102
- # from root of resource
103
- #
104
- # === Returns ===
105
- # Boolean:: should the scanning recurse into the directory
106
- def notice_dir(relative_position)
107
- true
108
- end
68
+ # Begin a scan for the given resource.
69
+ #
70
+ # === Parameters ===
71
+ # resource(RightScraper::Resource::Base):: resource to scan
72
+ def begin(resource)
73
+ end
74
+
75
+ # Finish a scan for the given resource.
76
+ #
77
+ # === Parameters ===
78
+ # resource(RightScraper::Resource::Base):: resource that just finished
79
+ # scanning
80
+ def end(resource)
81
+ end
82
+
83
+ # Notice a file during scanning.
84
+ #
85
+ # === Block ===
86
+ # Return the data for this file. We use a block because it may
87
+ # not always be necessary to read the data.
88
+ #
89
+ # === Parameters ===
90
+ # relative_position(String):: relative pathname for _pathname_
91
+ # from root of resource
92
+ def notice(relative_position)
93
+ end
94
+
95
+ # Notice a directory during scanning. Returns true if the scanner
96
+ # should recurse into the directory (the default behavior)
97
+ #
98
+ # === Parameters ===
99
+ # relative_position(String):: relative pathname for the directory
100
+ # from root of resource
101
+ #
102
+ # === Returns ===
103
+ # Boolean:: should the scanning recurse into the directory
104
+ def notice_dir(relative_position)
105
+ true
109
106
  end
110
107
  end
111
108
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,39 +21,40 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
25
27
  require 'digest/md5'
26
28
 
27
- module RightScraper
28
- module Scanners
29
- # Build manifests from a filesystem.
30
- class CookbookManifest < Base
31
- # Create a new manifest scanner. Does not accept any new arguments.
32
- def initialize(*args)
33
- super
34
- @manifest = {}
35
- end
29
+ module RightScraper::Scanners
30
+
31
+ # Build manifests from a filesystem.
32
+ class CookbookManifest < ::RightScraper::Scanners::Base
33
+ # Create a new manifest scanner. Does not accept any new arguments.
34
+ def initialize(*args)
35
+ super
36
+ @manifest = {}
37
+ end
36
38
 
37
- # Complete a scan for the given resource.
38
- #
39
- # === Parameters ===
40
- # resource(RightScraper::Resources::Base):: resource to scan
41
- def end(resource)
42
- resource.manifest = @manifest
43
- @manifest = {}
44
- end
39
+ # Complete a scan for the given resource.
40
+ #
41
+ # === Parameters ===
42
+ # resource(RightScraper::Resources::Base):: resource to scan
43
+ def end(resource)
44
+ resource.manifest = @manifest
45
+ @manifest = {}
46
+ end
45
47
 
46
- # Notice a file during scanning.
47
- #
48
- # === Block ===
49
- # Return the data for this file. We use a block because it may
50
- # not always be necessary to read the data.
51
- #
52
- # === Parameters ===
53
- # relative_position(String):: relative pathname for file from root of resource
54
- def notice(relative_position)
55
- @manifest[relative_position] = Digest::MD5.hexdigest(yield)
56
- end
48
+ # Notice a file during scanning.
49
+ #
50
+ # === Block ===
51
+ # Return the data for this file. We use a block because it may
52
+ # not always be necessary to read the data.
53
+ #
54
+ # === Parameters ===
55
+ # relative_position(String):: relative pathname for file from root of resource
56
+ def notice(relative_position)
57
+ @manifest[relative_position] = Digest::MD5.hexdigest(yield)
57
58
  end
58
59
  end
59
60
  end