right_scraper 3.2.6 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -20,81 +20,121 @@
20
20
  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
- require File.join(File.dirname(__FILE__), '..', 'svn_client')
24
-
25
- module RightScraper
26
- module Retrievers
27
- # Retriever for svn repositories
28
- class Svn < CheckoutBasedRetriever
29
-
30
- include RightScraper::SvnClient
31
-
32
- @@available = false
33
-
34
- # Determines if svn is available.
35
- def available?
36
- unless @@available
37
- begin
38
- calculate_version
39
- @@available = true
40
- rescue SvnClientError => e
41
- @logger.note_error(e, :available, "svn retriever is unavailable")
42
- end
23
+
24
+ # ancestor
25
+ require 'right_scraper/retrievers'
26
+
27
+ module RightScraper::Retrievers
28
+
29
+ # Retriever for svn repositories
30
+ class Svn < ::RightScraper::Retrievers::CheckoutBase
31
+
32
+ SVN_CLIENT = ::RightScraper::Processes::SvnClient
33
+
34
+ @@available = false
35
+
36
+ # Determines if svn is available.
37
+ def available?
38
+ unless @@available
39
+ begin
40
+ SVN_CLIENT.calculate_version
41
+ @@available = true
42
+ rescue SVN_CLIENT::SvnClientError => e
43
+ @logger.note_error(e, :available, 'svn retriever is unavailable')
43
44
  end
44
- @@available
45
45
  end
46
+ @@available
47
+ end
46
48
 
47
- # Return true if a checkout exists. Currently tests for .svn in
48
- # the checkout.
49
- #
50
- # === Returns
51
- # Boolean:: true if the checkout already exists (and thus
52
- # incremental updating can occur).
53
- def exists?
54
- File.exists?(File.join(@repo_dir, '.svn'))
55
- end
49
+ # Return true if a checkout exists. Currently tests for .svn in
50
+ # the checkout.
51
+ #
52
+ # === Returns
53
+ # Boolean:: true if the checkout already exists (and thus
54
+ # incremental updating can occur).
55
+ def exists?
56
+ ::File.exists?(::File.join(@repo_dir, '.svn'))
57
+ end
56
58
 
57
- # Incrementally update the checkout. The operations are as follows:
58
- # * update to #tag
59
- # In theory if #tag is a revision number that already exists no
60
- # update is necessary. It's not clear if the SVN client libraries
61
- # are bright enough to notice this.
62
- def do_update
63
- @logger.operation(:update) do
64
- run_svn("update", get_tag_argument)
65
- end
59
+ # Ignore .svn directories.
60
+ def ignorable_paths
61
+ ['.svn']
62
+ end
63
+
64
+ # Implements CheckoutBase#do_checkout
65
+ def do_checkout
66
+ @logger.operation(:checkout_revision) do
67
+ revision = resolve_revision
68
+ svn_args = ['checkout', @repository.url, @repo_dir]
69
+ svn_args += ['--revision', revision] if revision
70
+ svn_args << '--force'
71
+ svn_client.execute(svn_args)
66
72
  do_update_tag
67
73
  end
74
+ end
68
75
 
69
- # Update our idea of what the head of the repository is. We
70
- # would like to use svn info, but that doesn't do the right
71
- # thing all the time; the right thing to do is to run log and
72
- # pick out the first tag.
73
- def do_update_tag
74
- @repository = @repository.clone
75
- lines = run_svn_with_buffered_output("log", "-r", 'HEAD')
76
- lines.each do |line|
77
- if line =~ /^r(\d+)/
78
- @repository.tag = $1
79
- break
80
- end
81
- end
76
+ # Implements CheckoutBase#do_update
77
+ def do_update
78
+ @logger.operation(:update) do
79
+ revision = resolve_revision
80
+ svn_client.execute('update', '--revision', revision, '--force')
81
+ do_update_tag
82
82
  end
83
+ end
83
84
 
84
- # Check out the remote repository. The operations are as follows:
85
- # * checkout repository at #tag to @repo_dir
86
- def do_checkout
87
- super
88
- @logger.operation(:checkout_revision) do
89
- run_svn_no_chdir("checkout", @repository.url, @repo_dir, get_tag_argument)
85
+ # Implements CheckoutBase#do_update_tag
86
+ def do_update_tag
87
+ # query latest count=1 log entry for latest revision; don't attempt to
88
+ # specify revision on the assumption that the requested revision is
89
+ # already checked out. the --revision argument appears to expect a
90
+ # revision from-to range or else a start date or date range or else a
91
+ # specific revision number. it prints nothing when HEAD is specified by
92
+ # itself.
93
+ @repository = @repository.clone
94
+ svn_args = ['log', '--limit', '1']
95
+ svn_client.output_for(svn_args).lines.each do |line|
96
+ if matched = SVN_LOG_REGEX.match(line)
97
+ @repository.tag = matched[1]
98
+ break
90
99
  end
91
- do_update_tag
92
100
  end
101
+ end
93
102
 
94
- # Ignore .svn directories.
95
- def ignorable_paths
96
- ['.svn']
103
+ private
104
+
105
+ # http://svnbook.red-bean.com/en/1.7/svn.tour.revs.specifiers.html#svn.tour.revs.keywords
106
+ #
107
+ # Example: HEAD | <revision number> | {<datetime>}
108
+ #
109
+ # {2010-12-06T19:11:25} === {2010-12-06 19:11:25 +0000}
110
+ SVN_REVISION_REGEX = /^(HEAD|\d+|\{[0-9: T+\-]+\})$/
111
+
112
+ # Example:
113
+ # r12 | ira | 2006-11-27 12:31:51 -0600 (Mon, 27 Nov 2006) | 6 lines
114
+ SVN_LOG_REGEX = /^r(\d+)/ # ignoring additional info after revision
115
+
116
+ def resolve_revision
117
+ revision = @repository.tag.to_s.strip
118
+ if revision.empty?
119
+ revision = nil
120
+ elsif (revision =~ SVN_REVISION_REGEX).nil?
121
+ raise RetrieverError, "Revision reference contained illegal characters: #{revision.inspect}"
97
122
  end
123
+ # timestamps can contain spaces; surround them with double quotes.
124
+ revision = revision.inspect if revision.index(' ')
125
+ revision
98
126
  end
127
+
128
+ def svn_client
129
+ @svn_client ||= SVN_CLIENT.new(
130
+ @repository,
131
+ @logger,
132
+ ::RightScraper::Processes::Shell.new(
133
+ :initial_directory => self.repo_dir,
134
+ :max_bytes => self.max_bytes,
135
+ :max_seconds => self.max_seconds,
136
+ :watch_directory => self.repo_dir))
137
+ end
138
+
99
139
  end
100
140
  end
@@ -0,0 +1,37 @@
1
+ #
2
+ # Copyright (c) 2013 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # ancestor
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Scanners
28
+ autoload :Base, 'right_scraper/scanners/base'
29
+ autoload :CookbookManifest, 'right_scraper/scanners/cookbook_manifest'
30
+ autoload :CookbookMetadata, 'right_scraper/scanners/cookbook_metadata'
31
+ autoload :CookbookS3Upload, 'right_scraper/scanners/cookbook_s3_upload'
32
+ autoload :Union, 'right_scraper/scanners/union'
33
+ autoload :WorkflowManifest, 'right_scraper/scanners/workflow_manifest'
34
+ autoload :WorkflowMetadata, 'right_scraper/scanners/workflow_metadata'
35
+ autoload :WorkflowS3Upload, 'right_scraper/scanners/workflow_s3_upload'
36
+ end
37
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,91 +21,88 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module RightScraper
25
- module Scanners
26
- # Base class for scanning filesystems. Subclasses should override
27
- # #notice and may override #new, #begin, #end and
28
- # #notice_dir.
29
- #
30
- # Overriding #new is useful for getting
31
- # additional arguments. Overriding #begin allows you to do
32
- # processing before the scan of a given resource begins;
33
- # overriding #end allows you to do processing after it completes.
34
- #
35
- # Most processing will occur in #notice, which notifies you that a
36
- # file has been detected, and in #notice_dir. In #notice you are
37
- # handed the relative position of the file from the start of the
38
- # resource; so if you were scanning <tt>/a/resource</tt> and
39
- # noticed a file <tt>b/c</tt>, #notice would be called with
40
- # <tt>"b/c"</tt>, even though the full pathname is
41
- # <tt>/a/resource/b/c</tt>. If you decide you need the actual
42
- # data, #notice takes a block which will return that data to you
43
- # if you +yield+.
44
- #
45
- # In #notice_dir you are handed the relative position of a
46
- # directory. The return value determines whether you find the
47
- # directory worth recursing into, or not--as an example, when
48
- # looking for the <tt>metadata.json</tt> file it is never
49
- # necessary to descend past the topmost directory of the resource,
50
- # but the same is not true when building a manifest.
51
- class Base
52
- # Create a new Scanner. Recognizes options as given. Some
53
- # options may be required, others optional. This class recognizes
54
- # only _:logger_.
55
- #
56
- # === Options ===
57
- # _:logger_:: Optional. Logger currently being used
58
- #
59
- # === Parameters ===
60
- # options(Hash):: scanner options
61
- def initialize(options={})
62
- @logger = options.fetch(:logger, RightScraper::Logger.new)
63
- end
24
+ # ancestor
25
+ require 'right_scraper/scanners'
64
26
 
65
- # Notification that all scans for this repository have
66
- # completed.
67
- def finish
68
- end
27
+ module RightScraper::Scanners
69
28
 
70
- # Begin a scan for the given resource.
71
- #
72
- # === Parameters ===
73
- # resource(RightScraper::Resource::Base):: resource to scan
74
- def begin(resource)
75
- end
29
+ # Base class for scanning filesystems. Subclasses should override
30
+ # #notice and may override #new, #begin, #end and
31
+ # #notice_dir.
32
+ #
33
+ # Overriding #new is useful for getting
34
+ # additional arguments. Overriding #begin allows you to do
35
+ # processing before the scan of a given resource begins;
36
+ # overriding #end allows you to do processing after it completes.
37
+ #
38
+ # Most processing will occur in #notice, which notifies you that a
39
+ # file has been detected, and in #notice_dir. In #notice you are
40
+ # handed the relative position of the file from the start of the
41
+ # resource; so if you were scanning <tt>/a/resource</tt> and
42
+ # noticed a file <tt>b/c</tt>, #notice would be called with
43
+ # <tt>"b/c"</tt>, even though the full pathname is
44
+ # <tt>/a/resource/b/c</tt>. If you decide you need the actual
45
+ # data, #notice takes a block which will return that data to you
46
+ # if you +yield+.
47
+ #
48
+ # In #notice_dir you are handed the relative position of a
49
+ # directory. The return value determines whether you find the
50
+ # directory worth recursing into, or not--as an example, when
51
+ # looking for the <tt>metadata.json</tt> file it is never
52
+ # necessary to descend past the topmost directory of the resource,
53
+ # but the same is not true when building a manifest.
54
+ class Base
76
55
 
77
- # Finish a scan for the given resource.
78
- #
79
- # === Parameters ===
80
- # resource(RightScraper::Resource::Base):: resource that just finished
81
- # scanning
82
- def end(resource)
56
+ # @param [Hash] options for scanner
57
+ def initialize(options={})
58
+ unless @logger = options[:logger]
59
+ raise ::ArgumentError, ':logger is required'
83
60
  end
61
+ end
84
62
 
85
- # Notice a file during scanning.
86
- #
87
- # === Block ===
88
- # Return the data for this file. We use a block because it may
89
- # not always be necessary to read the data.
90
- #
91
- # === Parameters ===
92
- # relative_position(String):: relative pathname for _pathname_
93
- # from root of resource
94
- def notice(relative_position)
95
- end
63
+ # Notification that all scans for this repository have
64
+ # completed.
65
+ def finish
66
+ end
96
67
 
97
- # Notice a directory during scanning. Returns true if the scanner
98
- # should recurse into the directory (the default behavior)
99
- #
100
- # === Parameters ===
101
- # relative_position(String):: relative pathname for the directory
102
- # from root of resource
103
- #
104
- # === Returns ===
105
- # Boolean:: should the scanning recurse into the directory
106
- def notice_dir(relative_position)
107
- true
108
- end
68
+ # Begin a scan for the given resource.
69
+ #
70
+ # === Parameters ===
71
+ # resource(RightScraper::Resource::Base):: resource to scan
72
+ def begin(resource)
73
+ end
74
+
75
+ # Finish a scan for the given resource.
76
+ #
77
+ # === Parameters ===
78
+ # resource(RightScraper::Resource::Base):: resource that just finished
79
+ # scanning
80
+ def end(resource)
81
+ end
82
+
83
+ # Notice a file during scanning.
84
+ #
85
+ # === Block ===
86
+ # Return the data for this file. We use a block because it may
87
+ # not always be necessary to read the data.
88
+ #
89
+ # === Parameters ===
90
+ # relative_position(String):: relative pathname for _pathname_
91
+ # from root of resource
92
+ def notice(relative_position)
93
+ end
94
+
95
+ # Notice a directory during scanning. Returns true if the scanner
96
+ # should recurse into the directory (the default behavior)
97
+ #
98
+ # === Parameters ===
99
+ # relative_position(String):: relative pathname for the directory
100
+ # from root of resource
101
+ #
102
+ # === Returns ===
103
+ # Boolean:: should the scanning recurse into the directory
104
+ def notice_dir(relative_position)
105
+ true
109
106
  end
110
107
  end
111
108
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,39 +21,40 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- require File.expand_path(File.join(File.dirname(__FILE__), 'base'))
24
+ # ancestor
25
+ require 'right_scraper/scanners'
26
+
25
27
  require 'digest/md5'
26
28
 
27
- module RightScraper
28
- module Scanners
29
- # Build manifests from a filesystem.
30
- class CookbookManifest < Base
31
- # Create a new manifest scanner. Does not accept any new arguments.
32
- def initialize(*args)
33
- super
34
- @manifest = {}
35
- end
29
+ module RightScraper::Scanners
30
+
31
+ # Build manifests from a filesystem.
32
+ class CookbookManifest < ::RightScraper::Scanners::Base
33
+ # Create a new manifest scanner. Does not accept any new arguments.
34
+ def initialize(*args)
35
+ super
36
+ @manifest = {}
37
+ end
36
38
 
37
- # Complete a scan for the given resource.
38
- #
39
- # === Parameters ===
40
- # resource(RightScraper::Resources::Base):: resource to scan
41
- def end(resource)
42
- resource.manifest = @manifest
43
- @manifest = {}
44
- end
39
+ # Complete a scan for the given resource.
40
+ #
41
+ # === Parameters ===
42
+ # resource(RightScraper::Resources::Base):: resource to scan
43
+ def end(resource)
44
+ resource.manifest = @manifest
45
+ @manifest = {}
46
+ end
45
47
 
46
- # Notice a file during scanning.
47
- #
48
- # === Block ===
49
- # Return the data for this file. We use a block because it may
50
- # not always be necessary to read the data.
51
- #
52
- # === Parameters ===
53
- # relative_position(String):: relative pathname for file from root of resource
54
- def notice(relative_position)
55
- @manifest[relative_position] = Digest::MD5.hexdigest(yield)
56
- end
48
+ # Notice a file during scanning.
49
+ #
50
+ # === Block ===
51
+ # Return the data for this file. We use a block because it may
52
+ # not always be necessary to read the data.
53
+ #
54
+ # === Parameters ===
55
+ # relative_position(String):: relative pathname for file from root of resource
56
+ def notice(relative_position)
57
+ @manifest[relative_position] = Digest::MD5.hexdigest(yield)
57
58
  end
58
59
  end
59
60
  end