right_scraper 3.2.6 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -0,0 +1,32 @@
1
+ #
2
+ # Copyright (c) 2013 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # ancestor
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Scrapers
28
+ autoload :Base, 'right_scraper/scrapers/base'
29
+ autoload :Cookbook, 'right_scraper/scrapers/cookbook'
30
+ autoload :Workflow, 'right_scraper/scrapers/workflow'
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,242 +21,254 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module RightScraper
25
- module Scrapers
24
+ # ancestor
25
+ require 'right_scraper/scrapers'
26
26
 
27
- # Base class for all scrapers. Subclasses should override
28
- # #find_next which instantiates the resource from the file system.
29
- class Base
30
-
31
- # Scraped resources
32
- attr_reader :resources
33
-
34
- # Initialize scraper
35
- #
36
- # === Options
37
- # <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
38
- # <tt>:repo_dir</tt>:: Required, path to directory containing files
39
- # to be scraped
40
- # <tt>:ignorable_paths</tt>:: List of directory names that should
41
- # be ignored by scraper
42
- # <tt>:scanners</tt>:: List of Scanner classes to use, optional
43
- # <tt>:builders</tt>:: List of Builder classes to use, optional
44
- #
45
- # === Return
46
- # scraper(Scrapers::Base):: Corresponding scraper instance
47
- def self.scraper(options)
48
- scraper_kind = options.delete(:kind)
49
- scraper_class = @@types[scraper_kind]
50
- raise "Can't understand how to build scraper #{scraper_kind}" if scraper_class.nil?
51
- scraper = scraper_class.new(options)
52
- end
27
+ module RightScraper::Scrapers
28
+
29
+ class ScraperError < Exception; end
30
+
31
+ # Base class for all scrapers. Subclasses should override
32
+ # #find_next which instantiates the resource from the file system.
33
+ class Base < ::RightScraper::RegisteredBase
34
+
35
+ # Scraped resources
36
+ attr_reader :resources
37
+
38
+ # @return [Module] module for registered repository types
39
+ def self.registration_module
40
+ ::RightScraper::Scrapers
41
+ end
53
42
 
54
- # Do the scrape!
55
- # Extract all resources from directory
56
- # Call this method or call 'next_resource' to retrieve
57
- # resources one by one (you must then call 'close' yourself)
58
- # Fill @resources
59
- #
60
- # === Return
61
- # resources<Array>:: List of all scraped resources
62
- def scrape
63
- @resources = []
64
- begin
43
+ # Initialize scraper
44
+ #
45
+ # === Options
46
+ # <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
47
+ # <tt>:repo_dir</tt>:: Required, path to directory containing files
48
+ # to be scraped
49
+ # <tt>:ignorable_paths</tt>:: List of directory names that should
50
+ # be ignored by scraper
51
+ # <tt>:scanners</tt>:: List of Scanner classes to use, optional
52
+ # <tt>:builders</tt>:: List of Builder classes to use, optional
53
+ #
54
+ # === Return
55
+ # scraper(Scrapers::Base):: Corresponding scraper instance
56
+ def self.scraper(options)
57
+ scraper_kind = options.delete(:kind)
58
+ scraper_class = query_registered_type(scraper_kind)
59
+ scraper_class.new(options)
60
+ end
61
+
62
+ # Do the scrape!
63
+ # Extract all resources from directory
64
+ # Call this method or call 'next_resource' to retrieve
65
+ # resources one by one (you must then call 'close' yourself)
66
+ # Fill @resources
67
+ #
68
+ # === Return
69
+ # resources<Array>:: List of all scraped resources
70
+ def scrape
71
+ @resources = []
72
+ begin
73
+ resource = next_resource
74
+ until resource.nil?
75
+ @resources << resource
65
76
  resource = next_resource
66
- until resource.nil?
67
- @resources << resource
68
- resource = next_resource
69
- end
70
- ensure
71
- close
72
77
  end
73
- @resources
78
+ ensure
79
+ close
74
80
  end
81
+ @resources
82
+ end
75
83
 
76
- # Return the next resource in the filesystem, or nil if none. As
77
- # a part of building the resources, invokes the builders.
78
- # A resource can be a cookbook, a workflow, a RightScript etc.
79
- #
80
- # === Returns
81
- # Object:: next resource in filesystem, or nil if none.
82
- def next_resource
83
- @logger.operation(:next) do
84
- next nil if @next.nil?
84
+ # Return the next resource in the filesystem, or nil if none. As
85
+ # a part of building the resources, invokes the builders.
86
+ # A resource can be a cookbook, a workflow, a RightScript etc.
87
+ #
88
+ # === Returns
89
+ # Object:: next resource in filesystem, or nil if none.
90
+ def next_resource
91
+ @logger.operation(:next) do
92
+ next nil if @next.nil?
85
93
 
86
- value = @next
87
- @next = search_dirs
88
- while @next.nil? && !@queue.empty?
89
- pop_queue
90
- end
91
- value
94
+ value = @next
95
+ @next = search_dirs
96
+ while @next.nil? && !@queue.empty?
97
+ pop_queue
92
98
  end
99
+ value
93
100
  end
101
+ end
94
102
 
95
- # Close any opened file descriptor
96
- #
97
- # === Return
98
- # true:: Always return true
99
- def close
100
- @builder.finish
101
- if @stack && !@stack.empty?
102
- @stack.each {|s| s.close}
103
- @stack = []
104
- end
105
- true
103
+ # Close any opened file descriptor
104
+ #
105
+ # === Return
106
+ # true:: Always return true
107
+ def close
108
+ @builder.finish
109
+ if @stack && !@stack.empty?
110
+ @stack.each {|s| s.close}
111
+ @stack = []
106
112
  end
113
+ true
114
+ end
107
115
 
108
- protected
109
-
110
- # Directory containing files to be scraped
111
- attr_reader :repo_dir
116
+ protected
112
117
 
113
- # (Hash) Lookup table from textual description of scraper type
114
- # ('cookbook' or 'workflow' currently) to the class that
115
- # represents that scraper.
116
- @@types = {} unless class_variable_defined?(:@@types)
118
+ # Directory containing files to be scraped
119
+ attr_reader :repo_dir
117
120
 
118
- # Initialize scraper
119
- #
120
- # === Options
121
- # <tt>:repository</tt>:: Required, original repository containing scraped
122
- # files
123
- # <tt>:repo_dir</tt>:: Required, path to directory containing files
124
- # to be scraped
125
- # <tt>:ignorable_paths</tt>:: List of directory names that should
126
- # be ignored by scraper
127
- # <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
128
- # to RightScraper::Scanners::ResourceManifest and
129
- # RightScraper::Scanners::CookbookMetadata
130
- # <tt>:builders</tt>:: List of Builder classes to use, defaulting to
131
- # RightScaper::Builders::Filesystem
132
- #
133
- def initialize(options)
134
- raise "Repository required when initializing a scraper" unless options[:repository]
135
- raise "Repository directory required when initializing a scraper" unless options[:repo_dir]
136
- @repository = options[:repository]
137
- @logger = options[:logger] || ScraperLogger.new
138
- @repo_dir = options[:repo_dir]
139
- @ignorable_paths = options[:ignorable_paths]
140
- @stack = []
141
- @queue = (@repository.resources_path || [""]).reverse
142
- @resources = []
143
- scanners = options[:scanners] || default_scanners
144
- @scanner = RightScraper::Scanners::Union.new(scanners, options)
145
- builders = options[:builders] || default_builders
146
- @builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
147
- :scanner => @scanner,
148
- :logger => @logger,
149
- :max_bytes => @max_bytes,
150
- :max_seconds => @max_seconds)
151
- pop_queue # Initialize @next
121
+ # Initialize scraper
122
+ #
123
+ # === Options
124
+ # <tt>:repository</tt>:: Required, original repository containing scraped
125
+ # files
126
+ # <tt>:repo_dir</tt>:: Required, path to directory containing files
127
+ # to be scraped
128
+ # <tt>:ignorable_paths</tt>:: List of directory names that should
129
+ # be ignored by scraper
130
+ # <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
131
+ # to RightScraper::Scanners::ResourceManifest and
132
+ # RightScraper::Scanners::CookbookMetadata
133
+ # <tt>:builders</tt>:: List of Builder classes to use, defaulting to
134
+ # RightScaper::Builders::Filesystem
135
+ #
136
+ def initialize(options)
137
+ raise ScraperError.new("Repository required when initializing a scraper") unless options[:repository]
138
+ raise ScraperError.new("Repository directory required when initializing a scraper") unless options[:repo_dir]
139
+ @repository = options[:repository]
140
+ unless @logger = options[:logger]
141
+ raise ::ArgumentError, ':logger is required'
152
142
  end
143
+ @repo_dir = options[:repo_dir]
144
+ @ignorable_paths = options[:ignorable_paths]
145
+ @stack = []
146
+ @queue = (@repository.resources_path || [""]).reverse
153
147
 
154
- # List of default scanners for this scaper
155
- #
156
- # === Return
157
- # Array<Scanner>:: Default scanners
158
- def default_scanners
159
- end
148
+ # Make sure the requested cookbook resource path exists
149
+ missing_paths = @queue.select {|path| !File.directory?(File.join(repo_dir, path)) }.compact.sort
160
150
 
161
- # List of default builders for this scaper
162
- #
163
- # === Return
164
- # Array<Builder>:: Default builders
165
- def default_brokers
166
- end
151
+ raise ScraperError.new(
152
+ "Cookbook resource path#{'s' unless missing_paths.size < 2}: " +
153
+ "[#{missing_paths.join(', ')}] #{missing_paths.size < 2 ? "is" : "are"} " +
154
+ "non-existent for this repository and branch") unless missing_paths.empty?
167
155
 
168
- # Find the interesting item in given directory
169
- # Override in actual scraper implementation
170
- #
171
- # === Parameters
172
- # dir(Dir):: directory to begin search in
173
- def find_next(dir)
174
- raise NotImplementedError
175
- end
156
+ @resources = []
157
+ scanners = options[:scanners] || default_scanners
158
+ @scanner = RightScraper::Scanners::Union.new(scanners, options)
159
+ builders = options[:builders] || default_builders
160
+ @builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
161
+ :scanner => @scanner,
162
+ :logger => @logger,
163
+ :max_bytes => @max_bytes,
164
+ :max_seconds => @max_seconds)
165
+ pop_queue # Initialize @next
166
+ end
176
167
 
177
- # Return the position of the scraper. Here, the position is the
178
- # path relative from the top of the temporary directory. Akin to
179
- # IO#pos or IO#tell.
180
- def pos
181
- strip_repo_dir(@stack.last.path)
182
- end
183
- alias_method :tell, :pos
168
+ # List of default scanners for this scaper
169
+ #
170
+ # === Return
171
+ # Array<Scanner>:: Default scanners
172
+ def default_scanners
173
+ end
184
174
 
185
- # Turn path from an absolute filesystem location to a relative
186
- # file location from #repo_dir.
187
- #
188
- # === Parameters
189
- # path(String):: absolute path to relativize
190
- #
191
- # === Returns
192
- # res(String):: relative pathname for path
193
- def strip_repo_dir(path)
194
- res = path[repo_dir.length+1..-1]
195
- if res == nil || res == ""
196
- "."
197
- else
198
- res
199
- end
200
- end
201
- private :strip_repo_dir
175
+ # List of default builders for this scaper
176
+ #
177
+ # === Return
178
+ # Array<Builder>:: Default builders
179
+ def default_brokers
180
+ end
181
+
182
+ # Find the interesting item in given directory
183
+ # Override in actual scraper implementation
184
+ #
185
+ # === Parameters
186
+ # dir(Dir):: directory to begin search in
187
+ def find_next(dir)
188
+ raise NotImplementedError
189
+ end
190
+
191
+ # Return the position of the scraper. Here, the position is the
192
+ # path relative from the top of the temporary directory. Akin to
193
+ # IO#pos or IO#tell.
194
+ def pos
195
+ strip_repo_dir(@stack.last.path)
196
+ end
197
+ alias_method :tell, :pos
202
198
 
203
- # Test if the entry given is ignorable. By default just uses
204
- # #ignorable_paths
205
- #
206
- # === Parameters
207
- # entry(String):: file name to check
208
- #
209
- # === Returns
210
- # Boolean:: true if the entry should be ignored
211
- def ignorable?(entry)
212
- @ignorable_paths.include?(entry)
199
+ # Turn path from an absolute filesystem location to a relative
200
+ # file location from #repo_dir.
201
+ #
202
+ # === Parameters
203
+ # path(String):: absolute path to relativize
204
+ #
205
+ # === Returns
206
+ # res(String):: relative pathname for path
207
+ def strip_repo_dir(path)
208
+ res = path[repo_dir.length+1..-1]
209
+ if res == nil || res == ""
210
+ "."
211
+ else
212
+ res
213
213
  end
214
+ end
215
+ private :strip_repo_dir
214
216
 
215
- # Initialize @next with the next resource
216
- #
217
- # === Returns
218
- # @next(Resources::Base):: Next resource
219
- def pop_queue
220
- until @queue.empty?
221
- nextdir = @queue.pop
222
- if File.directory?(File.join(repo_dir, nextdir))
223
- @next = find_next(Dir.new(File.join(repo_dir, nextdir)))
224
- return @next
225
- else
226
- @logger.warn("When processing in #{@repository}, no such path #{nextdir}")
227
- end
217
+ # Test if the entry given is ignorable. By default just uses
218
+ # #ignorable_paths
219
+ #
220
+ # === Parameters
221
+ # entry(String):: file name to check
222
+ #
223
+ # === Returns
224
+ # Boolean:: true if the entry should be ignored
225
+ def ignorable?(entry)
226
+ @ignorable_paths.include?(entry)
227
+ end
228
+
229
+ # Initialize @next with the next resource
230
+ #
231
+ # === Returns
232
+ # @next(Resources::Base):: Next resource
233
+ def pop_queue
234
+ until @queue.empty?
235
+ nextdir = File.join(repo_dir, @queue.pop)
236
+ if File.directory?(nextdir)
237
+ @next = find_next(Dir.new(nextdir))
238
+ return @next
239
+ else
240
+ @logger.warn("When processing in #{@repository}, no such path #{nextdir}")
228
241
  end
229
- @next = nil
230
242
  end
243
+ @next = nil
244
+ end
231
245
 
232
- # Search the directory stack looking for the next resource.
233
- def search_dirs
234
- @logger.operation(:searching) do
235
- until @stack.empty?
236
- dir = @stack.last
237
- entry = dir.read
238
- if entry == nil
239
- dir.close
240
- @stack.pop
241
- next
242
- end
246
+ # Search the directory stack looking for the next resource.
247
+ def search_dirs
248
+ @logger.operation(:searching) do
249
+ until @stack.empty?
250
+ dir = @stack.last
251
+ entry = dir.read
252
+ if entry == nil
253
+ dir.close
254
+ @stack.pop
255
+ next
256
+ end
243
257
 
244
- next if entry == '.' || entry == '..'
245
- next if ignorable?(entry)
258
+ next if entry == '.' || entry == '..'
259
+ next if ignorable?(entry)
246
260
 
247
- fullpath = File.join(dir.path, entry)
261
+ fullpath = File.join(dir.path, entry)
248
262
 
249
- if File.directory?(fullpath)
250
- result = find_next(Dir.new(fullpath))
251
- break
252
- end
263
+ if File.directory?(fullpath)
264
+ result = find_next(Dir.new(fullpath))
265
+ break
253
266
  end
254
- result
255
267
  end
268
+ result
256
269
  end
257
- private :search_dirs
258
-
259
270
  end
271
+ private :search_dirs
272
+
260
273
  end
261
274
  end
262
-