right_scraper 3.2.6 → 5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/lib/right_scraper.rb +16 -34
  3. data/lib/right_scraper/builders.rb +32 -0
  4. data/lib/right_scraper/builders/base.rb +19 -20
  5. data/lib/right_scraper/builders/filesystem.rb +8 -6
  6. data/lib/right_scraper/builders/union.rb +4 -1
  7. data/lib/right_scraper/loggers.rb +31 -0
  8. data/lib/right_scraper/loggers/base.rb +113 -0
  9. data/lib/right_scraper/loggers/default.rb +98 -0
  10. data/lib/right_scraper/{scraper.rb → main.rb} +53 -9
  11. data/lib/right_scraper/processes.rb +33 -0
  12. data/lib/right_scraper/processes/shell.rb +227 -0
  13. data/lib/right_scraper/processes/{ssh.rb → ssh_agent.rb} +4 -0
  14. data/lib/right_scraper/processes/svn_client.rb +117 -0
  15. data/lib/right_scraper/processes/warden.rb +358 -0
  16. data/lib/right_scraper/registered_base.rb +154 -0
  17. data/lib/right_scraper/repositories.rb +33 -0
  18. data/lib/right_scraper/repositories/base.rb +271 -232
  19. data/lib/right_scraper/repositories/download.rb +8 -6
  20. data/lib/right_scraper/repositories/git.rb +8 -9
  21. data/lib/right_scraper/repositories/svn.rb +8 -8
  22. data/lib/right_scraper/resources.rb +32 -0
  23. data/lib/right_scraper/resources/base.rb +5 -1
  24. data/lib/right_scraper/resources/cookbook.rb +34 -27
  25. data/lib/right_scraper/resources/workflow.rb +27 -28
  26. data/lib/right_scraper/retrievers.rb +34 -0
  27. data/lib/right_scraper/retrievers/base.rb +80 -84
  28. data/lib/right_scraper/retrievers/checkout_base.rb +178 -0
  29. data/lib/right_scraper/retrievers/download.rb +125 -117
  30. data/lib/right_scraper/retrievers/git.rb +377 -223
  31. data/lib/right_scraper/retrievers/svn.rb +102 -62
  32. data/lib/right_scraper/scanners.rb +37 -0
  33. data/lib/right_scraper/scanners/base.rb +77 -80
  34. data/lib/right_scraper/scanners/cookbook_manifest.rb +31 -30
  35. data/lib/right_scraper/scanners/cookbook_metadata.rb +380 -35
  36. data/lib/right_scraper/scanners/cookbook_s3_upload.rb +56 -53
  37. data/lib/right_scraper/scanners/union.rb +61 -58
  38. data/lib/right_scraper/scanners/workflow_manifest.rb +55 -54
  39. data/lib/right_scraper/scanners/workflow_metadata.rb +41 -39
  40. data/lib/right_scraper/scanners/workflow_s3_upload.rb +59 -55
  41. data/lib/right_scraper/scrapers.rb +32 -0
  42. data/lib/right_scraper/scrapers/base.rb +217 -205
  43. data/lib/right_scraper/scrapers/cookbook.rb +42 -40
  44. data/lib/right_scraper/scrapers/workflow.rb +57 -58
  45. data/lib/right_scraper/version.rb +3 -0
  46. data/right_scraper.gemspec +12 -16
  47. metadata +57 -163
  48. data/Gemfile +0 -15
  49. data/Rakefile +0 -89
  50. data/lib/right_scraper/logger.rb +0 -107
  51. data/lib/right_scraper/loggers/noisy.rb +0 -85
  52. data/lib/right_scraper/repositories/mock.rb +0 -70
  53. data/lib/right_scraper/retrievers/checkout.rb +0 -79
  54. data/lib/right_scraper/scraper_logger.rb +0 -66
  55. data/lib/right_scraper/svn_client.rb +0 -164
  56. data/right_scraper.rconf +0 -13
  57. data/spec/builder_spec.rb +0 -50
  58. data/spec/cookbook_helper.rb +0 -73
  59. data/spec/cookbook_manifest_spec.rb +0 -93
  60. data/spec/cookbook_s3_upload_spec.rb +0 -159
  61. data/spec/download/download_retriever_spec.rb +0 -118
  62. data/spec/download/download_retriever_spec_helper.rb +0 -72
  63. data/spec/download/download_spec.rb +0 -128
  64. data/spec/download/multi_dir_spec.rb +0 -106
  65. data/spec/download/multi_dir_spec_helper.rb +0 -40
  66. data/spec/git/cookbook_spec.rb +0 -165
  67. data/spec/git/demokey +0 -27
  68. data/spec/git/demokey.pub +0 -1
  69. data/spec/git/password_key +0 -30
  70. data/spec/git/password_key.pub +0 -1
  71. data/spec/git/repository_spec.rb +0 -110
  72. data/spec/git/retriever_spec.rb +0 -553
  73. data/spec/git/retriever_spec_helper.rb +0 -112
  74. data/spec/git/scraper_spec.rb +0 -151
  75. data/spec/git/ssh_spec.rb +0 -174
  76. data/spec/git/url_spec.rb +0 -103
  77. data/spec/logger_spec.rb +0 -185
  78. data/spec/repository_spec.rb +0 -111
  79. data/spec/retriever_spec_helper.rb +0 -146
  80. data/spec/scanner_spec.rb +0 -61
  81. data/spec/scraper_helper.rb +0 -88
  82. data/spec/scraper_spec.rb +0 -147
  83. data/spec/spec_helper.rb +0 -185
  84. data/spec/svn/cookbook_spec.rb +0 -96
  85. data/spec/svn/multi_svn_spec.rb +0 -64
  86. data/spec/svn/multi_svn_spec_helper.rb +0 -40
  87. data/spec/svn/repository_spec.rb +0 -72
  88. data/spec/svn/retriever_spec.rb +0 -266
  89. data/spec/svn/scraper_spec.rb +0 -90
  90. data/spec/svn/svn_retriever_spec_helper.rb +0 -90
  91. data/spec/svn/url_spec.rb +0 -47
  92. data/spec/url_spec.rb +0 -164
@@ -0,0 +1,32 @@
1
+ #
2
+ # Copyright (c) 2013 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # ancestor
24
+ require 'right_scraper'
25
+
26
+ module RightScraper
27
+ module Scrapers
28
+ autoload :Base, 'right_scraper/scrapers/base'
29
+ autoload :Cookbook, 'right_scraper/scrapers/cookbook'
30
+ autoload :Workflow, 'right_scraper/scrapers/workflow'
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright: Copyright (c) 2010-2011 RightScale, Inc.
2
+ # Copyright: Copyright (c) 2010-2013 RightScale, Inc.
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -21,242 +21,254 @@
21
21
  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module RightScraper
25
- module Scrapers
24
+ # ancestor
25
+ require 'right_scraper/scrapers'
26
26
 
27
- # Base class for all scrapers. Subclasses should override
28
- # #find_next which instantiates the resource from the file system.
29
- class Base
30
-
31
- # Scraped resources
32
- attr_reader :resources
33
-
34
- # Initialize scraper
35
- #
36
- # === Options
37
- # <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
38
- # <tt>:repo_dir</tt>:: Required, path to directory containing files
39
- # to be scraped
40
- # <tt>:ignorable_paths</tt>:: List of directory names that should
41
- # be ignored by scraper
42
- # <tt>:scanners</tt>:: List of Scanner classes to use, optional
43
- # <tt>:builders</tt>:: List of Builder classes to use, optional
44
- #
45
- # === Return
46
- # scraper(Scrapers::Base):: Corresponding scraper instance
47
- def self.scraper(options)
48
- scraper_kind = options.delete(:kind)
49
- scraper_class = @@types[scraper_kind]
50
- raise "Can't understand how to build scraper #{scraper_kind}" if scraper_class.nil?
51
- scraper = scraper_class.new(options)
52
- end
27
+ module RightScraper::Scrapers
28
+
29
+ class ScraperError < Exception; end
30
+
31
+ # Base class for all scrapers. Subclasses should override
32
+ # #find_next which instantiates the resource from the file system.
33
+ class Base < ::RightScraper::RegisteredBase
34
+
35
+ # Scraped resources
36
+ attr_reader :resources
37
+
38
+ # @return [Module] module for registered repository types
39
+ def self.registration_module
40
+ ::RightScraper::Scrapers
41
+ end
53
42
 
54
- # Do the scrape!
55
- # Extract all resources from directory
56
- # Call this method or call 'next_resource' to retrieve
57
- # resources one by one (you must then call 'close' yourself)
58
- # Fill @resources
59
- #
60
- # === Return
61
- # resources<Array>:: List of all scraped resources
62
- def scrape
63
- @resources = []
64
- begin
43
+ # Initialize scraper
44
+ #
45
+ # === Options
46
+ # <tt>:kind</tt>:: Scraper type, one of :cookbook or :workflow
47
+ # <tt>:repo_dir</tt>:: Required, path to directory containing files
48
+ # to be scraped
49
+ # <tt>:ignorable_paths</tt>:: List of directory names that should
50
+ # be ignored by scraper
51
+ # <tt>:scanners</tt>:: List of Scanner classes to use, optional
52
+ # <tt>:builders</tt>:: List of Builder classes to use, optional
53
+ #
54
+ # === Return
55
+ # scraper(Scrapers::Base):: Corresponding scraper instance
56
+ def self.scraper(options)
57
+ scraper_kind = options.delete(:kind)
58
+ scraper_class = query_registered_type(scraper_kind)
59
+ scraper_class.new(options)
60
+ end
61
+
62
+ # Do the scrape!
63
+ # Extract all resources from directory
64
+ # Call this method or call 'next_resource' to retrieve
65
+ # resources one by one (you must then call 'close' yourself)
66
+ # Fill @resources
67
+ #
68
+ # === Return
69
+ # resources<Array>:: List of all scraped resources
70
+ def scrape
71
+ @resources = []
72
+ begin
73
+ resource = next_resource
74
+ until resource.nil?
75
+ @resources << resource
65
76
  resource = next_resource
66
- until resource.nil?
67
- @resources << resource
68
- resource = next_resource
69
- end
70
- ensure
71
- close
72
77
  end
73
- @resources
78
+ ensure
79
+ close
74
80
  end
81
+ @resources
82
+ end
75
83
 
76
- # Return the next resource in the filesystem, or nil if none. As
77
- # a part of building the resources, invokes the builders.
78
- # A resource can be a cookbook, a workflow, a RightScript etc.
79
- #
80
- # === Returns
81
- # Object:: next resource in filesystem, or nil if none.
82
- def next_resource
83
- @logger.operation(:next) do
84
- next nil if @next.nil?
84
+ # Return the next resource in the filesystem, or nil if none. As
85
+ # a part of building the resources, invokes the builders.
86
+ # A resource can be a cookbook, a workflow, a RightScript etc.
87
+ #
88
+ # === Returns
89
+ # Object:: next resource in filesystem, or nil if none.
90
+ def next_resource
91
+ @logger.operation(:next) do
92
+ next nil if @next.nil?
85
93
 
86
- value = @next
87
- @next = search_dirs
88
- while @next.nil? && !@queue.empty?
89
- pop_queue
90
- end
91
- value
94
+ value = @next
95
+ @next = search_dirs
96
+ while @next.nil? && !@queue.empty?
97
+ pop_queue
92
98
  end
99
+ value
93
100
  end
101
+ end
94
102
 
95
- # Close any opened file descriptor
96
- #
97
- # === Return
98
- # true:: Always return true
99
- def close
100
- @builder.finish
101
- if @stack && !@stack.empty?
102
- @stack.each {|s| s.close}
103
- @stack = []
104
- end
105
- true
103
+ # Close any opened file descriptor
104
+ #
105
+ # === Return
106
+ # true:: Always return true
107
+ def close
108
+ @builder.finish
109
+ if @stack && !@stack.empty?
110
+ @stack.each {|s| s.close}
111
+ @stack = []
106
112
  end
113
+ true
114
+ end
107
115
 
108
- protected
109
-
110
- # Directory containing files to be scraped
111
- attr_reader :repo_dir
116
+ protected
112
117
 
113
- # (Hash) Lookup table from textual description of scraper type
114
- # ('cookbook' or 'workflow' currently) to the class that
115
- # represents that scraper.
116
- @@types = {} unless class_variable_defined?(:@@types)
118
+ # Directory containing files to be scraped
119
+ attr_reader :repo_dir
117
120
 
118
- # Initialize scraper
119
- #
120
- # === Options
121
- # <tt>:repository</tt>:: Required, original repository containing scraped
122
- # files
123
- # <tt>:repo_dir</tt>:: Required, path to directory containing files
124
- # to be scraped
125
- # <tt>:ignorable_paths</tt>:: List of directory names that should
126
- # be ignored by scraper
127
- # <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
128
- # to RightScraper::Scanners::ResourceManifest and
129
- # RightScraper::Scanners::CookbookMetadata
130
- # <tt>:builders</tt>:: List of Builder classes to use, defaulting to
131
- # RightScaper::Builders::Filesystem
132
- #
133
- def initialize(options)
134
- raise "Repository required when initializing a scraper" unless options[:repository]
135
- raise "Repository directory required when initializing a scraper" unless options[:repo_dir]
136
- @repository = options[:repository]
137
- @logger = options[:logger] || ScraperLogger.new
138
- @repo_dir = options[:repo_dir]
139
- @ignorable_paths = options[:ignorable_paths]
140
- @stack = []
141
- @queue = (@repository.resources_path || [""]).reverse
142
- @resources = []
143
- scanners = options[:scanners] || default_scanners
144
- @scanner = RightScraper::Scanners::Union.new(scanners, options)
145
- builders = options[:builders] || default_builders
146
- @builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
147
- :scanner => @scanner,
148
- :logger => @logger,
149
- :max_bytes => @max_bytes,
150
- :max_seconds => @max_seconds)
151
- pop_queue # Initialize @next
121
+ # Initialize scraper
122
+ #
123
+ # === Options
124
+ # <tt>:repository</tt>:: Required, original repository containing scraped
125
+ # files
126
+ # <tt>:repo_dir</tt>:: Required, path to directory containing files
127
+ # to be scraped
128
+ # <tt>:ignorable_paths</tt>:: List of directory names that should
129
+ # be ignored by scraper
130
+ # <tt>:scanners</tt>:: List of Scanner classes to use, defaulting
131
+ # to RightScraper::Scanners::ResourceManifest and
132
+ # RightScraper::Scanners::CookbookMetadata
133
+ # <tt>:builders</tt>:: List of Builder classes to use, defaulting to
134
+ # RightScaper::Builders::Filesystem
135
+ #
136
+ def initialize(options)
137
+ raise ScraperError.new("Repository required when initializing a scraper") unless options[:repository]
138
+ raise ScraperError.new("Repository directory required when initializing a scraper") unless options[:repo_dir]
139
+ @repository = options[:repository]
140
+ unless @logger = options[:logger]
141
+ raise ::ArgumentError, ':logger is required'
152
142
  end
143
+ @repo_dir = options[:repo_dir]
144
+ @ignorable_paths = options[:ignorable_paths]
145
+ @stack = []
146
+ @queue = (@repository.resources_path || [""]).reverse
153
147
 
154
- # List of default scanners for this scaper
155
- #
156
- # === Return
157
- # Array<Scanner>:: Default scanners
158
- def default_scanners
159
- end
148
+ # Make sure the requested cookbook resource path exists
149
+ missing_paths = @queue.select {|path| !File.directory?(File.join(repo_dir, path)) }.compact.sort
160
150
 
161
- # List of default builders for this scaper
162
- #
163
- # === Return
164
- # Array<Builder>:: Default builders
165
- def default_brokers
166
- end
151
+ raise ScraperError.new(
152
+ "Cookbook resource path#{'s' unless missing_paths.size < 2}: " +
153
+ "[#{missing_paths.join(', ')}] #{missing_paths.size < 2 ? "is" : "are"} " +
154
+ "non-existent for this repository and branch") unless missing_paths.empty?
167
155
 
168
- # Find the interesting item in given directory
169
- # Override in actual scraper implementation
170
- #
171
- # === Parameters
172
- # dir(Dir):: directory to begin search in
173
- def find_next(dir)
174
- raise NotImplementedError
175
- end
156
+ @resources = []
157
+ scanners = options[:scanners] || default_scanners
158
+ @scanner = RightScraper::Scanners::Union.new(scanners, options)
159
+ builders = options[:builders] || default_builders
160
+ @builder = RightScraper::Builders::Union.new(builders, :ignorable_paths => @ignorable_paths,
161
+ :scanner => @scanner,
162
+ :logger => @logger,
163
+ :max_bytes => @max_bytes,
164
+ :max_seconds => @max_seconds)
165
+ pop_queue # Initialize @next
166
+ end
176
167
 
177
- # Return the position of the scraper. Here, the position is the
178
- # path relative from the top of the temporary directory. Akin to
179
- # IO#pos or IO#tell.
180
- def pos
181
- strip_repo_dir(@stack.last.path)
182
- end
183
- alias_method :tell, :pos
168
+ # List of default scanners for this scaper
169
+ #
170
+ # === Return
171
+ # Array<Scanner>:: Default scanners
172
+ def default_scanners
173
+ end
184
174
 
185
- # Turn path from an absolute filesystem location to a relative
186
- # file location from #repo_dir.
187
- #
188
- # === Parameters
189
- # path(String):: absolute path to relativize
190
- #
191
- # === Returns
192
- # res(String):: relative pathname for path
193
- def strip_repo_dir(path)
194
- res = path[repo_dir.length+1..-1]
195
- if res == nil || res == ""
196
- "."
197
- else
198
- res
199
- end
200
- end
201
- private :strip_repo_dir
175
+ # List of default builders for this scaper
176
+ #
177
+ # === Return
178
+ # Array<Builder>:: Default builders
179
+ def default_brokers
180
+ end
181
+
182
+ # Find the interesting item in given directory
183
+ # Override in actual scraper implementation
184
+ #
185
+ # === Parameters
186
+ # dir(Dir):: directory to begin search in
187
+ def find_next(dir)
188
+ raise NotImplementedError
189
+ end
190
+
191
+ # Return the position of the scraper. Here, the position is the
192
+ # path relative from the top of the temporary directory. Akin to
193
+ # IO#pos or IO#tell.
194
+ def pos
195
+ strip_repo_dir(@stack.last.path)
196
+ end
197
+ alias_method :tell, :pos
202
198
 
203
- # Test if the entry given is ignorable. By default just uses
204
- # #ignorable_paths
205
- #
206
- # === Parameters
207
- # entry(String):: file name to check
208
- #
209
- # === Returns
210
- # Boolean:: true if the entry should be ignored
211
- def ignorable?(entry)
212
- @ignorable_paths.include?(entry)
199
+ # Turn path from an absolute filesystem location to a relative
200
+ # file location from #repo_dir.
201
+ #
202
+ # === Parameters
203
+ # path(String):: absolute path to relativize
204
+ #
205
+ # === Returns
206
+ # res(String):: relative pathname for path
207
+ def strip_repo_dir(path)
208
+ res = path[repo_dir.length+1..-1]
209
+ if res == nil || res == ""
210
+ "."
211
+ else
212
+ res
213
213
  end
214
+ end
215
+ private :strip_repo_dir
214
216
 
215
- # Initialize @next with the next resource
216
- #
217
- # === Returns
218
- # @next(Resources::Base):: Next resource
219
- def pop_queue
220
- until @queue.empty?
221
- nextdir = @queue.pop
222
- if File.directory?(File.join(repo_dir, nextdir))
223
- @next = find_next(Dir.new(File.join(repo_dir, nextdir)))
224
- return @next
225
- else
226
- @logger.warn("When processing in #{@repository}, no such path #{nextdir}")
227
- end
217
+ # Test if the entry given is ignorable. By default just uses
218
+ # #ignorable_paths
219
+ #
220
+ # === Parameters
221
+ # entry(String):: file name to check
222
+ #
223
+ # === Returns
224
+ # Boolean:: true if the entry should be ignored
225
+ def ignorable?(entry)
226
+ @ignorable_paths.include?(entry)
227
+ end
228
+
229
+ # Initialize @next with the next resource
230
+ #
231
+ # === Returns
232
+ # @next(Resources::Base):: Next resource
233
+ def pop_queue
234
+ until @queue.empty?
235
+ nextdir = File.join(repo_dir, @queue.pop)
236
+ if File.directory?(nextdir)
237
+ @next = find_next(Dir.new(nextdir))
238
+ return @next
239
+ else
240
+ @logger.warn("When processing in #{@repository}, no such path #{nextdir}")
228
241
  end
229
- @next = nil
230
242
  end
243
+ @next = nil
244
+ end
231
245
 
232
- # Search the directory stack looking for the next resource.
233
- def search_dirs
234
- @logger.operation(:searching) do
235
- until @stack.empty?
236
- dir = @stack.last
237
- entry = dir.read
238
- if entry == nil
239
- dir.close
240
- @stack.pop
241
- next
242
- end
246
+ # Search the directory stack looking for the next resource.
247
+ def search_dirs
248
+ @logger.operation(:searching) do
249
+ until @stack.empty?
250
+ dir = @stack.last
251
+ entry = dir.read
252
+ if entry == nil
253
+ dir.close
254
+ @stack.pop
255
+ next
256
+ end
243
257
 
244
- next if entry == '.' || entry == '..'
245
- next if ignorable?(entry)
258
+ next if entry == '.' || entry == '..'
259
+ next if ignorable?(entry)
246
260
 
247
- fullpath = File.join(dir.path, entry)
261
+ fullpath = File.join(dir.path, entry)
248
262
 
249
- if File.directory?(fullpath)
250
- result = find_next(Dir.new(fullpath))
251
- break
252
- end
263
+ if File.directory?(fullpath)
264
+ result = find_next(Dir.new(fullpath))
265
+ break
253
266
  end
254
- result
255
267
  end
268
+ result
256
269
  end
257
- private :search_dirs
258
-
259
270
  end
271
+ private :search_dirs
272
+
260
273
  end
261
274
  end
262
-