downspout 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +21 -0
- data/README.rdoc +89 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/downspout/base.rb +131 -0
- data/lib/downspout/config.rb +77 -0
- data/lib/downspout/credential.rb +20 -0
- data/lib/downspout/downloader.rb +344 -0
- data/lib/downspout/logger.rb +11 -0
- data/lib/downspout/tmp_file.rb +101 -0
- data/lib/downspout.rb +23 -0
- data/test/downspout_test.rb +127 -0
- data/test/fixtures/ruby.png +0 -0
- data/test/servlet.rb +32 -0
- data/test/test_helper.rb +31 -0
- data/test/test_logger.rb +28 -0
- data/test/test_servlet.rb +42 -0
- data/test/unit/base_test.rb +124 -0
- data/test/unit/config_test.rb +101 -0
- data/test/unit/credential_test.rb +31 -0
- data/test/unit/downloader_test.rb +211 -0
- data/test/unit/tmp_file_test.rb +32 -0
- data/test/watchr.rb +17 -0
- metadata +143 -0
@@ -0,0 +1,344 @@
|
|
1
|
+
module Downspout
|
2
|
+
# The object returned by a call to fetch_url() or download_url_to_disk().
|
3
|
+
class Downloader < Base
|
4
|
+
|
5
|
+
# returns the path to the downloaded file
|
6
|
+
attr_accessor :path
|
7
|
+
|
8
|
+
# returns the remote response as the appropriate Net::HTTPResponse
|
9
|
+
attr_reader :response
|
10
|
+
|
11
|
+
# returns the headers parsed from the remote response
|
12
|
+
attr_reader :response_headers
|
13
|
+
|
14
|
+
# returns the URI parsed from the URL
|
15
|
+
attr_reader :uri
|
16
|
+
|
17
|
+
# returns the URL initially given
|
18
|
+
attr_accessor :url
|
19
|
+
|
20
|
+
def initialize( options=nil ) #:nodoc:
|
21
|
+
@basename = nil
|
22
|
+
@curb_enabled = Downspout::Config.use_curb?
|
23
|
+
@response_headers = {}
|
24
|
+
@started_at = nil
|
25
|
+
@finished_at = nil
|
26
|
+
|
27
|
+
if options.respond_to?(:keys) then
|
28
|
+
options.each do |key, value|
|
29
|
+
self.send("#{key}=", value)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@uri = URI.parse( @url ) unless @url.nil?
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s #:nodoc:
|
37
|
+
return @path
|
38
|
+
end
|
39
|
+
|
40
|
+
# returns the protocol or 'scheme' of the URL
|
41
|
+
def scheme
|
42
|
+
return @uri.scheme unless @uri.nil?
|
43
|
+
return nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# returns the time taken to download the file
|
47
|
+
def duration
|
48
|
+
return nil unless @started_at
|
49
|
+
return nil unless @finished_at
|
50
|
+
|
51
|
+
return @finished_at - @started_at
|
52
|
+
end
|
53
|
+
|
54
|
+
# Extracts the file name from the URL or uses a default name based on the content-type header
|
55
|
+
def basename
|
56
|
+
return @basename unless @basename.nil?
|
57
|
+
|
58
|
+
if !(@path.nil?) then
|
59
|
+
@basename = File.basename( @path )
|
60
|
+
else
|
61
|
+
if !(@uri.path.nil? || @uri.path.empty? || uri.path == '/')
|
62
|
+
@basename = File.basename( @uri.path )
|
63
|
+
else
|
64
|
+
$logger.debug("downspout | downloader | basename | Bad URI path")
|
65
|
+
@basename = 'file.downspout'
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
$logger.debug("downspout | downloader | basename | #{@basename} ")
|
70
|
+
|
71
|
+
return @basename
|
72
|
+
end
|
73
|
+
|
74
|
+
# will this download use the Curb library?
|
75
|
+
def use_curb?
|
76
|
+
return @curb_enabled
|
77
|
+
end
|
78
|
+
|
79
|
+
# will this download use the default Net/HTTP library?
|
80
|
+
def use_net_http?
|
81
|
+
return false if use_curb?
|
82
|
+
return true
|
83
|
+
end
|
84
|
+
|
85
|
+
# configure this download to use the Curb library (will fail if Curb is unavailable.)
|
86
|
+
def enable_curb!
|
87
|
+
@curb_enabled = true if Downspout::Config.curb_available?
|
88
|
+
|
89
|
+
return @curb_enabled
|
90
|
+
end
|
91
|
+
|
92
|
+
# configure this download NOT to use the Curb library
|
93
|
+
def disable_curb!
|
94
|
+
@curb_enabled = false
|
95
|
+
end
|
96
|
+
|
97
|
+
def download! #:nodoc:
|
98
|
+
$logger.debug("downspout | downloader | download! | URL : #{@url} ")
|
99
|
+
@started_at = Time.now
|
100
|
+
|
101
|
+
raise UnsupportedScheme if @uri.nil?
|
102
|
+
raise UnsupportedScheme unless Downspout.supported_protocol?( @uri.scheme )
|
103
|
+
|
104
|
+
if @path.nil? then
|
105
|
+
tf = Downspout::Tmpfile.new( :name => self.basename )
|
106
|
+
@path = tf.path
|
107
|
+
end
|
108
|
+
$logger.debug("downspout | downloader | download! | Path : #{@path} ")
|
109
|
+
|
110
|
+
remove_file_at_target_path
|
111
|
+
|
112
|
+
if Downspout::Config.network_enabled? then
|
113
|
+
case self.scheme
|
114
|
+
when /ftp/
|
115
|
+
net_ftp_download
|
116
|
+
when /http[s]?/
|
117
|
+
if use_curb? then
|
118
|
+
curb_http_download
|
119
|
+
else
|
120
|
+
net_http_download
|
121
|
+
end
|
122
|
+
else
|
123
|
+
$logger.error("downspout | downloader | download! | Unknown URL Scheme : '#{self.scheme}'")
|
124
|
+
raise UnsupportedScheme
|
125
|
+
end
|
126
|
+
else
|
127
|
+
$logger.warn("downspout | downloader | download! | >>>>> Networking Disabled <<<<<")
|
128
|
+
end
|
129
|
+
|
130
|
+
downloaded = File.exist?( @path )
|
131
|
+
|
132
|
+
$logger.debug("downspout | downloader | download! | #{self.basename} downloaded? : #{downloaded} ")
|
133
|
+
@finished_at = Time.now
|
134
|
+
|
135
|
+
if (tf && @basename == 'file.downspout') then
|
136
|
+
# rename file more appropriately
|
137
|
+
new_name = generate_file_name
|
138
|
+
if !(new_name.nil?) then
|
139
|
+
$logger.debug("downspout | downloader | download! | Renaming #{@basename} to #{new_name} ...")
|
140
|
+
new_path = File.join( File.dirname( tf.path ), new_name)
|
141
|
+
FileUtils.mv( tf.path, new_path )
|
142
|
+
@path = new_path
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
$logger.debug("downspout | downloader | download! | Started: #{@started_at.utc}, Finished: #{@finished_at.utc}, Duration: #{duration}")
|
147
|
+
|
148
|
+
return downloaded
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def remove_file_at_target_path
|
154
|
+
if File.exist?( @path ) then
|
155
|
+
$logger.debug("downspout | downloader | remove_file_at_target_path | Removing #{@path} ... ")
|
156
|
+
FileUtils.rm( @path )
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def net_ftp_download
|
161
|
+
$logger.debug("downspout | downloader | net_ftp_download | Downloading #{@url} ...")
|
162
|
+
|
163
|
+
# look up the credentials for this host
|
164
|
+
cred = Downspout::Config.credentials.select{|c| c.scheme == 'ftp' }.select{ |c| c.host == @uri.host }.first
|
165
|
+
if cred.nil? then
|
166
|
+
$logger.warn("downspout | downloader | net_ftp_download | No credentials found for '#{@uri.host}'.")
|
167
|
+
# proceed anyway - slight possibility it's an un-authorized FTP account...
|
168
|
+
else
|
169
|
+
$logger.debug("downspout | downloader | net_ftp_download | Loaded credentials for #{cred.host} ...")
|
170
|
+
end
|
171
|
+
|
172
|
+
begin
|
173
|
+
ftp = Net::FTP.open( @uri.host ) do |ftp|
|
174
|
+
ftp.login( cred.user_name, cred.pass_word ) unless cred.nil?
|
175
|
+
ftp.passive
|
176
|
+
ftp.chdir( File.dirname( @uri.path ) )
|
177
|
+
|
178
|
+
$logger.debug("downspout | downloader | net_ftp_download | Local Path : #{@path} ...")
|
179
|
+
ftp.getbinaryfile( self.basename, @path )
|
180
|
+
end
|
181
|
+
rescue Exception => e
|
182
|
+
$logger.error("downspout | downloader | net_ftp_download | Exception : #{e}")
|
183
|
+
raise e
|
184
|
+
end
|
185
|
+
|
186
|
+
done = File.exist?( @path )
|
187
|
+
$logger.debug("downspout | downloader | net_ftp_download | #{basename} downloaded? : #{done}.")
|
188
|
+
|
189
|
+
return done
|
190
|
+
end
|
191
|
+
|
192
|
+
def net_http_download
|
193
|
+
$logger.debug("downspout | downloader | net_http_download | Downloading #{@url} ...")
|
194
|
+
|
195
|
+
begin
|
196
|
+
response = net_http_fetch( @url , 1)
|
197
|
+
open( @path, "wb" ) do |file|
|
198
|
+
|
199
|
+
file.write(response.body)
|
200
|
+
end
|
201
|
+
|
202
|
+
$logger.debug("downspout | downloader | net_http_download | Response Body : #{response.body[0..5].strip}")
|
203
|
+
|
204
|
+
rescue SocketError => dns_err
|
205
|
+
$logger.error("downspout | downloader | net_http_download | Net/HTTP DNS Error | #{@uri.host} | #{dns_err.inspect}")
|
206
|
+
remove_file_at_target_path
|
207
|
+
raise dns_err
|
208
|
+
end
|
209
|
+
|
210
|
+
$logger.debug("downspout | downloader | net_http_download | Response Code : #{response.code}")
|
211
|
+
|
212
|
+
# populate the response headers from net/http headers...
|
213
|
+
new_header_str = "HTTP/1.1 #{@response.code} #{@response.message}\r\n"
|
214
|
+
@response.each_header do |k,v|
|
215
|
+
new_header_str += "#{k}: #{v}\r\n"
|
216
|
+
end
|
217
|
+
@response_headers = parse_headers_from_string( new_header_str )
|
218
|
+
|
219
|
+
|
220
|
+
if ((response.code.to_i != 200) and (response.code.to_i != 202)) then
|
221
|
+
# missing file, failed download - delete the response body [if downloaded]
|
222
|
+
remove_file_at_target_path
|
223
|
+
return false
|
224
|
+
end
|
225
|
+
|
226
|
+
$logger.debug("downspout | downloader | net_http_download | Headers : #{response.header}")
|
227
|
+
|
228
|
+
if !( File.exist?( @path ) ) then
|
229
|
+
$logger.error("downspout | downloader | net_http_download | Missing File at download path : #{@path}")
|
230
|
+
return false
|
231
|
+
end
|
232
|
+
|
233
|
+
$logger.debug("downspout | downloader | net_http_download | Successful.")
|
234
|
+
return true
|
235
|
+
end
|
236
|
+
|
237
|
+
def net_http_fetch( url_str, limit = 10 )
|
238
|
+
$logger.debug("downspout | downloader | net_http_fetch | URL: #{url_str}, Redirects: #{limit}.")
|
239
|
+
raise Downspout::BadURL, 'URL is missing' if url_str.nil?
|
240
|
+
raise Downspout::ExcessiveRedirects, 'HTTP redirect too deep' if limit == 0
|
241
|
+
|
242
|
+
u = URI.parse( url_str )
|
243
|
+
|
244
|
+
my_request = Net::HTTP::Get.new( "#{u.path}?#{u.query}" )
|
245
|
+
|
246
|
+
# TODO : implement credentials for downloads via net_http_fetch
|
247
|
+
my_request.basic_auth 'account', 'p4ssw0rd'
|
248
|
+
|
249
|
+
$logger.debug("downspout | downloader | net_http_fetch | Firing...")
|
250
|
+
@response = Net::HTTP.start( u.host, u.port ) do |http|
|
251
|
+
http.request( my_request )
|
252
|
+
end
|
253
|
+
|
254
|
+
$logger.debug("downspout | downloader | net_http_fetch | Response : #{@response}")
|
255
|
+
|
256
|
+
case @response
|
257
|
+
when Net::HTTPSuccess
|
258
|
+
@response
|
259
|
+
when Net::HTTPRedirection
|
260
|
+
net_http_fetch( @response['location'], limit - 1 )
|
261
|
+
else
|
262
|
+
@response.error!
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def curb_http_download
|
267
|
+
$logger.debug("downspout | downloader | curb_http_download | Downloading #{@url} ...")
|
268
|
+
|
269
|
+
begin
|
270
|
+
curb = Curl::Easy.download( @url, @path) {|c| c.follow_location=true; c.max_redirects=1;}
|
271
|
+
rescue Curl::Err::HostResolutionError
|
272
|
+
$logger.error("downspout | downloader | curb_http_download | Curb/Curl DNS Error | #{@uri.host}")
|
273
|
+
return false
|
274
|
+
end
|
275
|
+
|
276
|
+
$logger.debug("downspout | downloader | curb_http_download | Response Code : #{curb.response_code}")
|
277
|
+
|
278
|
+
if ((curb.response_code != 200) and (curb.response_code != 202)) then
|
279
|
+
# missing file, failed download - delete the response body [if downloaded]
|
280
|
+
remove_file_at_target_path
|
281
|
+
end
|
282
|
+
|
283
|
+
$logger.debug("downspout | downloader | curb_http_download | Headers : #{curb.header_str}")
|
284
|
+
|
285
|
+
# populate the response headers from curb header string
|
286
|
+
@response_headers = parse_headers_from_string( curb.header_str )
|
287
|
+
|
288
|
+
# populate a 'proxy' HTTPResponse object with the Curb data...
|
289
|
+
hr_klass = Net::HTTPResponse.send('response_class', curb.response_code.to_s)
|
290
|
+
$logger.debug("downspout | downloader | curb_http_download | Response Type : #{hr_klass.name}")
|
291
|
+
|
292
|
+
@response = hr_klass.new( @response_headers["HTTP"][:version],
|
293
|
+
curb.response_code,
|
294
|
+
@response_headers["HTTP"][:message] )
|
295
|
+
|
296
|
+
$logger.debug("downspout | downloader | curb_http_download | Response : #{@response.inspect}")
|
297
|
+
|
298
|
+
if !( File.exist?( @path ) ) then
|
299
|
+
$logger.error("downspout | downloader | curb_http_download | Missing File at download path : #{@path}")
|
300
|
+
return false
|
301
|
+
end
|
302
|
+
|
303
|
+
$logger.debug("downspout | downloader | curb_http_download | Successful.")
|
304
|
+
return true
|
305
|
+
end
|
306
|
+
|
307
|
+
def parse_headers_from_string( header_str )
|
308
|
+
$logger.debug("downspout | downloader | parse_headers_from_string | String : #{header_str}")
|
309
|
+
header_hash = {}
|
310
|
+
http_hash = {}
|
311
|
+
headers = header_str.split("\r\n")
|
312
|
+
|
313
|
+
http_info = headers[0]
|
314
|
+
http_hash[:header] = http_info
|
315
|
+
http_hash[:version] = http_info.split(" ")[0].match("HTTP/([0-9\.]+)")[1]
|
316
|
+
http_hash[:code] = (http_info.split("\r\n")[0].split(" ")[1]).to_i
|
317
|
+
http_hash[:message] = http_info.split("\r\n")[0].split(" ")[2]
|
318
|
+
|
319
|
+
$logger.debug("downspout | downloader | parse_headers_from_string | Response : #{http_hash[:version]}, #{http_hash[:code]}, #{http_hash[:message]}")
|
320
|
+
header_hash["HTTP"] = http_hash
|
321
|
+
|
322
|
+
headers[1..-1].each do |line|
|
323
|
+
header_name, header_value = line.match(/([\w\-\s]+)\:\s?(.*)/)[1..2]
|
324
|
+
header_hash[header_name] = header_value
|
325
|
+
end
|
326
|
+
|
327
|
+
return header_hash
|
328
|
+
end
|
329
|
+
|
330
|
+
def generate_file_name
|
331
|
+
file_type = @response_headers['Content-Type'] if use_curb?
|
332
|
+
file_type = @response_headers['Content-Type'] if use_net_http?
|
333
|
+
return nil unless file_type
|
334
|
+
|
335
|
+
return "default.html" if file_type =~ /text\/html/
|
336
|
+
# TODO : smarter file name generation
|
337
|
+
|
338
|
+
return nil
|
339
|
+
end
|
340
|
+
|
341
|
+
end
|
342
|
+
|
343
|
+
|
344
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
if !(defined?( $logger )) then
|
2
|
+
if defined?( RAILS_DEFAULT_LOGGER ) then
|
3
|
+
$logger = RAILS_DEFAULT_LOGGER
|
4
|
+
else
|
5
|
+
require 'logger'
|
6
|
+
$logger = Logger.new( STDERR )
|
7
|
+
$logger.level = Logger::INFO
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
$logger.debug "initialized logging facility..."
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module Downspout
|
5
|
+
|
6
|
+
class Tmpfile < File
|
7
|
+
|
8
|
+
# acepts an options hash which can include either or both of :name and :prefix
|
9
|
+
# then creates a Tempfile with the optionally given name in a unique sub-folder
|
10
|
+
# of the configured directory, optionally named with the prefix string.
|
11
|
+
# The unique folder name includes the prefix, a sortable date, the PID of
|
12
|
+
# the download process, and a randomly generated sequence of characters.
|
13
|
+
#
|
14
|
+
# => "/tmp/downloads/my-app-20110203-59488-1run8k2-0/desired-file-name.txt"
|
15
|
+
#
|
16
|
+
# call-seq:
|
17
|
+
# Downspout::Tmpfile.new( :name => 'desired-file-name.txt', :prefix => 'my-app' )
|
18
|
+
#
|
19
|
+
def initialize( options = nil )
|
20
|
+
# make sure the configured directory exists
|
21
|
+
FileUtils.mkdir_p( Downspout::Config.tmp_dir )
|
22
|
+
|
23
|
+
defaults = {:prefix => 'downspout', :name => "downloaded_file.tmp"}
|
24
|
+
|
25
|
+
# overwrite defaults with given options
|
26
|
+
defaults.merge!( options ) unless options.nil?
|
27
|
+
|
28
|
+
# create a unique file path from the given options
|
29
|
+
unique_path = File.join( Downspout::Config.tmp_dir, tmp_dir_name( defaults[:prefix] ), defaults[:name] )
|
30
|
+
|
31
|
+
# make sure the unique directory exists
|
32
|
+
$logger.debug("downspout | tmpfile | initialize | Creating unique directory : #{File.dirname(unique_path)}")
|
33
|
+
FileUtils.mkdir_p( File.dirname( unique_path ) )
|
34
|
+
raise "MakeDir Error" unless File.exist?( File.dirname( unique_path ) )
|
35
|
+
|
36
|
+
super( unique_path, File::CREAT, 0644 )
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.clean_dir( dir_path, delay=30 ) #:nodoc:
|
41
|
+
# remove files older than DELAY (in minutes) from configured folder
|
42
|
+
delay = 30 unless (delay.class == Fixnum)
|
43
|
+
t0 = Time.now - ( delay * 60 )
|
44
|
+
|
45
|
+
return false unless File.exist?( dir_path )
|
46
|
+
the_dir = Dir.new( dir_path )
|
47
|
+
|
48
|
+
$logger.debug( "downspout | tmpfile | clean_dir | start | Entries : #{the_dir.entries.size}" )
|
49
|
+
|
50
|
+
the_dir.entries.each do |item|
|
51
|
+
next if item == "."
|
52
|
+
next if item == ".."
|
53
|
+
|
54
|
+
$logger.debug( "downspout | tmpfile | clean_dir | sub item : #{item}" )
|
55
|
+
|
56
|
+
item_path = File.join( dir_path, item )
|
57
|
+
|
58
|
+
tx = File.mtime( item_path ).utc
|
59
|
+
|
60
|
+
# skip files with modtime changed less than sixty minutes ago
|
61
|
+
next unless (tx < t0)
|
62
|
+
|
63
|
+
if File.directory?( item_path ) then
|
64
|
+
$logger.debug( "downspout | tmpfile | clean_dir | Removing Directory : #{item}/*" )
|
65
|
+
|
66
|
+
clean_dir( item_path, delay )
|
67
|
+
|
68
|
+
begin
|
69
|
+
FileUtils.rmdir( item_path )
|
70
|
+
rescue Exception => e
|
71
|
+
$logger.debug( "downspout | tmpfile | clean_dir | Exception : #{e}" )
|
72
|
+
return false
|
73
|
+
end
|
74
|
+
else
|
75
|
+
$logger.debug( "downspout | tmpfile | clean_dir | Removing Item : #{item}" )
|
76
|
+
|
77
|
+
FileUtils.rm( item_path )
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
$logger.debug( "downspout | tmpfile | clean_download_dir | finish | Entries : #{the_dir.entries.size}" )
|
82
|
+
return true
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def tmp_dir_name( prefix, n=rand(9) )
|
88
|
+
t = Time.now.strftime("%Y%m%d")
|
89
|
+
path = "#{prefix}-#{t}-#{$$}-#{rand(0x100000000).to_s(36)}-#{n}"
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Utility method for periodically removing download files from the configured directory.
|
96
|
+
# Expects an integer as the number of minutes 'old' a file should be before removal.
|
97
|
+
def self.clean_download_dir( delay=30 )
|
98
|
+
Tmpfile.clean_dir( Downspout::Config.tmp_dir, delay )
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
data/lib/downspout.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
# common dependencies
|
4
|
+
require 'fileutils'
|
5
|
+
require 'uri'
|
6
|
+
require 'net/http'
|
7
|
+
require 'net/ftp'
|
8
|
+
|
9
|
+
# unusual dependencies
|
10
|
+
require 'curb'
|
11
|
+
|
12
|
+
# add this directory to the path...
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
|
15
|
+
# customized logger
|
16
|
+
require 'downspout/logger'
|
17
|
+
|
18
|
+
# required components
|
19
|
+
require 'downspout/base'
|
20
|
+
require 'downspout/config'
|
21
|
+
require 'downspout/credential'
|
22
|
+
require 'downspout/tmp_file'
|
23
|
+
require 'downspout/downloader'
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'test_servlet'
|
3
|
+
|
4
|
+
class DownspoutTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
$test_ws_root ||= Test::App.root
|
8
|
+
$test_read_me_url = "http://127.0.0.1:8899/READ_ME.rdoc"
|
9
|
+
$test_image_url = "http://127.0.0.1:8899/images/ruby.png"
|
10
|
+
|
11
|
+
$test_ws ||= WEBrick::HTTPServer.new(:Port => 8899,
|
12
|
+
:DocumentRoot => @test_ws_root,
|
13
|
+
# :Logger => Log.new(nil, BasicLog::WARN), # TODO : Use Log/BasicLog from WEBrick to reduce spam in tests
|
14
|
+
:Logger => $logger,
|
15
|
+
:AccessLog => [])
|
16
|
+
|
17
|
+
|
18
|
+
$test_ws.mount(TestServlet.path, TestServlet)
|
19
|
+
|
20
|
+
two_deep_proc = Proc.new { |req, resp|
|
21
|
+
resp.body = '2-deep redirector proc mounted on #{req.script_name}'
|
22
|
+
resp.set_redirect( HTTPStatus::MovedPermanently, '/one/deep')
|
23
|
+
}
|
24
|
+
|
25
|
+
$test_ws.mount('/two/deep/', HTTPServlet::ProcHandler.new(two_deep_proc) )
|
26
|
+
|
27
|
+
redir_proc = Proc.new { |req, resp|
|
28
|
+
resp.body = 'redirector proc mounted on #{req.script_name}'
|
29
|
+
resp.set_redirect( HTTPStatus::MovedPermanently, '/READ_ME.rdoc')
|
30
|
+
}
|
31
|
+
|
32
|
+
$test_ws.mount('/one/deep/', HTTPServlet::ProcHandler.new(redir_proc) )
|
33
|
+
|
34
|
+
$test_ws.mount("/images", HTTPServlet::FileHandler,
|
35
|
+
File.join( Test::App.root, "test", "fixtures"), {:FancyIndexing => true} )
|
36
|
+
|
37
|
+
$test_ws_thread = Thread.new { $test_ws.start }
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_download_rdoc_from_servlet
|
41
|
+
some_url = $test_read_me_url
|
42
|
+
|
43
|
+
dl = Downspout.fetch_url( some_url )
|
44
|
+
|
45
|
+
assert_not_nil dl
|
46
|
+
|
47
|
+
assert File.exist?( dl.path )
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_by_fetching_image_from_w3c
|
51
|
+
some_url = "http://www.w3.org/wiki/images/2/2e/Ruby01.png"
|
52
|
+
|
53
|
+
dl = Downspout.fetch_url( some_url )
|
54
|
+
|
55
|
+
assert_not_nil dl
|
56
|
+
|
57
|
+
assert File.exist?( dl.path )
|
58
|
+
end
|
59
|
+
|
60
|
+
context "Downspout" do
|
61
|
+
should "define the test URL" do
|
62
|
+
assert_not_nil $test_read_me_url
|
63
|
+
end
|
64
|
+
|
65
|
+
context "for HTTP URLs" do
|
66
|
+
should "download file via HTTP from local TestServlet" do
|
67
|
+
assert Downspout.fetch_url( $test_read_me_url )
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
should "fail with Curb error in case of excessive redirects" do
|
72
|
+
two_deep_url = "http://127.0.0.1:8899/two/deep?curby"
|
73
|
+
|
74
|
+
assert_raise Curl::Err::TooManyRedirectsError do
|
75
|
+
dl = Downspout.fetch_url( two_deep_url )
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context "with Curb disabled" do
|
80
|
+
setup do
|
81
|
+
Downspout::Config.disable_curb!
|
82
|
+
end
|
83
|
+
|
84
|
+
should "fail with Downspout error in case of excessive redirects" do
|
85
|
+
two_deep_url = "http://127.0.0.1:8899/two/deep?no-curb"
|
86
|
+
|
87
|
+
assert_raise Downspout::ExcessiveRedirects do
|
88
|
+
dl = Downspout.fetch_url( two_deep_url )
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
teardown do
|
94
|
+
Downspout::Config.enable_curb!
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
teardown do
|
99
|
+
Downspout.clean_download_dir( 0 )
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
should "fail due to excessive redirects" do
|
105
|
+
@obj = Downspout::Downloader.new( :url => "http://127.0.0.1:8899/two/deep?over-draft" )
|
106
|
+
|
107
|
+
assert_raise Downspout::ExcessiveRedirects do
|
108
|
+
@obj.send('net_http_fetch', @obj.url, 1 ) # uses send to bypass Curb and force Net::HTTP
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
should "fail due to DNS error" do
|
113
|
+
@obj = Downspout::Downloader.new( :url => "http://fu.man.chu/deep/nested/resource?over-draft" )
|
114
|
+
|
115
|
+
assert_raise SocketError do
|
116
|
+
@obj.send('net_http_fetch', @obj.url) # uses send to bypass Curb and force Net::HTTP
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
should "download to custom path" do
|
121
|
+
gfx_path = File.join( Test::App.root, "tmp", "download-test", "image.png" )
|
122
|
+
FileUtils.mkdir_p( File.dirname( gfx_path ) )
|
123
|
+
|
124
|
+
dl = Downspout.download_url_to_path( $test_image_url, gfx_path )
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
Binary file
|
data/test/servlet.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'test_servlet'
|
2
|
+
|
3
|
+
ws_root = File.expand_path( File.dirname( File.dirname( __FILE__ ) ) )
|
4
|
+
|
5
|
+
ws_app = WEBrick::HTTPServer.new(:Port => 8899,
|
6
|
+
:DocumentRoot => @test_ws_root,
|
7
|
+
:Logger => Log.new(nil, BasicLog::WARN), # Log/BasicLog from WEBrick - reduces spam in tests
|
8
|
+
# :Logger => $logger,
|
9
|
+
:AccessLog => [])
|
10
|
+
|
11
|
+
ws_app.mount(TestServlet.path, TestServlet)
|
12
|
+
|
13
|
+
two_deep_proc = Proc.new { |req, resp|
|
14
|
+
resp.body = '2-deep redirector proc mounted on #{req.script_name}'
|
15
|
+
resp.set_redirect( HTTPStatus::MovedPermanently, '/one/deep')
|
16
|
+
}
|
17
|
+
|
18
|
+
ws_app.mount('/two/deep/', HTTPServlet::ProcHandler.new(two_deep_proc) )
|
19
|
+
|
20
|
+
redir_proc = Proc.new { |req, resp|
|
21
|
+
resp.body = 'redirector proc mounted on #{req.script_name}'
|
22
|
+
resp.set_redirect( HTTPStatus::MovedPermanently, '/READ_ME.rdoc')
|
23
|
+
}
|
24
|
+
|
25
|
+
ws_app.mount('/one/deep/', HTTPServlet::ProcHandler.new(redir_proc) )
|
26
|
+
|
27
|
+
ws_thread = Thread.new { ws_app.start }
|
28
|
+
|
29
|
+
read_me_url = "#{TestServlet.url}/READ_ME.doc"
|
30
|
+
|
31
|
+
puts "Request #{read_me_url}"
|
32
|
+
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
require 'mocha'
|
5
|
+
|
6
|
+
begin
|
7
|
+
require 'redgreen'
|
8
|
+
rescue LoadError
|
9
|
+
# nice to have, aesthetic, not functional
|
10
|
+
end
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
|
15
|
+
class Test::Unit::TestCase
|
16
|
+
end
|
17
|
+
|
18
|
+
module Test
|
19
|
+
module App
|
20
|
+
def self.root
|
21
|
+
return File.expand_path( File.dirname( File.dirname( __FILE__ ) ) )
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# test_logger must be loaded before downspout
|
27
|
+
require 'test_logger'
|
28
|
+
require 'test_servlet'
|
29
|
+
|
30
|
+
# The object of our affections
|
31
|
+
require 'downspout'
|
data/test/test_logger.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
if !defined?( RAILS_DEFAULT_LOGGER ) then
|
4
|
+
root_dir = File.dirname( File.dirname(__FILE__) )
|
5
|
+
test_log_path = File.join( root_dir, 'tmp', 'log', 'test.log')
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'fileutils'
|
9
|
+
# creates the log directory if possible
|
10
|
+
FileUtils.mkdir_p( File.dirname( test_log_path ) )
|
11
|
+
|
12
|
+
# touching the file ensures it is writable
|
13
|
+
FileUtils.touch( test_log_path )
|
14
|
+
|
15
|
+
if File.exist?( File.dirname( test_log_path ) ) then
|
16
|
+
$logger = Logger.new( test_log_path )
|
17
|
+
else
|
18
|
+
$logger = Logger.new( STDERR )
|
19
|
+
end
|
20
|
+
|
21
|
+
rescue Exception => e
|
22
|
+
# ignore the error and try to carry on...
|
23
|
+
$logger = Logger.new( STDERR )
|
24
|
+
$logger.warn "Failed to create log file due to exception : #{e}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
$logger.level = Logger::DEBUG
|