fetcher 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +1 -0
- data/README.markdown +9 -1
- data/lib/fetcher.rb +8 -1
- data/lib/fetcher/runner.rb +91 -3
- data/lib/fetcher/worker.rb +96 -0
- metadata +5 -4
data/Manifest.txt
CHANGED
data/README.markdown
CHANGED
@@ -10,7 +10,15 @@ TBD
|
|
10
10
|
|
11
11
|
require 'fetcher'
|
12
12
|
|
13
|
-
Fetcher.
|
13
|
+
Fetcher.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
|
14
|
+
|
15
|
+
or
|
16
|
+
|
17
|
+
logger = Logger.new( STDOUT )
|
18
|
+
worker = Fetcher::Worker.new( logger )
|
19
|
+
worker.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
|
20
|
+
|
21
|
+
|
14
22
|
|
15
23
|
## Install
|
16
24
|
|
data/lib/fetcher.rb
CHANGED
@@ -11,16 +11,23 @@ require 'pp'
|
|
11
11
|
require 'logger'
|
12
12
|
require 'optparse'
|
13
13
|
require 'fileutils'
|
14
|
+
require 'uri'
|
15
|
+
require 'net/http'
|
16
|
+
require 'net/https'
|
17
|
+
require 'ostruct'
|
18
|
+
require 'date'
|
19
|
+
require 'cgi'
|
14
20
|
|
15
21
|
|
16
22
|
# our own code
|
17
23
|
|
18
24
|
require 'fetcher/runner'
|
25
|
+
require 'fetcher/worker'
|
19
26
|
|
20
27
|
|
21
28
|
module Fetcher
|
22
29
|
|
23
|
-
VERSION = '0.0
|
30
|
+
VERSION = '0.1.0'
|
24
31
|
|
25
32
|
# version string for generator meta tag (includes ruby version)
|
26
33
|
def self.banner
|
data/lib/fetcher/runner.rb
CHANGED
@@ -1,13 +1,101 @@
|
|
1
1
|
|
2
2
|
module Fetcher
|
3
3
|
|
4
|
+
def self.copy( src, dest )
|
5
|
+
Worker.new( Logger.new(STDOUT) ).copy( src, dest )
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
class Opts
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@hash = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def put( key, value )
|
16
|
+
@hash[ key.to_s ] = value
|
17
|
+
end
|
18
|
+
|
19
|
+
def output_path
|
20
|
+
@hash.fetch( 'output', '.' )
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Opts
|
24
|
+
|
25
|
+
|
4
26
|
class Runner
|
5
27
|
|
6
|
-
|
7
|
-
|
28
|
+
attr_reader :logger
|
29
|
+
attr_reader :opts
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
@logger = Logger.new(STDOUT)
|
33
|
+
@logger.level = Logger::INFO
|
34
|
+
@opts = Opts.new
|
8
35
|
end
|
9
36
|
|
10
|
-
|
37
|
+
def run( args )
|
38
|
+
opt=OptionParser.new do |cmd|
|
39
|
+
|
40
|
+
cmd.banner = "Usage: fetch [options] uri"
|
41
|
+
|
42
|
+
cmd.on( '-o', '--output PATH', 'Output Path' ) { |s| opts.put( 'output', s ) }
|
43
|
+
|
44
|
+
# todo: find different letter for debug trace switch (use v for version?)
|
45
|
+
cmd.on( "-v", "--verbose", "Show debug trace" ) do
|
46
|
+
logger.datetime_format = "%H:%H:%S"
|
47
|
+
logger.level = Logger::DEBUG
|
48
|
+
end
|
49
|
+
|
50
|
+
usage =<<EOS
|
51
|
+
|
52
|
+
fetch - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
|
53
|
+
|
54
|
+
#{cmd.help}
|
55
|
+
|
56
|
+
Examples:
|
57
|
+
fetch http://geraldb.github.com/rubybook/hoe.html
|
58
|
+
fetch -o downloads http://geraldb.github.com/rubybook/hoe.html
|
59
|
+
|
60
|
+
Further information:
|
61
|
+
http://geraldb.github.com/fetcher
|
62
|
+
|
63
|
+
EOS
|
64
|
+
|
65
|
+
cmd.on_tail( "-h", "--help", "Show this message" ) do
|
66
|
+
puts usage
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
opt.parse!( args )
|
72
|
+
|
73
|
+
puts Fetcher.banner
|
74
|
+
|
75
|
+
args.each do |arg|
|
76
|
+
|
77
|
+
src = arg
|
78
|
+
uri = URI.parse( src )
|
79
|
+
|
80
|
+
logger.debug "uri.host=<#{uri.host}>, uri.path=<#{uri.path}>"
|
81
|
+
|
82
|
+
if uri.path == '/' || uri.path == ''
|
83
|
+
dest = "#{uri.host}"
|
84
|
+
else
|
85
|
+
dest = "#{uri.host}@#{uri.path.gsub( /[ \-]/, '_').gsub( /[\/\\]/, '-')}"
|
86
|
+
end
|
87
|
+
|
88
|
+
## todo: use output path option
|
89
|
+
|
90
|
+
Worker.new( logger ).copy( src, dest )
|
91
|
+
|
92
|
+
end # each arg
|
93
|
+
|
94
|
+
puts "Done."
|
95
|
+
|
96
|
+
end # method run
|
97
|
+
|
98
|
+
end # class Runner
|
11
99
|
|
12
100
|
end # module Fetcher
|
13
101
|
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Fetcher
|
2
|
+
|
3
|
+
class Worker
|
4
|
+
|
5
|
+
attr_reader :logger
|
6
|
+
|
7
|
+
def initialize( logger )
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
## todo: add file protocol
|
13
|
+
|
14
|
+
def copy( src, dest )
|
15
|
+
logger.debug "fetch( src: #{src}, dest: #{dest} )"
|
16
|
+
|
17
|
+
uri = URI.parse( src )
|
18
|
+
|
19
|
+
# new code: honor proxy env variable HTTP_PROXY
|
20
|
+
proxy = ENV['HTTP_PROXY']
|
21
|
+
proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
|
22
|
+
|
23
|
+
if proxy
|
24
|
+
proxy = URI.parse( proxy )
|
25
|
+
logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
|
26
|
+
if proxy.user && proxy.password
|
27
|
+
logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
|
28
|
+
else
|
29
|
+
logger.debug " using no credentials"
|
30
|
+
end
|
31
|
+
else
|
32
|
+
logger.debug "using direct net http access; no proxy configured"
|
33
|
+
proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
|
34
|
+
end
|
35
|
+
|
36
|
+
http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
|
37
|
+
|
38
|
+
redirect_limit = 4
|
39
|
+
response = nil
|
40
|
+
|
41
|
+
until false
|
42
|
+
raise ArgumentError, 'HTTP redirect too deep' if redirect_limit == 0
|
43
|
+
redirect_limit -= 1
|
44
|
+
|
45
|
+
http = http_proxy.new( uri.host, uri.port )
|
46
|
+
|
47
|
+
logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
|
48
|
+
|
49
|
+
request = Net::HTTP::Get.new( uri.request_uri, { 'User-Agent'=> 'slideshow'} )
|
50
|
+
if uri.instance_of? URI::HTTPS
|
51
|
+
http.use_ssl = true
|
52
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
53
|
+
end
|
54
|
+
|
55
|
+
response = http.request( request )
|
56
|
+
|
57
|
+
if response.code == '200'
|
58
|
+
logger.debug "#{response.code} #{response.message}"
|
59
|
+
break
|
60
|
+
elsif (response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
|
61
|
+
# 301 = moved permanently
|
62
|
+
# 302 = found
|
63
|
+
# 303 = see other
|
64
|
+
# 307 = temporary redirect
|
65
|
+
logger.debug "#{response.code} #{response.message} location=#{response.header['location']}"
|
66
|
+
newuri = URI.parse( response.header['location'] )
|
67
|
+
if newuri.relative?
|
68
|
+
logger.debug "url relative; try to make it absolute"
|
69
|
+
newuri = uri + response.header['location']
|
70
|
+
end
|
71
|
+
uri = newuri
|
72
|
+
else
|
73
|
+
msg = "#{response.code} #{response.message}"
|
74
|
+
puts "*** error: #{msg}"
|
75
|
+
return # todo: throw StandardException?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
|
80
|
+
|
81
|
+
# check for content type; use 'wb' for images
|
82
|
+
if response.content_type =~ /image/
|
83
|
+
logger.debug ' switching to binary'
|
84
|
+
flags = 'wb'
|
85
|
+
else
|
86
|
+
flags = 'w'
|
87
|
+
end
|
88
|
+
|
89
|
+
File.open( dest, flags ) do |f|
|
90
|
+
f.write( response.body )
|
91
|
+
end
|
92
|
+
end # method copy
|
93
|
+
|
94
|
+
end # class Worker
|
95
|
+
|
96
|
+
end # module Fetcher
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gerald Bauer
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-06-
|
18
|
+
date: 2012-06-03 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rdoc
|
@@ -63,6 +63,7 @@ files:
|
|
63
63
|
- bin/fetch
|
64
64
|
- lib/fetcher.rb
|
65
65
|
- lib/fetcher/runner.rb
|
66
|
+
- lib/fetcher/worker.rb
|
66
67
|
homepage: http://geraldb.github.com/fetcher
|
67
68
|
licenses: []
|
68
69
|
|