fetcher 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +1 -0
- data/README.markdown +9 -1
- data/lib/fetcher.rb +8 -1
- data/lib/fetcher/runner.rb +91 -3
- data/lib/fetcher/worker.rb +96 -0
- metadata +5 -4
data/Manifest.txt
CHANGED
data/README.markdown
CHANGED
@@ -10,7 +10,15 @@ TBD
|
|
10
10
|
|
11
11
|
require 'fetcher'
|
12
12
|
|
13
|
-
Fetcher.
|
13
|
+
Fetcher.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
|
14
|
+
|
15
|
+
or
|
16
|
+
|
17
|
+
logger = Logger.new( STDOUT )
|
18
|
+
worker = Fetcher::Worker.new( logger )
|
19
|
+
worker.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
|
20
|
+
|
21
|
+
|
14
22
|
|
15
23
|
## Install
|
16
24
|
|
data/lib/fetcher.rb
CHANGED
@@ -11,16 +11,23 @@ require 'pp'
|
|
11
11
|
require 'logger'
|
12
12
|
require 'optparse'
|
13
13
|
require 'fileutils'
|
14
|
+
require 'uri'
|
15
|
+
require 'net/http'
|
16
|
+
require 'net/https'
|
17
|
+
require 'ostruct'
|
18
|
+
require 'date'
|
19
|
+
require 'cgi'
|
14
20
|
|
15
21
|
|
16
22
|
# our own code
|
17
23
|
|
18
24
|
require 'fetcher/runner'
|
25
|
+
require 'fetcher/worker'
|
19
26
|
|
20
27
|
|
21
28
|
module Fetcher
|
22
29
|
|
23
|
-
VERSION = '0.0
|
30
|
+
VERSION = '0.1.0'
|
24
31
|
|
25
32
|
# version string for generator meta tag (includes ruby version)
|
26
33
|
def self.banner
|
data/lib/fetcher/runner.rb
CHANGED
@@ -1,13 +1,101 @@
|
|
1
1
|
|
2
2
|
module Fetcher
|
3
3
|
|
4
|
+
def self.copy( src, dest )
|
5
|
+
Worker.new( Logger.new(STDOUT) ).copy( src, dest )
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
class Opts
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@hash = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def put( key, value )
|
16
|
+
@hash[ key.to_s ] = value
|
17
|
+
end
|
18
|
+
|
19
|
+
def output_path
|
20
|
+
@hash.fetch( 'output', '.' )
|
21
|
+
end
|
22
|
+
|
23
|
+
end # class Opts
|
24
|
+
|
25
|
+
|
4
26
|
class Runner
|
5
27
|
|
6
|
-
|
7
|
-
|
28
|
+
attr_reader :logger
|
29
|
+
attr_reader :opts
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
@logger = Logger.new(STDOUT)
|
33
|
+
@logger.level = Logger::INFO
|
34
|
+
@opts = Opts.new
|
8
35
|
end
|
9
36
|
|
10
|
-
|
37
|
+
def run( args )
|
38
|
+
opt=OptionParser.new do |cmd|
|
39
|
+
|
40
|
+
cmd.banner = "Usage: fetch [options] uri"
|
41
|
+
|
42
|
+
cmd.on( '-o', '--output PATH', 'Output Path' ) { |s| opts.put( 'output', s ) }
|
43
|
+
|
44
|
+
# todo: find different letter for debug trace switch (use v for version?)
|
45
|
+
cmd.on( "-v", "--verbose", "Show debug trace" ) do
|
46
|
+
logger.datetime_format = "%H:%H:%S"
|
47
|
+
logger.level = Logger::DEBUG
|
48
|
+
end
|
49
|
+
|
50
|
+
usage =<<EOS
|
51
|
+
|
52
|
+
fetch - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
|
53
|
+
|
54
|
+
#{cmd.help}
|
55
|
+
|
56
|
+
Examples:
|
57
|
+
fetch http://geraldb.github.com/rubybook/hoe.html
|
58
|
+
fetch -o downloads http://geraldb.github.com/rubybook/hoe.html
|
59
|
+
|
60
|
+
Further information:
|
61
|
+
http://geraldb.github.com/fetcher
|
62
|
+
|
63
|
+
EOS
|
64
|
+
|
65
|
+
cmd.on_tail( "-h", "--help", "Show this message" ) do
|
66
|
+
puts usage
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
opt.parse!( args )
|
72
|
+
|
73
|
+
puts Fetcher.banner
|
74
|
+
|
75
|
+
args.each do |arg|
|
76
|
+
|
77
|
+
src = arg
|
78
|
+
uri = URI.parse( src )
|
79
|
+
|
80
|
+
logger.debug "uri.host=<#{uri.host}>, uri.path=<#{uri.path}>"
|
81
|
+
|
82
|
+
if uri.path == '/' || uri.path == ''
|
83
|
+
dest = "#{uri.host}"
|
84
|
+
else
|
85
|
+
dest = "#{uri.host}@#{uri.path.gsub( /[ \-]/, '_').gsub( /[\/\\]/, '-')}"
|
86
|
+
end
|
87
|
+
|
88
|
+
## todo: use output path option
|
89
|
+
|
90
|
+
Worker.new( logger ).copy( src, dest )
|
91
|
+
|
92
|
+
end # each arg
|
93
|
+
|
94
|
+
puts "Done."
|
95
|
+
|
96
|
+
end # method run
|
97
|
+
|
98
|
+
end # class Runner
|
11
99
|
|
12
100
|
end # module Fetcher
|
13
101
|
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Fetcher
|
2
|
+
|
3
|
+
class Worker
|
4
|
+
|
5
|
+
attr_reader :logger
|
6
|
+
|
7
|
+
def initialize( logger )
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
## todo: add file protocol
|
13
|
+
|
14
|
+
def copy( src, dest )
|
15
|
+
logger.debug "fetch( src: #{src}, dest: #{dest} )"
|
16
|
+
|
17
|
+
uri = URI.parse( src )
|
18
|
+
|
19
|
+
# new code: honor proxy env variable HTTP_PROXY
|
20
|
+
proxy = ENV['HTTP_PROXY']
|
21
|
+
proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
|
22
|
+
|
23
|
+
if proxy
|
24
|
+
proxy = URI.parse( proxy )
|
25
|
+
logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
|
26
|
+
if proxy.user && proxy.password
|
27
|
+
logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
|
28
|
+
else
|
29
|
+
logger.debug " using no credentials"
|
30
|
+
end
|
31
|
+
else
|
32
|
+
logger.debug "using direct net http access; no proxy configured"
|
33
|
+
proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
|
34
|
+
end
|
35
|
+
|
36
|
+
http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
|
37
|
+
|
38
|
+
redirect_limit = 4
|
39
|
+
response = nil
|
40
|
+
|
41
|
+
until false
|
42
|
+
raise ArgumentError, 'HTTP redirect too deep' if redirect_limit == 0
|
43
|
+
redirect_limit -= 1
|
44
|
+
|
45
|
+
http = http_proxy.new( uri.host, uri.port )
|
46
|
+
|
47
|
+
logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
|
48
|
+
|
49
|
+
request = Net::HTTP::Get.new( uri.request_uri, { 'User-Agent'=> 'slideshow'} )
|
50
|
+
if uri.instance_of? URI::HTTPS
|
51
|
+
http.use_ssl = true
|
52
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
53
|
+
end
|
54
|
+
|
55
|
+
response = http.request( request )
|
56
|
+
|
57
|
+
if response.code == '200'
|
58
|
+
logger.debug "#{response.code} #{response.message}"
|
59
|
+
break
|
60
|
+
elsif (response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
|
61
|
+
# 301 = moved permanently
|
62
|
+
# 302 = found
|
63
|
+
# 303 = see other
|
64
|
+
# 307 = temporary redirect
|
65
|
+
logger.debug "#{response.code} #{response.message} location=#{response.header['location']}"
|
66
|
+
newuri = URI.parse( response.header['location'] )
|
67
|
+
if newuri.relative?
|
68
|
+
logger.debug "url relative; try to make it absolute"
|
69
|
+
newuri = uri + response.header['location']
|
70
|
+
end
|
71
|
+
uri = newuri
|
72
|
+
else
|
73
|
+
msg = "#{response.code} #{response.message}"
|
74
|
+
puts "*** error: #{msg}"
|
75
|
+
return # todo: throw StandardException?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
|
80
|
+
|
81
|
+
# check for content type; use 'wb' for images
|
82
|
+
if response.content_type =~ /image/
|
83
|
+
logger.debug ' switching to binary'
|
84
|
+
flags = 'wb'
|
85
|
+
else
|
86
|
+
flags = 'w'
|
87
|
+
end
|
88
|
+
|
89
|
+
File.open( dest, flags ) do |f|
|
90
|
+
f.write( response.body )
|
91
|
+
end
|
92
|
+
end # method copy
|
93
|
+
|
94
|
+
end # class Worker
|
95
|
+
|
96
|
+
end # module Fetcher
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gerald Bauer
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-06-
|
18
|
+
date: 2012-06-03 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rdoc
|
@@ -63,6 +63,7 @@ files:
|
|
63
63
|
- bin/fetch
|
64
64
|
- lib/fetcher.rb
|
65
65
|
- lib/fetcher/runner.rb
|
66
|
+
- lib/fetcher/worker.rb
|
66
67
|
homepage: http://geraldb.github.com/fetcher
|
67
68
|
licenses: []
|
68
69
|
|