fetcher 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -5,3 +5,4 @@ Rakefile
5
5
  bin/fetch
6
6
  lib/fetcher.rb
7
7
  lib/fetcher/runner.rb
8
+ lib/fetcher/worker.rb
data/README.markdown CHANGED
@@ -10,7 +10,15 @@ TBD
10
10
 
11
11
  require 'fetcher'
12
12
 
13
- Fetcher.new.copy( '/tmp/hoe.html', 'http://geraldb.github.com/rubybook/hoe.html' )
13
+ Fetcher.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
14
+
15
+ or
16
+
17
+ logger = Logger.new( STDOUT )
18
+ worker = Fetcher::Worker.new( logger )
19
+ worker.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
20
+
21
+
14
22
 
15
23
  ## Install
16
24
 
data/lib/fetcher.rb CHANGED
@@ -11,16 +11,23 @@ require 'pp'
11
11
  require 'logger'
12
12
  require 'optparse'
13
13
  require 'fileutils'
14
+ require 'uri'
15
+ require 'net/http'
16
+ require 'net/https'
17
+ require 'ostruct'
18
+ require 'date'
19
+ require 'cgi'
14
20
 
15
21
 
16
22
  # our own code
17
23
 
18
24
  require 'fetcher/runner'
25
+ require 'fetcher/worker'
19
26
 
20
27
 
21
28
  module Fetcher
22
29
 
23
- VERSION = '0.0.1'
30
+ VERSION = '0.1.0'
24
31
 
25
32
  # version string for generator meta tag (includes ruby version)
26
33
  def self.banner
@@ -1,13 +1,101 @@
1
1
 
2
2
  module Fetcher
3
3
 
4
+ def self.copy( src, dest )
5
+ Worker.new( Logger.new(STDOUT) ).copy( src, dest )
6
+ end
7
+
8
+
9
+ class Opts
10
+
11
+ def initialize
12
+ @hash = {}
13
+ end
14
+
15
+ def put( key, value )
16
+ @hash[ key.to_s ] = value
17
+ end
18
+
19
+ def output_path
20
+ @hash.fetch( 'output', '.' )
21
+ end
22
+
23
+ end # class Opts
24
+
25
+
4
26
  class Runner
5
27
 
6
- def run(args)
7
- puts "hello from Runner.run"
28
+ attr_reader :logger
29
+ attr_reader :opts
30
+
31
+ def initialize
32
+ @logger = Logger.new(STDOUT)
33
+ @logger.level = Logger::INFO
34
+ @opts = Opts.new
8
35
  end
9
36
 
10
- end
37
+ def run( args )
38
+ opt=OptionParser.new do |cmd|
39
+
40
+ cmd.banner = "Usage: fetch [options] uri"
41
+
42
+ cmd.on( '-o', '--output PATH', 'Output Path' ) { |s| opts.put( 'output', s ) }
43
+
44
+ # todo: find different letter for debug trace switch (use v for version?)
45
+ cmd.on( "-v", "--verbose", "Show debug trace" ) do
46
+ logger.datetime_format = "%H:%H:%S"
47
+ logger.level = Logger::DEBUG
48
+ end
49
+
50
+ usage =<<EOS
51
+
52
+ fetch - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
53
+
54
+ #{cmd.help}
55
+
56
+ Examples:
57
+ fetch http://geraldb.github.com/rubybook/hoe.html
58
+ fetch -o downloads http://geraldb.github.com/rubybook/hoe.html
59
+
60
+ Further information:
61
+ http://geraldb.github.com/fetcher
62
+
63
+ EOS
64
+
65
+ cmd.on_tail( "-h", "--help", "Show this message" ) do
66
+ puts usage
67
+ exit
68
+ end
69
+ end
70
+
71
+ opt.parse!( args )
72
+
73
+ puts Fetcher.banner
74
+
75
+ args.each do |arg|
76
+
77
+ src = arg
78
+ uri = URI.parse( src )
79
+
80
+ logger.debug "uri.host=<#{uri.host}>, uri.path=<#{uri.path}>"
81
+
82
+ if uri.path == '/' || uri.path == ''
83
+ dest = "#{uri.host}"
84
+ else
85
+ dest = "#{uri.host}@#{uri.path.gsub( /[ \-]/, '_').gsub( /[\/\\]/, '-')}"
86
+ end
87
+
88
+ ## todo: use output path option
89
+
90
+ Worker.new( logger ).copy( src, dest )
91
+
92
+ end # each arg
93
+
94
+ puts "Done."
95
+
96
+ end # method run
97
+
98
+ end # class Runner
11
99
 
12
100
  end # module Fetcher
13
101
 
@@ -0,0 +1,96 @@
1
+ module Fetcher
2
+
3
+ class Worker
4
+
5
+ attr_reader :logger
6
+
7
+ def initialize( logger )
8
+ @logger = logger
9
+ end
10
+
11
+
12
+ ## todo: add file protocol
13
+
14
+ def copy( src, dest )
15
+ logger.debug "fetch( src: #{src}, dest: #{dest} )"
16
+
17
+ uri = URI.parse( src )
18
+
19
+ # new code: honor proxy env variable HTTP_PROXY
20
+ proxy = ENV['HTTP_PROXY']
21
+ proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
22
+
23
+ if proxy
24
+ proxy = URI.parse( proxy )
25
+ logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
26
+ if proxy.user && proxy.password
27
+ logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
28
+ else
29
+ logger.debug " using no credentials"
30
+ end
31
+ else
32
+ logger.debug "using direct net http access; no proxy configured"
33
+ proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
34
+ end
35
+
36
+ http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
37
+
38
+ redirect_limit = 4
39
+ response = nil
40
+
41
+ until false
42
+ raise ArgumentError, 'HTTP redirect too deep' if redirect_limit == 0
43
+ redirect_limit -= 1
44
+
45
+ http = http_proxy.new( uri.host, uri.port )
46
+
47
+ logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
48
+
49
+ request = Net::HTTP::Get.new( uri.request_uri, { 'User-Agent'=> 'slideshow'} )
50
+ if uri.instance_of? URI::HTTPS
51
+ http.use_ssl = true
52
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
53
+ end
54
+
55
+ response = http.request( request )
56
+
57
+ if response.code == '200'
58
+ logger.debug "#{response.code} #{response.message}"
59
+ break
60
+ elsif (response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
61
+ # 301 = moved permanently
62
+ # 302 = found
63
+ # 303 = see other
64
+ # 307 = temporary redirect
65
+ logger.debug "#{response.code} #{response.message} location=#{response.header['location']}"
66
+ newuri = URI.parse( response.header['location'] )
67
+ if newuri.relative?
68
+ logger.debug "url relative; try to make it absolute"
69
+ newuri = uri + response.header['location']
70
+ end
71
+ uri = newuri
72
+ else
73
+ msg = "#{response.code} #{response.message}"
74
+ puts "*** error: #{msg}"
75
+ return # todo: throw StandardException?
76
+ end
77
+ end
78
+
79
+ logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
80
+
81
+ # check for content type; use 'wb' for images
82
+ if response.content_type =~ /image/
83
+ logger.debug ' switching to binary'
84
+ flags = 'wb'
85
+ else
86
+ flags = 'w'
87
+ end
88
+
89
+ File.open( dest, flags ) do |f|
90
+ f.write( response.body )
91
+ end
92
+ end # method copy
93
+
94
+ end # class Worker
95
+
96
+ end # module Fetcher
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fetcher
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 0
9
8
  - 1
10
- version: 0.0.1
9
+ - 0
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Gerald Bauer
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-06-02 00:00:00 Z
18
+ date: 2012-06-03 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rdoc
@@ -63,6 +63,7 @@ files:
63
63
  - bin/fetch
64
64
  - lib/fetcher.rb
65
65
  - lib/fetcher/runner.rb
66
+ - lib/fetcher/worker.rb
66
67
  homepage: http://geraldb.github.com/fetcher
67
68
  licenses: []
68
69