fetcher 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -5,3 +5,4 @@ Rakefile
5
5
  bin/fetch
6
6
  lib/fetcher.rb
7
7
  lib/fetcher/runner.rb
8
+ lib/fetcher/worker.rb
data/README.markdown CHANGED
@@ -10,7 +10,15 @@ TBD
10
10
 
11
11
  require 'fetcher'
12
12
 
13
- Fetcher.new.copy( '/tmp/hoe.html', 'http://geraldb.github.com/rubybook/hoe.html' )
13
+ Fetcher.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
14
+
15
+ or
16
+
17
+ logger = Logger.new( STDOUT )
18
+ worker = Fetcher::Worker.new( logger )
19
+ worker.copy( 'http://geraldb.github.com/rubybook/hoe.html', '/tmp/hoe.html' )
20
+
21
+
14
22
 
15
23
  ## Install
16
24
 
data/lib/fetcher.rb CHANGED
@@ -11,16 +11,23 @@ require 'pp'
11
11
  require 'logger'
12
12
  require 'optparse'
13
13
  require 'fileutils'
14
+ require 'uri'
15
+ require 'net/http'
16
+ require 'net/https'
17
+ require 'ostruct'
18
+ require 'date'
19
+ require 'cgi'
14
20
 
15
21
 
16
22
  # our own code
17
23
 
18
24
  require 'fetcher/runner'
25
+ require 'fetcher/worker'
19
26
 
20
27
 
21
28
  module Fetcher
22
29
 
23
- VERSION = '0.0.1'
30
+ VERSION = '0.1.0'
24
31
 
25
32
  # version string for generator meta tag (includes ruby version)
26
33
  def self.banner
@@ -1,13 +1,101 @@
1
1
 
2
2
  module Fetcher
3
3
 
4
+ def self.copy( src, dest )
5
+ Worker.new( Logger.new(STDOUT) ).copy( src, dest )
6
+ end
7
+
8
+
9
+ class Opts
10
+
11
+ def initialize
12
+ @hash = {}
13
+ end
14
+
15
+ def put( key, value )
16
+ @hash[ key.to_s ] = value
17
+ end
18
+
19
+ def output_path
20
+ @hash.fetch( 'output', '.' )
21
+ end
22
+
23
+ end # class Opts
24
+
25
+
4
26
  class Runner
5
27
 
6
- def run(args)
7
- puts "hello from Runner.run"
28
+ attr_reader :logger
29
+ attr_reader :opts
30
+
31
+ def initialize
32
+ @logger = Logger.new(STDOUT)
33
+ @logger.level = Logger::INFO
34
+ @opts = Opts.new
8
35
  end
9
36
 
10
- end
37
+ def run( args )
38
+ opt=OptionParser.new do |cmd|
39
+
40
+ cmd.banner = "Usage: fetch [options] uri"
41
+
42
+ cmd.on( '-o', '--output PATH', 'Output Path' ) { |s| opts.put( 'output', s ) }
43
+
44
+ # todo: find different letter for debug trace switch (use v for version?)
45
+ cmd.on( "-v", "--verbose", "Show debug trace" ) do
46
+ logger.datetime_format = "%H:%H:%S"
47
+ logger.level = Logger::DEBUG
48
+ end
49
+
50
+ usage =<<EOS
51
+
52
+ fetch - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
53
+
54
+ #{cmd.help}
55
+
56
+ Examples:
57
+ fetch http://geraldb.github.com/rubybook/hoe.html
58
+ fetch -o downloads http://geraldb.github.com/rubybook/hoe.html
59
+
60
+ Further information:
61
+ http://geraldb.github.com/fetcher
62
+
63
+ EOS
64
+
65
+ cmd.on_tail( "-h", "--help", "Show this message" ) do
66
+ puts usage
67
+ exit
68
+ end
69
+ end
70
+
71
+ opt.parse!( args )
72
+
73
+ puts Fetcher.banner
74
+
75
+ args.each do |arg|
76
+
77
+ src = arg
78
+ uri = URI.parse( src )
79
+
80
+ logger.debug "uri.host=<#{uri.host}>, uri.path=<#{uri.path}>"
81
+
82
+ if uri.path == '/' || uri.path == ''
83
+ dest = "#{uri.host}"
84
+ else
85
+ dest = "#{uri.host}@#{uri.path.gsub( /[ \-]/, '_').gsub( /[\/\\]/, '-')}"
86
+ end
87
+
88
+ ## todo: use output path option
89
+
90
+ Worker.new( logger ).copy( src, dest )
91
+
92
+ end # each arg
93
+
94
+ puts "Done."
95
+
96
+ end # method run
97
+
98
+ end # class Runner
11
99
 
12
100
  end # module Fetcher
13
101
 
@@ -0,0 +1,96 @@
1
+ module Fetcher
2
+
3
+ class Worker
4
+
5
+ attr_reader :logger
6
+
7
+ def initialize( logger )
8
+ @logger = logger
9
+ end
10
+
11
+
12
+ ## todo: add file protocol
13
+
14
+ def copy( src, dest )
15
+ logger.debug "fetch( src: #{src}, dest: #{dest} )"
16
+
17
+ uri = URI.parse( src )
18
+
19
+ # new code: honor proxy env variable HTTP_PROXY
20
+ proxy = ENV['HTTP_PROXY']
21
+ proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
22
+
23
+ if proxy
24
+ proxy = URI.parse( proxy )
25
+ logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
26
+ if proxy.user && proxy.password
27
+ logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
28
+ else
29
+ logger.debug " using no credentials"
30
+ end
31
+ else
32
+ logger.debug "using direct net http access; no proxy configured"
33
+ proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
34
+ end
35
+
36
+ http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
37
+
38
+ redirect_limit = 4
39
+ response = nil
40
+
41
+ until false
42
+ raise ArgumentError, 'HTTP redirect too deep' if redirect_limit == 0
43
+ redirect_limit -= 1
44
+
45
+ http = http_proxy.new( uri.host, uri.port )
46
+
47
+ logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
48
+
49
+ request = Net::HTTP::Get.new( uri.request_uri, { 'User-Agent'=> 'slideshow'} )
50
+ if uri.instance_of? URI::HTTPS
51
+ http.use_ssl = true
52
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
53
+ end
54
+
55
+ response = http.request( request )
56
+
57
+ if response.code == '200'
58
+ logger.debug "#{response.code} #{response.message}"
59
+ break
60
+ elsif (response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
61
+ # 301 = moved permanently
62
+ # 302 = found
63
+ # 303 = see other
64
+ # 307 = temporary redirect
65
+ logger.debug "#{response.code} #{response.message} location=#{response.header['location']}"
66
+ newuri = URI.parse( response.header['location'] )
67
+ if newuri.relative?
68
+ logger.debug "url relative; try to make it absolute"
69
+ newuri = uri + response.header['location']
70
+ end
71
+ uri = newuri
72
+ else
73
+ msg = "#{response.code} #{response.message}"
74
+ puts "*** error: #{msg}"
75
+ return # todo: throw StandardException?
76
+ end
77
+ end
78
+
79
+ logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
80
+
81
+ # check for content type; use 'wb' for images
82
+ if response.content_type =~ /image/
83
+ logger.debug ' switching to binary'
84
+ flags = 'wb'
85
+ else
86
+ flags = 'w'
87
+ end
88
+
89
+ File.open( dest, flags ) do |f|
90
+ f.write( response.body )
91
+ end
92
+ end # method copy
93
+
94
+ end # class Worker
95
+
96
+ end # module Fetcher
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fetcher
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 0
9
8
  - 1
10
- version: 0.0.1
9
+ - 0
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Gerald Bauer
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-06-02 00:00:00 Z
18
+ date: 2012-06-03 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rdoc
@@ -63,6 +63,7 @@ files:
63
63
  - bin/fetch
64
64
  - lib/fetcher.rb
65
65
  - lib/fetcher/runner.rb
66
+ - lib/fetcher/worker.rb
66
67
  homepage: http://geraldb.github.com/fetcher
67
68
  licenses: []
68
69