fetcher 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  # fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS
2
2
 
3
- * home :: [github.com/geraldb/fetcher](https://github.com/geraldb/fetcher)
4
- * bugs :: [github.com/geraldb/fetcher/issues](https://github.com/geraldb/fetcher/issues)
3
+ * home :: [github.com/rubylibs/fetcher](https://github.com/rubylibs/fetcher)
4
+ * bugs :: [github.com/rubylibs/fetcher/issues](https://github.com/rubylibs/fetcher/issues)
5
5
  * gem :: [rubygems.org/gems/fetcher](https://rubygems.org/gems/fetcher)
6
6
  * rdoc :: [rubydoc.info/gems/fetcher](http://rubydoc.info/gems/fetcher)
7
7
  * forum :: [groups.google.com/group/webslideshow](https://groups.google.com/group/webslideshow)
@@ -56,7 +56,7 @@ Note: The method `get` will return a `Net::HTTPResponse` object
56
56
  ## Command Line
57
57
 
58
58
  ~~~
59
- fetch version 0.7.0 - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
59
+ fetch version 0.5.0 - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
60
60
 
61
61
  Usage: fetch [options] URI
62
62
  -o, --output PATH Output Path (default is '.')
@@ -83,12 +83,12 @@ that lets you create slide shows
83
83
  and author slides in plain text using a wiki-style markup language that's easy-to-write and easy-to-read
84
84
  ships with the `fetcher` gem.
85
85
 
86
- The [`sportdb`](https://github.com/geraldb/sport.db.ruby) gem that lets you read football (soccer) fixtures
87
- and more in plain text
86
+ The [`pluto`](https://github.com/feedreader) gem that lets you build web pages
87
+ from published web feeds
88
88
  ships with the `fetcher` gem.
89
89
 
90
- The [`pluto`](https://github.com/geraldb/pluto) gem that lets you build web pages
91
- from published web feeds
90
+ The [`sportdb`](https://github.com/geraldb/sport.db.ruby) gem that lets you read football (soccer) fixtures
91
+ and more in plain text
92
92
  ships with the `fetcher` gem.
93
93
 
94
94
 
data/Rakefile CHANGED
@@ -8,8 +8,8 @@ Hoe.spec 'fetcher' do
8
8
  self.summary = 'fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS'
9
9
  self.description = summary
10
10
 
11
- self.urls = ['https://github.com/geraldb/fetcher']
12
-
11
+ self.urls = ['https://github.com/rubylibs/fetcher']
12
+
13
13
  self.author = 'Gerald Bauer'
14
14
  self.email = 'webslideshow@googlegroups.com'
15
15
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  module Fetcher
3
3
 
4
- VERSION = '0.4.0'
4
+ VERSION = '0.4.1'
5
5
 
6
6
  end # module Fetcher
7
7
 
@@ -14,8 +14,19 @@ module Fetcher
14
14
  if old_logger_do_not_use != nil
15
15
  puts "*** depreciated API call [Fetcher.initialize] - do NOT pass in logger; no longer required/needed; logger arg will get removed"
16
16
  end
17
+
18
+ ### cache for conditional get (e.g. etags and last-modified headers/checks)
19
+ @cache = {}
20
+ @use_cache = false
17
21
  end
18
22
 
23
+ ## note: use cache[ uri ] = hash for headers+plus body+plus code(410,etc.)
24
+ # cache[ uri ]
25
+ def clear_cache() @cache = {}; end
26
+ def cache() @cache; end
27
+ def use_cache=(true_or_false) @use_cache=true_or_false; end # true|false
28
+ def use_cache?() @use_cache; end
29
+
19
30
 
20
31
  def get( src )
21
32
  # return HTTPResponse (code,message,body,etc.)
@@ -95,19 +106,41 @@ module Fetcher
95
106
 
96
107
  logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
97
108
 
98
- request = Net::HTTP::Get.new( uri.request_uri, { 'User-Agent' => "fetcher gem v#{VERSION}" } )
109
+ headers = { 'User-Agent' => "fetcher gem v#{VERSION}" }
110
+
111
+ if use_cache?
112
+ ## check for existing cache entry in cache store (lookup by uri)
113
+ ## todo/fix: normalize uri!!!! - how?
114
+ cache_entry = cache[ uri.request_uri ]
115
+ if cache_entry
116
+ logger.info "found cache entry for >#{uri.request_uri}<"
117
+ if cache_entry['etag']
118
+ logger.info "adding header If-None-Match (etag) >#{cache_entry['etag']}< for conditional GET"
119
+ headers['If-None-Match'] = cache_entry['etag']
120
+ end
121
+ if cache_entry['last-modified']
122
+ logger.info "adding header If-Modified-Since (last-modified) >#{cache_entry['last-modified']}< for conditional GET"
123
+ headers['If-Modified-Since'] = cache_entry['last-modified']
124
+ end
125
+ end
126
+ end
127
+
128
+ request = Net::HTTP::Get.new( uri.request_uri, headers )
99
129
  if uri.instance_of? URI::HTTPS
100
130
  http.use_ssl = true
101
131
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
102
132
  end
103
-
133
+
104
134
  response = http.request( request )
105
-
135
+
106
136
  if response.code == '200'
107
137
  logger.debug "#{response.code} #{response.message}"
108
138
  logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
109
139
  break # will return response
110
- elsif (response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
140
+ elsif( response.code == '304' ) # -- Not Modified - for conditional GETs (using etag,last-modified)
141
+ logger.debug "#{response.code} #{response.message}"
142
+ break # will return response
143
+ elsif( response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
111
144
  # 301 = moved permanently
112
145
  # 302 = found
113
146
  # 303 = see other
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-14 00:00:00.000000000 Z
12
+ date: 2013-10-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &75519830 !ruby/object:Gem::Requirement
16
+ requirement: &75656580 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.6'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *75519830
24
+ version_requirements: *75656580
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &75537890 !ruby/object:Gem::Requirement
27
+ requirement: &75656270 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *75537890
35
+ version_requirements: *75656270
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &75536950 !ruby/object:Gem::Requirement
38
+ requirement: &75656000 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *75536950
46
+ version_requirements: *75656000
47
47
  description: fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS
48
48
  email: webslideshow@googlegroups.com
49
49
  executables:
@@ -62,7 +62,7 @@ files:
62
62
  - lib/fetcher/cli/runner.rb
63
63
  - lib/fetcher/version.rb
64
64
  - lib/fetcher/worker.rb
65
- homepage: https://github.com/geraldb/fetcher
65
+ homepage: https://github.com/rubylibs/fetcher
66
66
  licenses:
67
67
  - Public Domain
68
68
  post_install_message: