fetcher 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +7 -7
- data/Rakefile +2 -2
- data/lib/fetcher/version.rb +1 -1
- data/lib/fetcher/worker.rb +37 -4
- metadata +9 -9
data/README.markdown
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS
|
2
2
|
|
3
|
-
* home :: [github.com/
|
4
|
-
* bugs :: [github.com/
|
3
|
+
* home :: [github.com/rubylibs/fetcher](https://github.com/rubylibs/fetcher)
|
4
|
+
* bugs :: [github.com/rubylibs/fetcher/issues](https://github.com/rubylibs/fetcher/issues)
|
5
5
|
* gem :: [rubygems.org/gems/fetcher](https://rubygems.org/gems/fetcher)
|
6
6
|
* rdoc :: [rubydoc.info/gems/fetcher](http://rubydoc.info/gems/fetcher)
|
7
7
|
* forum :: [groups.google.com/group/webslideshow](https://groups.google.com/group/webslideshow)
|
@@ -56,7 +56,7 @@ Note: The method `get` will return a `Net::HTTPResponse` object
|
|
56
56
|
## Command Line
|
57
57
|
|
58
58
|
~~~
|
59
|
-
fetch version 0.
|
59
|
+
fetch version 0.5.0 - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
|
60
60
|
|
61
61
|
Usage: fetch [options] URI
|
62
62
|
-o, --output PATH Output Path (default is '.')
|
@@ -83,12 +83,12 @@ that lets you create slide shows
|
|
83
83
|
and author slides in plain text using a wiki-style markup language that's easy-to-write and easy-to-read
|
84
84
|
ships with the `fetcher` gem.
|
85
85
|
|
86
|
-
The [`
|
87
|
-
|
86
|
+
The [`pluto`](https://github.com/feedreader) gem that lets you build web pages
|
87
|
+
from published web feeds
|
88
88
|
ships with the `fetcher` gem.
|
89
89
|
|
90
|
-
The [`
|
91
|
-
|
90
|
+
The [`sportdb`](https://github.com/geraldb/sport.db.ruby) gem that lets you read football (soccer) fixtures
|
91
|
+
and more in plain text
|
92
92
|
ships with the `fetcher` gem.
|
93
93
|
|
94
94
|
|
data/Rakefile
CHANGED
@@ -8,8 +8,8 @@ Hoe.spec 'fetcher' do
|
|
8
8
|
self.summary = 'fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS'
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/
|
12
|
-
|
11
|
+
self.urls = ['https://github.com/rubylibs/fetcher']
|
12
|
+
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'webslideshow@googlegroups.com'
|
15
15
|
|
data/lib/fetcher/version.rb
CHANGED
data/lib/fetcher/worker.rb
CHANGED
@@ -14,8 +14,19 @@ module Fetcher
|
|
14
14
|
if old_logger_do_not_use != nil
|
15
15
|
puts "*** depreciated API call [Fetcher.initialize] - do NOT pass in logger; no longer required/needed; logger arg will get removed"
|
16
16
|
end
|
17
|
+
|
18
|
+
### cache for conditional get (e.g. etags and last-modified headers/checks)
|
19
|
+
@cache = {}
|
20
|
+
@use_cache = false
|
17
21
|
end
|
18
22
|
|
23
|
+
## note: use cache[ uri ] = hash for headers+plus body+plus code(410,etc.)
|
24
|
+
# cache[ uri ]
|
25
|
+
def clear_cache() @cache = {}; end
|
26
|
+
def cache() @cache; end
|
27
|
+
def use_cache=(true_or_false) @use_cache=true_or_false; end # true|false
|
28
|
+
def use_cache?() @use_cache; end
|
29
|
+
|
19
30
|
|
20
31
|
def get( src )
|
21
32
|
# return HTTPResponse (code,message,body,etc.)
|
@@ -95,19 +106,41 @@ module Fetcher
|
|
95
106
|
|
96
107
|
logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
|
97
108
|
|
98
|
-
|
109
|
+
headers = { 'User-Agent' => "fetcher gem v#{VERSION}" }
|
110
|
+
|
111
|
+
if use_cache?
|
112
|
+
## check for existing cache entry in cache store (lookup by uri)
|
113
|
+
## todo/fix: normalize uri!!!! - how?
|
114
|
+
cache_entry = cache[ uri.request_uri ]
|
115
|
+
if cache_entry
|
116
|
+
logger.info "found cache entry for >#{uri.request_uri}<"
|
117
|
+
if cache_entry['etag']
|
118
|
+
logger.info "adding header If-None-Match (etag) >#{cache_entry['etag']}< for conditional GET"
|
119
|
+
headers['If-None-Match'] = cache_entry['etag']
|
120
|
+
end
|
121
|
+
if cache_entry['last-modified']
|
122
|
+
logger.info "adding header If-Modified-Since (last-modified) >#{cache_entry['last-modified']}< for conditional GET"
|
123
|
+
headers['If-Modified-Since'] = cache_entry['last-modified']
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
request = Net::HTTP::Get.new( uri.request_uri, headers )
|
99
129
|
if uri.instance_of? URI::HTTPS
|
100
130
|
http.use_ssl = true
|
101
131
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
102
132
|
end
|
103
|
-
|
133
|
+
|
104
134
|
response = http.request( request )
|
105
|
-
|
135
|
+
|
106
136
|
if response.code == '200'
|
107
137
|
logger.debug "#{response.code} #{response.message}"
|
108
138
|
logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
|
109
139
|
break # will return response
|
110
|
-
elsif
|
140
|
+
elsif( response.code == '304' ) # -- Not Modified - for conditional GETs (using etag,last-modified)
|
141
|
+
logger.debug "#{response.code} #{response.message}"
|
142
|
+
break # will return response
|
143
|
+
elsif( response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
|
111
144
|
# 301 = moved permanently
|
112
145
|
# 302 = found
|
113
146
|
# 303 = see other
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &75656580 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.6'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *75656580
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &75656270 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *75656270
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &75656000 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *75656000
|
47
47
|
description: fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables:
|
@@ -62,7 +62,7 @@ files:
|
|
62
62
|
- lib/fetcher/cli/runner.rb
|
63
63
|
- lib/fetcher/version.rb
|
64
64
|
- lib/fetcher/worker.rb
|
65
|
-
homepage: https://github.com/
|
65
|
+
homepage: https://github.com/rubylibs/fetcher
|
66
66
|
licenses:
|
67
67
|
- Public Domain
|
68
68
|
post_install_message:
|