fetcher 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +7 -7
- data/Rakefile +2 -2
- data/lib/fetcher/version.rb +1 -1
- data/lib/fetcher/worker.rb +37 -4
- metadata +9 -9
data/README.markdown
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS
|
2
2
|
|
3
|
-
* home :: [github.com/
|
4
|
-
* bugs :: [github.com/
|
3
|
+
* home :: [github.com/rubylibs/fetcher](https://github.com/rubylibs/fetcher)
|
4
|
+
* bugs :: [github.com/rubylibs/fetcher/issues](https://github.com/rubylibs/fetcher/issues)
|
5
5
|
* gem :: [rubygems.org/gems/fetcher](https://rubygems.org/gems/fetcher)
|
6
6
|
* rdoc :: [rubydoc.info/gems/fetcher](http://rubydoc.info/gems/fetcher)
|
7
7
|
* forum :: [groups.google.com/group/webslideshow](https://groups.google.com/group/webslideshow)
|
@@ -56,7 +56,7 @@ Note: The method `get` will return a `Net::HTTPResponse` object
|
|
56
56
|
## Command Line
|
57
57
|
|
58
58
|
~~~
|
59
|
-
fetch version 0.
|
59
|
+
fetch version 0.5.0 - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
|
60
60
|
|
61
61
|
Usage: fetch [options] URI
|
62
62
|
-o, --output PATH Output Path (default is '.')
|
@@ -83,12 +83,12 @@ that lets you create slide shows
|
|
83
83
|
and author slides in plain text using a wiki-style markup language that's easy-to-write and easy-to-read
|
84
84
|
ships with the `fetcher` gem.
|
85
85
|
|
86
|
-
The [`
|
87
|
-
|
86
|
+
The [`pluto`](https://github.com/feedreader) gem that lets you build web pages
|
87
|
+
from published web feeds
|
88
88
|
ships with the `fetcher` gem.
|
89
89
|
|
90
|
-
The [`
|
91
|
-
|
90
|
+
The [`sportdb`](https://github.com/geraldb/sport.db.ruby) gem that lets you read football (soccer) fixtures
|
91
|
+
and more in plain text
|
92
92
|
ships with the `fetcher` gem.
|
93
93
|
|
94
94
|
|
data/Rakefile
CHANGED
@@ -8,8 +8,8 @@ Hoe.spec 'fetcher' do
|
|
8
8
|
self.summary = 'fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS'
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/
|
12
|
-
|
11
|
+
self.urls = ['https://github.com/rubylibs/fetcher']
|
12
|
+
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'webslideshow@googlegroups.com'
|
15
15
|
|
data/lib/fetcher/version.rb
CHANGED
data/lib/fetcher/worker.rb
CHANGED
@@ -14,8 +14,19 @@ module Fetcher
|
|
14
14
|
if old_logger_do_not_use != nil
|
15
15
|
puts "*** depreciated API call [Fetcher.initialize] - do NOT pass in logger; no longer required/needed; logger arg will get removed"
|
16
16
|
end
|
17
|
+
|
18
|
+
### cache for conditional get (e.g. etags and last-modified headers/checks)
|
19
|
+
@cache = {}
|
20
|
+
@use_cache = false
|
17
21
|
end
|
18
22
|
|
23
|
+
## note: use cache[ uri ] = hash for headers+plus body+plus code(410,etc.)
|
24
|
+
# cache[ uri ]
|
25
|
+
def clear_cache() @cache = {}; end
|
26
|
+
def cache() @cache; end
|
27
|
+
def use_cache=(true_or_false) @use_cache=true_or_false; end # true|false
|
28
|
+
def use_cache?() @use_cache; end
|
29
|
+
|
19
30
|
|
20
31
|
def get( src )
|
21
32
|
# return HTTPResponse (code,message,body,etc.)
|
@@ -95,19 +106,41 @@ module Fetcher
|
|
95
106
|
|
96
107
|
logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
|
97
108
|
|
98
|
-
|
109
|
+
headers = { 'User-Agent' => "fetcher gem v#{VERSION}" }
|
110
|
+
|
111
|
+
if use_cache?
|
112
|
+
## check for existing cache entry in cache store (lookup by uri)
|
113
|
+
## todo/fix: normalize uri!!!! - how?
|
114
|
+
cache_entry = cache[ uri.request_uri ]
|
115
|
+
if cache_entry
|
116
|
+
logger.info "found cache entry for >#{uri.request_uri}<"
|
117
|
+
if cache_entry['etag']
|
118
|
+
logger.info "adding header If-None-Match (etag) >#{cache_entry['etag']}< for conditional GET"
|
119
|
+
headers['If-None-Match'] = cache_entry['etag']
|
120
|
+
end
|
121
|
+
if cache_entry['last-modified']
|
122
|
+
logger.info "adding header If-Modified-Since (last-modified) >#{cache_entry['last-modified']}< for conditional GET"
|
123
|
+
headers['If-Modified-Since'] = cache_entry['last-modified']
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
request = Net::HTTP::Get.new( uri.request_uri, headers )
|
99
129
|
if uri.instance_of? URI::HTTPS
|
100
130
|
http.use_ssl = true
|
101
131
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
102
132
|
end
|
103
|
-
|
133
|
+
|
104
134
|
response = http.request( request )
|
105
|
-
|
135
|
+
|
106
136
|
if response.code == '200'
|
107
137
|
logger.debug "#{response.code} #{response.message}"
|
108
138
|
logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
|
109
139
|
break # will return response
|
110
|
-
elsif
|
140
|
+
elsif( response.code == '304' ) # -- Not Modified - for conditional GETs (using etag,last-modified)
|
141
|
+
logger.debug "#{response.code} #{response.message}"
|
142
|
+
break # will return response
|
143
|
+
elsif( response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
|
111
144
|
# 301 = moved permanently
|
112
145
|
# 302 = found
|
113
146
|
# 303 = see other
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &75656580 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.6'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *75656580
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &75656270 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *75656270
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &75656000 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *75656000
|
47
47
|
description: fetcher - Fetch Text Documents or Binary Blobs via HTTP, HTTPS
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables:
|
@@ -62,7 +62,7 @@ files:
|
|
62
62
|
- lib/fetcher/cli/runner.rb
|
63
63
|
- lib/fetcher/version.rb
|
64
64
|
- lib/fetcher/worker.rb
|
65
|
-
homepage: https://github.com/
|
65
|
+
homepage: https://github.com/rubylibs/fetcher
|
66
66
|
licenses:
|
67
67
|
- Public Domain
|
68
68
|
post_install_message:
|