pluto-feedfetcher 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -5
- data/Rakefile +2 -2
- data/lib/pluto/feedfetcher.rb +1 -2
- data/lib/pluto/feedfetcher/basic.rb +9 -9
- data/lib/pluto/feedfetcher/cond_get_with_cache.rb +21 -21
- data/lib/pluto/feedfetcher/version.rb +1 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2400c2cabca1baec24873c06c7a45ca3af9d381a
|
4
|
+
data.tar.gz: '09dda2a27accb6953fbd197424bd1b360099982a'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bfcad496fc5f6c3008f520e19c3bb22d3db32b615a647ce56545ba26e29158b574409b29e80e5c3627082b833e58a3a7d682ab0810ef58171dd2382344cc630
|
7
|
+
data.tar.gz: 8e61e9f1b671398dbe73ed6c543d2b1adacc4706b47821437cdeacea3f53d2f76e7f6545149c392fc13d5697308d2f796c8da182954e13b74fc9c6650ee33ca1
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# pluto-feedfetcher gem - fetch web feeds (w/ conditional HTTP get e.g. use etags, if-modified-since etc.)
|
2
2
|
|
3
|
-
* home :: [github.com/feedreader/pluto
|
4
|
-
* bugs :: [github.com/feedreader/pluto
|
3
|
+
* home :: [github.com/feedreader/pluto](https://github.com/feedreader/pluto)
|
4
|
+
* bugs :: [github.com/feedreader/pluto/issues](https://github.com/feedreader/pluto/issues)
|
5
5
|
* gem :: [rubygems.org/gems/pluto-feedfetcher](https://rubygems.org/gems/pluto-feedfetcher)
|
6
6
|
* rdoc :: [rubydoc.info/gems/pluto-feedfetcher](http://rubydoc.info/gems/pluto-feedfetcher)
|
7
|
-
* forum :: [groups.google.com/group/
|
7
|
+
* forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake)
|
8
8
|
|
9
9
|
|
10
10
|
|
@@ -15,11 +15,12 @@ TBD
|
|
15
15
|
|
16
16
|
## License
|
17
17
|
|
18
|
+
![](https://publicdomainworks.github.io/buttons/zero88x31.png)
|
19
|
+
|
18
20
|
The `pluto-feedfetcher` scripts are dedicated to the public domain.
|
19
21
|
Use it as you please with no restrictions whatsoever.
|
20
22
|
|
21
23
|
## Questions? Comments?
|
22
24
|
|
23
|
-
Send them along to the [
|
25
|
+
Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake).
|
24
26
|
Thanks!
|
25
|
-
|
data/Rakefile
CHANGED
@@ -8,10 +8,10 @@ Hoe.spec 'pluto-feedfetcher' do
|
|
8
8
|
self.summary = "pluto-feedfetcher - fetch web feeds (w/ conditional HTTP get e.g. use etags, if-modified-since etc.)"
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/feedreader/pluto
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
|
-
self.email = '
|
14
|
+
self.email = 'wwwmake@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
17
|
self.readme_file = 'README.md'
|
data/lib/pluto/feedfetcher.rb
CHANGED
@@ -42,11 +42,11 @@ class FeedFetcherBasic
|
|
42
42
|
### todo/fix:
|
43
43
|
### return feed_xml !!! - move FeedUtils::Parser.parse to update or someting !!!
|
44
44
|
|
45
|
-
|
45
|
+
logger.info "Before parsing feed >#{feed_key}<..."
|
46
|
+
|
46
47
|
|
47
|
-
|
48
48
|
feed_xml
|
49
|
-
|
49
|
+
|
50
50
|
## fix/todo: check for feed.nil? -> error parsing!!!
|
51
51
|
# or throw exception
|
52
52
|
# feed = FeedUtils::Parser.parse( feed_xml )
|
@@ -61,17 +61,17 @@ private
|
|
61
61
|
response = @worker.get( url )
|
62
62
|
|
63
63
|
## if debug?
|
64
|
-
|
64
|
+
logger.debug "http status #{response.code} #{response.message}"
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
logger.debug "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
67
|
+
logger.debug "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
68
|
+
logger.debug "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
69
69
|
## end
|
70
70
|
|
71
71
|
xml = response.body
|
72
72
|
|
73
73
|
###
|
74
|
-
#
|
74
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
75
75
|
# will mostly be ASCII
|
76
76
|
# - try to change encoding to UTF-8 ourselves
|
77
77
|
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
@@ -83,7 +83,7 @@ private
|
|
83
83
|
# for now "hardcoded" to utf8 - what else can we do?
|
84
84
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
85
85
|
xml = xml.force_encoding( Encoding::UTF_8 )
|
86
|
-
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
86
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
87
87
|
|
88
88
|
xml
|
89
89
|
end
|
@@ -36,7 +36,7 @@ class FeedFetcherCondGetWithCache
|
|
36
36
|
rescue SocketError => e
|
37
37
|
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
38
38
|
### will result in SocketError -- getaddrinfo: Name or service not known
|
39
|
-
|
39
|
+
logger.error "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
40
40
|
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
41
41
|
|
42
42
|
### todo/fix: update feed rec in db
|
@@ -47,8 +47,8 @@ class FeedFetcherCondGetWithCache
|
|
47
47
|
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
48
48
|
|
49
49
|
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
50
|
-
|
51
|
-
|
50
|
+
logger.info "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
51
|
+
logger.info "no change; request returns not modified (304); skipping parsing feed"
|
52
52
|
return nil # no updates available; nothing to do
|
53
53
|
end
|
54
54
|
|
@@ -56,8 +56,8 @@ class FeedFetcherCondGetWithCache
|
|
56
56
|
|
57
57
|
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
58
58
|
|
59
|
-
|
60
|
-
|
59
|
+
logger.error "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
60
|
+
|
61
61
|
feed_attribs = {
|
62
62
|
http_code: response.code.to_i,
|
63
63
|
http_server: response.header[ 'server' ],
|
@@ -65,21 +65,21 @@ class FeedFetcherCondGetWithCache
|
|
65
65
|
http_last_modified: nil,
|
66
66
|
body: nil,
|
67
67
|
md5: nil,
|
68
|
-
fetched: feed_fetched
|
68
|
+
fetched: feed_fetched
|
69
69
|
}
|
70
70
|
feed_rec.update_attributes!( feed_attribs )
|
71
|
-
|
71
|
+
|
72
72
|
## add log error activity -- in future add to error log - better - why? why not?
|
73
73
|
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
74
74
|
|
75
75
|
return nil # sorry; no feed for parsing available
|
76
76
|
end
|
77
77
|
|
78
|
-
|
78
|
+
logger.info "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
79
79
|
|
80
80
|
feed_xml = response.body
|
81
81
|
###
|
82
|
-
#
|
82
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
83
83
|
# will mostly be ASCII
|
84
84
|
# - try to change encoding to UTF-8 ourselves
|
85
85
|
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
@@ -95,7 +95,7 @@ class FeedFetcherCondGetWithCache
|
|
95
95
|
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
|
96
96
|
unless feed_xml_cleaned.valid_encoding?
|
97
97
|
|
98
|
-
|
98
|
+
logger.warn "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
|
99
99
|
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
|
100
100
|
# Some of it might be old Windows code page
|
101
101
|
# -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
|
@@ -103,11 +103,11 @@ class FeedFetcherCondGetWithCache
|
|
103
103
|
# tell ruby the encoding
|
104
104
|
# encode to utf-8
|
105
105
|
## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
|
106
|
-
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
106
|
+
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
107
107
|
end
|
108
108
|
feed_xml = feed_xml_cleaned
|
109
109
|
rescue EncodingError => e
|
110
|
-
|
110
|
+
logger.warn "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
|
111
111
|
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
|
112
112
|
|
113
113
|
# Force it to UTF-8, throwing out invalid bits
|
@@ -128,14 +128,14 @@ class FeedFetcherCondGetWithCache
|
|
128
128
|
|
129
129
|
last_feed_md5 = feed_rec.md5
|
130
130
|
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
131
|
-
|
131
|
+
|
132
132
|
if last_feed_md5 && last_feed_md5 == feed_md5
|
133
133
|
# not all servers handle conditional gets, so while not much can be
|
134
134
|
# done about the bandwidth, but if the response body is identical
|
135
135
|
# the downstream processing (parsing, caching, ...) can be avoided.
|
136
136
|
# - thanks to planet mars -fido.rb for the idea, cheers.
|
137
|
-
|
138
|
-
|
137
|
+
|
138
|
+
logger.info "no change; md5 digests match; skipping parsing feed"
|
139
139
|
return nil # no updates available; nothing to do
|
140
140
|
end
|
141
141
|
|
@@ -150,9 +150,9 @@ class FeedFetcherCondGetWithCache
|
|
150
150
|
}
|
151
151
|
|
152
152
|
## if debug?
|
153
|
-
|
154
|
-
|
155
|
-
|
153
|
+
logger.debug "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
154
|
+
logger.debug "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
155
|
+
logger.debug "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
156
156
|
## end
|
157
157
|
|
158
158
|
### note: might crash w/ encoding errors when saving in postgress
|
@@ -165,7 +165,7 @@ class FeedFetcherCondGetWithCache
|
|
165
165
|
feed_rec.update_attributes!( feed_attribs )
|
166
166
|
rescue Exception => e
|
167
167
|
# log db error; and continue
|
168
|
-
|
168
|
+
logger.error "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
|
169
169
|
Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
|
170
170
|
return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
|
171
171
|
end
|
@@ -173,8 +173,8 @@ class FeedFetcherCondGetWithCache
|
|
173
173
|
|
174
174
|
logger.debug "feed_xml:"
|
175
175
|
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
176
|
-
|
177
|
-
|
176
|
+
|
177
|
+
logger.info "Before parsing feed >#{feed_key}<..."
|
178
178
|
|
179
179
|
### move to feedutils
|
180
180
|
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-feedfetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pluto-models
|
@@ -58,17 +58,17 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '3.
|
61
|
+
version: '3.16'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '3.
|
68
|
+
version: '3.16'
|
69
69
|
description: pluto-feedfetcher - fetch web feeds (w/ conditional HTTP get e.g. use
|
70
70
|
etags, if-modified-since etc.)
|
71
|
-
email:
|
71
|
+
email: wwwmake@googlegroups.com
|
72
72
|
executables: []
|
73
73
|
extensions: []
|
74
74
|
extra_rdoc_files:
|
@@ -84,7 +84,7 @@ files:
|
|
84
84
|
- lib/pluto/feedfetcher/basic.rb
|
85
85
|
- lib/pluto/feedfetcher/cond_get_with_cache.rb
|
86
86
|
- lib/pluto/feedfetcher/version.rb
|
87
|
-
homepage: https://github.com/feedreader/pluto
|
87
|
+
homepage: https://github.com/feedreader/pluto
|
88
88
|
licenses:
|
89
89
|
- Public Domain
|
90
90
|
metadata: {}
|
@@ -106,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
106
|
version: '0'
|
107
107
|
requirements: []
|
108
108
|
rubyforge_project:
|
109
|
-
rubygems_version: 2.
|
109
|
+
rubygems_version: 2.5.2
|
110
110
|
signing_key:
|
111
111
|
specification_version: 4
|
112
112
|
summary: pluto-feedfetcher - fetch web feeds (w/ conditional HTTP get e.g. use etags,
|