pluto-feedfetcher 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -5
- data/Rakefile +2 -2
- data/lib/pluto/feedfetcher.rb +1 -2
- data/lib/pluto/feedfetcher/basic.rb +9 -9
- data/lib/pluto/feedfetcher/cond_get_with_cache.rb +21 -21
- data/lib/pluto/feedfetcher/version.rb +1 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2400c2cabca1baec24873c06c7a45ca3af9d381a
|
4
|
+
data.tar.gz: '09dda2a27accb6953fbd197424bd1b360099982a'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bfcad496fc5f6c3008f520e19c3bb22d3db32b615a647ce56545ba26e29158b574409b29e80e5c3627082b833e58a3a7d682ab0810ef58171dd2382344cc630
|
7
|
+
data.tar.gz: 8e61e9f1b671398dbe73ed6c543d2b1adacc4706b47821437cdeacea3f53d2f76e7f6545149c392fc13d5697308d2f796c8da182954e13b74fc9c6650ee33ca1
|
data/README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# pluto-feedfetcher gem - fetch web feeds (w/ conditional HTTP get e.g. use etags, if-modified-since etc.)
|
2
2
|
|
3
|
-
* home :: [github.com/feedreader/pluto
|
4
|
-
* bugs :: [github.com/feedreader/pluto
|
3
|
+
* home :: [github.com/feedreader/pluto](https://github.com/feedreader/pluto)
|
4
|
+
* bugs :: [github.com/feedreader/pluto/issues](https://github.com/feedreader/pluto/issues)
|
5
5
|
* gem :: [rubygems.org/gems/pluto-feedfetcher](https://rubygems.org/gems/pluto-feedfetcher)
|
6
6
|
* rdoc :: [rubydoc.info/gems/pluto-feedfetcher](http://rubydoc.info/gems/pluto-feedfetcher)
|
7
|
-
* forum :: [groups.google.com/group/
|
7
|
+
* forum :: [groups.google.com/group/wwwmake](http://groups.google.com/group/wwwmake)
|
8
8
|
|
9
9
|
|
10
10
|
|
@@ -15,11 +15,12 @@ TBD
|
|
15
15
|
|
16
16
|
## License
|
17
17
|
|
18
|
+

|
19
|
+
|
18
20
|
The `pluto-feedfetcher` scripts are dedicated to the public domain.
|
19
21
|
Use it as you please with no restrictions whatsoever.
|
20
22
|
|
21
23
|
## Questions? Comments?
|
22
24
|
|
23
|
-
Send them along to the [
|
25
|
+
Send them along to the [wwwmake Forum/Mailing List](http://groups.google.com/group/wwwmake).
|
24
26
|
Thanks!
|
25
|
-
|
data/Rakefile
CHANGED
@@ -8,10 +8,10 @@ Hoe.spec 'pluto-feedfetcher' do
|
|
8
8
|
self.summary = "pluto-feedfetcher - fetch web feeds (w/ conditional HTTP get e.g. use etags, if-modified-since etc.)"
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/feedreader/pluto
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
|
-
self.email = '
|
14
|
+
self.email = 'wwwmake@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
17
|
self.readme_file = 'README.md'
|
data/lib/pluto/feedfetcher.rb
CHANGED
@@ -42,11 +42,11 @@ class FeedFetcherBasic
|
|
42
42
|
### todo/fix:
|
43
43
|
### return feed_xml !!! - move FeedUtils::Parser.parse to update or someting !!!
|
44
44
|
|
45
|
-
|
45
|
+
logger.info "Before parsing feed >#{feed_key}<..."
|
46
|
+
|
46
47
|
|
47
|
-
|
48
48
|
feed_xml
|
49
|
-
|
49
|
+
|
50
50
|
## fix/todo: check for feed.nil? -> error parsing!!!
|
51
51
|
# or throw exception
|
52
52
|
# feed = FeedUtils::Parser.parse( feed_xml )
|
@@ -61,17 +61,17 @@ private
|
|
61
61
|
response = @worker.get( url )
|
62
62
|
|
63
63
|
## if debug?
|
64
|
-
|
64
|
+
logger.debug "http status #{response.code} #{response.message}"
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
logger.debug "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
67
|
+
logger.debug "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
68
|
+
logger.debug "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
69
69
|
## end
|
70
70
|
|
71
71
|
xml = response.body
|
72
72
|
|
73
73
|
###
|
74
|
-
#
|
74
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
75
75
|
# will mostly be ASCII
|
76
76
|
# - try to change encoding to UTF-8 ourselves
|
77
77
|
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
@@ -83,7 +83,7 @@ private
|
|
83
83
|
# for now "hardcoded" to utf8 - what else can we do?
|
84
84
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
85
85
|
xml = xml.force_encoding( Encoding::UTF_8 )
|
86
|
-
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
86
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
87
87
|
|
88
88
|
xml
|
89
89
|
end
|
@@ -36,7 +36,7 @@ class FeedFetcherCondGetWithCache
|
|
36
36
|
rescue SocketError => e
|
37
37
|
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
38
38
|
### will result in SocketError -- getaddrinfo: Name or service not known
|
39
|
-
|
39
|
+
logger.error "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
40
40
|
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
41
41
|
|
42
42
|
### todo/fix: update feed rec in db
|
@@ -47,8 +47,8 @@ class FeedFetcherCondGetWithCache
|
|
47
47
|
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
48
48
|
|
49
49
|
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
50
|
-
|
51
|
-
|
50
|
+
logger.info "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
51
|
+
logger.info "no change; request returns not modified (304); skipping parsing feed"
|
52
52
|
return nil # no updates available; nothing to do
|
53
53
|
end
|
54
54
|
|
@@ -56,8 +56,8 @@ class FeedFetcherCondGetWithCache
|
|
56
56
|
|
57
57
|
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
58
58
|
|
59
|
-
|
60
|
-
|
59
|
+
logger.error "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
60
|
+
|
61
61
|
feed_attribs = {
|
62
62
|
http_code: response.code.to_i,
|
63
63
|
http_server: response.header[ 'server' ],
|
@@ -65,21 +65,21 @@ class FeedFetcherCondGetWithCache
|
|
65
65
|
http_last_modified: nil,
|
66
66
|
body: nil,
|
67
67
|
md5: nil,
|
68
|
-
fetched: feed_fetched
|
68
|
+
fetched: feed_fetched
|
69
69
|
}
|
70
70
|
feed_rec.update_attributes!( feed_attribs )
|
71
|
-
|
71
|
+
|
72
72
|
## add log error activity -- in future add to error log - better - why? why not?
|
73
73
|
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
74
74
|
|
75
75
|
return nil # sorry; no feed for parsing available
|
76
76
|
end
|
77
77
|
|
78
|
-
|
78
|
+
logger.info "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
79
79
|
|
80
80
|
feed_xml = response.body
|
81
81
|
###
|
82
|
-
#
|
82
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
83
83
|
# will mostly be ASCII
|
84
84
|
# - try to change encoding to UTF-8 ourselves
|
85
85
|
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
@@ -95,7 +95,7 @@ class FeedFetcherCondGetWithCache
|
|
95
95
|
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
|
96
96
|
unless feed_xml_cleaned.valid_encoding?
|
97
97
|
|
98
|
-
|
98
|
+
logger.warn "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
|
99
99
|
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
|
100
100
|
# Some of it might be old Windows code page
|
101
101
|
# -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
|
@@ -103,11 +103,11 @@ class FeedFetcherCondGetWithCache
|
|
103
103
|
# tell ruby the encoding
|
104
104
|
# encode to utf-8
|
105
105
|
## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
|
106
|
-
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
106
|
+
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
107
107
|
end
|
108
108
|
feed_xml = feed_xml_cleaned
|
109
109
|
rescue EncodingError => e
|
110
|
-
|
110
|
+
logger.warn "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
|
111
111
|
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
|
112
112
|
|
113
113
|
# Force it to UTF-8, throwing out invalid bits
|
@@ -128,14 +128,14 @@ class FeedFetcherCondGetWithCache
|
|
128
128
|
|
129
129
|
last_feed_md5 = feed_rec.md5
|
130
130
|
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
131
|
-
|
131
|
+
|
132
132
|
if last_feed_md5 && last_feed_md5 == feed_md5
|
133
133
|
# not all servers handle conditional gets, so while not much can be
|
134
134
|
# done about the bandwidth, but if the response body is identical
|
135
135
|
# the downstream processing (parsing, caching, ...) can be avoided.
|
136
136
|
# - thanks to planet mars -fido.rb for the idea, cheers.
|
137
|
-
|
138
|
-
|
137
|
+
|
138
|
+
logger.info "no change; md5 digests match; skipping parsing feed"
|
139
139
|
return nil # no updates available; nothing to do
|
140
140
|
end
|
141
141
|
|
@@ -150,9 +150,9 @@ class FeedFetcherCondGetWithCache
|
|
150
150
|
}
|
151
151
|
|
152
152
|
## if debug?
|
153
|
-
|
154
|
-
|
155
|
-
|
153
|
+
logger.debug "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
154
|
+
logger.debug "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
155
|
+
logger.debug "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
156
156
|
## end
|
157
157
|
|
158
158
|
### note: might crash w/ encoding errors when saving in postgress
|
@@ -165,7 +165,7 @@ class FeedFetcherCondGetWithCache
|
|
165
165
|
feed_rec.update_attributes!( feed_attribs )
|
166
166
|
rescue Exception => e
|
167
167
|
# log db error; and continue
|
168
|
-
|
168
|
+
logger.error "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
|
169
169
|
Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
|
170
170
|
return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
|
171
171
|
end
|
@@ -173,8 +173,8 @@ class FeedFetcherCondGetWithCache
|
|
173
173
|
|
174
174
|
logger.debug "feed_xml:"
|
175
175
|
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
176
|
-
|
177
|
-
|
176
|
+
|
177
|
+
logger.info "Before parsing feed >#{feed_key}<..."
|
178
178
|
|
179
179
|
### move to feedutils
|
180
180
|
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-feedfetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pluto-models
|
@@ -58,17 +58,17 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '3.
|
61
|
+
version: '3.16'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '3.
|
68
|
+
version: '3.16'
|
69
69
|
description: pluto-feedfetcher - fetch web feeds (w/ conditional HTTP get e.g. use
|
70
70
|
etags, if-modified-since etc.)
|
71
|
-
email:
|
71
|
+
email: wwwmake@googlegroups.com
|
72
72
|
executables: []
|
73
73
|
extensions: []
|
74
74
|
extra_rdoc_files:
|
@@ -84,7 +84,7 @@ files:
|
|
84
84
|
- lib/pluto/feedfetcher/basic.rb
|
85
85
|
- lib/pluto/feedfetcher/cond_get_with_cache.rb
|
86
86
|
- lib/pluto/feedfetcher/version.rb
|
87
|
-
homepage: https://github.com/feedreader/pluto
|
87
|
+
homepage: https://github.com/feedreader/pluto
|
88
88
|
licenses:
|
89
89
|
- Public Domain
|
90
90
|
metadata: {}
|
@@ -106,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
106
|
version: '0'
|
107
107
|
requirements: []
|
108
108
|
rubyforge_project:
|
109
|
-
rubygems_version: 2.
|
109
|
+
rubygems_version: 2.5.2
|
110
110
|
signing_key:
|
111
111
|
specification_version: 4
|
112
112
|
summary: pluto-feedfetcher - fetch web feeds (w/ conditional HTTP get e.g. use etags,
|