pluto-update 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/HISTORY.md +4 -0
- data/Manifest.txt +10 -0
- data/README.md +55 -0
- data/Rakefile +31 -0
- data/lib/pluto/update.rb +40 -0
- data/lib/pluto/update/fetcher.rb +297 -0
- data/lib/pluto/update/refresher.rb +133 -0
- data/lib/pluto/update/subscriber.rb +105 -0
- data/lib/pluto/update/updater.rb +53 -0
- data/lib/pluto/update/version.rb +25 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 68de3691ae194862a74e9478d542fa5c606aa97e
|
4
|
+
data.tar.gz: 480c6f296bfa83495481d3f3f10c6c8e66b4c23b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 42b5f6a7e8b03a82325ab4f5dedbabcdae1ccfe9e103c8d6b23d84efa948cdfd9e37b86ef67d5f05289e0002d1d7962eafd79a2bd2fa292df48786430ddfc189
|
7
|
+
data.tar.gz: 22abcd241d7ae8a2e1565ef9a42e441c88eb8b4dfa0038f4e9f69d6d4ea8842dc104eaf697d5ef8fa561d87651a146aea1129bda7a56e38562bf75bcc6bdb13d
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# pluto-update gem - planet feed 'n' subscription updater
|
2
|
+
|
3
|
+
* home :: [github.com/feedreader/pluto-update](https://github.com/feedreader/pluto-update)
|
4
|
+
* bugs :: [github.com/feedreader/pluto-update/issues](https://github.com/feedreader/pluto-update/issues)
|
5
|
+
* gem :: [rubygems.org/gems/pluto-update](https://rubygems.org/gems/pluto-update)
|
6
|
+
* rdoc :: [rubydoc.info/gems/pluto-update](http://rubydoc.info/gems/pluto-update)
|
7
|
+
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
### Planet Configuration Sample
|
14
|
+
|
15
|
+
`ruby.ini`:
|
16
|
+
|
17
|
+
```
|
18
|
+
title = Planet Ruby
|
19
|
+
|
20
|
+
[rubyflow]
|
21
|
+
title = Ruby Flow
|
22
|
+
link = http://rubyflow.com
|
23
|
+
feed = http://feeds.feedburner.com/Rubyflow?format=xml
|
24
|
+
|
25
|
+
[rubyonrails]
|
26
|
+
title = Ruby on Rails Blog
|
27
|
+
link = http://weblog.rubyonrails.org
|
28
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
29
|
+
|
30
|
+
[viennarb]
|
31
|
+
title = vienna.rb Blog
|
32
|
+
link = http://vienna-rb.at
|
33
|
+
feed = http://vienna-rb.at/atom.xml
|
34
|
+
```
|
35
|
+
|
36
|
+
For more samples, see [`nytimes.ini`](https://github.com/feedreader/pluto.samples/blob/master/nytimes.ini),
|
37
|
+
[`js.ini`](https://github.com/feedreader/pluto.samples/blob/master/js.ini),
|
38
|
+
[`dart.ini`](https://github.com/feedreader/pluto.samples/blob/master/dart.ini),
|
39
|
+
[`haskell.ini`](https://github.com/feedreader/pluto.samples/blob/master/haskell.ini),
|
40
|
+
[`viennarb.ini`](https://github.com/feedreader/pluto.samples/blob/master/viennarb.ini),
|
41
|
+
[`beer.ini`](https://github.com/feedreader/pluto.samples/blob/master/beer.ini),
|
42
|
+
[`football.ini`](https://github.com/feedreader/pluto.samples/blob/master/football.ini).
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
## License
|
48
|
+
|
49
|
+
The `pluto-update` scripts are dedicated to the public domain.
|
50
|
+
Use it as you please with no restrictions whatsoever.
|
51
|
+
|
52
|
+
## Questions? Comments?
|
53
|
+
|
54
|
+
Send them along to the [Planet Pluto and Friends Forum/Mailing List](http://groups.google.com/group/feedreader).
|
55
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/pluto/update/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'pluto-update' do
|
5
|
+
|
6
|
+
self.version = PlutoUpdate::VERSION
|
7
|
+
|
8
|
+
self.summary = "pluto-update - planet feed 'n' subscription updater"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto-update']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'feedreader@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['pluto-models', '>= 1.2.2'],
|
22
|
+
['fetcher', '>= 0.4.4'],
|
23
|
+
]
|
24
|
+
|
25
|
+
self.licenses = ['Public Domain']
|
26
|
+
|
27
|
+
self.spec_extras = {
|
28
|
+
required_ruby_version: '>= 1.9.2'
|
29
|
+
}
|
30
|
+
|
31
|
+
end
|
data/lib/pluto/update.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'pluto/models'
|
5
|
+
|
6
|
+
|
7
|
+
# more 3rd party gems
|
8
|
+
require 'fetcher'
|
9
|
+
|
10
|
+
|
11
|
+
# our own code
|
12
|
+
require 'pluto/update/version' # Note: let version always go first
|
13
|
+
require 'pluto/update/fetcher'
|
14
|
+
require 'pluto/update/refresher'
|
15
|
+
require 'pluto/update/subscriber'
|
16
|
+
require 'pluto/update/updater'
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
module Pluto
|
21
|
+
|
22
|
+
# todo: add alias update_site( config ) ??
|
23
|
+
def self.update_subscriptions( config )
|
24
|
+
Subscriber.new.update_subscriptions( config )
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.update_feeds
|
28
|
+
Refresher.new.update_feeds
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.update_sites
|
32
|
+
Refresher.new.update_sites
|
33
|
+
end
|
34
|
+
|
35
|
+
end # module Pluto
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
# say hello
|
40
|
+
puts PlutoUpdate.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
|
@@ -0,0 +1,297 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Fetcher
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
include Models # for easy convenience access for Activity etc.
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@worker = ::Fetcher::Worker.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def debug=(value) @debug = value; end
|
17
|
+
def debug?() @debug || false; end
|
18
|
+
|
19
|
+
|
20
|
+
def fetch_feed( url )
|
21
|
+
response = @worker.get( url )
|
22
|
+
|
23
|
+
## if debug?
|
24
|
+
puts "http status #{response.code} #{response.message}"
|
25
|
+
|
26
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
27
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
28
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
29
|
+
## end
|
30
|
+
|
31
|
+
xml = response.body
|
32
|
+
|
33
|
+
###
|
34
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
35
|
+
# will mostly be ASCII
|
36
|
+
# - try to change encoding to UTF-8 ourselves
|
37
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
38
|
+
|
39
|
+
#####
|
40
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
41
|
+
|
42
|
+
## NB:
|
43
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
44
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
45
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
46
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
47
|
+
|
48
|
+
xml
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
def feed_by_rec( feed_rec )
|
53
|
+
# simple feed fetcher; use for debugging (only/mostly)
|
54
|
+
# -- will NOT change db records in any way
|
55
|
+
|
56
|
+
feed_url = feed_rec.feed_url
|
57
|
+
feed_key = feed_rec.key
|
58
|
+
|
59
|
+
feed_xml = fetch_feed( feed_url )
|
60
|
+
|
61
|
+
logger.debug "feed_xml:"
|
62
|
+
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
63
|
+
|
64
|
+
# if opts.verbose? # also write a copy to disk
|
65
|
+
if debug?
|
66
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
67
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
68
|
+
f.write( feed_xml )
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
puts "Before parsing feed >#{feed_key}<..."
|
73
|
+
|
74
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
75
|
+
# or throw exception
|
76
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
77
|
+
feed
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
82
|
+
feed_url = feed_rec.feed_url
|
83
|
+
feed_key = feed_rec.key
|
84
|
+
|
85
|
+
### todo/fix: normalize/unifiy feed_url
|
86
|
+
## - same in fetcher - use shared utitlity method or similar
|
87
|
+
|
88
|
+
@worker.use_cache = true
|
89
|
+
@worker.cache[ feed_url ] = {
|
90
|
+
'etag' => feed_rec.http_etag,
|
91
|
+
'last-modified' => feed_rec.http_last_modified
|
92
|
+
}
|
93
|
+
|
94
|
+
begin
|
95
|
+
response = @worker.get( feed_url )
|
96
|
+
rescue SocketError => e
|
97
|
+
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
98
|
+
### will result in SocketError -- getaddrinfo: Name or service not known
|
99
|
+
puts "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
100
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
101
|
+
|
102
|
+
### todo/fix: update feed rec in db
|
103
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
104
|
+
return nil
|
105
|
+
end
|
106
|
+
|
107
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
108
|
+
|
109
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
110
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
111
|
+
puts "no change; request returns not modified (304); skipping parsing feed"
|
112
|
+
return nil # no updates available; nothing to do
|
113
|
+
end
|
114
|
+
|
115
|
+
feed_fetched = Time.now
|
116
|
+
|
117
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
118
|
+
|
119
|
+
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
120
|
+
|
121
|
+
feed_attribs = {
|
122
|
+
http_code: response.code.to_i,
|
123
|
+
http_server: response.header[ 'server' ],
|
124
|
+
http_etag: nil,
|
125
|
+
http_last_modified: nil,
|
126
|
+
body: nil,
|
127
|
+
md5: nil,
|
128
|
+
fetched: feed_fetched
|
129
|
+
}
|
130
|
+
feed_rec.update_attributes!( feed_attribs )
|
131
|
+
|
132
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
133
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
134
|
+
|
135
|
+
return nil # sorry; no feed for parsing available
|
136
|
+
end
|
137
|
+
|
138
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
139
|
+
|
140
|
+
feed_xml = response.body
|
141
|
+
###
|
142
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
143
|
+
# will mostly be ASCII
|
144
|
+
# - try to change encoding to UTF-8 ourselves
|
145
|
+
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
146
|
+
|
147
|
+
#####
|
148
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
149
|
+
|
150
|
+
## NB:
|
151
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
152
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
153
|
+
feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
154
|
+
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
155
|
+
|
156
|
+
## check for md5 hash for response.body
|
157
|
+
|
158
|
+
last_feed_md5 = feed_rec.md5
|
159
|
+
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
160
|
+
|
161
|
+
if last_feed_md5 && last_feed_md5 == feed_md5
|
162
|
+
# not all servers handle conditional gets, so while not much can be
|
163
|
+
# done about the bandwidth, but if the response body is identical
|
164
|
+
# the downstream processing (parsing, caching, ...) can be avoided.
|
165
|
+
# - thanks to planet mars -fido.rb for the idea, cheers.
|
166
|
+
|
167
|
+
puts "no change; md5 digests match; skipping parsing feed"
|
168
|
+
return nil # no updates available; nothing to do
|
169
|
+
end
|
170
|
+
|
171
|
+
feed_attribs = {
|
172
|
+
http_code: response.code.to_i,
|
173
|
+
http_server: response.header[ 'server' ],
|
174
|
+
http_etag: response.header[ 'etag' ],
|
175
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
176
|
+
body: feed_xml,
|
177
|
+
md5: feed_md5,
|
178
|
+
fetched: feed_fetched
|
179
|
+
}
|
180
|
+
|
181
|
+
## if debug?
|
182
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
183
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
184
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
185
|
+
## end
|
186
|
+
|
187
|
+
feed_rec.update_attributes!( feed_attribs )
|
188
|
+
|
189
|
+
logger.debug "feed_xml:"
|
190
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
191
|
+
|
192
|
+
puts "Before parsing feed >#{feed_key}<..."
|
193
|
+
|
194
|
+
### move to feedutils
|
195
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
196
|
+
|
197
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
198
|
+
# or throw exception
|
199
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
200
|
+
feed
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
|
205
|
+
site_url = site_rec.url
|
206
|
+
site_key = site_rec.key
|
207
|
+
|
208
|
+
### todo/fix: normalize/unifiy feed_url
|
209
|
+
## - same in fetcher - use shared utitlity method or similar
|
210
|
+
|
211
|
+
@worker.use_cache = true
|
212
|
+
@worker.cache[ site_url ] = {
|
213
|
+
'etag' => site_rec.http_etag,
|
214
|
+
'last-modified' => site_rec.http_last_modified
|
215
|
+
}
|
216
|
+
|
217
|
+
response = @worker.get( site_url )
|
218
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
219
|
+
|
220
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
221
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
222
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
223
|
+
return nil # no updates available; nothing to do
|
224
|
+
end
|
225
|
+
|
226
|
+
site_fetched = Time.now
|
227
|
+
|
228
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
229
|
+
|
230
|
+
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
231
|
+
|
232
|
+
site_attribs = {
|
233
|
+
http_code: response.code.to_i,
|
234
|
+
http_server: response.header[ 'server' ],
|
235
|
+
http_etag: nil,
|
236
|
+
http_last_modified: nil,
|
237
|
+
body: nil,
|
238
|
+
md5: nil,
|
239
|
+
fetched: feed_fetched
|
240
|
+
}
|
241
|
+
site_rec.update_attributes!( site_attribs )
|
242
|
+
|
243
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
244
|
+
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
245
|
+
|
246
|
+
return nil # sorry; no feed for parsing available
|
247
|
+
end
|
248
|
+
|
249
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
250
|
+
|
251
|
+
site_text = response.body
|
252
|
+
|
253
|
+
###
|
254
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
255
|
+
# will mostly be ASCII
|
256
|
+
# - try to change encoding to UTF-8 ourselves
|
257
|
+
logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
|
258
|
+
|
259
|
+
#####
|
260
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
261
|
+
|
262
|
+
## NB:
|
263
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
264
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
265
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
266
|
+
logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
|
267
|
+
|
268
|
+
site_attribs = {
|
269
|
+
http_code: response.code.to_i,
|
270
|
+
http_server: response.header[ 'server' ],
|
271
|
+
http_etag: response.header[ 'etag' ],
|
272
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
273
|
+
fetched: site_fetched
|
274
|
+
}
|
275
|
+
|
276
|
+
## if debug?
|
277
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
278
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
279
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
280
|
+
## end
|
281
|
+
|
282
|
+
site_rec.update_attributes!( site_attribs )
|
283
|
+
|
284
|
+
## logger.debug "site_text:"
|
285
|
+
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
286
|
+
|
287
|
+
|
288
|
+
puts "Before parsing site config >#{site_key}<..."
|
289
|
+
|
290
|
+
# assume ini format for now
|
291
|
+
site_config = INI.load( site_text )
|
292
|
+
site_config
|
293
|
+
end
|
294
|
+
|
295
|
+
end # class Fetcher
|
296
|
+
|
297
|
+
end # module Pluto
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Refresher
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
include Models
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@worker = Fetcher.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def debug=(value) @debug = value; end
|
17
|
+
def debug?() @debug || false; end
|
18
|
+
|
19
|
+
|
20
|
+
def update_sites( opts={} ) # update all site configs
|
21
|
+
if debug?
|
22
|
+
## turn on logging for sql too
|
23
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
24
|
+
@worker.debug = true # also pass along worker debug flag if set
|
25
|
+
end
|
26
|
+
|
27
|
+
# -- log update activity
|
28
|
+
Activity.create!( text: "update sites (#{Site.count})" )
|
29
|
+
|
30
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
31
|
+
|
32
|
+
Site.order(:id).each do |site|
|
33
|
+
update_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def update_feeds( opts={} ) # update all feeds
|
39
|
+
if debug?
|
40
|
+
## turn on logging for sql too
|
41
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
42
|
+
@worker.debug = true # also pass along worker debug flag if set
|
43
|
+
end
|
44
|
+
|
45
|
+
# -- log update activity
|
46
|
+
Activity.create!( text: "update feeds (#{Feed.count})" )
|
47
|
+
|
48
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
49
|
+
|
50
|
+
Feed.order(:id).each do |feed|
|
51
|
+
update_feed_worker( feed )
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
def update_feeds_for( site_key, opts={} )
|
57
|
+
if debug?
|
58
|
+
## turn on logging for sql too
|
59
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
60
|
+
@worker.debug = true # also pass along worker debug flag if set
|
61
|
+
end
|
62
|
+
|
63
|
+
# -- log update activity
|
64
|
+
Activity.create!( text: "update feeds >#{site_key}<" )
|
65
|
+
|
66
|
+
site = Site.find_by_key!( site_key )
|
67
|
+
|
68
|
+
site.feeds.each do |feed|
|
69
|
+
update_feed_worker( feed )
|
70
|
+
end
|
71
|
+
|
72
|
+
end # method update_feeds
|
73
|
+
|
74
|
+
|
75
|
+
private
|
76
|
+
def update_site_worker( site_rec )
|
77
|
+
site_config = @worker.site_by_rec_if_modified( site_rec )
|
78
|
+
|
79
|
+
# on error or if http-not modified etc. skip update/processing
|
80
|
+
return if site_config.nil?
|
81
|
+
|
82
|
+
subscriber = Subscriber.new
|
83
|
+
subscriber.debug = debug? ? true : false # pass along debug flag
|
84
|
+
|
85
|
+
site_key = site_rec.key
|
86
|
+
subscriber.update_subscriptions_for( site_key, site_config )
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def update_feed_worker( feed_rec )
|
91
|
+
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
92
|
+
|
93
|
+
# on error or if http-not modified etc. skip update/processing
|
94
|
+
return if feed.nil?
|
95
|
+
|
96
|
+
## fix/todo: reload feed_red - fetched date updated etc.
|
97
|
+
## check if needed for access to fetched date
|
98
|
+
|
99
|
+
|
100
|
+
## todo/check: move feed_rec update to the end (after item updates??)
|
101
|
+
|
102
|
+
# update feed attribs e.g.
|
103
|
+
# generator
|
104
|
+
# published_at,built_at,touched_at,fetched_at
|
105
|
+
# summary,title2
|
106
|
+
|
107
|
+
## fix:
|
108
|
+
## weird rss exception error on windows w/ dates
|
109
|
+
# e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
|
110
|
+
#
|
111
|
+
# move to_datetime to feedutils!! if it works
|
112
|
+
## todo: move this comments to feedutils??
|
113
|
+
|
114
|
+
|
115
|
+
feed_rec.debug = debug? ? true : false # pass along debug flag
|
116
|
+
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
117
|
+
feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
|
118
|
+
|
119
|
+
|
120
|
+
# update cached value last published for item
|
121
|
+
last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
122
|
+
if last_item_rec.present?
|
123
|
+
if last_item_rec.published?
|
124
|
+
feed_rec.update_attributes!( last_published: last_item_rec.published )
|
125
|
+
else # try touched
|
126
|
+
feed_rec.update_attributes!( last_published: last_item_rec.touched )
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end # method update_feed_worker
|
130
|
+
|
131
|
+
end # class Refresher
|
132
|
+
|
133
|
+
end # module Pluto
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Subscriber
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
include Models
|
11
|
+
|
12
|
+
def debug=(value) @debug = value; end
|
13
|
+
def debug?() @debug || false; end
|
14
|
+
|
15
|
+
|
16
|
+
def update_subscriptions( config, opts={} )
|
17
|
+
# !!!! -- depreciated API - remove - do NOT use anymore
|
18
|
+
puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
|
19
|
+
update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def update_subscriptions_for( site_key, config, opts={} )
|
24
|
+
site_attribs = {
|
25
|
+
title: config['title'] || config['name'], # support either title or name
|
26
|
+
url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
|
27
|
+
}
|
28
|
+
|
29
|
+
logger.debug "site_attribs: #{site_attribs.inspect}"
|
30
|
+
|
31
|
+
site_rec = Site.find_by_key( site_key )
|
32
|
+
if site_rec.nil?
|
33
|
+
site_rec = Site.new
|
34
|
+
site_attribs[ :key ] = site_key
|
35
|
+
|
36
|
+
## use object_id: site.id and object_type: Site
|
37
|
+
## change - model/table/schema!!!
|
38
|
+
Activity.create!( text: "new site >#{site_key}< - #{site_attribs[ :title ]}" )
|
39
|
+
end
|
40
|
+
site_rec.update_attributes!( site_attribs )
|
41
|
+
|
42
|
+
# -- log update activity
|
43
|
+
Activity.create!( text: "update subscriptions >#{site_key}<" )
|
44
|
+
|
45
|
+
# clean out subscriptions and add again
|
46
|
+
logger.debug "before site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
|
47
|
+
site_rec.subscriptions.destroy_all # note: use destroy_all NOT delete_all (delete_all tries by default only nullify)
|
48
|
+
logger.debug "after site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
|
49
|
+
|
50
|
+
config.each do |key, value|
|
51
|
+
|
52
|
+
## todo: downcase key - why ??? why not???
|
53
|
+
|
54
|
+
# skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
|
55
|
+
next if ['title','title2','name',
|
56
|
+
'source', 'url',
|
57
|
+
'include','includes','exclude','excludes',
|
58
|
+
'feeds',
|
59
|
+
'planet','defaults'].include?( key )
|
60
|
+
|
61
|
+
### todo/check:
|
62
|
+
## check value - must be hash
|
63
|
+
# check if url or feed_url present
|
64
|
+
# that is, check for required props/key-value pairs
|
65
|
+
|
66
|
+
feed_key = key.to_s.dup
|
67
|
+
feed_hash = value
|
68
|
+
|
69
|
+
# todo/fix: use title from feed?
|
70
|
+
# e.g. fill up auto_title, auto_url, etc.
|
71
|
+
|
72
|
+
feed_attribs = {
|
73
|
+
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
|
74
|
+
url: feed_hash[ 'link' ] || feed_hash[ 'url' ],
|
75
|
+
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
76
|
+
title2: feed_hash[ 'title2' ],
|
77
|
+
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
78
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
|
79
|
+
}
|
80
|
+
|
81
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
82
|
+
|
83
|
+
feed_rec = Feed.find_by_key( feed_key )
|
84
|
+
if feed_rec.nil?
|
85
|
+
feed_rec = Feed.new
|
86
|
+
feed_attribs[ :key ] = feed_key
|
87
|
+
|
88
|
+
## use object_id: feed.id and object_type: Feed
|
89
|
+
## change - model/table/schema!!!
|
90
|
+
## todo: add parent_action_id - why? why not?
|
91
|
+
Activity.create!( text: "new feed >#{feed_key}< - #{feed_attribs[ :title ]}" )
|
92
|
+
end
|
93
|
+
|
94
|
+
feed_rec.update_attributes!( feed_attribs )
|
95
|
+
|
96
|
+
# add subscription record
|
97
|
+
# note: subscriptions get cleaned out on update first (see above)
|
98
|
+
site_rec.subscriptions.create!( feed_id: feed_rec.id )
|
99
|
+
end
|
100
|
+
|
101
|
+
end # method update_subscriptions
|
102
|
+
|
103
|
+
end # class Subscriber
|
104
|
+
|
105
|
+
end # module Pluto
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Updater
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
### fix!!!!!: change config to text - yes/no - why? why not??
|
11
|
+
# or pass along struct
|
12
|
+
# - with hash and text and format(e.g. ini/yml) as fields???
|
13
|
+
#
|
14
|
+
# - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
|
15
|
+
## - pass along unparsed text!! - not hash struct
|
16
|
+
# - will get saved in db plus we need to generate md5 hash
|
17
|
+
# - add filename e.g. ruby.ini|ruby.conf|ruby.yml as opt ??
|
18
|
+
# or add config format as opt e.g. ini or yml?
|
19
|
+
|
20
|
+
def initialize( opts, config )
|
21
|
+
@opts = opts
|
22
|
+
@config = config
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :opts, :config
|
26
|
+
|
27
|
+
def run( arg )
|
28
|
+
arg = arg.downcase.gsub('.ini','').gsub('.yml','') # remove file extension if present
|
29
|
+
|
30
|
+
update_for( arg )
|
31
|
+
end
|
32
|
+
|
33
|
+
def update_for( site_key )
|
34
|
+
###################
|
35
|
+
# step 1) update subscriptions
|
36
|
+
subscriber = Subscriber.new
|
37
|
+
|
38
|
+
# pass along debug/verbose setting/switch
|
39
|
+
subscriber.debug = true if opts.verbose?
|
40
|
+
subscriber.update_subscriptions_for( site_key, config )
|
41
|
+
|
42
|
+
##############################
|
43
|
+
# step 2) update feeds
|
44
|
+
refresher = Refresher.new
|
45
|
+
|
46
|
+
# pass along debug/verbose setting/switch
|
47
|
+
refresher.debug = true if opts.verbose?
|
48
|
+
refresher.update_feeds_for( site_key )
|
49
|
+
end # method run
|
50
|
+
|
51
|
+
end # class Updater
|
52
|
+
|
53
|
+
end # module Pluto
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module PlutoUpdate
|
5
|
+
|
6
|
+
MAJOR = 1
|
7
|
+
MINOR = 1
|
8
|
+
PATCH = 0
|
9
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
10
|
+
|
11
|
+
def self.version
|
12
|
+
VERSION
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.banner
|
16
|
+
### todo: add RUBY_PATCHLEVEL or RUBY_PATCH_LEVEL e.g. -p124
|
17
|
+
"pluto-update/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )}"
|
22
|
+
end
|
23
|
+
|
24
|
+
end # module PlutoUpdate
|
25
|
+
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pluto-update
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pluto-models
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.2.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: fetcher
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.4
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.4
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rdoc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: hoe
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.13'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.13'
|
69
|
+
description: pluto-update - planet feed 'n' subscription updater
|
70
|
+
email: feedreader@googlegroups.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files:
|
74
|
+
- HISTORY.md
|
75
|
+
- Manifest.txt
|
76
|
+
- README.md
|
77
|
+
files:
|
78
|
+
- HISTORY.md
|
79
|
+
- Manifest.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- lib/pluto/update.rb
|
83
|
+
- lib/pluto/update/fetcher.rb
|
84
|
+
- lib/pluto/update/refresher.rb
|
85
|
+
- lib/pluto/update/subscriber.rb
|
86
|
+
- lib/pluto/update/updater.rb
|
87
|
+
- lib/pluto/update/version.rb
|
88
|
+
homepage: https://github.com/feedreader/pluto-update
|
89
|
+
licenses:
|
90
|
+
- Public Domain
|
91
|
+
metadata: {}
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options:
|
94
|
+
- "--main"
|
95
|
+
- README.md
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 1.9.2
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 2.4.2
|
111
|
+
signing_key:
|
112
|
+
specification_version: 4
|
113
|
+
summary: pluto-update - planet feed 'n' subscription updater
|
114
|
+
test_files: []
|