pluto-update 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.md +4 -0
- data/Manifest.txt +10 -0
- data/README.md +55 -0
- data/Rakefile +31 -0
- data/lib/pluto/update.rb +40 -0
- data/lib/pluto/update/fetcher.rb +297 -0
- data/lib/pluto/update/refresher.rb +133 -0
- data/lib/pluto/update/subscriber.rb +105 -0
- data/lib/pluto/update/updater.rb +53 -0
- data/lib/pluto/update/version.rb +25 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 68de3691ae194862a74e9478d542fa5c606aa97e
|
4
|
+
data.tar.gz: 480c6f296bfa83495481d3f3f10c6c8e66b4c23b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 42b5f6a7e8b03a82325ab4f5dedbabcdae1ccfe9e103c8d6b23d84efa948cdfd9e37b86ef67d5f05289e0002d1d7962eafd79a2bd2fa292df48786430ddfc189
|
7
|
+
data.tar.gz: 22abcd241d7ae8a2e1565ef9a42e441c88eb8b4dfa0038f4e9f69d6d4ea8842dc104eaf697d5ef8fa561d87651a146aea1129bda7a56e38562bf75bcc6bdb13d
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# pluto-update gem - planet feed 'n' subscription updater
|
2
|
+
|
3
|
+
* home :: [github.com/feedreader/pluto-update](https://github.com/feedreader/pluto-update)
|
4
|
+
* bugs :: [github.com/feedreader/pluto-update/issues](https://github.com/feedreader/pluto-update/issues)
|
5
|
+
* gem :: [rubygems.org/gems/pluto-update](https://rubygems.org/gems/pluto-update)
|
6
|
+
* rdoc :: [rubydoc.info/gems/pluto-update](http://rubydoc.info/gems/pluto-update)
|
7
|
+
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
### Planet Configuration Sample
|
14
|
+
|
15
|
+
`ruby.ini`:
|
16
|
+
|
17
|
+
```
|
18
|
+
title = Planet Ruby
|
19
|
+
|
20
|
+
[rubyflow]
|
21
|
+
title = Ruby Flow
|
22
|
+
link = http://rubyflow.com
|
23
|
+
feed = http://feeds.feedburner.com/Rubyflow?format=xml
|
24
|
+
|
25
|
+
[rubyonrails]
|
26
|
+
title = Ruby on Rails Blog
|
27
|
+
link = http://weblog.rubyonrails.org
|
28
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
29
|
+
|
30
|
+
[viennarb]
|
31
|
+
title = vienna.rb Blog
|
32
|
+
link = http://vienna-rb.at
|
33
|
+
feed = http://vienna-rb.at/atom.xml
|
34
|
+
```
|
35
|
+
|
36
|
+
For more samples, see [`nytimes.ini`](https://github.com/feedreader/pluto.samples/blob/master/nytimes.ini),
|
37
|
+
[`js.ini`](https://github.com/feedreader/pluto.samples/blob/master/js.ini),
|
38
|
+
[`dart.ini`](https://github.com/feedreader/pluto.samples/blob/master/dart.ini),
|
39
|
+
[`haskell.ini`](https://github.com/feedreader/pluto.samples/blob/master/haskell.ini),
|
40
|
+
[`viennarb.ini`](https://github.com/feedreader/pluto.samples/blob/master/viennarb.ini),
|
41
|
+
[`beer.ini`](https://github.com/feedreader/pluto.samples/blob/master/beer.ini),
|
42
|
+
[`football.ini`](https://github.com/feedreader/pluto.samples/blob/master/football.ini).
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
## License
|
48
|
+
|
49
|
+
The `pluto-update` scripts are dedicated to the public domain.
|
50
|
+
Use it as you please with no restrictions whatsoever.
|
51
|
+
|
52
|
+
## Questions? Comments?
|
53
|
+
|
54
|
+
Send them along to the [Planet Pluto and Friends Forum/Mailing List](http://groups.google.com/group/feedreader).
|
55
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/pluto/update/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'pluto-update' do
|
5
|
+
|
6
|
+
self.version = PlutoUpdate::VERSION
|
7
|
+
|
8
|
+
self.summary = "pluto-update - planet feed 'n' subscription updater"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto-update']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'feedreader@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['pluto-models', '>= 1.2.2'],
|
22
|
+
['fetcher', '>= 0.4.4'],
|
23
|
+
]
|
24
|
+
|
25
|
+
self.licenses = ['Public Domain']
|
26
|
+
|
27
|
+
self.spec_extras = {
|
28
|
+
required_ruby_version: '>= 1.9.2'
|
29
|
+
}
|
30
|
+
|
31
|
+
end
|
data/lib/pluto/update.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
require 'pluto/models'
|
5
|
+
|
6
|
+
|
7
|
+
# more 3rd party gems
|
8
|
+
require 'fetcher'
|
9
|
+
|
10
|
+
|
11
|
+
# our own code
|
12
|
+
require 'pluto/update/version' # Note: let version always go first
|
13
|
+
require 'pluto/update/fetcher'
|
14
|
+
require 'pluto/update/refresher'
|
15
|
+
require 'pluto/update/subscriber'
|
16
|
+
require 'pluto/update/updater'
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
module Pluto
|
21
|
+
|
22
|
+
# todo: add alias update_site( config ) ??
|
23
|
+
def self.update_subscriptions( config )
|
24
|
+
Subscriber.new.update_subscriptions( config )
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.update_feeds
|
28
|
+
Refresher.new.update_feeds
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.update_sites
|
32
|
+
Refresher.new.update_sites
|
33
|
+
end
|
34
|
+
|
35
|
+
end # module Pluto
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
# say hello
|
40
|
+
puts PlutoUpdate.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
|
@@ -0,0 +1,297 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Fetcher
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
include Models # for easy convenience access for Activity etc.
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@worker = ::Fetcher::Worker.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def debug=(value) @debug = value; end
|
17
|
+
def debug?() @debug || false; end
|
18
|
+
|
19
|
+
|
20
|
+
def fetch_feed( url )
|
21
|
+
response = @worker.get( url )
|
22
|
+
|
23
|
+
## if debug?
|
24
|
+
puts "http status #{response.code} #{response.message}"
|
25
|
+
|
26
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
27
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
28
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
29
|
+
## end
|
30
|
+
|
31
|
+
xml = response.body
|
32
|
+
|
33
|
+
###
|
34
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
35
|
+
# will mostly be ASCII
|
36
|
+
# - try to change encoding to UTF-8 ourselves
|
37
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
38
|
+
|
39
|
+
#####
|
40
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
41
|
+
|
42
|
+
## NB:
|
43
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
44
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
45
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
46
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
47
|
+
|
48
|
+
xml
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
def feed_by_rec( feed_rec )
|
53
|
+
# simple feed fetcher; use for debugging (only/mostly)
|
54
|
+
# -- will NOT change db records in any way
|
55
|
+
|
56
|
+
feed_url = feed_rec.feed_url
|
57
|
+
feed_key = feed_rec.key
|
58
|
+
|
59
|
+
feed_xml = fetch_feed( feed_url )
|
60
|
+
|
61
|
+
logger.debug "feed_xml:"
|
62
|
+
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
63
|
+
|
64
|
+
# if opts.verbose? # also write a copy to disk
|
65
|
+
if debug?
|
66
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
67
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
68
|
+
f.write( feed_xml )
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
puts "Before parsing feed >#{feed_key}<..."
|
73
|
+
|
74
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
75
|
+
# or throw exception
|
76
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
77
|
+
feed
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
82
|
+
feed_url = feed_rec.feed_url
|
83
|
+
feed_key = feed_rec.key
|
84
|
+
|
85
|
+
### todo/fix: normalize/unifiy feed_url
|
86
|
+
## - same in fetcher - use shared utitlity method or similar
|
87
|
+
|
88
|
+
@worker.use_cache = true
|
89
|
+
@worker.cache[ feed_url ] = {
|
90
|
+
'etag' => feed_rec.http_etag,
|
91
|
+
'last-modified' => feed_rec.http_last_modified
|
92
|
+
}
|
93
|
+
|
94
|
+
begin
|
95
|
+
response = @worker.get( feed_url )
|
96
|
+
rescue SocketError => e
|
97
|
+
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
98
|
+
### will result in SocketError -- getaddrinfo: Name or service not known
|
99
|
+
puts "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
100
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
101
|
+
|
102
|
+
### todo/fix: update feed rec in db
|
103
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
104
|
+
return nil
|
105
|
+
end
|
106
|
+
|
107
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
108
|
+
|
109
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
110
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
111
|
+
puts "no change; request returns not modified (304); skipping parsing feed"
|
112
|
+
return nil # no updates available; nothing to do
|
113
|
+
end
|
114
|
+
|
115
|
+
feed_fetched = Time.now
|
116
|
+
|
117
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
118
|
+
|
119
|
+
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
120
|
+
|
121
|
+
feed_attribs = {
|
122
|
+
http_code: response.code.to_i,
|
123
|
+
http_server: response.header[ 'server' ],
|
124
|
+
http_etag: nil,
|
125
|
+
http_last_modified: nil,
|
126
|
+
body: nil,
|
127
|
+
md5: nil,
|
128
|
+
fetched: feed_fetched
|
129
|
+
}
|
130
|
+
feed_rec.update_attributes!( feed_attribs )
|
131
|
+
|
132
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
133
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
134
|
+
|
135
|
+
return nil # sorry; no feed for parsing available
|
136
|
+
end
|
137
|
+
|
138
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
139
|
+
|
140
|
+
feed_xml = response.body
|
141
|
+
###
|
142
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
143
|
+
# will mostly be ASCII
|
144
|
+
# - try to change encoding to UTF-8 ourselves
|
145
|
+
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
146
|
+
|
147
|
+
#####
|
148
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
149
|
+
|
150
|
+
## NB:
|
151
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
152
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
153
|
+
feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
154
|
+
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
155
|
+
|
156
|
+
## check for md5 hash for response.body
|
157
|
+
|
158
|
+
last_feed_md5 = feed_rec.md5
|
159
|
+
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
160
|
+
|
161
|
+
if last_feed_md5 && last_feed_md5 == feed_md5
|
162
|
+
# not all servers handle conditional gets, so while not much can be
|
163
|
+
# done about the bandwidth, but if the response body is identical
|
164
|
+
# the downstream processing (parsing, caching, ...) can be avoided.
|
165
|
+
# - thanks to planet mars -fido.rb for the idea, cheers.
|
166
|
+
|
167
|
+
puts "no change; md5 digests match; skipping parsing feed"
|
168
|
+
return nil # no updates available; nothing to do
|
169
|
+
end
|
170
|
+
|
171
|
+
feed_attribs = {
|
172
|
+
http_code: response.code.to_i,
|
173
|
+
http_server: response.header[ 'server' ],
|
174
|
+
http_etag: response.header[ 'etag' ],
|
175
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
176
|
+
body: feed_xml,
|
177
|
+
md5: feed_md5,
|
178
|
+
fetched: feed_fetched
|
179
|
+
}
|
180
|
+
|
181
|
+
## if debug?
|
182
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
183
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
184
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
185
|
+
## end
|
186
|
+
|
187
|
+
feed_rec.update_attributes!( feed_attribs )
|
188
|
+
|
189
|
+
logger.debug "feed_xml:"
|
190
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
191
|
+
|
192
|
+
puts "Before parsing feed >#{feed_key}<..."
|
193
|
+
|
194
|
+
### move to feedutils
|
195
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
196
|
+
|
197
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
198
|
+
# or throw exception
|
199
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
200
|
+
feed
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
|
205
|
+
site_url = site_rec.url
|
206
|
+
site_key = site_rec.key
|
207
|
+
|
208
|
+
### todo/fix: normalize/unifiy feed_url
|
209
|
+
## - same in fetcher - use shared utitlity method or similar
|
210
|
+
|
211
|
+
@worker.use_cache = true
|
212
|
+
@worker.cache[ site_url ] = {
|
213
|
+
'etag' => site_rec.http_etag,
|
214
|
+
'last-modified' => site_rec.http_last_modified
|
215
|
+
}
|
216
|
+
|
217
|
+
response = @worker.get( site_url )
|
218
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
219
|
+
|
220
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
221
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
222
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
223
|
+
return nil # no updates available; nothing to do
|
224
|
+
end
|
225
|
+
|
226
|
+
site_fetched = Time.now
|
227
|
+
|
228
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
229
|
+
|
230
|
+
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
231
|
+
|
232
|
+
site_attribs = {
|
233
|
+
http_code: response.code.to_i,
|
234
|
+
http_server: response.header[ 'server' ],
|
235
|
+
http_etag: nil,
|
236
|
+
http_last_modified: nil,
|
237
|
+
body: nil,
|
238
|
+
md5: nil,
|
239
|
+
fetched: feed_fetched
|
240
|
+
}
|
241
|
+
site_rec.update_attributes!( site_attribs )
|
242
|
+
|
243
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
244
|
+
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
245
|
+
|
246
|
+
return nil # sorry; no feed for parsing available
|
247
|
+
end
|
248
|
+
|
249
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
250
|
+
|
251
|
+
site_text = response.body
|
252
|
+
|
253
|
+
###
|
254
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
255
|
+
# will mostly be ASCII
|
256
|
+
# - try to change encoding to UTF-8 ourselves
|
257
|
+
logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
|
258
|
+
|
259
|
+
#####
|
260
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
261
|
+
|
262
|
+
## NB:
|
263
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
264
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
265
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
266
|
+
logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
|
267
|
+
|
268
|
+
site_attribs = {
|
269
|
+
http_code: response.code.to_i,
|
270
|
+
http_server: response.header[ 'server' ],
|
271
|
+
http_etag: response.header[ 'etag' ],
|
272
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
273
|
+
fetched: site_fetched
|
274
|
+
}
|
275
|
+
|
276
|
+
## if debug?
|
277
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
278
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
279
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
280
|
+
## end
|
281
|
+
|
282
|
+
site_rec.update_attributes!( site_attribs )
|
283
|
+
|
284
|
+
## logger.debug "site_text:"
|
285
|
+
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
286
|
+
|
287
|
+
|
288
|
+
puts "Before parsing site config >#{site_key}<..."
|
289
|
+
|
290
|
+
# assume ini format for now
|
291
|
+
site_config = INI.load( site_text )
|
292
|
+
site_config
|
293
|
+
end
|
294
|
+
|
295
|
+
end # class Fetcher
|
296
|
+
|
297
|
+
end # module Pluto
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Refresher
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
include Models
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@worker = Fetcher.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def debug=(value) @debug = value; end
|
17
|
+
def debug?() @debug || false; end
|
18
|
+
|
19
|
+
|
20
|
+
def update_sites( opts={} ) # update all site configs
|
21
|
+
if debug?
|
22
|
+
## turn on logging for sql too
|
23
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
24
|
+
@worker.debug = true # also pass along worker debug flag if set
|
25
|
+
end
|
26
|
+
|
27
|
+
# -- log update activity
|
28
|
+
Activity.create!( text: "update sites (#{Site.count})" )
|
29
|
+
|
30
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
31
|
+
|
32
|
+
Site.order(:id).each do |site|
|
33
|
+
update_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def update_feeds( opts={} ) # update all feeds
|
39
|
+
if debug?
|
40
|
+
## turn on logging for sql too
|
41
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
42
|
+
@worker.debug = true # also pass along worker debug flag if set
|
43
|
+
end
|
44
|
+
|
45
|
+
# -- log update activity
|
46
|
+
Activity.create!( text: "update feeds (#{Feed.count})" )
|
47
|
+
|
48
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
49
|
+
|
50
|
+
Feed.order(:id).each do |feed|
|
51
|
+
update_feed_worker( feed )
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
def update_feeds_for( site_key, opts={} )
|
57
|
+
if debug?
|
58
|
+
## turn on logging for sql too
|
59
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
60
|
+
@worker.debug = true # also pass along worker debug flag if set
|
61
|
+
end
|
62
|
+
|
63
|
+
# -- log update activity
|
64
|
+
Activity.create!( text: "update feeds >#{site_key}<" )
|
65
|
+
|
66
|
+
site = Site.find_by_key!( site_key )
|
67
|
+
|
68
|
+
site.feeds.each do |feed|
|
69
|
+
update_feed_worker( feed )
|
70
|
+
end
|
71
|
+
|
72
|
+
end # method update_feeds
|
73
|
+
|
74
|
+
|
75
|
+
private
|
76
|
+
def update_site_worker( site_rec )
|
77
|
+
site_config = @worker.site_by_rec_if_modified( site_rec )
|
78
|
+
|
79
|
+
# on error or if http-not modified etc. skip update/processing
|
80
|
+
return if site_config.nil?
|
81
|
+
|
82
|
+
subscriber = Subscriber.new
|
83
|
+
subscriber.debug = debug? ? true : false # pass along debug flag
|
84
|
+
|
85
|
+
site_key = site_rec.key
|
86
|
+
subscriber.update_subscriptions_for( site_key, site_config )
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def update_feed_worker( feed_rec )
|
91
|
+
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
92
|
+
|
93
|
+
# on error or if http-not modified etc. skip update/processing
|
94
|
+
return if feed.nil?
|
95
|
+
|
96
|
+
## fix/todo: reload feed_red - fetched date updated etc.
|
97
|
+
## check if needed for access to fetched date
|
98
|
+
|
99
|
+
|
100
|
+
## todo/check: move feed_rec update to the end (after item updates??)
|
101
|
+
|
102
|
+
# update feed attribs e.g.
|
103
|
+
# generator
|
104
|
+
# published_at,built_at,touched_at,fetched_at
|
105
|
+
# summary,title2
|
106
|
+
|
107
|
+
## fix:
|
108
|
+
## weird rss exception error on windows w/ dates
|
109
|
+
# e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
|
110
|
+
#
|
111
|
+
# move to_datetime to feedutils!! if it works
|
112
|
+
## todo: move this comments to feedutils??
|
113
|
+
|
114
|
+
|
115
|
+
feed_rec.debug = debug? ? true : false # pass along debug flag
|
116
|
+
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
117
|
+
feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
|
118
|
+
|
119
|
+
|
120
|
+
# update cached value last published for item
|
121
|
+
last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
122
|
+
if last_item_rec.present?
|
123
|
+
if last_item_rec.published?
|
124
|
+
feed_rec.update_attributes!( last_published: last_item_rec.published )
|
125
|
+
else # try touched
|
126
|
+
feed_rec.update_attributes!( last_published: last_item_rec.touched )
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end # method update_feed_worker
|
130
|
+
|
131
|
+
end # class Refresher
|
132
|
+
|
133
|
+
end # module Pluto
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Subscriber
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
include Models
|
11
|
+
|
12
|
+
def debug=(value) @debug = value; end
|
13
|
+
def debug?() @debug || false; end
|
14
|
+
|
15
|
+
|
16
|
+
def update_subscriptions( config, opts={} )
|
17
|
+
# !!!! -- depreciated API - remove - do NOT use anymore
|
18
|
+
puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
|
19
|
+
update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def update_subscriptions_for( site_key, config, opts={} )
|
24
|
+
site_attribs = {
|
25
|
+
title: config['title'] || config['name'], # support either title or name
|
26
|
+
url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
|
27
|
+
}
|
28
|
+
|
29
|
+
logger.debug "site_attribs: #{site_attribs.inspect}"
|
30
|
+
|
31
|
+
site_rec = Site.find_by_key( site_key )
|
32
|
+
if site_rec.nil?
|
33
|
+
site_rec = Site.new
|
34
|
+
site_attribs[ :key ] = site_key
|
35
|
+
|
36
|
+
## use object_id: site.id and object_type: Site
|
37
|
+
## change - model/table/schema!!!
|
38
|
+
Activity.create!( text: "new site >#{site_key}< - #{site_attribs[ :title ]}" )
|
39
|
+
end
|
40
|
+
site_rec.update_attributes!( site_attribs )
|
41
|
+
|
42
|
+
# -- log update activity
|
43
|
+
Activity.create!( text: "update subscriptions >#{site_key}<" )
|
44
|
+
|
45
|
+
# clean out subscriptions and add again
|
46
|
+
logger.debug "before site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
|
47
|
+
site_rec.subscriptions.destroy_all # note: use destroy_all NOT delete_all (delete_all tries by default only nullify)
|
48
|
+
logger.debug "after site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
|
49
|
+
|
50
|
+
config.each do |key, value|
|
51
|
+
|
52
|
+
## todo: downcase key - why ??? why not???
|
53
|
+
|
54
|
+
# skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
|
55
|
+
next if ['title','title2','name',
|
56
|
+
'source', 'url',
|
57
|
+
'include','includes','exclude','excludes',
|
58
|
+
'feeds',
|
59
|
+
'planet','defaults'].include?( key )
|
60
|
+
|
61
|
+
### todo/check:
|
62
|
+
## check value - must be hash
|
63
|
+
# check if url or feed_url present
|
64
|
+
# that is, check for required props/key-value pairs
|
65
|
+
|
66
|
+
feed_key = key.to_s.dup
|
67
|
+
feed_hash = value
|
68
|
+
|
69
|
+
# todo/fix: use title from feed?
|
70
|
+
# e.g. fill up auto_title, auto_url, etc.
|
71
|
+
|
72
|
+
feed_attribs = {
|
73
|
+
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
|
74
|
+
url: feed_hash[ 'link' ] || feed_hash[ 'url' ],
|
75
|
+
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
76
|
+
title2: feed_hash[ 'title2' ],
|
77
|
+
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
78
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
|
79
|
+
}
|
80
|
+
|
81
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
82
|
+
|
83
|
+
feed_rec = Feed.find_by_key( feed_key )
|
84
|
+
if feed_rec.nil?
|
85
|
+
feed_rec = Feed.new
|
86
|
+
feed_attribs[ :key ] = feed_key
|
87
|
+
|
88
|
+
## use object_id: feed.id and object_type: Feed
|
89
|
+
## change - model/table/schema!!!
|
90
|
+
## todo: add parent_action_id - why? why not?
|
91
|
+
Activity.create!( text: "new feed >#{feed_key}< - #{feed_attribs[ :title ]}" )
|
92
|
+
end
|
93
|
+
|
94
|
+
feed_rec.update_attributes!( feed_attribs )
|
95
|
+
|
96
|
+
# add subscription record
|
97
|
+
# note: subscriptions get cleaned out on update first (see above)
|
98
|
+
site_rec.subscriptions.create!( feed_id: feed_rec.id )
|
99
|
+
end
|
100
|
+
|
101
|
+
end # method update_subscriptions
|
102
|
+
|
103
|
+
end # class Subscriber
|
104
|
+
|
105
|
+
end # module Pluto
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class Updater
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
### fix!!!!!: change config to text - yes/no - why? why not??
|
11
|
+
# or pass along struct
|
12
|
+
# - with hash and text and format(e.g. ini/yml) as fields???
|
13
|
+
#
|
14
|
+
# - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
|
15
|
+
## - pass along unparsed text!! - not hash struct
|
16
|
+
# - will get saved in db plus we need to generate md5 hash
|
17
|
+
# - add filename e.g. ruby.ini|ruby.conf|ruby.yml as opt ??
|
18
|
+
# or add config format as opt e.g. ini or yml?
|
19
|
+
|
20
|
+
def initialize( opts, config )
|
21
|
+
@opts = opts
|
22
|
+
@config = config
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :opts, :config
|
26
|
+
|
27
|
+
def run( arg )
|
28
|
+
arg = arg.downcase.gsub('.ini','').gsub('.yml','') # remove file extension if present
|
29
|
+
|
30
|
+
update_for( arg )
|
31
|
+
end
|
32
|
+
|
33
|
+
def update_for( site_key )
|
34
|
+
###################
|
35
|
+
# step 1) update subscriptions
|
36
|
+
subscriber = Subscriber.new
|
37
|
+
|
38
|
+
# pass along debug/verbose setting/switch
|
39
|
+
subscriber.debug = true if opts.verbose?
|
40
|
+
subscriber.update_subscriptions_for( site_key, config )
|
41
|
+
|
42
|
+
##############################
|
43
|
+
# step 2) update feeds
|
44
|
+
refresher = Refresher.new
|
45
|
+
|
46
|
+
# pass along debug/verbose setting/switch
|
47
|
+
refresher.debug = true if opts.verbose?
|
48
|
+
refresher.update_feeds_for( site_key )
|
49
|
+
end # method run
|
50
|
+
|
51
|
+
end # class Updater
|
52
|
+
|
53
|
+
end # module Pluto
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module PlutoUpdate
|
5
|
+
|
6
|
+
MAJOR = 1
|
7
|
+
MINOR = 1
|
8
|
+
PATCH = 0
|
9
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
10
|
+
|
11
|
+
def self.version
|
12
|
+
VERSION
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.banner
|
16
|
+
### todo: add RUBY_PATCHLEVEL or RUBY_PATCH_LEVEL e.g. -p124
|
17
|
+
"pluto-update/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )}"
|
22
|
+
end
|
23
|
+
|
24
|
+
end # module PlutoUpdate
|
25
|
+
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pluto-update
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pluto-models
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.2.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: fetcher
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.4
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.4
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rdoc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: hoe
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.13'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.13'
|
69
|
+
description: pluto-update - planet feed 'n' subscription updater
|
70
|
+
email: feedreader@googlegroups.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files:
|
74
|
+
- HISTORY.md
|
75
|
+
- Manifest.txt
|
76
|
+
- README.md
|
77
|
+
files:
|
78
|
+
- HISTORY.md
|
79
|
+
- Manifest.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- lib/pluto/update.rb
|
83
|
+
- lib/pluto/update/fetcher.rb
|
84
|
+
- lib/pluto/update/refresher.rb
|
85
|
+
- lib/pluto/update/subscriber.rb
|
86
|
+
- lib/pluto/update/updater.rb
|
87
|
+
- lib/pluto/update/version.rb
|
88
|
+
homepage: https://github.com/feedreader/pluto-update
|
89
|
+
licenses:
|
90
|
+
- Public Domain
|
91
|
+
metadata: {}
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options:
|
94
|
+
- "--main"
|
95
|
+
- README.md
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 1.9.2
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 2.4.2
|
111
|
+
signing_key:
|
112
|
+
specification_version: 4
|
113
|
+
summary: pluto-update - planet feed 'n' subscription updater
|
114
|
+
test_files: []
|