pluto-update 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gemtest +0 -0
- data/Manifest.txt +8 -4
- data/README.md +4 -4
- data/Rakefile +4 -3
- data/lib/pluto/update.rb +14 -9
- data/lib/pluto/update/{refresher.rb → feed_refresher.rb} +25 -51
- data/lib/pluto/update/site_fetcher.rb +134 -0
- data/lib/pluto/update/site_refresher.rb +67 -0
- data/lib/pluto/update/{subscriber.rb → site_updater.rb} +3 -4
- data/lib/pluto/update/version.rb +1 -1
- data/test/data/ruby.ini +18 -0
- data/test/helper.rb +25 -0
- data/test/test_refresh.rb +38 -0
- data/test/test_site.rb +47 -0
- metadata +28 -7
- data/lib/pluto/update/fetcher.rb +0 -357
- data/lib/pluto/update/updater.rb +0 -62
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49adc91fcbc3b7df6d0ee5aed048fa0df4abb186
|
4
|
+
data.tar.gz: 16b22d7a9993c287f592176acd9991ac5d42e3c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0db8f9b7dcb72009df75b4cc001fe522e579f4b55bc8292b7ab5d19f4166aead75084b0da9356444dddd00b999a372e9972abaedf9230497706db138d766f80
|
7
|
+
data.tar.gz: 0fc2ba83226d96ca4b9a71829079c7232322997963230a99cda0948ae4591dc4071725ddc287a2485dcdb083ec8fc1015c0399a50238ff78901f0dd39d908d00
|
data/.gemtest
ADDED
File without changes
|
data/Manifest.txt
CHANGED
@@ -3,8 +3,12 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/pluto/update.rb
|
6
|
-
lib/pluto/update/
|
7
|
-
lib/pluto/update/
|
8
|
-
lib/pluto/update/
|
9
|
-
lib/pluto/update/
|
6
|
+
lib/pluto/update/feed_refresher.rb
|
7
|
+
lib/pluto/update/site_fetcher.rb
|
8
|
+
lib/pluto/update/site_refresher.rb
|
9
|
+
lib/pluto/update/site_updater.rb
|
10
10
|
lib/pluto/update/version.rb
|
11
|
+
test/data/ruby.ini
|
12
|
+
test/helper.rb
|
13
|
+
test/test_refresh.rb
|
14
|
+
test/test_site.rb
|
data/README.md
CHANGED
@@ -17,10 +17,10 @@
|
|
17
17
|
```
|
18
18
|
title = Planet Ruby
|
19
19
|
|
20
|
-
[
|
21
|
-
title
|
22
|
-
link
|
23
|
-
feed
|
20
|
+
[rubylang]
|
21
|
+
title = Ruby Lang News
|
22
|
+
link = http://www.ruby-lang.org/en/news
|
23
|
+
feed = http://www.ruby-lang.org/en/feeds/news.rss
|
24
24
|
|
25
25
|
[rubyonrails]
|
26
26
|
title = Ruby on Rails Blog
|
data/Rakefile
CHANGED
@@ -18,9 +18,10 @@ Hoe.spec 'pluto-update' do
|
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
|
-
['pluto-models',
|
22
|
-
['
|
23
|
-
['
|
21
|
+
['pluto-models', '>= 1.3.2'],
|
22
|
+
['pluto-feedfetcher', '>= 0.1.0'],
|
23
|
+
['fetcher', '>= 0.4.4'],
|
24
|
+
['preproc', '>= 0.1.0'],
|
24
25
|
]
|
25
26
|
|
26
27
|
self.licenses = ['Public Domain']
|
data/lib/pluto/update.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
require 'pluto/models'
|
5
|
+
require 'pluto/feedfetcher'
|
5
6
|
|
6
7
|
|
7
8
|
# more 3rd party gems
|
@@ -11,26 +12,30 @@ require 'preproc' # include preprocessor
|
|
11
12
|
|
12
13
|
# our own code
|
13
14
|
require 'pluto/update/version' # Note: let version always go first
|
14
|
-
require 'pluto/update/
|
15
|
-
require 'pluto/update/
|
16
|
-
require 'pluto/update/
|
17
|
-
require 'pluto/update/
|
15
|
+
require 'pluto/update/feed_refresher'
|
16
|
+
require 'pluto/update/site_refresher'
|
17
|
+
require 'pluto/update/site_fetcher'
|
18
|
+
require 'pluto/update/site_updater'
|
18
19
|
|
19
20
|
|
20
21
|
|
21
22
|
module Pluto
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
Subscriber.new.update_subscriptions( config )
|
24
|
+
def self.refresh_feeds ## refresh == fetch+parse+update
|
25
|
+
FeedRefresher.new.refresh_feeds
|
26
26
|
end
|
27
27
|
|
28
|
+
def self.refresh_sites ## refresh == fetch+parse+update
|
29
|
+
SiteRefresher.new.refresh_sites
|
30
|
+
end
|
31
|
+
|
32
|
+
### convenience alias w/ update_ -- use refresh (only) - why? why not??
|
28
33
|
def self.update_feeds
|
29
|
-
|
34
|
+
FeedRefresher.new.refresh_feeds
|
30
35
|
end
|
31
36
|
|
32
37
|
def self.update_sites
|
33
|
-
|
38
|
+
SiteRefresher.new.refresh_sites
|
34
39
|
end
|
35
40
|
|
36
41
|
end # module Pluto
|
@@ -3,41 +3,26 @@
|
|
3
3
|
|
4
4
|
module Pluto
|
5
5
|
|
6
|
-
|
6
|
+
#######
|
7
|
+
# note: refresh
|
8
|
+
# refresh will fetch feeds, parse feeds and than update feeds
|
9
|
+
# (e.g. update is just one operation of refresh)
|
7
10
|
|
8
|
-
|
11
|
+
class FeedRefresher
|
9
12
|
|
13
|
+
include LogUtils::Logging
|
10
14
|
include Models
|
11
15
|
|
12
16
|
def initialize
|
13
|
-
@worker
|
17
|
+
## @worker = FeedFetcherBasic.new ## -- simple fetch (strategy); no cache, no cond get etc.
|
18
|
+
@worker = FeedFetcherCondGetWithCache.new
|
14
19
|
end
|
15
20
|
|
16
21
|
def debug=(value) @debug = value; end
|
17
22
|
def debug?() @debug || false; end
|
18
23
|
|
19
24
|
|
20
|
-
def
|
21
|
-
if debug?
|
22
|
-
## turn on logging for sql too
|
23
|
-
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
24
|
-
@worker.debug = true # also pass along worker debug flag if set
|
25
|
-
end
|
26
|
-
|
27
|
-
start_time = Time.now
|
28
|
-
Activity.create!( text: "start update sites (#{Site.count})" )
|
29
|
-
|
30
|
-
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
31
|
-
Site.order(:id).each do |site|
|
32
|
-
update_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
33
|
-
end
|
34
|
-
|
35
|
-
total_secs = Time.now - start_time
|
36
|
-
Activity.create!( text: "done update sites (#{Site.count}) in #{total_secs}s" )
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
def update_feeds( opts={} ) # update all feeds
|
25
|
+
def refresh_feeds( opts={} ) # refresh (fetch+parse+update) all feeds
|
41
26
|
if debug?
|
42
27
|
## turn on logging for sql too
|
43
28
|
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
@@ -49,16 +34,16 @@ class Refresher
|
|
49
34
|
|
50
35
|
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
51
36
|
Feed.order(:id).each do |feed|
|
52
|
-
|
37
|
+
refresh_feed_worker( feed )
|
53
38
|
### todo/fix: add catch exception in loop and log to activity log and continue w/ next feed
|
54
39
|
end
|
55
40
|
|
56
41
|
total_secs = Time.now - start_time
|
57
|
-
Activity.create!( text: "done update feeds (#{
|
42
|
+
Activity.create!( text: "done update feeds (#{Feed.count}) in #{total_secs}s" )
|
58
43
|
end
|
59
44
|
|
60
45
|
|
61
|
-
def
|
46
|
+
def refresh_feeds_for( site_key, opts={} ) # refresh (fetch+parse+update) feeds for site
|
62
47
|
if debug?
|
63
48
|
## turn on logging for sql too
|
64
49
|
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
@@ -71,35 +56,23 @@ class Refresher
|
|
71
56
|
site = Site.find_by_key!( site_key )
|
72
57
|
|
73
58
|
site.feeds.each do |feed|
|
74
|
-
|
59
|
+
refresh_feed_worker( feed )
|
75
60
|
end
|
76
61
|
|
77
|
-
end # method
|
62
|
+
end # method refresh_feeds_for
|
78
63
|
|
79
64
|
|
80
65
|
private
|
81
|
-
def
|
82
|
-
|
83
|
-
|
84
|
-
# on error or if http-not modified etc. skip update/processing
|
85
|
-
return if site_config.nil?
|
86
|
-
|
87
|
-
subscriber = Subscriber.new
|
88
|
-
subscriber.debug = debug? ? true : false # pass along debug flag
|
66
|
+
def refresh_feed_worker( feed_rec )
|
67
|
+
feed_xml = @worker.fetch( feed_rec )
|
89
68
|
|
90
|
-
subscriber.update_subscriptions_for( site_rec.key, site_config )
|
91
|
-
end
|
92
|
-
|
93
|
-
|
94
|
-
def update_feed_worker( feed_rec )
|
95
|
-
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
96
|
-
|
97
69
|
# on error or if http-not modified etc. skip update/processing
|
98
|
-
return if
|
70
|
+
return if feed_xml.nil?
|
71
|
+
|
72
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
99
73
|
|
100
74
|
## fix/todo: reload feed_red - fetched date updated etc.
|
101
75
|
## check if needed for access to fetched date
|
102
|
-
|
103
76
|
|
104
77
|
## todo/check: move feed_rec update to the end (after item updates??)
|
105
78
|
|
@@ -107,7 +80,7 @@ private
|
|
107
80
|
# generator
|
108
81
|
# published_at,built_at,touched_at,fetched_at
|
109
82
|
# summary,title2
|
110
|
-
|
83
|
+
|
111
84
|
## fix:
|
112
85
|
## weird rss exception error on windows w/ dates
|
113
86
|
# e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
|
@@ -117,9 +90,10 @@ private
|
|
117
90
|
|
118
91
|
|
119
92
|
feed_rec.debug = debug? ? true : false # pass along debug flag
|
120
|
-
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
121
|
-
feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
|
122
93
|
|
94
|
+
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
95
|
+
# fix/todo: find a better name - why? why not?? => use update_from_struct!
|
96
|
+
feed_rec.save_from_struct!( feed )
|
123
97
|
|
124
98
|
# update cached value last published for item
|
125
99
|
last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
@@ -130,8 +104,8 @@ private
|
|
130
104
|
feed_rec.update_attributes!( last_published: last_item_rec.touched )
|
131
105
|
end
|
132
106
|
end
|
133
|
-
end # method
|
107
|
+
end # method refresh_feed_worker
|
134
108
|
|
135
|
-
end # class
|
109
|
+
end # class FeedRefresher
|
136
110
|
|
137
111
|
end # module Pluto
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class SiteFetcher
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
include Models # for easy convenience access for Activity etc.
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@worker = Fetcher::Worker.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def debug=(value) @debug = value; end
|
16
|
+
def debug?() @debug || false; end
|
17
|
+
|
18
|
+
def fetch( site_rec )
|
19
|
+
####################################################
|
20
|
+
# try smart http update; will update db records
|
21
|
+
|
22
|
+
site_url = site_rec.url
|
23
|
+
site_key = site_rec.key
|
24
|
+
|
25
|
+
### todo/fix: normalize/unifiy feed_url
|
26
|
+
## - same in fetcher - use shared utitlity method or similar
|
27
|
+
|
28
|
+
@worker.use_cache = true
|
29
|
+
@worker.cache[ site_url ] = {
|
30
|
+
'etag' => site_rec.http_etag,
|
31
|
+
'last-modified' => site_rec.http_last_modified
|
32
|
+
}
|
33
|
+
|
34
|
+
response = @worker.get( site_url )
|
35
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
36
|
+
|
37
|
+
site_fetched = Time.now
|
38
|
+
|
39
|
+
###
|
40
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
41
|
+
# will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
42
|
+
# thus, set/force encoding to utf-8
|
43
|
+
site_text = response.body.to_s
|
44
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
45
|
+
|
46
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
47
|
+
|
48
|
+
if site_text.index('@include')
|
49
|
+
## note: if the site_text includes @include
|
50
|
+
## we must revalidate complete file hierachy(tree) for now
|
51
|
+
### continue;
|
52
|
+
##
|
53
|
+
## fix/todo: use ahead-of-time preprocessor ?? in the future to simplify???
|
54
|
+
else
|
55
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
56
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
57
|
+
return nil # no updates available; nothing to do
|
58
|
+
end
|
59
|
+
|
60
|
+
elsif response.code != '200' # note Net::HTTP response.code is a string in ruby
|
61
|
+
|
62
|
+
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
63
|
+
|
64
|
+
site_attribs = {
|
65
|
+
http_code: response.code.to_i,
|
66
|
+
http_server: response.header[ 'server' ],
|
67
|
+
http_etag: nil,
|
68
|
+
http_last_modified: nil,
|
69
|
+
fetched: site_fetched
|
70
|
+
}
|
71
|
+
site_rec.update_attributes!( site_attribs )
|
72
|
+
|
73
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
74
|
+
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
75
|
+
|
76
|
+
return nil # sorry; no feed for parsing available
|
77
|
+
else
|
78
|
+
# assume 200; continue w/ processing
|
79
|
+
end
|
80
|
+
|
81
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
82
|
+
|
83
|
+
site_attribs = {
|
84
|
+
http_code: response.code.to_i,
|
85
|
+
http_server: response.header[ 'server' ],
|
86
|
+
http_etag: response.header[ 'etag' ],
|
87
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
88
|
+
fetched: site_fetched
|
89
|
+
}
|
90
|
+
|
91
|
+
## if debug?
|
92
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
93
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
94
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
95
|
+
## end
|
96
|
+
|
97
|
+
site_rec.update_attributes!( site_attribs )
|
98
|
+
|
99
|
+
|
100
|
+
#################
|
101
|
+
### fix: add support for http_etag cache etc. - how??
|
102
|
+
###
|
103
|
+
### use from_text( text, base: base ) !!!!!!!!
|
104
|
+
### do NOT reissue first request
|
105
|
+
##
|
106
|
+
## fix: use special case/method for update_with_includes!!!
|
107
|
+
## keep it simple w/o includes (do NOT mix in one method)
|
108
|
+
## split into two methods!!!
|
109
|
+
|
110
|
+
## retry w/ preprocesser
|
111
|
+
## refetch if @include found w/ all includes included
|
112
|
+
if site_text.index('@include')
|
113
|
+
site_text = InclPreproc.from_url( site_url ).read
|
114
|
+
end
|
115
|
+
|
116
|
+
## logger.debug "site_text:"
|
117
|
+
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
118
|
+
|
119
|
+
site_text
|
120
|
+
|
121
|
+
###
|
122
|
+
## todo/fix:
|
123
|
+
### move INI.load out of this method!! - return site_text or nil
|
124
|
+
##
|
125
|
+
## puts "Before parsing site config >#{site_key}<..."
|
126
|
+
##
|
127
|
+
# assume ini format for now
|
128
|
+
## site_config = INI.load( site_text )
|
129
|
+
## site_config
|
130
|
+
end
|
131
|
+
|
132
|
+
end # class SiteFetcher
|
133
|
+
|
134
|
+
end # module Pluto
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
#######
|
7
|
+
# note: refresh
|
8
|
+
# refresh will fetch site subscriptions, parse and than update the site subscriptions
|
9
|
+
# (e.g. update is just one operation of refresh)
|
10
|
+
|
11
|
+
|
12
|
+
class SiteRefresher
|
13
|
+
|
14
|
+
include LogUtils::Logging
|
15
|
+
include Models
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@worker = SiteFetcher.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def debug=(value) @debug = value; end
|
22
|
+
def debug?() @debug || false; end
|
23
|
+
|
24
|
+
def refresh_sites( opts={} ) # refresh (fetch+parse+update) all site configs
|
25
|
+
if debug?
|
26
|
+
## turn on logging for sql too
|
27
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
28
|
+
@worker.debug = true # also pass along worker debug flag if set
|
29
|
+
end
|
30
|
+
|
31
|
+
start_time = Time.now
|
32
|
+
Activity.create!( text: "start update sites (#{Site.count})" )
|
33
|
+
|
34
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
35
|
+
Site.order(:id).each do |site|
|
36
|
+
refresh_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
37
|
+
end
|
38
|
+
|
39
|
+
total_secs = Time.now - start_time
|
40
|
+
Activity.create!( text: "done update sites (#{Site.count}) in #{total_secs}s" )
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
private
|
45
|
+
def refresh_site_worker( site_rec )
|
46
|
+
site_text = @worker.fetch( site_rec )
|
47
|
+
|
48
|
+
# on error or if http-not modified etc. skip update/processing
|
49
|
+
return if site_text.nil?
|
50
|
+
|
51
|
+
site_config = INI.load( site_text )
|
52
|
+
|
53
|
+
site_updater = SiteUpdater.new
|
54
|
+
site_updater.debug = debug? ? true : false # pass along debug flag
|
55
|
+
|
56
|
+
### todo/fix:
|
57
|
+
## allow passing in as first arg database rec!!!
|
58
|
+
## - if passed in database rec - do NOT lookup record by site_config.key!!!
|
59
|
+
## use existing key (lets you change/update key without creating a new duplicate site entry, for example)
|
60
|
+
## - or use a new method (instead of overloading arg) ?? - why? why not??
|
61
|
+
site_updater.update_subscriptions_for( site_rec.key, site_config )
|
62
|
+
end
|
63
|
+
|
64
|
+
end # class SiteRefresher
|
65
|
+
|
66
|
+
end # module Pluto
|
67
|
+
|
@@ -3,10 +3,9 @@
|
|
3
3
|
|
4
4
|
module Pluto
|
5
5
|
|
6
|
-
class
|
6
|
+
class SiteUpdater
|
7
7
|
|
8
8
|
include LogUtils::Logging
|
9
|
-
|
10
9
|
include Models
|
11
10
|
|
12
11
|
def debug=(value) @debug = value; end
|
@@ -156,8 +155,8 @@ class Subscriber
|
|
156
155
|
site_rec.subscriptions.create!( feed_id: feed_rec.id )
|
157
156
|
end
|
158
157
|
|
159
|
-
end # method
|
158
|
+
end # method update_subscriptions_for
|
160
159
|
|
161
|
-
end # class
|
160
|
+
end # class SiteUpdater
|
162
161
|
|
163
162
|
end # module Pluto
|
data/lib/pluto/update/version.rb
CHANGED
data/test/data/ruby.ini
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
title = Planet Ruby
|
2
|
+
source = https://github.com/feedreader/pluto.update/raw/master/test/data/ruby.ini
|
3
|
+
|
4
|
+
[rubylang]
|
5
|
+
title = Ruby Lang News
|
6
|
+
link = http://www.ruby-lang.org/en/news
|
7
|
+
feed = http://www.ruby-lang.org/en/feeds/news.rss
|
8
|
+
|
9
|
+
[rubyonrails]
|
10
|
+
title = Ruby on Rails News
|
11
|
+
link = http://weblog.rubyonrails.org
|
12
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
13
|
+
|
14
|
+
[viennarb]
|
15
|
+
title = Vienna.rb News
|
16
|
+
link = http://vienna-rb.at
|
17
|
+
feed = http://vienna-rb.at/atom.xml
|
18
|
+
|
data/test/helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
|
4
|
+
## minitest setup
|
5
|
+
|
6
|
+
require 'minitest/autorun'
|
7
|
+
|
8
|
+
## our own code
|
9
|
+
require 'pluto/update'
|
10
|
+
|
11
|
+
|
12
|
+
LogUtils::Logger.root.level = :debug
|
13
|
+
|
14
|
+
|
15
|
+
## some shortcuts
|
16
|
+
Log = LogDb::Model::Log
|
17
|
+
Prop = ConfDb::Model::Prop
|
18
|
+
|
19
|
+
Site = Pluto::Model::Site
|
20
|
+
Feed = Pluto::Model::Feed
|
21
|
+
Item = Pluto::Model::Item
|
22
|
+
Subscription = Pluto::Model::Subscription
|
23
|
+
|
24
|
+
|
25
|
+
Pluto.setup_in_memory_db
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_refresh.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestRefresh < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Site.delete_all
|
15
|
+
Feed.delete_all
|
16
|
+
Item.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
|
19
|
+
site_text = File.read( "#{PlutoUpdate.root}/test/data/ruby.ini")
|
20
|
+
site_config = INI.load( site_text )
|
21
|
+
|
22
|
+
site_updater = Pluto::SiteUpdater.new
|
23
|
+
site_updater.update_subscriptions_for( 'ruby', site_config )
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def test_refresh_sites
|
28
|
+
Pluto.refresh_sites
|
29
|
+
assert true
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_refresh_feeds
|
33
|
+
Pluto.refresh_feeds
|
34
|
+
assert true
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class TestRefresh
|
38
|
+
|
data/test/test_site.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_site.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestSite < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Site.delete_all
|
15
|
+
Feed.delete_all
|
16
|
+
Item.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def test_site_updater
|
22
|
+
site_text = File.read( "#{PlutoUpdate.root}/test/data/ruby.ini")
|
23
|
+
site_config = INI.load( site_text )
|
24
|
+
pp site_config
|
25
|
+
|
26
|
+
assert_equal 0, Site.count
|
27
|
+
assert_equal 0, Feed.count
|
28
|
+
|
29
|
+
site_updater = Pluto::SiteUpdater.new
|
30
|
+
site_updater.update_subscriptions_for( 'ruby', site_config )
|
31
|
+
|
32
|
+
assert_equal 1, Site.count
|
33
|
+
assert_equal 3, Feed.count
|
34
|
+
|
35
|
+
ruby = Site.find_by_key!( 'ruby' )
|
36
|
+
assert_equal 'Planet Ruby', ruby.title
|
37
|
+
assert_equal 3, ruby.subscriptions.count
|
38
|
+
assert_equal 3, ruby.feeds.count
|
39
|
+
|
40
|
+
rubylang = Feed.find_by_key!( 'rubylang' )
|
41
|
+
assert_equal 'Ruby Lang News', rubylang.title
|
42
|
+
assert_equal 'http://www.ruby-lang.org/en/news', rubylang.url
|
43
|
+
assert_equal 'http://www.ruby-lang.org/en/feeds/news.rss', rubylang.feed_url
|
44
|
+
end
|
45
|
+
|
46
|
+
end # class TestSite
|
47
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-update
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pluto-models
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.3.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pluto-feedfetcher
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.1.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.1.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: fetcher
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,16 +103,21 @@ extra_rdoc_files:
|
|
89
103
|
- Manifest.txt
|
90
104
|
- README.md
|
91
105
|
files:
|
106
|
+
- ".gemtest"
|
92
107
|
- HISTORY.md
|
93
108
|
- Manifest.txt
|
94
109
|
- README.md
|
95
110
|
- Rakefile
|
96
111
|
- lib/pluto/update.rb
|
97
|
-
- lib/pluto/update/
|
98
|
-
- lib/pluto/update/
|
99
|
-
- lib/pluto/update/
|
100
|
-
- lib/pluto/update/
|
112
|
+
- lib/pluto/update/feed_refresher.rb
|
113
|
+
- lib/pluto/update/site_fetcher.rb
|
114
|
+
- lib/pluto/update/site_refresher.rb
|
115
|
+
- lib/pluto/update/site_updater.rb
|
101
116
|
- lib/pluto/update/version.rb
|
117
|
+
- test/data/ruby.ini
|
118
|
+
- test/helper.rb
|
119
|
+
- test/test_refresh.rb
|
120
|
+
- test/test_site.rb
|
102
121
|
homepage: https://github.com/feedreader/pluto.update
|
103
122
|
licenses:
|
104
123
|
- Public Domain
|
@@ -125,4 +144,6 @@ rubygems_version: 2.4.2
|
|
125
144
|
signing_key:
|
126
145
|
specification_version: 4
|
127
146
|
summary: pluto-update - planet feed 'n' subscription updater
|
128
|
-
test_files:
|
147
|
+
test_files:
|
148
|
+
- test/test_site.rb
|
149
|
+
- test/test_refresh.rb
|
data/lib/pluto/update/fetcher.rb
DELETED
@@ -1,357 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Pluto
|
5
|
-
|
6
|
-
class Fetcher
|
7
|
-
|
8
|
-
include LogUtils::Logging
|
9
|
-
|
10
|
-
include Models # for easy convenience access for Activity etc.
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@worker = ::Fetcher::Worker.new
|
14
|
-
end
|
15
|
-
|
16
|
-
def debug=(value) @debug = value; end
|
17
|
-
def debug?() @debug || false; end
|
18
|
-
|
19
|
-
|
20
|
-
def fetch_feed( url )
|
21
|
-
response = @worker.get( url )
|
22
|
-
|
23
|
-
## if debug?
|
24
|
-
puts "http status #{response.code} #{response.message}"
|
25
|
-
|
26
|
-
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
27
|
-
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
28
|
-
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
29
|
-
## end
|
30
|
-
|
31
|
-
xml = response.body
|
32
|
-
|
33
|
-
###
|
34
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
35
|
-
# will mostly be ASCII
|
36
|
-
# - try to change encoding to UTF-8 ourselves
|
37
|
-
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
38
|
-
|
39
|
-
#####
|
40
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
41
|
-
|
42
|
-
## NB:
|
43
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
44
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
45
|
-
xml = xml.force_encoding( Encoding::UTF_8 )
|
46
|
-
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
47
|
-
|
48
|
-
xml
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
def feed_by_rec( feed_rec )
|
53
|
-
# simple feed fetcher; use for debugging (only/mostly)
|
54
|
-
# -- will NOT change db records in any way
|
55
|
-
|
56
|
-
feed_url = feed_rec.feed_url
|
57
|
-
feed_key = feed_rec.key
|
58
|
-
|
59
|
-
feed_xml = fetch_feed( feed_url )
|
60
|
-
|
61
|
-
logger.debug "feed_xml:"
|
62
|
-
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
63
|
-
|
64
|
-
# if opts.verbose? # also write a copy to disk
|
65
|
-
if debug?
|
66
|
-
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
67
|
-
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
68
|
-
f.write( feed_xml )
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
puts "Before parsing feed >#{feed_key}<..."
|
73
|
-
|
74
|
-
## fix/todo: check for feed.nil? -> error parsing!!!
|
75
|
-
# or throw exception
|
76
|
-
feed = FeedUtils::Parser.parse( feed_xml )
|
77
|
-
feed
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
82
|
-
feed_url = feed_rec.feed_url
|
83
|
-
feed_key = feed_rec.key
|
84
|
-
|
85
|
-
### todo/fix: normalize/unifiy feed_url
|
86
|
-
## - same in fetcher - use shared utitlity method or similar
|
87
|
-
|
88
|
-
@worker.use_cache = true
|
89
|
-
@worker.cache[ feed_url ] = {
|
90
|
-
'etag' => feed_rec.http_etag,
|
91
|
-
'last-modified' => feed_rec.http_last_modified
|
92
|
-
}
|
93
|
-
|
94
|
-
begin
|
95
|
-
response = @worker.get( feed_url )
|
96
|
-
rescue SocketError => e
|
97
|
-
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
98
|
-
### will result in SocketError -- getaddrinfo: Name or service not known
|
99
|
-
puts "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
100
|
-
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
101
|
-
|
102
|
-
### todo/fix: update feed rec in db
|
103
|
-
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
104
|
-
return nil
|
105
|
-
end
|
106
|
-
|
107
|
-
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
108
|
-
|
109
|
-
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
110
|
-
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
111
|
-
puts "no change; request returns not modified (304); skipping parsing feed"
|
112
|
-
return nil # no updates available; nothing to do
|
113
|
-
end
|
114
|
-
|
115
|
-
feed_fetched = Time.now
|
116
|
-
|
117
|
-
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
118
|
-
|
119
|
-
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
120
|
-
|
121
|
-
feed_attribs = {
|
122
|
-
http_code: response.code.to_i,
|
123
|
-
http_server: response.header[ 'server' ],
|
124
|
-
http_etag: nil,
|
125
|
-
http_last_modified: nil,
|
126
|
-
body: nil,
|
127
|
-
md5: nil,
|
128
|
-
fetched: feed_fetched
|
129
|
-
}
|
130
|
-
feed_rec.update_attributes!( feed_attribs )
|
131
|
-
|
132
|
-
## add log error activity -- in future add to error log - better - why? why not?
|
133
|
-
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
134
|
-
|
135
|
-
return nil # sorry; no feed for parsing available
|
136
|
-
end
|
137
|
-
|
138
|
-
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
139
|
-
|
140
|
-
feed_xml = response.body
|
141
|
-
###
|
142
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
143
|
-
# will mostly be ASCII
|
144
|
-
# - try to change encoding to UTF-8 ourselves
|
145
|
-
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
146
|
-
|
147
|
-
|
148
|
-
#####
|
149
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
150
|
-
|
151
|
-
# try Converting ASCII-8BIT to UTF-8 based domain-specific guesses
|
152
|
-
begin
|
153
|
-
# Try it as UTF-8 directly
|
154
|
-
# Note: make a copy/dup - otherwise convert fails (because string is already changed/corrupted)
|
155
|
-
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
|
156
|
-
unless feed_xml_cleaned.valid_encoding?
|
157
|
-
|
158
|
-
puts "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
|
159
|
-
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
|
160
|
-
# Some of it might be old Windows code page
|
161
|
-
# -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
|
162
|
-
|
163
|
-
# tell ruby the encoding
|
164
|
-
# encode to utf-8
|
165
|
-
## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
|
166
|
-
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
167
|
-
end
|
168
|
-
feed_xml = feed_xml_cleaned
|
169
|
-
rescue EncodingError => e
|
170
|
-
puts "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
|
171
|
-
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
|
172
|
-
|
173
|
-
# Force it to UTF-8, throwing out invalid bits
|
174
|
-
## todo: check options - add ?? or something to mark invalid chars ???
|
175
|
-
feed_xml.encode!( Encoding::UTF_8, :invalid => :replace, :undef => :replace )
|
176
|
-
end
|
177
|
-
|
178
|
-
## NB:
|
179
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
180
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
181
|
-
### old "simple" version
|
182
|
-
## feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
183
|
-
|
184
|
-
|
185
|
-
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
186
|
-
|
187
|
-
## check for md5 hash for response.body
|
188
|
-
|
189
|
-
last_feed_md5 = feed_rec.md5
|
190
|
-
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
191
|
-
|
192
|
-
if last_feed_md5 && last_feed_md5 == feed_md5
|
193
|
-
# not all servers handle conditional gets, so while not much can be
|
194
|
-
# done about the bandwidth, but if the response body is identical
|
195
|
-
# the downstream processing (parsing, caching, ...) can be avoided.
|
196
|
-
# - thanks to planet mars -fido.rb for the idea, cheers.
|
197
|
-
|
198
|
-
puts "no change; md5 digests match; skipping parsing feed"
|
199
|
-
return nil # no updates available; nothing to do
|
200
|
-
end
|
201
|
-
|
202
|
-
feed_attribs = {
|
203
|
-
http_code: response.code.to_i,
|
204
|
-
http_server: response.header[ 'server' ],
|
205
|
-
http_etag: response.header[ 'etag' ],
|
206
|
-
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
207
|
-
body: feed_xml,
|
208
|
-
md5: feed_md5,
|
209
|
-
fetched: feed_fetched
|
210
|
-
}
|
211
|
-
|
212
|
-
## if debug?
|
213
|
-
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
214
|
-
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
215
|
-
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
216
|
-
## end
|
217
|
-
|
218
|
-
### note: might crash w/ encoding errors when saving in postgress
|
219
|
-
## e.g. PG::CharacterNotInRepertoire: ERROR: ...
|
220
|
-
## catch error, log it and stop for now
|
221
|
-
#
|
222
|
-
# in the future check for different charset than utf-8 ?? possible?? how to deal with non-utf8 charsets??
|
223
|
-
|
224
|
-
begin
|
225
|
-
feed_rec.update_attributes!( feed_attribs )
|
226
|
-
rescue Exception => e
|
227
|
-
# log db error; and continue
|
228
|
-
puts "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
|
229
|
-
Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
|
230
|
-
return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
|
231
|
-
end
|
232
|
-
|
233
|
-
|
234
|
-
logger.debug "feed_xml:"
|
235
|
-
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
236
|
-
|
237
|
-
puts "Before parsing feed >#{feed_key}<..."
|
238
|
-
|
239
|
-
### move to feedutils
|
240
|
-
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
241
|
-
|
242
|
-
## fix/todo: check for feed.nil? -> error parsing!!!
|
243
|
-
# or throw exception
|
244
|
-
feed = FeedUtils::Parser.parse( feed_xml )
|
245
|
-
feed
|
246
|
-
end
|
247
|
-
|
248
|
-
|
249
|
-
def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
|
250
|
-
site_url = site_rec.url
|
251
|
-
site_key = site_rec.key
|
252
|
-
|
253
|
-
### todo/fix: normalize/unifiy feed_url
|
254
|
-
## - same in fetcher - use shared utitlity method or similar
|
255
|
-
|
256
|
-
@worker.use_cache = true
|
257
|
-
@worker.cache[ site_url ] = {
|
258
|
-
'etag' => site_rec.http_etag,
|
259
|
-
'last-modified' => site_rec.http_last_modified
|
260
|
-
}
|
261
|
-
|
262
|
-
response = @worker.get( site_url )
|
263
|
-
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
264
|
-
|
265
|
-
site_fetched = Time.now
|
266
|
-
|
267
|
-
###
|
268
|
-
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
269
|
-
# will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
270
|
-
# thus, set/force encoding to utf-8
|
271
|
-
site_text = response.body.to_s
|
272
|
-
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
273
|
-
|
274
|
-
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
275
|
-
|
276
|
-
if site_text.index('@include')
|
277
|
-
## note: if the site_text includes @include
|
278
|
-
## we must revalidate complete file hierachy(tree) for now
|
279
|
-
### continue;
|
280
|
-
##
|
281
|
-
## fix/todo: use ahead-of-time preprocessor ?? in the future to simplify???
|
282
|
-
else
|
283
|
-
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
284
|
-
puts "no change; request returns not modified (304); skipping parsing site config"
|
285
|
-
return nil # no updates available; nothing to do
|
286
|
-
end
|
287
|
-
|
288
|
-
elsif response.code != '200' # note Net::HTTP response.code is a string in ruby
|
289
|
-
|
290
|
-
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
291
|
-
|
292
|
-
site_attribs = {
|
293
|
-
http_code: response.code.to_i,
|
294
|
-
http_server: response.header[ 'server' ],
|
295
|
-
http_etag: nil,
|
296
|
-
http_last_modified: nil,
|
297
|
-
fetched: site_fetched
|
298
|
-
}
|
299
|
-
site_rec.update_attributes!( site_attribs )
|
300
|
-
|
301
|
-
## add log error activity -- in future add to error log - better - why? why not?
|
302
|
-
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
303
|
-
|
304
|
-
return nil # sorry; no feed for parsing available
|
305
|
-
else
|
306
|
-
# assume 200; continue w/ processing
|
307
|
-
end
|
308
|
-
|
309
|
-
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
310
|
-
|
311
|
-
site_attribs = {
|
312
|
-
http_code: response.code.to_i,
|
313
|
-
http_server: response.header[ 'server' ],
|
314
|
-
http_etag: response.header[ 'etag' ],
|
315
|
-
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
316
|
-
fetched: site_fetched
|
317
|
-
}
|
318
|
-
|
319
|
-
## if debug?
|
320
|
-
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
321
|
-
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
322
|
-
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
323
|
-
## end
|
324
|
-
|
325
|
-
site_rec.update_attributes!( site_attribs )
|
326
|
-
|
327
|
-
|
328
|
-
#################
|
329
|
-
### fix: add support for http_etag cache etc. - how??
|
330
|
-
###
|
331
|
-
### use from_text( text, base: base ) !!!!!!!!
|
332
|
-
### do NOT reissue first request
|
333
|
-
##
|
334
|
-
## fix: use special case/method for update_with_includes!!!
|
335
|
-
## keep it simple w/o includes (do NOT mix in one method)
|
336
|
-
## split into two methods!!!
|
337
|
-
|
338
|
-
## retry w/ preprocesser
|
339
|
-
## refetch if @include found w/ all includes included
|
340
|
-
if site_text.index('@include')
|
341
|
-
site_text = InclPreproc.from_url( site_url ).read
|
342
|
-
end
|
343
|
-
|
344
|
-
## logger.debug "site_text:"
|
345
|
-
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
346
|
-
|
347
|
-
|
348
|
-
puts "Before parsing site config >#{site_key}<..."
|
349
|
-
|
350
|
-
# assume ini format for now
|
351
|
-
site_config = INI.load( site_text )
|
352
|
-
site_config
|
353
|
-
end
|
354
|
-
|
355
|
-
end # class Fetcher
|
356
|
-
|
357
|
-
end # module Pluto
|
data/lib/pluto/update/updater.rb
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Pluto
|
5
|
-
|
6
|
-
class Updater
|
7
|
-
|
8
|
-
include LogUtils::Logging
|
9
|
-
|
10
|
-
### fix!!!!!: change config to text - yes/no - why? why not??
|
11
|
-
# or pass along struct
|
12
|
-
# - with hash and text and format(e.g. ini) as fields???
|
13
|
-
#
|
14
|
-
# - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
|
15
|
-
## - pass along unparsed text!! - not hash struct
|
16
|
-
# - will get saved in db plus we need to generate md5 hash
|
17
|
-
# - add filename e.g. ruby.ini|ruby.conf as opt ??
|
18
|
-
# or add config format as opt e.g. ini?
|
19
|
-
|
20
|
-
def initialize( opts, config )
|
21
|
-
@opts = opts
|
22
|
-
@config = config
|
23
|
-
end
|
24
|
-
|
25
|
-
attr_reader :opts, :config
|
26
|
-
|
27
|
-
def run( arg )
|
28
|
-
arg = arg.downcase.gsub('.ini','') # remove file extension if present
|
29
|
-
|
30
|
-
update_for( arg )
|
31
|
-
end
|
32
|
-
|
33
|
-
def update_for( name )
|
34
|
-
|
35
|
-
## note: allow (optional) config of site key too
|
36
|
-
site_key = config['key'] || config['slug']
|
37
|
-
if site_key.nil?
|
38
|
-
## if no key configured; use (file)name; remove -_ chars
|
39
|
-
## e.g. jekyll-meta becomes jekyllmeta etc.
|
40
|
-
site_key = name.downcase.gsub( /[\-_]/, '' )
|
41
|
-
end
|
42
|
-
|
43
|
-
###################
|
44
|
-
# step 1) update subscriptions
|
45
|
-
subscriber = Subscriber.new
|
46
|
-
|
47
|
-
# pass along debug/verbose setting/switch
|
48
|
-
subscriber.debug = true if opts.verbose?
|
49
|
-
subscriber.update_subscriptions_for( site_key, config )
|
50
|
-
|
51
|
-
##############################
|
52
|
-
# step 2) update feeds
|
53
|
-
refresher = Refresher.new
|
54
|
-
|
55
|
-
# pass along debug/verbose setting/switch
|
56
|
-
refresher.debug = true if opts.verbose?
|
57
|
-
refresher.update_feeds_for( site_key )
|
58
|
-
end # method run
|
59
|
-
|
60
|
-
end # class Updater
|
61
|
-
|
62
|
-
end # module Pluto
|