pluto-update 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gemtest +0 -0
- data/Manifest.txt +8 -4
- data/README.md +4 -4
- data/Rakefile +4 -3
- data/lib/pluto/update.rb +14 -9
- data/lib/pluto/update/{refresher.rb → feed_refresher.rb} +25 -51
- data/lib/pluto/update/site_fetcher.rb +134 -0
- data/lib/pluto/update/site_refresher.rb +67 -0
- data/lib/pluto/update/{subscriber.rb → site_updater.rb} +3 -4
- data/lib/pluto/update/version.rb +1 -1
- data/test/data/ruby.ini +18 -0
- data/test/helper.rb +25 -0
- data/test/test_refresh.rb +38 -0
- data/test/test_site.rb +47 -0
- metadata +28 -7
- data/lib/pluto/update/fetcher.rb +0 -357
- data/lib/pluto/update/updater.rb +0 -62
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49adc91fcbc3b7df6d0ee5aed048fa0df4abb186
|
4
|
+
data.tar.gz: 16b22d7a9993c287f592176acd9991ac5d42e3c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0db8f9b7dcb72009df75b4cc001fe522e579f4b55bc8292b7ab5d19f4166aead75084b0da9356444dddd00b999a372e9972abaedf9230497706db138d766f80
|
7
|
+
data.tar.gz: 0fc2ba83226d96ca4b9a71829079c7232322997963230a99cda0948ae4591dc4071725ddc287a2485dcdb083ec8fc1015c0399a50238ff78901f0dd39d908d00
|
data/.gemtest
ADDED
File without changes
|
data/Manifest.txt
CHANGED
@@ -3,8 +3,12 @@ Manifest.txt
|
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
5
|
lib/pluto/update.rb
|
6
|
-
lib/pluto/update/
|
7
|
-
lib/pluto/update/
|
8
|
-
lib/pluto/update/
|
9
|
-
lib/pluto/update/
|
6
|
+
lib/pluto/update/feed_refresher.rb
|
7
|
+
lib/pluto/update/site_fetcher.rb
|
8
|
+
lib/pluto/update/site_refresher.rb
|
9
|
+
lib/pluto/update/site_updater.rb
|
10
10
|
lib/pluto/update/version.rb
|
11
|
+
test/data/ruby.ini
|
12
|
+
test/helper.rb
|
13
|
+
test/test_refresh.rb
|
14
|
+
test/test_site.rb
|
data/README.md
CHANGED
@@ -17,10 +17,10 @@
|
|
17
17
|
```
|
18
18
|
title = Planet Ruby
|
19
19
|
|
20
|
-
[
|
21
|
-
title
|
22
|
-
link
|
23
|
-
feed
|
20
|
+
[rubylang]
|
21
|
+
title = Ruby Lang News
|
22
|
+
link = http://www.ruby-lang.org/en/news
|
23
|
+
feed = http://www.ruby-lang.org/en/feeds/news.rss
|
24
24
|
|
25
25
|
[rubyonrails]
|
26
26
|
title = Ruby on Rails Blog
|
data/Rakefile
CHANGED
@@ -18,9 +18,10 @@ Hoe.spec 'pluto-update' do
|
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
|
-
['pluto-models',
|
22
|
-
['
|
23
|
-
['
|
21
|
+
['pluto-models', '>= 1.3.2'],
|
22
|
+
['pluto-feedfetcher', '>= 0.1.0'],
|
23
|
+
['fetcher', '>= 0.4.4'],
|
24
|
+
['preproc', '>= 0.1.0'],
|
24
25
|
]
|
25
26
|
|
26
27
|
self.licenses = ['Public Domain']
|
data/lib/pluto/update.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
require 'pluto/models'
|
5
|
+
require 'pluto/feedfetcher'
|
5
6
|
|
6
7
|
|
7
8
|
# more 3rd party gems
|
@@ -11,26 +12,30 @@ require 'preproc' # include preprocessor
|
|
11
12
|
|
12
13
|
# our own code
|
13
14
|
require 'pluto/update/version' # Note: let version always go first
|
14
|
-
require 'pluto/update/
|
15
|
-
require 'pluto/update/
|
16
|
-
require 'pluto/update/
|
17
|
-
require 'pluto/update/
|
15
|
+
require 'pluto/update/feed_refresher'
|
16
|
+
require 'pluto/update/site_refresher'
|
17
|
+
require 'pluto/update/site_fetcher'
|
18
|
+
require 'pluto/update/site_updater'
|
18
19
|
|
19
20
|
|
20
21
|
|
21
22
|
module Pluto
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
Subscriber.new.update_subscriptions( config )
|
24
|
+
def self.refresh_feeds ## refresh == fetch+parse+update
|
25
|
+
FeedRefresher.new.refresh_feeds
|
26
26
|
end
|
27
27
|
|
28
|
+
def self.refresh_sites ## refresh == fetch+parse+update
|
29
|
+
SiteRefresher.new.refresh_sites
|
30
|
+
end
|
31
|
+
|
32
|
+
### convenience alias w/ update_ -- use refresh (only) - why? why not??
|
28
33
|
def self.update_feeds
|
29
|
-
|
34
|
+
FeedRefresher.new.refresh_feeds
|
30
35
|
end
|
31
36
|
|
32
37
|
def self.update_sites
|
33
|
-
|
38
|
+
SiteRefresher.new.refresh_sites
|
34
39
|
end
|
35
40
|
|
36
41
|
end # module Pluto
|
@@ -3,41 +3,26 @@
|
|
3
3
|
|
4
4
|
module Pluto
|
5
5
|
|
6
|
-
|
6
|
+
#######
|
7
|
+
# note: refresh
|
8
|
+
# refresh will fetch feeds, parse feeds and than update feeds
|
9
|
+
# (e.g. update is just one operation of refresh)
|
7
10
|
|
8
|
-
|
11
|
+
class FeedRefresher
|
9
12
|
|
13
|
+
include LogUtils::Logging
|
10
14
|
include Models
|
11
15
|
|
12
16
|
def initialize
|
13
|
-
@worker
|
17
|
+
## @worker = FeedFetcherBasic.new ## -- simple fetch (strategy); no cache, no cond get etc.
|
18
|
+
@worker = FeedFetcherCondGetWithCache.new
|
14
19
|
end
|
15
20
|
|
16
21
|
def debug=(value) @debug = value; end
|
17
22
|
def debug?() @debug || false; end
|
18
23
|
|
19
24
|
|
20
|
-
def
|
21
|
-
if debug?
|
22
|
-
## turn on logging for sql too
|
23
|
-
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
24
|
-
@worker.debug = true # also pass along worker debug flag if set
|
25
|
-
end
|
26
|
-
|
27
|
-
start_time = Time.now
|
28
|
-
Activity.create!( text: "start update sites (#{Site.count})" )
|
29
|
-
|
30
|
-
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
31
|
-
Site.order(:id).each do |site|
|
32
|
-
update_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
33
|
-
end
|
34
|
-
|
35
|
-
total_secs = Time.now - start_time
|
36
|
-
Activity.create!( text: "done update sites (#{Site.count}) in #{total_secs}s" )
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
def update_feeds( opts={} ) # update all feeds
|
25
|
+
def refresh_feeds( opts={} ) # refresh (fetch+parse+update) all feeds
|
41
26
|
if debug?
|
42
27
|
## turn on logging for sql too
|
43
28
|
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
@@ -49,16 +34,16 @@ class Refresher
|
|
49
34
|
|
50
35
|
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
51
36
|
Feed.order(:id).each do |feed|
|
52
|
-
|
37
|
+
refresh_feed_worker( feed )
|
53
38
|
### todo/fix: add catch exception in loop and log to activity log and continue w/ next feed
|
54
39
|
end
|
55
40
|
|
56
41
|
total_secs = Time.now - start_time
|
57
|
-
Activity.create!( text: "done update feeds (#{
|
42
|
+
Activity.create!( text: "done update feeds (#{Feed.count}) in #{total_secs}s" )
|
58
43
|
end
|
59
44
|
|
60
45
|
|
61
|
-
def
|
46
|
+
def refresh_feeds_for( site_key, opts={} ) # refresh (fetch+parse+update) feeds for site
|
62
47
|
if debug?
|
63
48
|
## turn on logging for sql too
|
64
49
|
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
@@ -71,35 +56,23 @@ class Refresher
|
|
71
56
|
site = Site.find_by_key!( site_key )
|
72
57
|
|
73
58
|
site.feeds.each do |feed|
|
74
|
-
|
59
|
+
refresh_feed_worker( feed )
|
75
60
|
end
|
76
61
|
|
77
|
-
end # method
|
62
|
+
end # method refresh_feeds_for
|
78
63
|
|
79
64
|
|
80
65
|
private
|
81
|
-
def
|
82
|
-
|
83
|
-
|
84
|
-
# on error or if http-not modified etc. skip update/processing
|
85
|
-
return if site_config.nil?
|
86
|
-
|
87
|
-
subscriber = Subscriber.new
|
88
|
-
subscriber.debug = debug? ? true : false # pass along debug flag
|
66
|
+
def refresh_feed_worker( feed_rec )
|
67
|
+
feed_xml = @worker.fetch( feed_rec )
|
89
68
|
|
90
|
-
subscriber.update_subscriptions_for( site_rec.key, site_config )
|
91
|
-
end
|
92
|
-
|
93
|
-
|
94
|
-
def update_feed_worker( feed_rec )
|
95
|
-
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
96
|
-
|
97
69
|
# on error or if http-not modified etc. skip update/processing
|
98
|
-
return if
|
70
|
+
return if feed_xml.nil?
|
71
|
+
|
72
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
99
73
|
|
100
74
|
## fix/todo: reload feed_red - fetched date updated etc.
|
101
75
|
## check if needed for access to fetched date
|
102
|
-
|
103
76
|
|
104
77
|
## todo/check: move feed_rec update to the end (after item updates??)
|
105
78
|
|
@@ -107,7 +80,7 @@ private
|
|
107
80
|
# generator
|
108
81
|
# published_at,built_at,touched_at,fetched_at
|
109
82
|
# summary,title2
|
110
|
-
|
83
|
+
|
111
84
|
## fix:
|
112
85
|
## weird rss exception error on windows w/ dates
|
113
86
|
# e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
|
@@ -117,9 +90,10 @@ private
|
|
117
90
|
|
118
91
|
|
119
92
|
feed_rec.debug = debug? ? true : false # pass along debug flag
|
120
|
-
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
121
|
-
feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
|
122
93
|
|
94
|
+
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
95
|
+
# fix/todo: find a better name - why? why not?? => use update_from_struct!
|
96
|
+
feed_rec.save_from_struct!( feed )
|
123
97
|
|
124
98
|
# update cached value last published for item
|
125
99
|
last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
@@ -130,8 +104,8 @@ private
|
|
130
104
|
feed_rec.update_attributes!( last_published: last_item_rec.touched )
|
131
105
|
end
|
132
106
|
end
|
133
|
-
end # method
|
107
|
+
end # method refresh_feed_worker
|
134
108
|
|
135
|
-
end # class
|
109
|
+
end # class FeedRefresher
|
136
110
|
|
137
111
|
end # module Pluto
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
class SiteFetcher
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
include Models # for easy convenience access for Activity etc.
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@worker = Fetcher::Worker.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def debug=(value) @debug = value; end
|
16
|
+
def debug?() @debug || false; end
|
17
|
+
|
18
|
+
def fetch( site_rec )
|
19
|
+
####################################################
|
20
|
+
# try smart http update; will update db records
|
21
|
+
|
22
|
+
site_url = site_rec.url
|
23
|
+
site_key = site_rec.key
|
24
|
+
|
25
|
+
### todo/fix: normalize/unifiy feed_url
|
26
|
+
## - same in fetcher - use shared utitlity method or similar
|
27
|
+
|
28
|
+
@worker.use_cache = true
|
29
|
+
@worker.cache[ site_url ] = {
|
30
|
+
'etag' => site_rec.http_etag,
|
31
|
+
'last-modified' => site_rec.http_last_modified
|
32
|
+
}
|
33
|
+
|
34
|
+
response = @worker.get( site_url )
|
35
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
36
|
+
|
37
|
+
site_fetched = Time.now
|
38
|
+
|
39
|
+
###
|
40
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
41
|
+
# will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
42
|
+
# thus, set/force encoding to utf-8
|
43
|
+
site_text = response.body.to_s
|
44
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
45
|
+
|
46
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
47
|
+
|
48
|
+
if site_text.index('@include')
|
49
|
+
## note: if the site_text includes @include
|
50
|
+
## we must revalidate complete file hierachy(tree) for now
|
51
|
+
### continue;
|
52
|
+
##
|
53
|
+
## fix/todo: use ahead-of-time preprocessor ?? in the future to simplify???
|
54
|
+
else
|
55
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
56
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
57
|
+
return nil # no updates available; nothing to do
|
58
|
+
end
|
59
|
+
|
60
|
+
elsif response.code != '200' # note Net::HTTP response.code is a string in ruby
|
61
|
+
|
62
|
+
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
63
|
+
|
64
|
+
site_attribs = {
|
65
|
+
http_code: response.code.to_i,
|
66
|
+
http_server: response.header[ 'server' ],
|
67
|
+
http_etag: nil,
|
68
|
+
http_last_modified: nil,
|
69
|
+
fetched: site_fetched
|
70
|
+
}
|
71
|
+
site_rec.update_attributes!( site_attribs )
|
72
|
+
|
73
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
74
|
+
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
75
|
+
|
76
|
+
return nil # sorry; no feed for parsing available
|
77
|
+
else
|
78
|
+
# assume 200; continue w/ processing
|
79
|
+
end
|
80
|
+
|
81
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
82
|
+
|
83
|
+
site_attribs = {
|
84
|
+
http_code: response.code.to_i,
|
85
|
+
http_server: response.header[ 'server' ],
|
86
|
+
http_etag: response.header[ 'etag' ],
|
87
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
88
|
+
fetched: site_fetched
|
89
|
+
}
|
90
|
+
|
91
|
+
## if debug?
|
92
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
93
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
94
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
95
|
+
## end
|
96
|
+
|
97
|
+
site_rec.update_attributes!( site_attribs )
|
98
|
+
|
99
|
+
|
100
|
+
#################
|
101
|
+
### fix: add support for http_etag cache etc. - how??
|
102
|
+
###
|
103
|
+
### use from_text( text, base: base ) !!!!!!!!
|
104
|
+
### do NOT reissue first request
|
105
|
+
##
|
106
|
+
## fix: use special case/method for update_with_includes!!!
|
107
|
+
## keep it simple w/o includes (do NOT mix in one method)
|
108
|
+
## split into two methods!!!
|
109
|
+
|
110
|
+
## retry w/ preprocesser
|
111
|
+
## refetch if @include found w/ all includes included
|
112
|
+
if site_text.index('@include')
|
113
|
+
site_text = InclPreproc.from_url( site_url ).read
|
114
|
+
end
|
115
|
+
|
116
|
+
## logger.debug "site_text:"
|
117
|
+
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
118
|
+
|
119
|
+
site_text
|
120
|
+
|
121
|
+
###
|
122
|
+
## todo/fix:
|
123
|
+
### move INI.load out of this method!! - return site_text or nil
|
124
|
+
##
|
125
|
+
## puts "Before parsing site config >#{site_key}<..."
|
126
|
+
##
|
127
|
+
# assume ini format for now
|
128
|
+
## site_config = INI.load( site_text )
|
129
|
+
## site_config
|
130
|
+
end
|
131
|
+
|
132
|
+
end # class SiteFetcher
|
133
|
+
|
134
|
+
end # module Pluto
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Pluto
|
5
|
+
|
6
|
+
#######
|
7
|
+
# note: refresh
|
8
|
+
# refresh will fetch site subscriptions, parse and than update the site subscriptions
|
9
|
+
# (e.g. update is just one operation of refresh)
|
10
|
+
|
11
|
+
|
12
|
+
class SiteRefresher
|
13
|
+
|
14
|
+
include LogUtils::Logging
|
15
|
+
include Models
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@worker = SiteFetcher.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def debug=(value) @debug = value; end
|
22
|
+
def debug?() @debug || false; end
|
23
|
+
|
24
|
+
def refresh_sites( opts={} ) # refresh (fetch+parse+update) all site configs
|
25
|
+
if debug?
|
26
|
+
## turn on logging for sql too
|
27
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
28
|
+
@worker.debug = true # also pass along worker debug flag if set
|
29
|
+
end
|
30
|
+
|
31
|
+
start_time = Time.now
|
32
|
+
Activity.create!( text: "start update sites (#{Site.count})" )
|
33
|
+
|
34
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
35
|
+
Site.order(:id).each do |site|
|
36
|
+
refresh_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
37
|
+
end
|
38
|
+
|
39
|
+
total_secs = Time.now - start_time
|
40
|
+
Activity.create!( text: "done update sites (#{Site.count}) in #{total_secs}s" )
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
private
|
45
|
+
def refresh_site_worker( site_rec )
|
46
|
+
site_text = @worker.fetch( site_rec )
|
47
|
+
|
48
|
+
# on error or if http-not modified etc. skip update/processing
|
49
|
+
return if site_text.nil?
|
50
|
+
|
51
|
+
site_config = INI.load( site_text )
|
52
|
+
|
53
|
+
site_updater = SiteUpdater.new
|
54
|
+
site_updater.debug = debug? ? true : false # pass along debug flag
|
55
|
+
|
56
|
+
### todo/fix:
|
57
|
+
## allow passing in as first arg database rec!!!
|
58
|
+
## - if passed in database rec - do NOT lookup record by site_config.key!!!
|
59
|
+
## use existing key (lets you change/update key without creating a new duplicate site entry, for example)
|
60
|
+
## - or use a new method (instead of overloading arg) ?? - why? why not??
|
61
|
+
site_updater.update_subscriptions_for( site_rec.key, site_config )
|
62
|
+
end
|
63
|
+
|
64
|
+
end # class SiteRefresher
|
65
|
+
|
66
|
+
end # module Pluto
|
67
|
+
|
@@ -3,10 +3,9 @@
|
|
3
3
|
|
4
4
|
module Pluto
|
5
5
|
|
6
|
-
class
|
6
|
+
class SiteUpdater
|
7
7
|
|
8
8
|
include LogUtils::Logging
|
9
|
-
|
10
9
|
include Models
|
11
10
|
|
12
11
|
def debug=(value) @debug = value; end
|
@@ -156,8 +155,8 @@ class Subscriber
|
|
156
155
|
site_rec.subscriptions.create!( feed_id: feed_rec.id )
|
157
156
|
end
|
158
157
|
|
159
|
-
end # method
|
158
|
+
end # method update_subscriptions_for
|
160
159
|
|
161
|
-
end # class
|
160
|
+
end # class SiteUpdater
|
162
161
|
|
163
162
|
end # module Pluto
|
data/lib/pluto/update/version.rb
CHANGED
data/test/data/ruby.ini
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
title = Planet Ruby
|
2
|
+
source = https://github.com/feedreader/pluto.update/raw/master/test/data/ruby.ini
|
3
|
+
|
4
|
+
[rubylang]
|
5
|
+
title = Ruby Lang News
|
6
|
+
link = http://www.ruby-lang.org/en/news
|
7
|
+
feed = http://www.ruby-lang.org/en/feeds/news.rss
|
8
|
+
|
9
|
+
[rubyonrails]
|
10
|
+
title = Ruby on Rails News
|
11
|
+
link = http://weblog.rubyonrails.org
|
12
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
13
|
+
|
14
|
+
[viennarb]
|
15
|
+
title = Vienna.rb News
|
16
|
+
link = http://vienna-rb.at
|
17
|
+
feed = http://vienna-rb.at/atom.xml
|
18
|
+
|
data/test/helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
|
4
|
+
## minitest setup
|
5
|
+
|
6
|
+
require 'minitest/autorun'
|
7
|
+
|
8
|
+
## our own code
|
9
|
+
require 'pluto/update'
|
10
|
+
|
11
|
+
|
12
|
+
LogUtils::Logger.root.level = :debug
|
13
|
+
|
14
|
+
|
15
|
+
## some shortcuts
|
16
|
+
Log = LogDb::Model::Log
|
17
|
+
Prop = ConfDb::Model::Prop
|
18
|
+
|
19
|
+
Site = Pluto::Model::Site
|
20
|
+
Feed = Pluto::Model::Feed
|
21
|
+
Item = Pluto::Model::Item
|
22
|
+
Subscription = Pluto::Model::Subscription
|
23
|
+
|
24
|
+
|
25
|
+
Pluto.setup_in_memory_db
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_refresh.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestRefresh < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Site.delete_all
|
15
|
+
Feed.delete_all
|
16
|
+
Item.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
|
19
|
+
site_text = File.read( "#{PlutoUpdate.root}/test/data/ruby.ini")
|
20
|
+
site_config = INI.load( site_text )
|
21
|
+
|
22
|
+
site_updater = Pluto::SiteUpdater.new
|
23
|
+
site_updater.update_subscriptions_for( 'ruby', site_config )
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def test_refresh_sites
|
28
|
+
Pluto.refresh_sites
|
29
|
+
assert true
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_refresh_feeds
|
33
|
+
Pluto.refresh_feeds
|
34
|
+
assert true
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class TestRefresh
|
38
|
+
|
data/test/test_site.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_site.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestSite < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Site.delete_all
|
15
|
+
Feed.delete_all
|
16
|
+
Item.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def test_site_updater
|
22
|
+
site_text = File.read( "#{PlutoUpdate.root}/test/data/ruby.ini")
|
23
|
+
site_config = INI.load( site_text )
|
24
|
+
pp site_config
|
25
|
+
|
26
|
+
assert_equal 0, Site.count
|
27
|
+
assert_equal 0, Feed.count
|
28
|
+
|
29
|
+
site_updater = Pluto::SiteUpdater.new
|
30
|
+
site_updater.update_subscriptions_for( 'ruby', site_config )
|
31
|
+
|
32
|
+
assert_equal 1, Site.count
|
33
|
+
assert_equal 3, Feed.count
|
34
|
+
|
35
|
+
ruby = Site.find_by_key!( 'ruby' )
|
36
|
+
assert_equal 'Planet Ruby', ruby.title
|
37
|
+
assert_equal 3, ruby.subscriptions.count
|
38
|
+
assert_equal 3, ruby.feeds.count
|
39
|
+
|
40
|
+
rubylang = Feed.find_by_key!( 'rubylang' )
|
41
|
+
assert_equal 'Ruby Lang News', rubylang.title
|
42
|
+
assert_equal 'http://www.ruby-lang.org/en/news', rubylang.url
|
43
|
+
assert_equal 'http://www.ruby-lang.org/en/feeds/news.rss', rubylang.feed_url
|
44
|
+
end
|
45
|
+
|
46
|
+
end # class TestSite
|
47
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-update
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pluto-models
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.3.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pluto-feedfetcher
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.1.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.1.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: fetcher
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,16 +103,21 @@ extra_rdoc_files:
|
|
89
103
|
- Manifest.txt
|
90
104
|
- README.md
|
91
105
|
files:
|
106
|
+
- ".gemtest"
|
92
107
|
- HISTORY.md
|
93
108
|
- Manifest.txt
|
94
109
|
- README.md
|
95
110
|
- Rakefile
|
96
111
|
- lib/pluto/update.rb
|
97
|
-
- lib/pluto/update/
|
98
|
-
- lib/pluto/update/
|
99
|
-
- lib/pluto/update/
|
100
|
-
- lib/pluto/update/
|
112
|
+
- lib/pluto/update/feed_refresher.rb
|
113
|
+
- lib/pluto/update/site_fetcher.rb
|
114
|
+
- lib/pluto/update/site_refresher.rb
|
115
|
+
- lib/pluto/update/site_updater.rb
|
101
116
|
- lib/pluto/update/version.rb
|
117
|
+
- test/data/ruby.ini
|
118
|
+
- test/helper.rb
|
119
|
+
- test/test_refresh.rb
|
120
|
+
- test/test_site.rb
|
102
121
|
homepage: https://github.com/feedreader/pluto.update
|
103
122
|
licenses:
|
104
123
|
- Public Domain
|
@@ -125,4 +144,6 @@ rubygems_version: 2.4.2
|
|
125
144
|
signing_key:
|
126
145
|
specification_version: 4
|
127
146
|
summary: pluto-update - planet feed 'n' subscription updater
|
128
|
-
test_files:
|
147
|
+
test_files:
|
148
|
+
- test/test_site.rb
|
149
|
+
- test/test_refresh.rb
|
data/lib/pluto/update/fetcher.rb
DELETED
@@ -1,357 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Pluto
|
5
|
-
|
6
|
-
class Fetcher
|
7
|
-
|
8
|
-
include LogUtils::Logging
|
9
|
-
|
10
|
-
include Models # for easy convenience access for Activity etc.
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@worker = ::Fetcher::Worker.new
|
14
|
-
end
|
15
|
-
|
16
|
-
def debug=(value) @debug = value; end
|
17
|
-
def debug?() @debug || false; end
|
18
|
-
|
19
|
-
|
20
|
-
def fetch_feed( url )
|
21
|
-
response = @worker.get( url )
|
22
|
-
|
23
|
-
## if debug?
|
24
|
-
puts "http status #{response.code} #{response.message}"
|
25
|
-
|
26
|
-
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
27
|
-
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
28
|
-
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
29
|
-
## end
|
30
|
-
|
31
|
-
xml = response.body
|
32
|
-
|
33
|
-
###
|
34
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
35
|
-
# will mostly be ASCII
|
36
|
-
# - try to change encoding to UTF-8 ourselves
|
37
|
-
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
38
|
-
|
39
|
-
#####
|
40
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
41
|
-
|
42
|
-
## NB:
|
43
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
44
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
45
|
-
xml = xml.force_encoding( Encoding::UTF_8 )
|
46
|
-
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
47
|
-
|
48
|
-
xml
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
def feed_by_rec( feed_rec )
|
53
|
-
# simple feed fetcher; use for debugging (only/mostly)
|
54
|
-
# -- will NOT change db records in any way
|
55
|
-
|
56
|
-
feed_url = feed_rec.feed_url
|
57
|
-
feed_key = feed_rec.key
|
58
|
-
|
59
|
-
feed_xml = fetch_feed( feed_url )
|
60
|
-
|
61
|
-
logger.debug "feed_xml:"
|
62
|
-
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
63
|
-
|
64
|
-
# if opts.verbose? # also write a copy to disk
|
65
|
-
if debug?
|
66
|
-
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
67
|
-
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
68
|
-
f.write( feed_xml )
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
puts "Before parsing feed >#{feed_key}<..."
|
73
|
-
|
74
|
-
## fix/todo: check for feed.nil? -> error parsing!!!
|
75
|
-
# or throw exception
|
76
|
-
feed = FeedUtils::Parser.parse( feed_xml )
|
77
|
-
feed
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
82
|
-
feed_url = feed_rec.feed_url
|
83
|
-
feed_key = feed_rec.key
|
84
|
-
|
85
|
-
### todo/fix: normalize/unifiy feed_url
|
86
|
-
## - same in fetcher - use shared utitlity method or similar
|
87
|
-
|
88
|
-
@worker.use_cache = true
|
89
|
-
@worker.cache[ feed_url ] = {
|
90
|
-
'etag' => feed_rec.http_etag,
|
91
|
-
'last-modified' => feed_rec.http_last_modified
|
92
|
-
}
|
93
|
-
|
94
|
-
begin
|
95
|
-
response = @worker.get( feed_url )
|
96
|
-
rescue SocketError => e
|
97
|
-
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
98
|
-
### will result in SocketError -- getaddrinfo: Name or service not known
|
99
|
-
puts "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
100
|
-
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
101
|
-
|
102
|
-
### todo/fix: update feed rec in db
|
103
|
-
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
104
|
-
return nil
|
105
|
-
end
|
106
|
-
|
107
|
-
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
108
|
-
|
109
|
-
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
110
|
-
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
111
|
-
puts "no change; request returns not modified (304); skipping parsing feed"
|
112
|
-
return nil # no updates available; nothing to do
|
113
|
-
end
|
114
|
-
|
115
|
-
feed_fetched = Time.now
|
116
|
-
|
117
|
-
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
118
|
-
|
119
|
-
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
120
|
-
|
121
|
-
feed_attribs = {
|
122
|
-
http_code: response.code.to_i,
|
123
|
-
http_server: response.header[ 'server' ],
|
124
|
-
http_etag: nil,
|
125
|
-
http_last_modified: nil,
|
126
|
-
body: nil,
|
127
|
-
md5: nil,
|
128
|
-
fetched: feed_fetched
|
129
|
-
}
|
130
|
-
feed_rec.update_attributes!( feed_attribs )
|
131
|
-
|
132
|
-
## add log error activity -- in future add to error log - better - why? why not?
|
133
|
-
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
134
|
-
|
135
|
-
return nil # sorry; no feed for parsing available
|
136
|
-
end
|
137
|
-
|
138
|
-
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
139
|
-
|
140
|
-
feed_xml = response.body
|
141
|
-
###
|
142
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
143
|
-
# will mostly be ASCII
|
144
|
-
# - try to change encoding to UTF-8 ourselves
|
145
|
-
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
146
|
-
|
147
|
-
|
148
|
-
#####
|
149
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
150
|
-
|
151
|
-
# try Converting ASCII-8BIT to UTF-8 based domain-specific guesses
|
152
|
-
begin
|
153
|
-
# Try it as UTF-8 directly
|
154
|
-
# Note: make a copy/dup - otherwise convert fails (because string is already changed/corrupted)
|
155
|
-
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
|
156
|
-
unless feed_xml_cleaned.valid_encoding?
|
157
|
-
|
158
|
-
puts "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
|
159
|
-
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
|
160
|
-
# Some of it might be old Windows code page
|
161
|
-
# -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
|
162
|
-
|
163
|
-
# tell ruby the encoding
|
164
|
-
# encode to utf-8
|
165
|
-
## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
|
166
|
-
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
167
|
-
end
|
168
|
-
feed_xml = feed_xml_cleaned
|
169
|
-
rescue EncodingError => e
|
170
|
-
puts "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
|
171
|
-
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
|
172
|
-
|
173
|
-
# Force it to UTF-8, throwing out invalid bits
|
174
|
-
## todo: check options - add ?? or something to mark invalid chars ???
|
175
|
-
feed_xml.encode!( Encoding::UTF_8, :invalid => :replace, :undef => :replace )
|
176
|
-
end
|
177
|
-
|
178
|
-
## NB:
|
179
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
180
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
181
|
-
### old "simple" version
|
182
|
-
## feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
183
|
-
|
184
|
-
|
185
|
-
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
186
|
-
|
187
|
-
## check for md5 hash for response.body
|
188
|
-
|
189
|
-
last_feed_md5 = feed_rec.md5
|
190
|
-
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
191
|
-
|
192
|
-
if last_feed_md5 && last_feed_md5 == feed_md5
|
193
|
-
# not all servers handle conditional gets, so while not much can be
|
194
|
-
# done about the bandwidth, but if the response body is identical
|
195
|
-
# the downstream processing (parsing, caching, ...) can be avoided.
|
196
|
-
# - thanks to planet mars -fido.rb for the idea, cheers.
|
197
|
-
|
198
|
-
puts "no change; md5 digests match; skipping parsing feed"
|
199
|
-
return nil # no updates available; nothing to do
|
200
|
-
end
|
201
|
-
|
202
|
-
feed_attribs = {
|
203
|
-
http_code: response.code.to_i,
|
204
|
-
http_server: response.header[ 'server' ],
|
205
|
-
http_etag: response.header[ 'etag' ],
|
206
|
-
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
207
|
-
body: feed_xml,
|
208
|
-
md5: feed_md5,
|
209
|
-
fetched: feed_fetched
|
210
|
-
}
|
211
|
-
|
212
|
-
## if debug?
|
213
|
-
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
214
|
-
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
215
|
-
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
216
|
-
## end
|
217
|
-
|
218
|
-
### note: might crash w/ encoding errors when saving in postgress
|
219
|
-
## e.g. PG::CharacterNotInRepertoire: ERROR: ...
|
220
|
-
## catch error, log it and stop for now
|
221
|
-
#
|
222
|
-
# in the future check for different charset than utf-8 ?? possible?? how to deal with non-utf8 charsets??
|
223
|
-
|
224
|
-
begin
|
225
|
-
feed_rec.update_attributes!( feed_attribs )
|
226
|
-
rescue Exception => e
|
227
|
-
# log db error; and continue
|
228
|
-
puts "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
|
229
|
-
Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
|
230
|
-
return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
|
231
|
-
end
|
232
|
-
|
233
|
-
|
234
|
-
logger.debug "feed_xml:"
|
235
|
-
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
236
|
-
|
237
|
-
puts "Before parsing feed >#{feed_key}<..."
|
238
|
-
|
239
|
-
### move to feedutils
|
240
|
-
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
241
|
-
|
242
|
-
## fix/todo: check for feed.nil? -> error parsing!!!
|
243
|
-
# or throw exception
|
244
|
-
feed = FeedUtils::Parser.parse( feed_xml )
|
245
|
-
feed
|
246
|
-
end
|
247
|
-
|
248
|
-
|
249
|
-
def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
|
250
|
-
site_url = site_rec.url
|
251
|
-
site_key = site_rec.key
|
252
|
-
|
253
|
-
### todo/fix: normalize/unifiy feed_url
|
254
|
-
## - same in fetcher - use shared utitlity method or similar
|
255
|
-
|
256
|
-
@worker.use_cache = true
|
257
|
-
@worker.cache[ site_url ] = {
|
258
|
-
'etag' => site_rec.http_etag,
|
259
|
-
'last-modified' => site_rec.http_last_modified
|
260
|
-
}
|
261
|
-
|
262
|
-
response = @worker.get( site_url )
|
263
|
-
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
264
|
-
|
265
|
-
site_fetched = Time.now
|
266
|
-
|
267
|
-
###
|
268
|
-
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
269
|
-
# will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
270
|
-
# thus, set/force encoding to utf-8
|
271
|
-
site_text = response.body.to_s
|
272
|
-
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
273
|
-
|
274
|
-
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
275
|
-
|
276
|
-
if site_text.index('@include')
|
277
|
-
## note: if the site_text includes @include
|
278
|
-
## we must revalidate complete file hierachy(tree) for now
|
279
|
-
### continue;
|
280
|
-
##
|
281
|
-
## fix/todo: use ahead-of-time preprocessor ?? in the future to simplify???
|
282
|
-
else
|
283
|
-
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
284
|
-
puts "no change; request returns not modified (304); skipping parsing site config"
|
285
|
-
return nil # no updates available; nothing to do
|
286
|
-
end
|
287
|
-
|
288
|
-
elsif response.code != '200' # note Net::HTTP response.code is a string in ruby
|
289
|
-
|
290
|
-
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
291
|
-
|
292
|
-
site_attribs = {
|
293
|
-
http_code: response.code.to_i,
|
294
|
-
http_server: response.header[ 'server' ],
|
295
|
-
http_etag: nil,
|
296
|
-
http_last_modified: nil,
|
297
|
-
fetched: site_fetched
|
298
|
-
}
|
299
|
-
site_rec.update_attributes!( site_attribs )
|
300
|
-
|
301
|
-
## add log error activity -- in future add to error log - better - why? why not?
|
302
|
-
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
303
|
-
|
304
|
-
return nil # sorry; no feed for parsing available
|
305
|
-
else
|
306
|
-
# assume 200; continue w/ processing
|
307
|
-
end
|
308
|
-
|
309
|
-
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
310
|
-
|
311
|
-
site_attribs = {
|
312
|
-
http_code: response.code.to_i,
|
313
|
-
http_server: response.header[ 'server' ],
|
314
|
-
http_etag: response.header[ 'etag' ],
|
315
|
-
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
316
|
-
fetched: site_fetched
|
317
|
-
}
|
318
|
-
|
319
|
-
## if debug?
|
320
|
-
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
321
|
-
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
322
|
-
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
323
|
-
## end
|
324
|
-
|
325
|
-
site_rec.update_attributes!( site_attribs )
|
326
|
-
|
327
|
-
|
328
|
-
#################
|
329
|
-
### fix: add support for http_etag cache etc. - how??
|
330
|
-
###
|
331
|
-
### use from_text( text, base: base ) !!!!!!!!
|
332
|
-
### do NOT reissue first request
|
333
|
-
##
|
334
|
-
## fix: use special case/method for update_with_includes!!!
|
335
|
-
## keep it simple w/o includes (do NOT mix in one method)
|
336
|
-
## split into two methods!!!
|
337
|
-
|
338
|
-
## retry w/ preprocesser
|
339
|
-
## refetch if @include found w/ all includes included
|
340
|
-
if site_text.index('@include')
|
341
|
-
site_text = InclPreproc.from_url( site_url ).read
|
342
|
-
end
|
343
|
-
|
344
|
-
## logger.debug "site_text:"
|
345
|
-
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
346
|
-
|
347
|
-
|
348
|
-
puts "Before parsing site config >#{site_key}<..."
|
349
|
-
|
350
|
-
# assume ini format for now
|
351
|
-
site_config = INI.load( site_text )
|
352
|
-
site_config
|
353
|
-
end
|
354
|
-
|
355
|
-
end # class Fetcher
|
356
|
-
|
357
|
-
end # module Pluto
|
data/lib/pluto/update/updater.rb
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Pluto
|
5
|
-
|
6
|
-
class Updater
|
7
|
-
|
8
|
-
include LogUtils::Logging
|
9
|
-
|
10
|
-
### fix!!!!!: change config to text - yes/no - why? why not??
|
11
|
-
# or pass along struct
|
12
|
-
# - with hash and text and format(e.g. ini) as fields???
|
13
|
-
#
|
14
|
-
# - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
|
15
|
-
## - pass along unparsed text!! - not hash struct
|
16
|
-
# - will get saved in db plus we need to generate md5 hash
|
17
|
-
# - add filename e.g. ruby.ini|ruby.conf as opt ??
|
18
|
-
# or add config format as opt e.g. ini?
|
19
|
-
|
20
|
-
def initialize( opts, config )
|
21
|
-
@opts = opts
|
22
|
-
@config = config
|
23
|
-
end
|
24
|
-
|
25
|
-
attr_reader :opts, :config
|
26
|
-
|
27
|
-
def run( arg )
|
28
|
-
arg = arg.downcase.gsub('.ini','') # remove file extension if present
|
29
|
-
|
30
|
-
update_for( arg )
|
31
|
-
end
|
32
|
-
|
33
|
-
def update_for( name )
|
34
|
-
|
35
|
-
## note: allow (optional) config of site key too
|
36
|
-
site_key = config['key'] || config['slug']
|
37
|
-
if site_key.nil?
|
38
|
-
## if no key configured; use (file)name; remove -_ chars
|
39
|
-
## e.g. jekyll-meta becomes jekyllmeta etc.
|
40
|
-
site_key = name.downcase.gsub( /[\-_]/, '' )
|
41
|
-
end
|
42
|
-
|
43
|
-
###################
|
44
|
-
# step 1) update subscriptions
|
45
|
-
subscriber = Subscriber.new
|
46
|
-
|
47
|
-
# pass along debug/verbose setting/switch
|
48
|
-
subscriber.debug = true if opts.verbose?
|
49
|
-
subscriber.update_subscriptions_for( site_key, config )
|
50
|
-
|
51
|
-
##############################
|
52
|
-
# step 2) update feeds
|
53
|
-
refresher = Refresher.new
|
54
|
-
|
55
|
-
# pass along debug/verbose setting/switch
|
56
|
-
refresher.debug = true if opts.verbose?
|
57
|
-
refresher.update_feeds_for( site_key )
|
58
|
-
end # method run
|
59
|
-
|
60
|
-
end # class Updater
|
61
|
-
|
62
|
-
end # module Pluto
|