pluto 0.8.5 → 0.8.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +10 -1
- data/lib/pluto/fetcher.rb +95 -9
- data/lib/pluto/models/feed.rb +69 -9
- data/lib/pluto/models/item.rb +42 -6
- data/lib/pluto/models/site.rb +1 -0
- data/lib/pluto/models/utils.rb +7 -7
- data/lib/pluto/refresher.rb +20 -73
- data/lib/pluto/schema.rb +24 -11
- data/lib/pluto/subscriber.rb +6 -2
- data/lib/pluto/template_helpers.rb +30 -30
- data/lib/pluto/version.rb +1 -1
- metadata +20 -20
data/README.md
CHANGED
@@ -197,6 +197,15 @@ For more samples, see [`nytimes.yml`](https://github.com/feedreader/pluto.sample
|
|
197
197
|
[`beer.yml`](https://github.com/feedreader/pluto.samples/blob/master/beer.yml),
|
198
198
|
[`football.yml`](https://github.com/feedreader/pluto.samples/blob/master/football.yml).
|
199
199
|
|
200
|
+
|
201
|
+
## Template Packs
|
202
|
+
|
203
|
+
- Blank - default templates; [more »](https://github.com/feedreader/pluto.blank)
|
204
|
+
- News - 'river of news' style templates; [more »](https://github.com/feedreader/pluto.news)
|
205
|
+
- Top - Popurl-style templates; [more »](https://github.com/feedreader/pluto.top)
|
206
|
+
- Classic - Planet Planet-Style templates; [more »](https://github.com/feedreader/pluto.classic)
|
207
|
+
|
208
|
+
|
200
209
|
## Install
|
201
210
|
|
202
211
|
Just install the gem:
|
@@ -222,7 +231,7 @@ Planet Mars by Sam Ruby [(Site)](https://github.com/rubys/mars) - first draft o
|
|
222
231
|
|
223
232
|
### Python
|
224
233
|
|
225
|
-
Planet Planet by Scott James Remnant
|
234
|
+
Planet Planet by Scott James Remnant ann Jeff Waugh [(Site)](http://www.planetplanet.org) - uses Mark Pilgrim's universal feed parser (RDF, RSS and Atom) and Tomas Styblo's templating engine; last release version 2.0 in 2006
|
226
235
|
|
227
236
|
Planet Venus by Sam Ruby [(Site)](https://github.com/rubys/venus) - cleaned up Planet Planet code; last change in 2010
|
228
237
|
|
data/lib/pluto/fetcher.rb
CHANGED
@@ -13,14 +13,48 @@ class Fetcher
|
|
13
13
|
def debug?() @debug || false; end
|
14
14
|
|
15
15
|
|
16
|
+
def fetch_feed( url )
|
17
|
+
response = @worker.get( url )
|
18
|
+
|
19
|
+
if debug?
|
20
|
+
puts "http status #{response.code} #{response.message}"
|
21
|
+
|
22
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
23
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
24
|
+
end
|
25
|
+
|
26
|
+
xml = response.body
|
27
|
+
|
28
|
+
###
|
29
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
30
|
+
# will mostly be ASCII
|
31
|
+
# - try to change encoding to UTF-8 ourselves
|
32
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
33
|
+
|
34
|
+
#####
|
35
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
36
|
+
|
37
|
+
## NB:
|
38
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
39
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
40
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
41
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
42
|
+
|
43
|
+
xml
|
44
|
+
end
|
45
|
+
|
46
|
+
|
16
47
|
def feed_by_rec( feed_rec )
|
48
|
+
# simple feed fetcher; use for debugging (only/mostly)
|
49
|
+
# -- will NOT change db records in any way
|
50
|
+
|
17
51
|
feed_url = feed_rec.feed_url
|
18
52
|
feed_key = feed_rec.key
|
19
|
-
|
53
|
+
|
20
54
|
feed_xml = fetch_feed( feed_url )
|
21
55
|
|
22
56
|
logger.debug "feed_xml:"
|
23
|
-
logger.debug feed_xml[ 0..
|
57
|
+
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
24
58
|
|
25
59
|
# if opts.verbose? # also write a copy to disk
|
26
60
|
if debug?
|
@@ -41,16 +75,43 @@ class Fetcher
|
|
41
75
|
end
|
42
76
|
|
43
77
|
|
44
|
-
def
|
45
|
-
|
78
|
+
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
79
|
+
feed_url = feed_rec.feed_url
|
80
|
+
feed_key = feed_rec.key
|
81
|
+
|
82
|
+
### todo/fix:
|
83
|
+
## add if available http_etag machinery for smarter updates
|
84
|
+
## and http_last_modified headers
|
85
|
+
## and brute force body_old == body_new etc.
|
86
|
+
|
87
|
+
response = @worker.get( feed_url )
|
88
|
+
|
89
|
+
feed_fetched = Time.now
|
90
|
+
|
91
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
92
|
+
|
93
|
+
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
94
|
+
|
95
|
+
feed_attribs = {
|
96
|
+
http_code: response.code.to_i,
|
97
|
+
http_etag: nil,
|
98
|
+
http_last_modified: nil,
|
99
|
+
body: nil,
|
100
|
+
fetched: feed_fetched
|
101
|
+
}
|
102
|
+
feed_rec.update_attributes!( feed_attribs )
|
103
|
+
return nil # sorry; no feed for parsing available
|
104
|
+
end
|
105
|
+
|
46
106
|
|
47
|
-
|
107
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
48
108
|
|
109
|
+
feed_xml = response.body
|
49
110
|
###
|
50
111
|
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
51
112
|
# will mostly be ASCII
|
52
113
|
# - try to change encoding to UTF-8 ourselves
|
53
|
-
logger.debug "
|
114
|
+
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
54
115
|
|
55
116
|
#####
|
56
117
|
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
@@ -58,10 +119,35 @@ class Fetcher
|
|
58
119
|
## NB:
|
59
120
|
# for now "hardcoded" to utf8 - what else can we do?
|
60
121
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
61
|
-
|
62
|
-
logger.debug "
|
122
|
+
feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
123
|
+
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
124
|
+
|
125
|
+
feed_attribs = {
|
126
|
+
http_code: response.code.to_i,
|
127
|
+
http_etag: response.header[ 'etag' ],
|
128
|
+
http_last_modified: response.header[ 'last-modified' ],
|
129
|
+
body: feed_xml,
|
130
|
+
fetched: feed_fetched
|
131
|
+
}
|
132
|
+
|
133
|
+
if debug?
|
134
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
135
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
136
|
+
end
|
63
137
|
|
64
|
-
|
138
|
+
feed_rec.update_attributes!( feed_attribs )
|
139
|
+
|
140
|
+
logger.debug "feed_xml:"
|
141
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
142
|
+
|
143
|
+
puts "Before parsing feed >#{feed_key}<..."
|
144
|
+
|
145
|
+
### move to feedutils
|
146
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
147
|
+
|
148
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
149
|
+
# or throw exception
|
150
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
65
151
|
end
|
66
152
|
|
67
153
|
end # class Fetcher
|
data/lib/pluto/models/feed.rb
CHANGED
@@ -15,9 +15,9 @@ class Feed < ActiveRecord::Base
|
|
15
15
|
# note: order by first non-null datetime field
|
16
16
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
17
17
|
|
18
|
-
# note: if not
|
19
|
-
## order( "coalesce(
|
20
|
-
order( "coalesce(
|
18
|
+
# note: if not published, touched or built use hardcoded 1971-01-01 for now
|
19
|
+
## order( "coalesce(published,touched,built,'1971-01-01') desc" )
|
20
|
+
order( "coalesce(last_published,'1971-01-01') desc" )
|
21
21
|
end
|
22
22
|
|
23
23
|
##################################
|
@@ -27,6 +27,12 @@ class Feed < ActiveRecord::Base
|
|
27
27
|
def link() url; end # alias for url
|
28
28
|
def feed() feed_url; end # alias for feed_url
|
29
29
|
|
30
|
+
def last_published_at() last_published; end # legay attrib reader - depreciated - remove!!
|
31
|
+
def fetched_at() fetched; end # legay attrib reader - depreciated - remove!!
|
32
|
+
def published_at() published; end # legay attrib reader - depreciated - remove!!
|
33
|
+
def touched_at() touched; end # legay attrib reader - depreciated - remove!!
|
34
|
+
def built_at() built; end # legay attrib reader - depreciated - remove!!
|
35
|
+
|
30
36
|
|
31
37
|
def url?() read_attribute(:url).present?; end
|
32
38
|
def title?() read_attribute(:title).present?; end
|
@@ -39,20 +45,74 @@ class Feed < ActiveRecord::Base
|
|
39
45
|
def feed_url() read_attribute_w_fallbacks( :feed_url, :auto_feed_url ); end
|
40
46
|
|
41
47
|
|
42
|
-
def
|
43
|
-
def
|
48
|
+
def published?() read_attribute(:published).present?; end
|
49
|
+
def touched?() read_attribute(:touched).present?; end
|
50
|
+
|
44
51
|
|
45
|
-
def
|
52
|
+
def published
|
46
53
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
47
54
|
# db backed attribute
|
48
55
|
|
49
56
|
read_attribute_w_fallbacks(
|
50
|
-
:
|
51
|
-
:
|
52
|
-
:
|
57
|
+
:published,
|
58
|
+
:touched, # try touched (aka updated (ATOM))
|
59
|
+
:built # try build (aka lastBuildDate (RSS))
|
53
60
|
)
|
54
61
|
end
|
55
62
|
|
63
|
+
|
64
|
+
def debug=(value) @debug = value; end
|
65
|
+
def debug?() @debug || false; end
|
66
|
+
|
67
|
+
def save_from_struct!( data )
|
68
|
+
|
69
|
+
update_from_struct!( data )
|
70
|
+
|
71
|
+
data.items.each do |item|
|
72
|
+
|
73
|
+
item_rec = Item.find_by_guid( item.guid )
|
74
|
+
if item_rec.nil?
|
75
|
+
item_rec = Item.new
|
76
|
+
puts "** NEW | #{item.title}"
|
77
|
+
else
|
78
|
+
## todo: check if any attribs changed
|
79
|
+
puts "UPDATE | #{item.title}"
|
80
|
+
end
|
81
|
+
|
82
|
+
item_rec.debug = debug? ? true : false # pass along debug flag
|
83
|
+
item_rec.update_from_struct!( self, item )
|
84
|
+
|
85
|
+
end # each item
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def update_from_struct!( data )
|
90
|
+
feed_attribs = {
|
91
|
+
format: data.format,
|
92
|
+
published: data.published? ? data.published : nil,
|
93
|
+
touched: data.updated? ? data.updated : nil,
|
94
|
+
built: data.built? ? data.built : nil,
|
95
|
+
summary: data.summary? ? data.summary : nil,
|
96
|
+
### todo/fix: add/use
|
97
|
+
# auto_title: ???,
|
98
|
+
# auto_url: ???,
|
99
|
+
# auto_feed_url: ???,
|
100
|
+
auto_title2: data.title2? ? data.title2 : nil,
|
101
|
+
generator: data.generator
|
102
|
+
}
|
103
|
+
|
104
|
+
if debug?
|
105
|
+
## puts "*** dump feed_attribs:"
|
106
|
+
## pp feed_attribs
|
107
|
+
puts "*** dump feed_attribs w/ class types:"
|
108
|
+
feed_attribs.each do |key,value|
|
109
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
update_attributes!( feed_attribs )
|
114
|
+
end
|
115
|
+
|
56
116
|
end # class Feed
|
57
117
|
|
58
118
|
|
data/lib/pluto/models/item.rb
CHANGED
@@ -14,27 +14,63 @@ class Item < ActiveRecord::Base
|
|
14
14
|
def description() summary; end # alias for summary -- also add descr shortcut??
|
15
15
|
def link() url; end # alias for url
|
16
16
|
|
17
|
+
def fetched_at() fetched; end # legay attrib reader - depreciated - remove!!
|
18
|
+
def published_at() published; end # legay attrib reader - depreciated - remove!!
|
19
|
+
def touched_at() touched; end # legay attrib reader - depreciated - remove!!
|
20
|
+
|
17
21
|
|
18
22
|
def self.latest
|
19
23
|
# note: order by first non-null datetime field
|
20
24
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
21
25
|
|
22
|
-
# note: if not
|
23
|
-
order( "coalesce(
|
26
|
+
# note: if not published,touched or built_at use hardcoded 1971-01-01 for now
|
27
|
+
order( "coalesce(published,touched,'1971-01-01') desc" )
|
24
28
|
end
|
25
29
|
|
26
|
-
def
|
30
|
+
def published?() read_attribute(:published).present?; end
|
27
31
|
|
28
|
-
def
|
32
|
+
def published
|
29
33
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
30
34
|
# db backed attribute
|
31
35
|
|
32
36
|
read_attribute_w_fallbacks(
|
33
|
-
:
|
34
|
-
:
|
37
|
+
:published,
|
38
|
+
:touched # try touched (aka updated RSS/ATOM)
|
35
39
|
)
|
36
40
|
end
|
37
41
|
|
42
|
+
|
43
|
+
|
44
|
+
def debug=(value) @debug = value; end
|
45
|
+
def debug?() @debug || false; end
|
46
|
+
|
47
|
+
def update_from_struct!( feed_rec, data )
|
48
|
+
## check: new item/record? not saved? add guid
|
49
|
+
# otherwise do not add guid - why? why not?
|
50
|
+
|
51
|
+
item_attribs = {
|
52
|
+
guid: data.guid, # todo: only add for new records???
|
53
|
+
title: data.title,
|
54
|
+
url: data.url,
|
55
|
+
summary: data.summary? ? data.summary : nil,
|
56
|
+
content: data.content? ? data.content : nil,
|
57
|
+
published: data.published? ? data.published : nil,
|
58
|
+
touched: data.updated? ? data.updated : nil,
|
59
|
+
feed_id: feed_rec.id, # add feed_id fk_ref
|
60
|
+
fetched: feed_rec.fetched
|
61
|
+
}
|
62
|
+
|
63
|
+
if debug?
|
64
|
+
puts "*** dump item_attribs w/ class types:"
|
65
|
+
item_attribs.each do |key,value|
|
66
|
+
next if [:summary,:content].include?( key ) # skip summary n content
|
67
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
update_attributes!( item_attribs )
|
72
|
+
end
|
73
|
+
|
38
74
|
end # class Item
|
39
75
|
|
40
76
|
|
data/lib/pluto/models/site.rb
CHANGED
data/lib/pluto/models/utils.rb
CHANGED
@@ -8,16 +8,16 @@ class ItemCursor
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def each
|
11
|
-
|
11
|
+
last_published = Time.local( 1971, 1, 1 )
|
12
12
|
last_feed_id = -1 ## todo: use feed_key instead of id?? why? why not??
|
13
13
|
|
14
14
|
@items.each do |item|
|
15
15
|
|
16
|
-
|
16
|
+
item_published = item.published # cache published value ref
|
17
17
|
|
18
|
-
if
|
19
|
-
|
20
|
-
|
18
|
+
if last_published.year == item_published.year &&
|
19
|
+
last_published.month == item_published.month &&
|
20
|
+
last_published.day == item_published.day
|
21
21
|
new_date = false
|
22
22
|
else
|
23
23
|
new_date = true
|
@@ -31,8 +31,8 @@ class ItemCursor
|
|
31
31
|
|
32
32
|
yield( item, new_date, new_feed )
|
33
33
|
|
34
|
-
|
35
|
-
last_feed_id
|
34
|
+
last_published = item.published
|
35
|
+
last_feed_id = item.feed.id
|
36
36
|
end
|
37
37
|
end # method each
|
38
38
|
|
data/lib/pluto/refresher.rb
CHANGED
@@ -25,18 +25,23 @@ class Refresher
|
|
25
25
|
Action.create!( title: 'update feeds' )
|
26
26
|
|
27
27
|
#####
|
28
|
-
# -- update
|
29
|
-
|
28
|
+
# -- update fetched timestamps for all sites
|
29
|
+
feeds_fetched = Time.now
|
30
30
|
Site.all.each do |site|
|
31
|
-
site.
|
31
|
+
site.fetched = feeds_fetched
|
32
32
|
site.save!
|
33
33
|
end
|
34
34
|
|
35
35
|
Feed.all.each do |feed_rec|
|
36
36
|
|
37
|
-
feed = @worker.
|
37
|
+
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
38
|
+
|
39
|
+
# on error or if http-not modified etc. skip update/processing
|
40
|
+
next if feed.nil?
|
38
41
|
|
39
|
-
|
42
|
+
## fix/todo: reload feed_red - fetched date updated etc.
|
43
|
+
## check if needed for access to fetched date
|
44
|
+
|
40
45
|
|
41
46
|
## todo/check: move feed_rec update to the end (after item updates??)
|
42
47
|
|
@@ -51,79 +56,21 @@ class Refresher
|
|
51
56
|
#
|
52
57
|
# move to_datetime to feedutils!! if it works
|
53
58
|
## todo: move this comments to feedutils??
|
54
|
-
|
55
|
-
|
56
|
-
feed_attribs = {
|
57
|
-
fetched_at: feed_fetched_at,
|
58
|
-
format: feed.format,
|
59
|
-
published_at: feed.published? ? feed.published : nil,
|
60
|
-
touched_at: feed.updated? ? feed.updated : nil,
|
61
|
-
built_at: feed.built? ? feed.built : nil,
|
62
|
-
summary: feed.summary? ? feed.summary : nil,
|
63
|
-
### todo/fix: add/use
|
64
|
-
# auto_title: ???,
|
65
|
-
# auto_url: ???,
|
66
|
-
# auto_feed_url: ???,
|
67
|
-
auto_title2: feed.title2? ? feed.title2 : nil,
|
68
|
-
generator: feed.generator
|
69
|
-
}
|
70
|
-
|
71
|
-
if debug?
|
72
|
-
## puts "*** dump feed_attribs:"
|
73
|
-
## pp feed_attribs
|
74
|
-
puts "*** dump feed_attribs w/ class types:"
|
75
|
-
feed_attribs.each do |key,value|
|
76
|
-
puts " #{key}: >#{value}< : #{value.class.name}"
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
feed_rec.update_attributes!( feed_attribs )
|
81
59
|
|
82
60
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
fetched_at: feed_fetched_at,
|
87
|
-
title: item.title,
|
88
|
-
url: item.url,
|
89
|
-
summary: item.summary? ? item.summary : nil,
|
90
|
-
content: item.content? ? item.content : nil,
|
91
|
-
published_at: item.published? ? item.published : nil,
|
92
|
-
touched_at: item.updated? ? item.updated : nil,
|
93
|
-
feed_id: feed_rec.id # add feed_id fk_ref
|
94
|
-
}
|
95
|
-
|
96
|
-
if debug?
|
97
|
-
puts "*** dump item_attribs w/ class types:"
|
98
|
-
item_attribs.each do |key,value|
|
99
|
-
next if [:summary,:content].include?( key ) # skip summary n content
|
100
|
-
puts " #{key}: >#{value}< : #{value.class.name}"
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
|
105
|
-
rec = Item.find_by_guid( item.guid )
|
106
|
-
if rec.nil?
|
107
|
-
rec = Item.new
|
108
|
-
item_attribs[ :guid ] = item.guid
|
109
|
-
puts "** NEW | #{item.title}"
|
110
|
-
else
|
111
|
-
## todo: check if any attribs changed
|
112
|
-
puts "UPDATE | #{item.title}"
|
113
|
-
end
|
61
|
+
feed_rec.debug = debug? ? true : false # pass along debug flag
|
62
|
+
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
63
|
+
feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
|
114
64
|
|
115
|
-
rec.update_attributes!( item_attribs )
|
116
|
-
end # each item
|
117
65
|
|
118
|
-
# update cached value
|
119
|
-
|
120
|
-
|
121
|
-
if
|
122
|
-
feed_rec.
|
123
|
-
else # try
|
124
|
-
feed_rec.
|
66
|
+
# update cached value last published for item
|
67
|
+
last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
68
|
+
if last_item_rec.present?
|
69
|
+
if last_item_rec.published?
|
70
|
+
feed_rec.update_attributes!( last_published: last_item_rec.published )
|
71
|
+
else # try touched
|
72
|
+
feed_rec.update_attributes!( last_published: last_item_rec.touched )
|
125
73
|
end
|
126
|
-
feed_rec.save!
|
127
74
|
end
|
128
75
|
|
129
76
|
end # each feed
|
data/lib/pluto/schema.rb
CHANGED
@@ -7,7 +7,7 @@ class CreateDb < ActiveRecord::Migration
|
|
7
7
|
create_table :sites do |t|
|
8
8
|
t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
|
9
9
|
t.string :key, :null => false # e.g. ruby, js, etc.
|
10
|
-
t.datetime :
|
10
|
+
t.datetime :fetched # last fetched/checked date -- make not null ??
|
11
11
|
|
12
12
|
t.timestamps # created_at, updated_at
|
13
13
|
end
|
@@ -35,17 +35,30 @@ class CreateDb < ActiveRecord::Migration
|
|
35
35
|
|
36
36
|
t.string :generator # feed generator (e.g. wordpress, etc.) from feed
|
37
37
|
|
38
|
-
t.datetime :
|
39
|
-
t.datetime :
|
40
|
-
t.datetime :
|
38
|
+
t.datetime :published # from feed published(atom)+ pubDate(rss)
|
39
|
+
t.datetime :built # from feed lastBuiltDate(rss)
|
40
|
+
t.datetime :touched # from feed updated(atom)
|
41
|
+
|
42
|
+
############
|
43
|
+
# filters
|
44
|
+
t.string :includes # regex
|
45
|
+
t.string :excludes # regex
|
46
|
+
# todo: add generic filter list e.g. t.string :filters (comma,pipe or space separated method names?)
|
41
47
|
|
42
48
|
# -- our own (meta) fields
|
43
|
-
t.datetime :
|
49
|
+
t.datetime :last_published # cache last (latest) published for items
|
44
50
|
|
45
51
|
t.string :key, :null => false
|
46
52
|
t.string :format # e.g. atom (1.0), rss 2.0, rss 0.7 etc.
|
47
|
-
|
48
|
-
t.
|
53
|
+
|
54
|
+
t.integer :http_code # last http status code e.g. 200,404,etc.
|
55
|
+
t.string :http_etag # last http header etag
|
56
|
+
t.datetime :http_last_modified # last http header last-modified
|
57
|
+
|
58
|
+
t.text :body # last http response body (complete feed!)
|
59
|
+
|
60
|
+
t.datetime :fetched # last fetched/checked date
|
61
|
+
|
49
62
|
t.timestamps # created_at, updated_at
|
50
63
|
end
|
51
64
|
|
@@ -56,15 +69,15 @@ class CreateDb < ActiveRecord::Migration
|
|
56
69
|
t.text :summary # e.g. description (rss), summary (atom)
|
57
70
|
t.text :content
|
58
71
|
|
59
|
-
t.datetime :
|
60
|
-
t.datetime :
|
72
|
+
t.datetime :published # from feed (published) + pubDate(rss)
|
73
|
+
t.datetime :touched # from feed updated (atom)
|
61
74
|
|
62
75
|
## todo: add :last_updated_at ?? (NOTE: updated_at already take by auto-timestamps)
|
63
76
|
t.references :feed, :null => false
|
64
77
|
|
65
|
-
t.datetime :
|
78
|
+
t.datetime :fetched # last fetched/check date
|
66
79
|
t.timestamps # created_at, updated_at
|
67
|
-
|
80
|
+
|
68
81
|
## t.string :author
|
69
82
|
## todo: add author/authors, category/categories
|
70
83
|
end
|
data/lib/pluto/subscriber.rb
CHANGED
@@ -54,12 +54,16 @@ class Subscriber
|
|
54
54
|
feed_key = key.to_s.dup
|
55
55
|
feed_hash = value
|
56
56
|
|
57
|
-
# todo: use title from feed?
|
57
|
+
# todo/fix: use title from feed?
|
58
|
+
# e.g. fill up auto_title, auto_url, etc.
|
59
|
+
|
58
60
|
feed_attribs = {
|
59
61
|
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
|
60
62
|
url: feed_hash[ 'link' ] || feed_hash[ 'url' ],
|
61
63
|
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
62
|
-
title2: feed_hash[ 'title2' ]
|
64
|
+
title2: feed_hash[ 'title2' ],
|
65
|
+
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
66
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
|
63
67
|
}
|
64
68
|
|
65
69
|
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
@@ -5,16 +5,16 @@ module Pluto
|
|
5
5
|
|
6
6
|
module TemplateHelper
|
7
7
|
|
8
|
-
def strip_tags(
|
8
|
+
def strip_tags( ht )
|
9
9
|
### tobe done
|
10
10
|
## strip markup tags; return plain text
|
11
|
-
|
11
|
+
ht.gsub( /<[^>]+>/, '' )
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
-
def whitelist(
|
15
|
+
def whitelist( ht, tags, opts={} )
|
16
16
|
|
17
|
-
# note: assumes properly escaped <> in
|
17
|
+
# note: assumes properly escaped <> in ht/hypertext
|
18
18
|
|
19
19
|
###############################################
|
20
20
|
# step one - save whitelisted tags use ‹tag›
|
@@ -25,38 +25,38 @@ module TemplateHelper
|
|
25
25
|
|
26
26
|
# convert xml-style empty tags to simple html emtpty tags
|
27
27
|
# e.g. <br/> or <br /> becomses <br>
|
28
|
-
|
28
|
+
ht = ht.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg. <br /> or <br/> becomes ‹br›
|
29
29
|
|
30
30
|
# make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
|
31
|
-
|
32
|
-
|
31
|
+
ht = ht.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag <p>
|
32
|
+
ht = ht.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g. </p>
|
33
33
|
end
|
34
34
|
|
35
35
|
############################
|
36
36
|
# step two - clean tags
|
37
37
|
|
38
38
|
# strip images - special treatment for debugging
|
39
|
-
|
40
|
-
|
39
|
+
ht = ht.gsub( /<img[^>]*>/i, '♦' ) # for debugging use black diamond e.g. ♦
|
40
|
+
ht = ht.gsub( /<\/img>/i, '' ) # should not exists
|
41
41
|
|
42
42
|
# strip all remaining tags
|
43
|
-
|
43
|
+
ht = ht.gsub( /<[^>]+>/, '' )
|
44
44
|
|
45
|
-
pp
|
45
|
+
pp ht # fix: debugging indo - remove
|
46
46
|
|
47
47
|
############################################
|
48
48
|
# step three - restore whitelisted tags
|
49
49
|
|
50
|
-
return
|
50
|
+
return ht if opts[:skip_restore].present? # skip step 3 for debugging
|
51
51
|
|
52
52
|
tags.each do |tag|
|
53
|
-
#
|
54
|
-
#
|
55
|
-
|
56
|
-
|
53
|
+
# ht = ht.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g. <p>
|
54
|
+
# ht = ht.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g. </p>
|
55
|
+
ht = ht.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" }
|
56
|
+
ht = ht.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g. </p>
|
57
57
|
end
|
58
58
|
|
59
|
-
|
59
|
+
ht
|
60
60
|
end # method whitelist
|
61
61
|
|
62
62
|
|
@@ -95,37 +95,37 @@ module TemplateHelper
|
|
95
95
|
|
96
96
|
|
97
97
|
|
98
|
-
def textify(
|
98
|
+
def textify( ht, opts={} ) # ht -> hypertext
|
99
99
|
## turn into text
|
100
100
|
# todo: add options for
|
101
101
|
# keep links, images, lists (?too), code, codeblocks
|
102
102
|
|
103
|
-
|
103
|
+
ht = whitelist( ht, [:br, :p, :ul, :ol, :li, :pre, :code, :blockquote, :q, :cite], opts )
|
104
104
|
|
105
105
|
# strip bold
|
106
|
-
#
|
107
|
-
#
|
106
|
+
# ht = ht.gsub( /<b[^>]*>/, '**' ) # fix: will also swallow bxxx tags - add b space
|
107
|
+
# ht = ht.gsub( /<\/b>/, '**' )
|
108
108
|
|
109
109
|
# strip em
|
110
|
-
#
|
111
|
-
#
|
110
|
+
# ht = ht.gsub( /<em[^>]*>/, '__' )
|
111
|
+
# ht = ht.gsub( /<\/em>/, '__' )
|
112
112
|
|
113
113
|
# clean (prettify) literal urls (strip protocoll)
|
114
|
-
|
114
|
+
ht = ht.gsub( /(http|https):\/\//, '' )
|
115
115
|
|
116
|
-
#
|
116
|
+
# ht = ht.gsub( / /, ' ' )
|
117
117
|
|
118
118
|
# # try to cleanup whitespaces
|
119
119
|
# # -- keep no more than two spaces
|
120
|
-
#
|
120
|
+
# ht = ht.gsub( /[ \t]{3,}/, ' ' )
|
121
121
|
# # -- keep no more than two new lines
|
122
|
-
#
|
122
|
+
# ht = ht.gsub( /\n{2,}/m, "\n\n" )
|
123
123
|
# # -- remove all trailing spaces
|
124
|
-
#
|
124
|
+
# ht = ht.gsub( /[ \t\n]+$/m, '' )
|
125
125
|
# # -- remove all leading spaces
|
126
|
-
#
|
126
|
+
# ht = ht.gsub( /^[ \t\n]+/m, '' )
|
127
127
|
|
128
|
-
|
128
|
+
ht
|
129
129
|
end
|
130
130
|
|
131
131
|
|
data/lib/pluto/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pakman
|
16
|
-
requirement: &
|
16
|
+
requirement: &75090920 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *75090920
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fetcher
|
27
|
-
requirement: &
|
27
|
+
requirement: &75090620 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.3'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *75090620
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: logutils
|
38
|
-
requirement: &
|
38
|
+
requirement: &75090350 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.6'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *75090350
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: feedutils
|
49
|
-
requirement: &
|
49
|
+
requirement: &75090050 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.3.2
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *75090050
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: props
|
60
|
-
requirement: &
|
60
|
+
requirement: &75089760 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.2
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *75089760
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: textutils
|
71
|
-
requirement: &
|
71
|
+
requirement: &75089460 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 0.6.8
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *75089460
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: gli
|
82
|
-
requirement: &
|
82
|
+
requirement: &75089160 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: 2.5.6
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *75089160
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: rdoc
|
93
|
-
requirement: &
|
93
|
+
requirement: &75088910 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ~>
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '3.10'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *75088910
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: hoe
|
104
|
-
requirement: &
|
104
|
+
requirement: &75088620 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ~>
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '3.3'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *75088620
|
113
113
|
description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
|
114
114
|
Web Feeds)
|
115
115
|
email: feedreader@googlegroups.com
|