pluto 0.8.5 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +10 -1
- data/lib/pluto/fetcher.rb +95 -9
- data/lib/pluto/models/feed.rb +69 -9
- data/lib/pluto/models/item.rb +42 -6
- data/lib/pluto/models/site.rb +1 -0
- data/lib/pluto/models/utils.rb +7 -7
- data/lib/pluto/refresher.rb +20 -73
- data/lib/pluto/schema.rb +24 -11
- data/lib/pluto/subscriber.rb +6 -2
- data/lib/pluto/template_helpers.rb +30 -30
- data/lib/pluto/version.rb +1 -1
- metadata +20 -20
data/README.md
CHANGED
@@ -197,6 +197,15 @@ For more samples, see [`nytimes.yml`](https://github.com/feedreader/pluto.sample
|
|
197
197
|
[`beer.yml`](https://github.com/feedreader/pluto.samples/blob/master/beer.yml),
|
198
198
|
[`football.yml`](https://github.com/feedreader/pluto.samples/blob/master/football.yml).
|
199
199
|
|
200
|
+
|
201
|
+
## Template Packs
|
202
|
+
|
203
|
+
- Blank - default templates; [more »](https://github.com/feedreader/pluto.blank)
|
204
|
+
- News - 'river of news' style templates; [more »](https://github.com/feedreader/pluto.news)
|
205
|
+
- Top - Popurl-style templates; [more »](https://github.com/feedreader/pluto.top)
|
206
|
+
- Classic - Planet Planet-Style templates; [more »](https://github.com/feedreader/pluto.classic)
|
207
|
+
|
208
|
+
|
200
209
|
## Install
|
201
210
|
|
202
211
|
Just install the gem:
|
@@ -222,7 +231,7 @@ Planet Mars by Sam Ruby [(Site)](https://github.com/rubys/mars) - first draft o
|
|
222
231
|
|
223
232
|
### Python
|
224
233
|
|
225
|
-
Planet Planet by Scott James Remnant
|
234
|
+
Planet Planet by Scott James Remnant ann Jeff Waugh [(Site)](http://www.planetplanet.org) - uses Mark Pilgrim's universal feed parser (RDF, RSS and Atom) and Tomas Styblo's templating engine; last release version 2.0 in 2006
|
226
235
|
|
227
236
|
Planet Venus by Sam Ruby [(Site)](https://github.com/rubys/venus) - cleaned up Planet Planet code; last change in 2010
|
228
237
|
|
data/lib/pluto/fetcher.rb
CHANGED
@@ -13,14 +13,48 @@ class Fetcher
|
|
13
13
|
def debug?() @debug || false; end
|
14
14
|
|
15
15
|
|
16
|
+
def fetch_feed( url )
|
17
|
+
response = @worker.get( url )
|
18
|
+
|
19
|
+
if debug?
|
20
|
+
puts "http status #{response.code} #{response.message}"
|
21
|
+
|
22
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
23
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
24
|
+
end
|
25
|
+
|
26
|
+
xml = response.body
|
27
|
+
|
28
|
+
###
|
29
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
30
|
+
# will mostly be ASCII
|
31
|
+
# - try to change encoding to UTF-8 ourselves
|
32
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
33
|
+
|
34
|
+
#####
|
35
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
36
|
+
|
37
|
+
## NB:
|
38
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
39
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
40
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
41
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
42
|
+
|
43
|
+
xml
|
44
|
+
end
|
45
|
+
|
46
|
+
|
16
47
|
def feed_by_rec( feed_rec )
|
48
|
+
# simple feed fetcher; use for debugging (only/mostly)
|
49
|
+
# -- will NOT change db records in any way
|
50
|
+
|
17
51
|
feed_url = feed_rec.feed_url
|
18
52
|
feed_key = feed_rec.key
|
19
|
-
|
53
|
+
|
20
54
|
feed_xml = fetch_feed( feed_url )
|
21
55
|
|
22
56
|
logger.debug "feed_xml:"
|
23
|
-
logger.debug feed_xml[ 0..
|
57
|
+
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
24
58
|
|
25
59
|
# if opts.verbose? # also write a copy to disk
|
26
60
|
if debug?
|
@@ -41,16 +75,43 @@ class Fetcher
|
|
41
75
|
end
|
42
76
|
|
43
77
|
|
44
|
-
def
|
45
|
-
|
78
|
+
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
79
|
+
feed_url = feed_rec.feed_url
|
80
|
+
feed_key = feed_rec.key
|
81
|
+
|
82
|
+
### todo/fix:
|
83
|
+
## add if available http_etag machinery for smarter updates
|
84
|
+
## and http_last_modified headers
|
85
|
+
## and brute force body_old == body_new etc.
|
86
|
+
|
87
|
+
response = @worker.get( feed_url )
|
88
|
+
|
89
|
+
feed_fetched = Time.now
|
90
|
+
|
91
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
92
|
+
|
93
|
+
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
94
|
+
|
95
|
+
feed_attribs = {
|
96
|
+
http_code: response.code.to_i,
|
97
|
+
http_etag: nil,
|
98
|
+
http_last_modified: nil,
|
99
|
+
body: nil,
|
100
|
+
fetched: feed_fetched
|
101
|
+
}
|
102
|
+
feed_rec.update_attributes!( feed_attribs )
|
103
|
+
return nil # sorry; no feed for parsing available
|
104
|
+
end
|
105
|
+
|
46
106
|
|
47
|
-
|
107
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
48
108
|
|
109
|
+
feed_xml = response.body
|
49
110
|
###
|
50
111
|
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
51
112
|
# will mostly be ASCII
|
52
113
|
# - try to change encoding to UTF-8 ourselves
|
53
|
-
logger.debug "
|
114
|
+
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
54
115
|
|
55
116
|
#####
|
56
117
|
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
@@ -58,10 +119,35 @@ class Fetcher
|
|
58
119
|
## NB:
|
59
120
|
# for now "hardcoded" to utf8 - what else can we do?
|
60
121
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
61
|
-
|
62
|
-
logger.debug "
|
122
|
+
feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
123
|
+
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
124
|
+
|
125
|
+
feed_attribs = {
|
126
|
+
http_code: response.code.to_i,
|
127
|
+
http_etag: response.header[ 'etag' ],
|
128
|
+
http_last_modified: response.header[ 'last-modified' ],
|
129
|
+
body: feed_xml,
|
130
|
+
fetched: feed_fetched
|
131
|
+
}
|
132
|
+
|
133
|
+
if debug?
|
134
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
135
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
136
|
+
end
|
63
137
|
|
64
|
-
|
138
|
+
feed_rec.update_attributes!( feed_attribs )
|
139
|
+
|
140
|
+
logger.debug "feed_xml:"
|
141
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
142
|
+
|
143
|
+
puts "Before parsing feed >#{feed_key}<..."
|
144
|
+
|
145
|
+
### move to feedutils
|
146
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
147
|
+
|
148
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
149
|
+
# or throw exception
|
150
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
65
151
|
end
|
66
152
|
|
67
153
|
end # class Fetcher
|
data/lib/pluto/models/feed.rb
CHANGED
@@ -15,9 +15,9 @@ class Feed < ActiveRecord::Base
|
|
15
15
|
# note: order by first non-null datetime field
|
16
16
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
17
17
|
|
18
|
-
# note: if not
|
19
|
-
## order( "coalesce(
|
20
|
-
order( "coalesce(
|
18
|
+
# note: if not published, touched or built use hardcoded 1971-01-01 for now
|
19
|
+
## order( "coalesce(published,touched,built,'1971-01-01') desc" )
|
20
|
+
order( "coalesce(last_published,'1971-01-01') desc" )
|
21
21
|
end
|
22
22
|
|
23
23
|
##################################
|
@@ -27,6 +27,12 @@ class Feed < ActiveRecord::Base
|
|
27
27
|
def link() url; end # alias for url
|
28
28
|
def feed() feed_url; end # alias for feed_url
|
29
29
|
|
30
|
+
def last_published_at() last_published; end # legay attrib reader - depreciated - remove!!
|
31
|
+
def fetched_at() fetched; end # legay attrib reader - depreciated - remove!!
|
32
|
+
def published_at() published; end # legay attrib reader - depreciated - remove!!
|
33
|
+
def touched_at() touched; end # legay attrib reader - depreciated - remove!!
|
34
|
+
def built_at() built; end # legay attrib reader - depreciated - remove!!
|
35
|
+
|
30
36
|
|
31
37
|
def url?() read_attribute(:url).present?; end
|
32
38
|
def title?() read_attribute(:title).present?; end
|
@@ -39,20 +45,74 @@ class Feed < ActiveRecord::Base
|
|
39
45
|
def feed_url() read_attribute_w_fallbacks( :feed_url, :auto_feed_url ); end
|
40
46
|
|
41
47
|
|
42
|
-
def
|
43
|
-
def
|
48
|
+
def published?() read_attribute(:published).present?; end
|
49
|
+
def touched?() read_attribute(:touched).present?; end
|
50
|
+
|
44
51
|
|
45
|
-
def
|
52
|
+
def published
|
46
53
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
47
54
|
# db backed attribute
|
48
55
|
|
49
56
|
read_attribute_w_fallbacks(
|
50
|
-
:
|
51
|
-
:
|
52
|
-
:
|
57
|
+
:published,
|
58
|
+
:touched, # try touched (aka updated (ATOM))
|
59
|
+
:built # try build (aka lastBuildDate (RSS))
|
53
60
|
)
|
54
61
|
end
|
55
62
|
|
63
|
+
|
64
|
+
def debug=(value) @debug = value; end
|
65
|
+
def debug?() @debug || false; end
|
66
|
+
|
67
|
+
def save_from_struct!( data )
|
68
|
+
|
69
|
+
update_from_struct!( data )
|
70
|
+
|
71
|
+
data.items.each do |item|
|
72
|
+
|
73
|
+
item_rec = Item.find_by_guid( item.guid )
|
74
|
+
if item_rec.nil?
|
75
|
+
item_rec = Item.new
|
76
|
+
puts "** NEW | #{item.title}"
|
77
|
+
else
|
78
|
+
## todo: check if any attribs changed
|
79
|
+
puts "UPDATE | #{item.title}"
|
80
|
+
end
|
81
|
+
|
82
|
+
item_rec.debug = debug? ? true : false # pass along debug flag
|
83
|
+
item_rec.update_from_struct!( self, item )
|
84
|
+
|
85
|
+
end # each item
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def update_from_struct!( data )
|
90
|
+
feed_attribs = {
|
91
|
+
format: data.format,
|
92
|
+
published: data.published? ? data.published : nil,
|
93
|
+
touched: data.updated? ? data.updated : nil,
|
94
|
+
built: data.built? ? data.built : nil,
|
95
|
+
summary: data.summary? ? data.summary : nil,
|
96
|
+
### todo/fix: add/use
|
97
|
+
# auto_title: ???,
|
98
|
+
# auto_url: ???,
|
99
|
+
# auto_feed_url: ???,
|
100
|
+
auto_title2: data.title2? ? data.title2 : nil,
|
101
|
+
generator: data.generator
|
102
|
+
}
|
103
|
+
|
104
|
+
if debug?
|
105
|
+
## puts "*** dump feed_attribs:"
|
106
|
+
## pp feed_attribs
|
107
|
+
puts "*** dump feed_attribs w/ class types:"
|
108
|
+
feed_attribs.each do |key,value|
|
109
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
update_attributes!( feed_attribs )
|
114
|
+
end
|
115
|
+
|
56
116
|
end # class Feed
|
57
117
|
|
58
118
|
|
data/lib/pluto/models/item.rb
CHANGED
@@ -14,27 +14,63 @@ class Item < ActiveRecord::Base
|
|
14
14
|
def description() summary; end # alias for summary -- also add descr shortcut??
|
15
15
|
def link() url; end # alias for url
|
16
16
|
|
17
|
+
def fetched_at() fetched; end # legay attrib reader - depreciated - remove!!
|
18
|
+
def published_at() published; end # legay attrib reader - depreciated - remove!!
|
19
|
+
def touched_at() touched; end # legay attrib reader - depreciated - remove!!
|
20
|
+
|
17
21
|
|
18
22
|
def self.latest
|
19
23
|
# note: order by first non-null datetime field
|
20
24
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
21
25
|
|
22
|
-
# note: if not
|
23
|
-
order( "coalesce(
|
26
|
+
# note: if not published,touched or built_at use hardcoded 1971-01-01 for now
|
27
|
+
order( "coalesce(published,touched,'1971-01-01') desc" )
|
24
28
|
end
|
25
29
|
|
26
|
-
def
|
30
|
+
def published?() read_attribute(:published).present?; end
|
27
31
|
|
28
|
-
def
|
32
|
+
def published
|
29
33
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
30
34
|
# db backed attribute
|
31
35
|
|
32
36
|
read_attribute_w_fallbacks(
|
33
|
-
:
|
34
|
-
:
|
37
|
+
:published,
|
38
|
+
:touched # try touched (aka updated RSS/ATOM)
|
35
39
|
)
|
36
40
|
end
|
37
41
|
|
42
|
+
|
43
|
+
|
44
|
+
def debug=(value) @debug = value; end
|
45
|
+
def debug?() @debug || false; end
|
46
|
+
|
47
|
+
def update_from_struct!( feed_rec, data )
|
48
|
+
## check: new item/record? not saved? add guid
|
49
|
+
# otherwise do not add guid - why? why not?
|
50
|
+
|
51
|
+
item_attribs = {
|
52
|
+
guid: data.guid, # todo: only add for new records???
|
53
|
+
title: data.title,
|
54
|
+
url: data.url,
|
55
|
+
summary: data.summary? ? data.summary : nil,
|
56
|
+
content: data.content? ? data.content : nil,
|
57
|
+
published: data.published? ? data.published : nil,
|
58
|
+
touched: data.updated? ? data.updated : nil,
|
59
|
+
feed_id: feed_rec.id, # add feed_id fk_ref
|
60
|
+
fetched: feed_rec.fetched
|
61
|
+
}
|
62
|
+
|
63
|
+
if debug?
|
64
|
+
puts "*** dump item_attribs w/ class types:"
|
65
|
+
item_attribs.each do |key,value|
|
66
|
+
next if [:summary,:content].include?( key ) # skip summary n content
|
67
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
update_attributes!( item_attribs )
|
72
|
+
end
|
73
|
+
|
38
74
|
end # class Item
|
39
75
|
|
40
76
|
|
data/lib/pluto/models/site.rb
CHANGED
data/lib/pluto/models/utils.rb
CHANGED
@@ -8,16 +8,16 @@ class ItemCursor
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def each
|
11
|
-
|
11
|
+
last_published = Time.local( 1971, 1, 1 )
|
12
12
|
last_feed_id = -1 ## todo: use feed_key instead of id?? why? why not??
|
13
13
|
|
14
14
|
@items.each do |item|
|
15
15
|
|
16
|
-
|
16
|
+
item_published = item.published # cache published value ref
|
17
17
|
|
18
|
-
if
|
19
|
-
|
20
|
-
|
18
|
+
if last_published.year == item_published.year &&
|
19
|
+
last_published.month == item_published.month &&
|
20
|
+
last_published.day == item_published.day
|
21
21
|
new_date = false
|
22
22
|
else
|
23
23
|
new_date = true
|
@@ -31,8 +31,8 @@ class ItemCursor
|
|
31
31
|
|
32
32
|
yield( item, new_date, new_feed )
|
33
33
|
|
34
|
-
|
35
|
-
last_feed_id
|
34
|
+
last_published = item.published
|
35
|
+
last_feed_id = item.feed.id
|
36
36
|
end
|
37
37
|
end # method each
|
38
38
|
|
data/lib/pluto/refresher.rb
CHANGED
@@ -25,18 +25,23 @@ class Refresher
|
|
25
25
|
Action.create!( title: 'update feeds' )
|
26
26
|
|
27
27
|
#####
|
28
|
-
# -- update
|
29
|
-
|
28
|
+
# -- update fetched timestamps for all sites
|
29
|
+
feeds_fetched = Time.now
|
30
30
|
Site.all.each do |site|
|
31
|
-
site.
|
31
|
+
site.fetched = feeds_fetched
|
32
32
|
site.save!
|
33
33
|
end
|
34
34
|
|
35
35
|
Feed.all.each do |feed_rec|
|
36
36
|
|
37
|
-
feed = @worker.
|
37
|
+
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
38
|
+
|
39
|
+
# on error or if http-not modified etc. skip update/processing
|
40
|
+
next if feed.nil?
|
38
41
|
|
39
|
-
|
42
|
+
## fix/todo: reload feed_red - fetched date updated etc.
|
43
|
+
## check if needed for access to fetched date
|
44
|
+
|
40
45
|
|
41
46
|
## todo/check: move feed_rec update to the end (after item updates??)
|
42
47
|
|
@@ -51,79 +56,21 @@ class Refresher
|
|
51
56
|
#
|
52
57
|
# move to_datetime to feedutils!! if it works
|
53
58
|
## todo: move this comments to feedutils??
|
54
|
-
|
55
|
-
|
56
|
-
feed_attribs = {
|
57
|
-
fetched_at: feed_fetched_at,
|
58
|
-
format: feed.format,
|
59
|
-
published_at: feed.published? ? feed.published : nil,
|
60
|
-
touched_at: feed.updated? ? feed.updated : nil,
|
61
|
-
built_at: feed.built? ? feed.built : nil,
|
62
|
-
summary: feed.summary? ? feed.summary : nil,
|
63
|
-
### todo/fix: add/use
|
64
|
-
# auto_title: ???,
|
65
|
-
# auto_url: ???,
|
66
|
-
# auto_feed_url: ???,
|
67
|
-
auto_title2: feed.title2? ? feed.title2 : nil,
|
68
|
-
generator: feed.generator
|
69
|
-
}
|
70
|
-
|
71
|
-
if debug?
|
72
|
-
## puts "*** dump feed_attribs:"
|
73
|
-
## pp feed_attribs
|
74
|
-
puts "*** dump feed_attribs w/ class types:"
|
75
|
-
feed_attribs.each do |key,value|
|
76
|
-
puts " #{key}: >#{value}< : #{value.class.name}"
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
feed_rec.update_attributes!( feed_attribs )
|
81
59
|
|
82
60
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
fetched_at: feed_fetched_at,
|
87
|
-
title: item.title,
|
88
|
-
url: item.url,
|
89
|
-
summary: item.summary? ? item.summary : nil,
|
90
|
-
content: item.content? ? item.content : nil,
|
91
|
-
published_at: item.published? ? item.published : nil,
|
92
|
-
touched_at: item.updated? ? item.updated : nil,
|
93
|
-
feed_id: feed_rec.id # add feed_id fk_ref
|
94
|
-
}
|
95
|
-
|
96
|
-
if debug?
|
97
|
-
puts "*** dump item_attribs w/ class types:"
|
98
|
-
item_attribs.each do |key,value|
|
99
|
-
next if [:summary,:content].include?( key ) # skip summary n content
|
100
|
-
puts " #{key}: >#{value}< : #{value.class.name}"
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
|
105
|
-
rec = Item.find_by_guid( item.guid )
|
106
|
-
if rec.nil?
|
107
|
-
rec = Item.new
|
108
|
-
item_attribs[ :guid ] = item.guid
|
109
|
-
puts "** NEW | #{item.title}"
|
110
|
-
else
|
111
|
-
## todo: check if any attribs changed
|
112
|
-
puts "UPDATE | #{item.title}"
|
113
|
-
end
|
61
|
+
feed_rec.debug = debug? ? true : false # pass along debug flag
|
62
|
+
## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
|
63
|
+
feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
|
114
64
|
|
115
|
-
rec.update_attributes!( item_attribs )
|
116
|
-
end # each item
|
117
65
|
|
118
|
-
# update cached value
|
119
|
-
|
120
|
-
|
121
|
-
if
|
122
|
-
feed_rec.
|
123
|
-
else # try
|
124
|
-
feed_rec.
|
66
|
+
# update cached value last published for item
|
67
|
+
last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
68
|
+
if last_item_rec.present?
|
69
|
+
if last_item_rec.published?
|
70
|
+
feed_rec.update_attributes!( last_published: last_item_rec.published )
|
71
|
+
else # try touched
|
72
|
+
feed_rec.update_attributes!( last_published: last_item_rec.touched )
|
125
73
|
end
|
126
|
-
feed_rec.save!
|
127
74
|
end
|
128
75
|
|
129
76
|
end # each feed
|
data/lib/pluto/schema.rb
CHANGED
@@ -7,7 +7,7 @@ class CreateDb < ActiveRecord::Migration
|
|
7
7
|
create_table :sites do |t|
|
8
8
|
t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
|
9
9
|
t.string :key, :null => false # e.g. ruby, js, etc.
|
10
|
-
t.datetime :
|
10
|
+
t.datetime :fetched # last fetched/checked date -- make not null ??
|
11
11
|
|
12
12
|
t.timestamps # created_at, updated_at
|
13
13
|
end
|
@@ -35,17 +35,30 @@ class CreateDb < ActiveRecord::Migration
|
|
35
35
|
|
36
36
|
t.string :generator # feed generator (e.g. wordpress, etc.) from feed
|
37
37
|
|
38
|
-
t.datetime :
|
39
|
-
t.datetime :
|
40
|
-
t.datetime :
|
38
|
+
t.datetime :published # from feed published(atom)+ pubDate(rss)
|
39
|
+
t.datetime :built # from feed lastBuiltDate(rss)
|
40
|
+
t.datetime :touched # from feed updated(atom)
|
41
|
+
|
42
|
+
############
|
43
|
+
# filters
|
44
|
+
t.string :includes # regex
|
45
|
+
t.string :excludes # regex
|
46
|
+
# todo: add generic filter list e.g. t.string :filters (comma,pipe or space separated method names?)
|
41
47
|
|
42
48
|
# -- our own (meta) fields
|
43
|
-
t.datetime :
|
49
|
+
t.datetime :last_published # cache last (latest) published for items
|
44
50
|
|
45
51
|
t.string :key, :null => false
|
46
52
|
t.string :format # e.g. atom (1.0), rss 2.0, rss 0.7 etc.
|
47
|
-
|
48
|
-
t.
|
53
|
+
|
54
|
+
t.integer :http_code # last http status code e.g. 200,404,etc.
|
55
|
+
t.string :http_etag # last http header etag
|
56
|
+
t.datetime :http_last_modified # last http header last-modified
|
57
|
+
|
58
|
+
t.text :body # last http response body (complete feed!)
|
59
|
+
|
60
|
+
t.datetime :fetched # last fetched/checked date
|
61
|
+
|
49
62
|
t.timestamps # created_at, updated_at
|
50
63
|
end
|
51
64
|
|
@@ -56,15 +69,15 @@ class CreateDb < ActiveRecord::Migration
|
|
56
69
|
t.text :summary # e.g. description (rss), summary (atom)
|
57
70
|
t.text :content
|
58
71
|
|
59
|
-
t.datetime :
|
60
|
-
t.datetime :
|
72
|
+
t.datetime :published # from feed (published) + pubDate(rss)
|
73
|
+
t.datetime :touched # from feed updated (atom)
|
61
74
|
|
62
75
|
## todo: add :last_updated_at ?? (NOTE: updated_at already take by auto-timestamps)
|
63
76
|
t.references :feed, :null => false
|
64
77
|
|
65
|
-
t.datetime :
|
78
|
+
t.datetime :fetched # last fetched/check date
|
66
79
|
t.timestamps # created_at, updated_at
|
67
|
-
|
80
|
+
|
68
81
|
## t.string :author
|
69
82
|
## todo: add author/authors, category/categories
|
70
83
|
end
|
data/lib/pluto/subscriber.rb
CHANGED
@@ -54,12 +54,16 @@ class Subscriber
|
|
54
54
|
feed_key = key.to_s.dup
|
55
55
|
feed_hash = value
|
56
56
|
|
57
|
-
# todo: use title from feed?
|
57
|
+
# todo/fix: use title from feed?
|
58
|
+
# e.g. fill up auto_title, auto_url, etc.
|
59
|
+
|
58
60
|
feed_attribs = {
|
59
61
|
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
|
60
62
|
url: feed_hash[ 'link' ] || feed_hash[ 'url' ],
|
61
63
|
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
62
|
-
title2: feed_hash[ 'title2' ]
|
64
|
+
title2: feed_hash[ 'title2' ],
|
65
|
+
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
66
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
|
63
67
|
}
|
64
68
|
|
65
69
|
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
@@ -5,16 +5,16 @@ module Pluto
|
|
5
5
|
|
6
6
|
module TemplateHelper
|
7
7
|
|
8
|
-
def strip_tags(
|
8
|
+
def strip_tags( ht )
|
9
9
|
### tobe done
|
10
10
|
## strip markup tags; return plain text
|
11
|
-
|
11
|
+
ht.gsub( /<[^>]+>/, '' )
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
-
def whitelist(
|
15
|
+
def whitelist( ht, tags, opts={} )
|
16
16
|
|
17
|
-
# note: assumes properly escaped <> in
|
17
|
+
# note: assumes properly escaped <> in ht/hypertext
|
18
18
|
|
19
19
|
###############################################
|
20
20
|
# step one - save whitelisted tags use ‹tag›
|
@@ -25,38 +25,38 @@ module TemplateHelper
|
|
25
25
|
|
26
26
|
# convert xml-style empty tags to simple html emtpty tags
|
27
27
|
# e.g. <br/> or <br /> becomses <br>
|
28
|
-
|
28
|
+
ht = ht.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg. <br /> or <br/> becomes ‹br›
|
29
29
|
|
30
30
|
# make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
|
31
|
-
|
32
|
-
|
31
|
+
ht = ht.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag <p>
|
32
|
+
ht = ht.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g. </p>
|
33
33
|
end
|
34
34
|
|
35
35
|
############################
|
36
36
|
# step two - clean tags
|
37
37
|
|
38
38
|
# strip images - special treatment for debugging
|
39
|
-
|
40
|
-
|
39
|
+
ht = ht.gsub( /<img[^>]*>/i, '♦' ) # for debugging use black diamond e.g. ♦
|
40
|
+
ht = ht.gsub( /<\/img>/i, '' ) # should not exists
|
41
41
|
|
42
42
|
# strip all remaining tags
|
43
|
-
|
43
|
+
ht = ht.gsub( /<[^>]+>/, '' )
|
44
44
|
|
45
|
-
pp
|
45
|
+
pp ht # fix: debugging indo - remove
|
46
46
|
|
47
47
|
############################################
|
48
48
|
# step three - restore whitelisted tags
|
49
49
|
|
50
|
-
return
|
50
|
+
return ht if opts[:skip_restore].present? # skip step 3 for debugging
|
51
51
|
|
52
52
|
tags.each do |tag|
|
53
|
-
#
|
54
|
-
#
|
55
|
-
|
56
|
-
|
53
|
+
# ht = ht.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g. <p>
|
54
|
+
# ht = ht.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g. </p>
|
55
|
+
ht = ht.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" }
|
56
|
+
ht = ht.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g. </p>
|
57
57
|
end
|
58
58
|
|
59
|
-
|
59
|
+
ht
|
60
60
|
end # method whitelist
|
61
61
|
|
62
62
|
|
@@ -95,37 +95,37 @@ module TemplateHelper
|
|
95
95
|
|
96
96
|
|
97
97
|
|
98
|
-
def textify(
|
98
|
+
def textify( ht, opts={} ) # ht -> hypertext
|
99
99
|
## turn into text
|
100
100
|
# todo: add options for
|
101
101
|
# keep links, images, lists (?too), code, codeblocks
|
102
102
|
|
103
|
-
|
103
|
+
ht = whitelist( ht, [:br, :p, :ul, :ol, :li, :pre, :code, :blockquote, :q, :cite], opts )
|
104
104
|
|
105
105
|
# strip bold
|
106
|
-
#
|
107
|
-
#
|
106
|
+
# ht = ht.gsub( /<b[^>]*>/, '**' ) # fix: will also swallow bxxx tags - add b space
|
107
|
+
# ht = ht.gsub( /<\/b>/, '**' )
|
108
108
|
|
109
109
|
# strip em
|
110
|
-
#
|
111
|
-
#
|
110
|
+
# ht = ht.gsub( /<em[^>]*>/, '__' )
|
111
|
+
# ht = ht.gsub( /<\/em>/, '__' )
|
112
112
|
|
113
113
|
# clean (prettify) literal urls (strip protocoll)
|
114
|
-
|
114
|
+
ht = ht.gsub( /(http|https):\/\//, '' )
|
115
115
|
|
116
|
-
#
|
116
|
+
# ht = ht.gsub( / /, ' ' )
|
117
117
|
|
118
118
|
# # try to cleanup whitespaces
|
119
119
|
# # -- keep no more than two spaces
|
120
|
-
#
|
120
|
+
# ht = ht.gsub( /[ \t]{3,}/, ' ' )
|
121
121
|
# # -- keep no more than two new lines
|
122
|
-
#
|
122
|
+
# ht = ht.gsub( /\n{2,}/m, "\n\n" )
|
123
123
|
# # -- remove all trailing spaces
|
124
|
-
#
|
124
|
+
# ht = ht.gsub( /[ \t\n]+$/m, '' )
|
125
125
|
# # -- remove all leading spaces
|
126
|
-
#
|
126
|
+
# ht = ht.gsub( /^[ \t\n]+/m, '' )
|
127
127
|
|
128
|
-
|
128
|
+
ht
|
129
129
|
end
|
130
130
|
|
131
131
|
|
data/lib/pluto/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pakman
|
16
|
-
requirement: &
|
16
|
+
requirement: &75090920 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *75090920
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fetcher
|
27
|
-
requirement: &
|
27
|
+
requirement: &75090620 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.3'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *75090620
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: logutils
|
38
|
-
requirement: &
|
38
|
+
requirement: &75090350 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.6'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *75090350
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: feedutils
|
49
|
-
requirement: &
|
49
|
+
requirement: &75090050 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.3.2
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *75090050
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: props
|
60
|
-
requirement: &
|
60
|
+
requirement: &75089760 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.2
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *75089760
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: textutils
|
71
|
-
requirement: &
|
71
|
+
requirement: &75089460 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 0.6.8
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *75089460
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: gli
|
82
|
-
requirement: &
|
82
|
+
requirement: &75089160 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: 2.5.6
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *75089160
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: rdoc
|
93
|
-
requirement: &
|
93
|
+
requirement: &75088910 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ~>
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '3.10'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *75088910
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: hoe
|
104
|
-
requirement: &
|
104
|
+
requirement: &75088620 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ~>
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '3.3'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *75088620
|
113
113
|
description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
|
114
114
|
Web Feeds)
|
115
115
|
email: feedreader@googlegroups.com
|