pluto 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/Manifest.txt +4 -0
- data/README.md +87 -17
- data/Rakefile +3 -3
- data/lib/pluto.rb +5 -3
- data/lib/pluto/cli/main.rb +94 -58
- data/lib/pluto/fetcher.rb +2 -20
- data/lib/pluto/formatter.rb +17 -9
- data/lib/pluto/refresher.rb +186 -0
- data/lib/pluto/subscriber.rb +85 -0
- data/lib/pluto/template_helpers.rb +104 -11
- data/lib/pluto/updater.rb +18 -233
- data/lib/pluto/version.rb +1 -1
- data/test/helper.rb +18 -0
- data/test/test_helpers.rb +75 -0
- metadata +30 -24
data/lib/pluto/formatter.rb
CHANGED
@@ -14,15 +14,12 @@ class Formatter
|
|
14
14
|
@config = config
|
15
15
|
end
|
16
16
|
|
17
|
-
attr_reader :opts
|
18
|
-
|
19
|
-
def site
|
20
|
-
### fix !!!!!!!!!!
|
21
|
-
## fix/todo: change to db record for site
|
22
|
-
@config
|
23
|
-
end
|
17
|
+
attr_reader :opts, :config, :site
|
24
18
|
|
25
19
|
def run( arg )
|
20
|
+
## fix: change arg to planet_key or just key or similar
|
21
|
+
# todo: rename run to some less generic - merge/build/etc. ??
|
22
|
+
|
26
23
|
manifest_name = opts.manifest
|
27
24
|
manifest_name = manifest_name.downcase.gsub('.txt', '' ) # remove .txt if present
|
28
25
|
|
@@ -51,9 +48,20 @@ class Formatter
|
|
51
48
|
|
52
49
|
manifestsrc = manifests[0][1]
|
53
50
|
pakpath = opts.output_path
|
54
|
-
|
55
|
-
|
51
|
+
|
56
52
|
name = arg
|
53
|
+
|
54
|
+
## for now - use single site w/ key planet
|
55
|
+
##-- fix!! allow multiple sites (planets)
|
56
|
+
|
57
|
+
site_key = 'planet'
|
58
|
+
@site = Site.find_by_key( site_key )
|
59
|
+
if @site.nil?
|
60
|
+
puts "*** warn: no site with key '#{site_key}' found; using untitled site record"
|
61
|
+
@site = Site.new
|
62
|
+
@site.title = 'Planet Untitled'
|
63
|
+
end
|
64
|
+
|
57
65
|
Pakman::Templater.new.merge_pak( manifestsrc, pakpath, binding, name )
|
58
66
|
end
|
59
67
|
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
class Refresher
|
4
|
+
|
5
|
+
include LogUtils::Logging
|
6
|
+
|
7
|
+
include Models
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@worker = ::Fetcher::Worker.new
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :worker
|
14
|
+
|
15
|
+
def debug=(value)
|
16
|
+
@debug = value
|
17
|
+
### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug?
|
21
|
+
@debug || false
|
22
|
+
end
|
23
|
+
|
24
|
+
def fetch_feed( url )
|
25
|
+
|
26
|
+
### fix: use worker.get( url ) # check http response code etc.
|
27
|
+
|
28
|
+
xml = worker.read( url )
|
29
|
+
|
30
|
+
###
|
31
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
32
|
+
# will mostly be ASCII
|
33
|
+
# - try to change encoding to UTF-8 ourselves
|
34
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
35
|
+
|
36
|
+
#####
|
37
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
38
|
+
|
39
|
+
## NB:
|
40
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
41
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
42
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
43
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
44
|
+
xml
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def update_feeds( opts={} )
|
49
|
+
|
50
|
+
if debug?
|
51
|
+
## turn on logging for sql too
|
52
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
53
|
+
end
|
54
|
+
|
55
|
+
### move to feedutils
|
56
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
57
|
+
|
58
|
+
# -- log update action
|
59
|
+
Action.create!( title: 'update feeds' )
|
60
|
+
|
61
|
+
#####
|
62
|
+
# -- update fetched_at timestamps for all sites
|
63
|
+
feeds_fetched_at = Time.now
|
64
|
+
Site.all.each do |site|
|
65
|
+
site.fetched_at = feeds_fetched_at
|
66
|
+
site.save!
|
67
|
+
end
|
68
|
+
|
69
|
+
Feed.all.each do |feed_rec|
|
70
|
+
|
71
|
+
feed_key = feed_rec.key
|
72
|
+
feed_url = feed_rec.feed_url
|
73
|
+
|
74
|
+
feed_xml = fetch_feed( feed_url )
|
75
|
+
|
76
|
+
logger.debug "feed_xml:"
|
77
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
78
|
+
|
79
|
+
# if opts.verbose? # also write a copy to disk
|
80
|
+
if debug?
|
81
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
82
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
83
|
+
f.write( feed_xml )
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
puts "Before parsing feed >#{feed_key}<..."
|
88
|
+
|
89
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
90
|
+
# or throw exception
|
91
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
92
|
+
|
93
|
+
feed_fetched_at = Time.now
|
94
|
+
|
95
|
+
## todo/check: move feed_rec update to the end (after item updates??)
|
96
|
+
|
97
|
+
# update feed attribs e.g.
|
98
|
+
# generator
|
99
|
+
# published_at,built_at,touched_at,fetched_at
|
100
|
+
# summary,title2
|
101
|
+
|
102
|
+
## fix:
|
103
|
+
## weird rss exception error on windows w/ dates
|
104
|
+
# e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
|
105
|
+
#
|
106
|
+
# move to_datetime to feedutils!! if it works
|
107
|
+
## todo: move this comments to feedutils??
|
108
|
+
|
109
|
+
|
110
|
+
feed_attribs = {
|
111
|
+
fetched_at: feed_fetched_at,
|
112
|
+
format: feed.format,
|
113
|
+
published_at: feed.published? ? feed.published : nil,
|
114
|
+
touched_at: feed.updated? ? feed.updated : nil,
|
115
|
+
built_at: feed.built? ? feed.built : nil,
|
116
|
+
summary: feed.summary? ? feed.summary : nil,
|
117
|
+
title2: feed.title2? ? feed.title2 : nil,
|
118
|
+
generator: feed.generator
|
119
|
+
}
|
120
|
+
|
121
|
+
if debug?
|
122
|
+
## puts "*** dump feed_attribs:"
|
123
|
+
## pp feed_attribs
|
124
|
+
puts "*** dump feed_attribs w/ class types:"
|
125
|
+
feed_attribs.each do |key,value|
|
126
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
feed_rec.update_attributes!( feed_attribs )
|
131
|
+
|
132
|
+
|
133
|
+
feed.items.each do |item|
|
134
|
+
|
135
|
+
item_attribs = {
|
136
|
+
fetched_at: feed_fetched_at,
|
137
|
+
title: item.title,
|
138
|
+
url: item.url,
|
139
|
+
summary: item.summary? ? item.summary : nil,
|
140
|
+
content: item.content? ? item.content : nil,
|
141
|
+
published_at: item.published? ? item.published : nil,
|
142
|
+
touched_at: item.updated? ? item.updated : nil,
|
143
|
+
feed_id: feed_rec.id # add feed_id fk_ref
|
144
|
+
}
|
145
|
+
|
146
|
+
if debug?
|
147
|
+
puts "*** dump item_attribs w/ class types:"
|
148
|
+
item_attribs.each do |key,value|
|
149
|
+
next if [:summary,:content].include?( key ) # skip summary n content
|
150
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
rec = Item.find_by_guid( item.guid )
|
156
|
+
if rec.nil?
|
157
|
+
rec = Item.new
|
158
|
+
item_attribs[ :guid ] = item.guid
|
159
|
+
puts "** NEW | #{item.title}"
|
160
|
+
else
|
161
|
+
## todo: check if any attribs changed
|
162
|
+
puts "UPDATE | #{item.title}"
|
163
|
+
end
|
164
|
+
|
165
|
+
rec.update_attributes!( item_attribs )
|
166
|
+
end # each item
|
167
|
+
|
168
|
+
# update cached value latest published_at for item
|
169
|
+
item_recs = feed_rec.items.latest.limit(1).all
|
170
|
+
unless item_recs.empty?
|
171
|
+
if item_recs[0].published_at?
|
172
|
+
feed_rec.latest_published_at = item_recs[0].published_at
|
173
|
+
else # try touched_at
|
174
|
+
feed_rec.latest_published_at = item_recs[0].touched_at
|
175
|
+
end
|
176
|
+
feed_rec.save!
|
177
|
+
end
|
178
|
+
|
179
|
+
end # each feed
|
180
|
+
|
181
|
+
end # method update_feeds
|
182
|
+
|
183
|
+
|
184
|
+
end # class Refresher
|
185
|
+
|
186
|
+
end # module Pluto
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
class Subscriber
|
4
|
+
|
5
|
+
include LogUtils::Logging
|
6
|
+
|
7
|
+
include Models
|
8
|
+
|
9
|
+
def debug=(value)
|
10
|
+
@debug = value
|
11
|
+
### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def debug?
|
15
|
+
@debug || false
|
16
|
+
end
|
17
|
+
|
18
|
+
def update_subscriptions( config, opts={} )
|
19
|
+
|
20
|
+
site_attribs = {
|
21
|
+
title: config[ 'title' ] || config[ 'name' ] # support either title or name
|
22
|
+
}
|
23
|
+
|
24
|
+
## for now - use single site w/ key planet
|
25
|
+
## -- fix!! allow multiple sites (planets)
|
26
|
+
|
27
|
+
site_key = 'planet'
|
28
|
+
site_rec = Site.find_by_key( site_key )
|
29
|
+
if site_rec.nil?
|
30
|
+
site_rec = Site.new
|
31
|
+
site_attribs[ :key ] = site_key
|
32
|
+
|
33
|
+
## use object_id: site.id and object_type: Site
|
34
|
+
## change - model/table/schema!!!
|
35
|
+
Action.create!( title: 'new site', object: site_attribs[ :title ] )
|
36
|
+
end
|
37
|
+
site_rec.update_attributes!( site_attribs )
|
38
|
+
|
39
|
+
# -- log update action
|
40
|
+
Action.create!( title: 'update subscriptions' )
|
41
|
+
|
42
|
+
|
43
|
+
config.each do |key, value|
|
44
|
+
|
45
|
+
next if ['title','name','feeds'].include?( key ) # skip "top-level" feed keys e.g. title, etc.
|
46
|
+
|
47
|
+
### todo/check:
|
48
|
+
## check value - must be hash
|
49
|
+
# check if url or feed_url present
|
50
|
+
# that is, check for required props/key-value pairs
|
51
|
+
|
52
|
+
feed_key = key.to_s.dup
|
53
|
+
feed_hash = value
|
54
|
+
|
55
|
+
# todo: use title from feed?
|
56
|
+
feed_attribs = {
|
57
|
+
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
|
58
|
+
url: feed_hash[ 'link' ] || feed_hash[ 'site' ] || feed_hash[ 'url' ],
|
59
|
+
title: feed_hash[ 'title' ] || feed_hash[ 'name' ] || feed_hash[ 'author' ]
|
60
|
+
}
|
61
|
+
|
62
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
63
|
+
|
64
|
+
feed_rec = Feed.find_by_key( feed_key )
|
65
|
+
if feed_rec.nil?
|
66
|
+
feed_rec = Feed.new
|
67
|
+
feed_attribs[ :key ] = feed_key
|
68
|
+
|
69
|
+
## use object_id: feed.id and object_type: Feed
|
70
|
+
## change - model/table/schema!!!
|
71
|
+
## todo: add parent_action_id - why? why not?
|
72
|
+
Action.create!( title: 'new feed', object: feed_attribs[ :title ] )
|
73
|
+
end
|
74
|
+
|
75
|
+
feed_rec.update_attributes!( feed_attribs )
|
76
|
+
|
77
|
+
## todo:
|
78
|
+
# add subscription records (feed,site) - how?
|
79
|
+
end
|
80
|
+
|
81
|
+
end # method update_subscriptions
|
82
|
+
|
83
|
+
end # class Subscriber
|
84
|
+
|
85
|
+
end # module Pluto
|
@@ -1,17 +1,107 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
module Pluto
|
2
4
|
|
3
|
-
####
|
4
|
-
# fix: rename to DateHelper
|
5
5
|
|
6
6
|
module TemplateHelper
|
7
7
|
|
8
|
-
def strip_tags(
|
8
|
+
def strip_tags( hy )
|
9
9
|
### tobe done
|
10
10
|
## strip markup tags; return plain text
|
11
|
-
|
11
|
+
hy.gsub( /<[^>]+>/, '' )
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
+
def whitelist( hy, tags, opts={} )
|
16
|
+
|
17
|
+
# note: assumes properly escaped <> in hy/hypertext
|
18
|
+
|
19
|
+
###############################################
|
20
|
+
# step one - save whitelisted tags use ‹tag›
|
21
|
+
tags.each do |tag|
|
22
|
+
# note: we strip all attribues
|
23
|
+
# note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc.
|
24
|
+
# downcase all tags
|
25
|
+
|
26
|
+
# convert xml-style empty tags to simple html emtpty tags
|
27
|
+
# e.g. <br/> or <br /> becomses <br>
|
28
|
+
hy = hy.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg. <br /> or <br/> becomes ‹br›
|
29
|
+
|
30
|
+
# make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
|
31
|
+
hy = hy.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag <p>
|
32
|
+
hy = hy.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g. </p>
|
33
|
+
end
|
34
|
+
|
35
|
+
############################
|
36
|
+
# step two - clean tags
|
37
|
+
|
38
|
+
# strip images - special treatment for debugging
|
39
|
+
hy = hy.gsub( /<img[^>]*>/i, '♦' ) # for debugging use black diamond e.g. ♦
|
40
|
+
hy = hy.gsub( /<\/img>/i, '' ) # should not exists
|
41
|
+
|
42
|
+
# strip all remaining tags
|
43
|
+
hy = hy.gsub( /<[^>]+>/, '' )
|
44
|
+
|
45
|
+
pp hy # fix: debugging indo - remove
|
46
|
+
|
47
|
+
############################################
|
48
|
+
# step three - restore whitelisted tags
|
49
|
+
|
50
|
+
return hy if opts[:skip_restore].present? # skip step 3 for debugging
|
51
|
+
|
52
|
+
tags.each do |tag|
|
53
|
+
# hy = hy.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g. <p>
|
54
|
+
# hy = hy.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g. </p>
|
55
|
+
hy = hy.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" }
|
56
|
+
hy = hy.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g. </p>
|
57
|
+
end
|
58
|
+
|
59
|
+
hy
|
60
|
+
end # method whitelist
|
61
|
+
|
62
|
+
|
63
|
+
## change to simple_hypertext or
|
64
|
+
# hypertext_simple or
|
65
|
+
# sanitize ???
|
66
|
+
|
67
|
+
def textify( hy, opts={} ) # hy -> hypertext
|
68
|
+
## turn into text
|
69
|
+
# todo: add options for
|
70
|
+
# keep links, images, lists (?too), code, codeblocks
|
71
|
+
|
72
|
+
hy = whitelist( hy, [:br, :p, :ul, :ol, :li, :pre, :code], opts )
|
73
|
+
|
74
|
+
# strip bold
|
75
|
+
# hy = hy.gsub( /<b[^>]*>/, '**' ) # fix: will also swallow bxxx tags - add b space
|
76
|
+
# hy = hy.gsub( /<\/b>/, '**' )
|
77
|
+
|
78
|
+
# strip em
|
79
|
+
# hy = hy.gsub( /<em[^>]*>/, '__' )
|
80
|
+
# hy = hy.gsub( /<\/em>/, '__' )
|
81
|
+
|
82
|
+
# clean (prettify) literal urls (strip protocoll)
|
83
|
+
hy = hy.gsub( /(http|https):\/\//, '' )
|
84
|
+
|
85
|
+
# hy = hy.gsub( / /, ' ' )
|
86
|
+
|
87
|
+
# # try to cleanup whitespaces
|
88
|
+
# # -- keep no more than two spaces
|
89
|
+
# hy = hy.gsub( /[ \t]{3,}/, ' ' )
|
90
|
+
# # -- keep no more than two new lines
|
91
|
+
# hy = hy.gsub( /\n{2,}/m, "\n\n" )
|
92
|
+
# # -- remove all trailing spaces
|
93
|
+
# hy = hy.gsub( /[ \t\n]+$/m, '' )
|
94
|
+
# # -- remove all leading spaces
|
95
|
+
# hy = hy.gsub( /^[ \t\n]+/m, '' )
|
96
|
+
|
97
|
+
hy
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
####
|
102
|
+
# fix: move to DateHelper ??
|
103
|
+
|
104
|
+
|
15
105
|
def time_ago_in_words( from_time )
|
16
106
|
from_time = from_time.to_time
|
17
107
|
to_time = Time.now
|
@@ -27,23 +117,26 @@ module TemplateHelper
|
|
27
117
|
case distance_in_minutes
|
28
118
|
when 0..1 then "just now"
|
29
119
|
when 2...45 then "%d minutes ago" % distance_in_minutes
|
30
|
-
when 45...90 then "
|
120
|
+
when 45...90 then "an hour ago" ## use one instead of 1 ?? why? why not?
|
31
121
|
# 90 mins up to 24 hours
|
32
|
-
when 90...1440 then "
|
122
|
+
when 90...1440 then "%d hours ago" % (distance_in_minutes.to_f / 60.0).round
|
33
123
|
# 24 hours up to 42 hours
|
34
|
-
when 1440...2520 then "
|
124
|
+
when 1440...2520 then "a day ago" ## use one day ago - why? why not?
|
35
125
|
# 42 hours up to 30 days
|
36
126
|
when 2520...43200 then "%d days ago" % (distance_in_minutes.to_f / 1440.0).round
|
37
127
|
# 30 days up to 60 days
|
38
|
-
# fix: use pluralize for months
|
39
|
-
when 43200...86400 then "
|
128
|
+
# fix: use pluralize for months - fix: change when - use just for a month ago
|
129
|
+
when 43200...86400 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
|
40
130
|
# 60 days up to 365 days
|
41
131
|
when 86400...525600 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
|
42
132
|
## fix - add number of years ago
|
43
|
-
else "
|
133
|
+
else "over a year ago" #todo: use over a year ago???
|
134
|
+
# fix: split into two - use
|
135
|
+
# 1) a year ago
|
136
|
+
# 2) (x) years ago
|
44
137
|
end
|
45
138
|
end
|
46
139
|
|
47
140
|
|
48
141
|
end # module TemplateHelper
|
49
|
-
end # module Pluto
|
142
|
+
end # module Pluto
|