pluto 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/Manifest.txt +4 -0
- data/README.md +87 -17
- data/Rakefile +3 -3
- data/lib/pluto.rb +5 -3
- data/lib/pluto/cli/main.rb +94 -58
- data/lib/pluto/fetcher.rb +2 -20
- data/lib/pluto/formatter.rb +17 -9
- data/lib/pluto/refresher.rb +186 -0
- data/lib/pluto/subscriber.rb +85 -0
- data/lib/pluto/template_helpers.rb +104 -11
- data/lib/pluto/updater.rb +18 -233
- data/lib/pluto/version.rb +1 -1
- data/test/helper.rb +18 -0
- data/test/test_helpers.rb +75 -0
- metadata +30 -24
data/lib/pluto/formatter.rb
CHANGED
@@ -14,15 +14,12 @@ class Formatter
|
|
14
14
|
@config = config
|
15
15
|
end
|
16
16
|
|
17
|
-
attr_reader :opts
|
18
|
-
|
19
|
-
def site
|
20
|
-
### fix !!!!!!!!!!
|
21
|
-
## fix/todo: change to db record for site
|
22
|
-
@config
|
23
|
-
end
|
17
|
+
attr_reader :opts, :config, :site
|
24
18
|
|
25
19
|
def run( arg )
|
20
|
+
## fix: change arg to planet_key or just key or similar
|
21
|
+
# todo: rename run to some less generic - merge/build/etc. ??
|
22
|
+
|
26
23
|
manifest_name = opts.manifest
|
27
24
|
manifest_name = manifest_name.downcase.gsub('.txt', '' ) # remove .txt if present
|
28
25
|
|
@@ -51,9 +48,20 @@ class Formatter
|
|
51
48
|
|
52
49
|
manifestsrc = manifests[0][1]
|
53
50
|
pakpath = opts.output_path
|
54
|
-
|
55
|
-
|
51
|
+
|
56
52
|
name = arg
|
53
|
+
|
54
|
+
## for now - use single site w/ key planet
|
55
|
+
##-- fix!! allow multiple sites (planets)
|
56
|
+
|
57
|
+
site_key = 'planet'
|
58
|
+
@site = Site.find_by_key( site_key )
|
59
|
+
if @site.nil?
|
60
|
+
puts "*** warn: no site with key '#{site_key}' found; using untitled site record"
|
61
|
+
@site = Site.new
|
62
|
+
@site.title = 'Planet Untitled'
|
63
|
+
end
|
64
|
+
|
57
65
|
Pakman::Templater.new.merge_pak( manifestsrc, pakpath, binding, name )
|
58
66
|
end
|
59
67
|
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
class Refresher
|
4
|
+
|
5
|
+
include LogUtils::Logging
|
6
|
+
|
7
|
+
include Models
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@worker = ::Fetcher::Worker.new
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :worker
|
14
|
+
|
15
|
+
def debug=(value)
|
16
|
+
@debug = value
|
17
|
+
### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug?
|
21
|
+
@debug || false
|
22
|
+
end
|
23
|
+
|
24
|
+
def fetch_feed( url )
|
25
|
+
|
26
|
+
### fix: use worker.get( url ) # check http response code etc.
|
27
|
+
|
28
|
+
xml = worker.read( url )
|
29
|
+
|
30
|
+
###
|
31
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
32
|
+
# will mostly be ASCII
|
33
|
+
# - try to change encoding to UTF-8 ourselves
|
34
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
35
|
+
|
36
|
+
#####
|
37
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
38
|
+
|
39
|
+
## NB:
|
40
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
41
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
42
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
43
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
44
|
+
xml
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def update_feeds( opts={} )
|
49
|
+
|
50
|
+
if debug?
|
51
|
+
## turn on logging for sql too
|
52
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
53
|
+
end
|
54
|
+
|
55
|
+
### move to feedutils
|
56
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
57
|
+
|
58
|
+
# -- log update action
|
59
|
+
Action.create!( title: 'update feeds' )
|
60
|
+
|
61
|
+
#####
|
62
|
+
# -- update fetched_at timestamps for all sites
|
63
|
+
feeds_fetched_at = Time.now
|
64
|
+
Site.all.each do |site|
|
65
|
+
site.fetched_at = feeds_fetched_at
|
66
|
+
site.save!
|
67
|
+
end
|
68
|
+
|
69
|
+
Feed.all.each do |feed_rec|
|
70
|
+
|
71
|
+
feed_key = feed_rec.key
|
72
|
+
feed_url = feed_rec.feed_url
|
73
|
+
|
74
|
+
feed_xml = fetch_feed( feed_url )
|
75
|
+
|
76
|
+
logger.debug "feed_xml:"
|
77
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
78
|
+
|
79
|
+
# if opts.verbose? # also write a copy to disk
|
80
|
+
if debug?
|
81
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
82
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
83
|
+
f.write( feed_xml )
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
puts "Before parsing feed >#{feed_key}<..."
|
88
|
+
|
89
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
90
|
+
# or throw exception
|
91
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
92
|
+
|
93
|
+
feed_fetched_at = Time.now
|
94
|
+
|
95
|
+
## todo/check: move feed_rec update to the end (after item updates??)
|
96
|
+
|
97
|
+
# update feed attribs e.g.
|
98
|
+
# generator
|
99
|
+
# published_at,built_at,touched_at,fetched_at
|
100
|
+
# summary,title2
|
101
|
+
|
102
|
+
## fix:
|
103
|
+
## weird rss exception error on windows w/ dates
|
104
|
+
# e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
|
105
|
+
#
|
106
|
+
# move to_datetime to feedutils!! if it works
|
107
|
+
## todo: move this comments to feedutils??
|
108
|
+
|
109
|
+
|
110
|
+
feed_attribs = {
|
111
|
+
fetched_at: feed_fetched_at,
|
112
|
+
format: feed.format,
|
113
|
+
published_at: feed.published? ? feed.published : nil,
|
114
|
+
touched_at: feed.updated? ? feed.updated : nil,
|
115
|
+
built_at: feed.built? ? feed.built : nil,
|
116
|
+
summary: feed.summary? ? feed.summary : nil,
|
117
|
+
title2: feed.title2? ? feed.title2 : nil,
|
118
|
+
generator: feed.generator
|
119
|
+
}
|
120
|
+
|
121
|
+
if debug?
|
122
|
+
## puts "*** dump feed_attribs:"
|
123
|
+
## pp feed_attribs
|
124
|
+
puts "*** dump feed_attribs w/ class types:"
|
125
|
+
feed_attribs.each do |key,value|
|
126
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
feed_rec.update_attributes!( feed_attribs )
|
131
|
+
|
132
|
+
|
133
|
+
feed.items.each do |item|
|
134
|
+
|
135
|
+
item_attribs = {
|
136
|
+
fetched_at: feed_fetched_at,
|
137
|
+
title: item.title,
|
138
|
+
url: item.url,
|
139
|
+
summary: item.summary? ? item.summary : nil,
|
140
|
+
content: item.content? ? item.content : nil,
|
141
|
+
published_at: item.published? ? item.published : nil,
|
142
|
+
touched_at: item.updated? ? item.updated : nil,
|
143
|
+
feed_id: feed_rec.id # add feed_id fk_ref
|
144
|
+
}
|
145
|
+
|
146
|
+
if debug?
|
147
|
+
puts "*** dump item_attribs w/ class types:"
|
148
|
+
item_attribs.each do |key,value|
|
149
|
+
next if [:summary,:content].include?( key ) # skip summary n content
|
150
|
+
puts " #{key}: >#{value}< : #{value.class.name}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
rec = Item.find_by_guid( item.guid )
|
156
|
+
if rec.nil?
|
157
|
+
rec = Item.new
|
158
|
+
item_attribs[ :guid ] = item.guid
|
159
|
+
puts "** NEW | #{item.title}"
|
160
|
+
else
|
161
|
+
## todo: check if any attribs changed
|
162
|
+
puts "UPDATE | #{item.title}"
|
163
|
+
end
|
164
|
+
|
165
|
+
rec.update_attributes!( item_attribs )
|
166
|
+
end # each item
|
167
|
+
|
168
|
+
# update cached value latest published_at for item
|
169
|
+
item_recs = feed_rec.items.latest.limit(1).all
|
170
|
+
unless item_recs.empty?
|
171
|
+
if item_recs[0].published_at?
|
172
|
+
feed_rec.latest_published_at = item_recs[0].published_at
|
173
|
+
else # try touched_at
|
174
|
+
feed_rec.latest_published_at = item_recs[0].touched_at
|
175
|
+
end
|
176
|
+
feed_rec.save!
|
177
|
+
end
|
178
|
+
|
179
|
+
end # each feed
|
180
|
+
|
181
|
+
end # method update_feeds
|
182
|
+
|
183
|
+
|
184
|
+
end # class Refresher
|
185
|
+
|
186
|
+
end # module Pluto
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
class Subscriber
|
4
|
+
|
5
|
+
include LogUtils::Logging
|
6
|
+
|
7
|
+
include Models
|
8
|
+
|
9
|
+
def debug=(value)
|
10
|
+
@debug = value
|
11
|
+
### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def debug?
|
15
|
+
@debug || false
|
16
|
+
end
|
17
|
+
|
18
|
+
def update_subscriptions( config, opts={} )
|
19
|
+
|
20
|
+
site_attribs = {
|
21
|
+
title: config[ 'title' ] || config[ 'name' ] # support either title or name
|
22
|
+
}
|
23
|
+
|
24
|
+
## for now - use single site w/ key planet
|
25
|
+
## -- fix!! allow multiple sites (planets)
|
26
|
+
|
27
|
+
site_key = 'planet'
|
28
|
+
site_rec = Site.find_by_key( site_key )
|
29
|
+
if site_rec.nil?
|
30
|
+
site_rec = Site.new
|
31
|
+
site_attribs[ :key ] = site_key
|
32
|
+
|
33
|
+
## use object_id: site.id and object_type: Site
|
34
|
+
## change - model/table/schema!!!
|
35
|
+
Action.create!( title: 'new site', object: site_attribs[ :title ] )
|
36
|
+
end
|
37
|
+
site_rec.update_attributes!( site_attribs )
|
38
|
+
|
39
|
+
# -- log update action
|
40
|
+
Action.create!( title: 'update subscriptions' )
|
41
|
+
|
42
|
+
|
43
|
+
config.each do |key, value|
|
44
|
+
|
45
|
+
next if ['title','name','feeds'].include?( key ) # skip "top-level" feed keys e.g. title, etc.
|
46
|
+
|
47
|
+
### todo/check:
|
48
|
+
## check value - must be hash
|
49
|
+
# check if url or feed_url present
|
50
|
+
# that is, check for required props/key-value pairs
|
51
|
+
|
52
|
+
feed_key = key.to_s.dup
|
53
|
+
feed_hash = value
|
54
|
+
|
55
|
+
# todo: use title from feed?
|
56
|
+
feed_attribs = {
|
57
|
+
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
|
58
|
+
url: feed_hash[ 'link' ] || feed_hash[ 'site' ] || feed_hash[ 'url' ],
|
59
|
+
title: feed_hash[ 'title' ] || feed_hash[ 'name' ] || feed_hash[ 'author' ]
|
60
|
+
}
|
61
|
+
|
62
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
63
|
+
|
64
|
+
feed_rec = Feed.find_by_key( feed_key )
|
65
|
+
if feed_rec.nil?
|
66
|
+
feed_rec = Feed.new
|
67
|
+
feed_attribs[ :key ] = feed_key
|
68
|
+
|
69
|
+
## use object_id: feed.id and object_type: Feed
|
70
|
+
## change - model/table/schema!!!
|
71
|
+
## todo: add parent_action_id - why? why not?
|
72
|
+
Action.create!( title: 'new feed', object: feed_attribs[ :title ] )
|
73
|
+
end
|
74
|
+
|
75
|
+
feed_rec.update_attributes!( feed_attribs )
|
76
|
+
|
77
|
+
## todo:
|
78
|
+
# add subscription records (feed,site) - how?
|
79
|
+
end
|
80
|
+
|
81
|
+
end # method update_subscriptions
|
82
|
+
|
83
|
+
end # class Subscriber
|
84
|
+
|
85
|
+
end # module Pluto
|
@@ -1,17 +1,107 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
module Pluto
|
2
4
|
|
3
|
-
####
|
4
|
-
# fix: rename to DateHelper
|
5
5
|
|
6
6
|
module TemplateHelper
|
7
7
|
|
8
|
-
def strip_tags(
|
8
|
+
def strip_tags( hy )
|
9
9
|
### tobe done
|
10
10
|
## strip markup tags; return plain text
|
11
|
-
|
11
|
+
hy.gsub( /<[^>]+>/, '' )
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
+
def whitelist( hy, tags, opts={} )
|
16
|
+
|
17
|
+
# note: assumes properly escaped <> in hy/hypertext
|
18
|
+
|
19
|
+
###############################################
|
20
|
+
# step one - save whitelisted tags use ‹tag›
|
21
|
+
tags.each do |tag|
|
22
|
+
# note: we strip all attribues
|
23
|
+
# note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc.
|
24
|
+
# downcase all tags
|
25
|
+
|
26
|
+
# convert xml-style empty tags to simple html emtpty tags
|
27
|
+
# e.g. <br/> or <br /> becomses <br>
|
28
|
+
hy = hy.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg. <br /> or <br/> becomes ‹br›
|
29
|
+
|
30
|
+
# make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
|
31
|
+
hy = hy.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag <p>
|
32
|
+
hy = hy.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g. </p>
|
33
|
+
end
|
34
|
+
|
35
|
+
############################
|
36
|
+
# step two - clean tags
|
37
|
+
|
38
|
+
# strip images - special treatment for debugging
|
39
|
+
hy = hy.gsub( /<img[^>]*>/i, '♦' ) # for debugging use black diamond e.g. ♦
|
40
|
+
hy = hy.gsub( /<\/img>/i, '' ) # should not exists
|
41
|
+
|
42
|
+
# strip all remaining tags
|
43
|
+
hy = hy.gsub( /<[^>]+>/, '' )
|
44
|
+
|
45
|
+
pp hy # fix: debugging indo - remove
|
46
|
+
|
47
|
+
############################################
|
48
|
+
# step three - restore whitelisted tags
|
49
|
+
|
50
|
+
return hy if opts[:skip_restore].present? # skip step 3 for debugging
|
51
|
+
|
52
|
+
tags.each do |tag|
|
53
|
+
# hy = hy.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g. <p>
|
54
|
+
# hy = hy.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g. </p>
|
55
|
+
hy = hy.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" }
|
56
|
+
hy = hy.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g. </p>
|
57
|
+
end
|
58
|
+
|
59
|
+
hy
|
60
|
+
end # method whitelist
|
61
|
+
|
62
|
+
|
63
|
+
## change to simple_hypertext or
|
64
|
+
# hypertext_simple or
|
65
|
+
# sanitize ???
|
66
|
+
|
67
|
+
def textify( hy, opts={} ) # hy -> hypertext
|
68
|
+
## turn into text
|
69
|
+
# todo: add options for
|
70
|
+
# keep links, images, lists (?too), code, codeblocks
|
71
|
+
|
72
|
+
hy = whitelist( hy, [:br, :p, :ul, :ol, :li, :pre, :code], opts )
|
73
|
+
|
74
|
+
# strip bold
|
75
|
+
# hy = hy.gsub( /<b[^>]*>/, '**' ) # fix: will also swallow bxxx tags - add b space
|
76
|
+
# hy = hy.gsub( /<\/b>/, '**' )
|
77
|
+
|
78
|
+
# strip em
|
79
|
+
# hy = hy.gsub( /<em[^>]*>/, '__' )
|
80
|
+
# hy = hy.gsub( /<\/em>/, '__' )
|
81
|
+
|
82
|
+
# clean (prettify) literal urls (strip protocoll)
|
83
|
+
hy = hy.gsub( /(http|https):\/\//, '' )
|
84
|
+
|
85
|
+
# hy = hy.gsub( / /, ' ' )
|
86
|
+
|
87
|
+
# # try to cleanup whitespaces
|
88
|
+
# # -- keep no more than two spaces
|
89
|
+
# hy = hy.gsub( /[ \t]{3,}/, ' ' )
|
90
|
+
# # -- keep no more than two new lines
|
91
|
+
# hy = hy.gsub( /\n{2,}/m, "\n\n" )
|
92
|
+
# # -- remove all trailing spaces
|
93
|
+
# hy = hy.gsub( /[ \t\n]+$/m, '' )
|
94
|
+
# # -- remove all leading spaces
|
95
|
+
# hy = hy.gsub( /^[ \t\n]+/m, '' )
|
96
|
+
|
97
|
+
hy
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
####
|
102
|
+
# fix: move to DateHelper ??
|
103
|
+
|
104
|
+
|
15
105
|
def time_ago_in_words( from_time )
|
16
106
|
from_time = from_time.to_time
|
17
107
|
to_time = Time.now
|
@@ -27,23 +117,26 @@ module TemplateHelper
|
|
27
117
|
case distance_in_minutes
|
28
118
|
when 0..1 then "just now"
|
29
119
|
when 2...45 then "%d minutes ago" % distance_in_minutes
|
30
|
-
when 45...90 then "
|
120
|
+
when 45...90 then "an hour ago" ## use one instead of 1 ?? why? why not?
|
31
121
|
# 90 mins up to 24 hours
|
32
|
-
when 90...1440 then "
|
122
|
+
when 90...1440 then "%d hours ago" % (distance_in_minutes.to_f / 60.0).round
|
33
123
|
# 24 hours up to 42 hours
|
34
|
-
when 1440...2520 then "
|
124
|
+
when 1440...2520 then "a day ago" ## use one day ago - why? why not?
|
35
125
|
# 42 hours up to 30 days
|
36
126
|
when 2520...43200 then "%d days ago" % (distance_in_minutes.to_f / 1440.0).round
|
37
127
|
# 30 days up to 60 days
|
38
|
-
# fix: use pluralize for months
|
39
|
-
when 43200...86400 then "
|
128
|
+
# fix: use pluralize for months - fix: change when - use just for a month ago
|
129
|
+
when 43200...86400 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
|
40
130
|
# 60 days up to 365 days
|
41
131
|
when 86400...525600 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
|
42
132
|
## fix - add number of years ago
|
43
|
-
else "
|
133
|
+
else "over a year ago" #todo: use over a year ago???
|
134
|
+
# fix: split into two - use
|
135
|
+
# 1) a year ago
|
136
|
+
# 2) (x) years ago
|
44
137
|
end
|
45
138
|
end
|
46
139
|
|
47
140
|
|
48
141
|
end # module TemplateHelper
|
49
|
-
end # module Pluto
|
142
|
+
end # module Pluto
|