pluto 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,15 +14,12 @@ class Formatter
14
14
  @config = config
15
15
  end
16
16
 
17
- attr_reader :opts
18
-
19
- def site
20
- ### fix !!!!!!!!!!
21
- ## fix/todo: change to db record for site
22
- @config
23
- end
17
+ attr_reader :opts, :config, :site
24
18
 
25
19
  def run( arg )
20
+ ## fix: change arg to planet_key or just key or similar
21
+ # todo: rename run to some less generic - merge/build/etc. ??
22
+
26
23
  manifest_name = opts.manifest
27
24
  manifest_name = manifest_name.downcase.gsub('.txt', '' ) # remove .txt if present
28
25
 
@@ -51,9 +48,20 @@ class Formatter
51
48
 
52
49
  manifestsrc = manifests[0][1]
53
50
  pakpath = opts.output_path
54
-
55
-
51
+
56
52
  name = arg
53
+
54
+ ## for now - use single site w/ key planet
55
+ ##-- fix!! allow multiple sites (planets)
56
+
57
+ site_key = 'planet'
58
+ @site = Site.find_by_key( site_key )
59
+ if @site.nil?
60
+ puts "*** warn: no site with key '#{site_key}' found; using untitled site record"
61
+ @site = Site.new
62
+ @site.title = 'Planet Untitled'
63
+ end
64
+
57
65
  Pakman::Templater.new.merge_pak( manifestsrc, pakpath, binding, name )
58
66
  end
59
67
 
@@ -0,0 +1,186 @@
1
+ module Pluto
2
+
3
+ class Refresher
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def initialize
10
+ @worker = ::Fetcher::Worker.new
11
+ end
12
+
13
+ attr_reader :worker
14
+
15
+ def debug=(value)
16
+ @debug = value
17
+ ### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
18
+ end
19
+
20
+ def debug?
21
+ @debug || false
22
+ end
23
+
24
+ def fetch_feed( url )
25
+
26
+ ### fix: use worker.get( url ) # check http response code etc.
27
+
28
+ xml = worker.read( url )
29
+
30
+ ###
31
+ # NB: Net::HTTP will NOT set encoding UTF-8 etc.
32
+ # will mostly be ASCII
33
+ # - try to change encoding to UTF-8 ourselves
34
+ logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
35
+
36
+ #####
37
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
38
+
39
+ ## NB:
40
+ # for now "hardcoded" to utf8 - what else can we do?
41
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
42
+ xml = xml.force_encoding( Encoding::UTF_8 )
43
+ logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
44
+ xml
45
+ end
46
+
47
+
48
+ def update_feeds( opts={} )
49
+
50
+ if debug?
51
+ ## turn on logging for sql too
52
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
53
+ end
54
+
55
+ ### move to feedutils
56
+ ### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
57
+
58
+ # -- log update action
59
+ Action.create!( title: 'update feeds' )
60
+
61
+ #####
62
+ # -- update fetched_at timestamps for all sites
63
+ feeds_fetched_at = Time.now
64
+ Site.all.each do |site|
65
+ site.fetched_at = feeds_fetched_at
66
+ site.save!
67
+ end
68
+
69
+ Feed.all.each do |feed_rec|
70
+
71
+ feed_key = feed_rec.key
72
+ feed_url = feed_rec.feed_url
73
+
74
+ feed_xml = fetch_feed( feed_url )
75
+
76
+ logger.debug "feed_xml:"
77
+ logger.debug feed_xml[ 0..300 ] # get first 300 chars
78
+
79
+ # if opts.verbose? # also write a copy to disk
80
+ if debug?
81
+ logger.debug "saving feed to >./#{feed_key}.xml<..."
82
+ File.open( "./#{feed_key}.xml", 'w' ) do |f|
83
+ f.write( feed_xml )
84
+ end
85
+ end
86
+
87
+ puts "Before parsing feed >#{feed_key}<..."
88
+
89
+ ## fix/todo: check for feed.nil? -> error parsing!!!
90
+ # or throw exception
91
+ feed = FeedUtils::Parser.parse( feed_xml )
92
+
93
+ feed_fetched_at = Time.now
94
+
95
+ ## todo/check: move feed_rec update to the end (after item updates??)
96
+
97
+ # update feed attribs e.g.
98
+ # generator
99
+ # published_at,built_at,touched_at,fetched_at
100
+ # summary,title2
101
+
102
+ ## fix:
103
+ ## weird rss exception error on windows w/ dates
104
+ # e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
105
+ #
106
+ # move to_datetime to feedutils!! if it works
107
+ ## todo: move this comments to feedutils??
108
+
109
+
110
+ feed_attribs = {
111
+ fetched_at: feed_fetched_at,
112
+ format: feed.format,
113
+ published_at: feed.published? ? feed.published : nil,
114
+ touched_at: feed.updated? ? feed.updated : nil,
115
+ built_at: feed.built? ? feed.built : nil,
116
+ summary: feed.summary? ? feed.summary : nil,
117
+ title2: feed.title2? ? feed.title2 : nil,
118
+ generator: feed.generator
119
+ }
120
+
121
+ if debug?
122
+ ## puts "*** dump feed_attribs:"
123
+ ## pp feed_attribs
124
+ puts "*** dump feed_attribs w/ class types:"
125
+ feed_attribs.each do |key,value|
126
+ puts " #{key}: >#{value}< : #{value.class.name}"
127
+ end
128
+ end
129
+
130
+ feed_rec.update_attributes!( feed_attribs )
131
+
132
+
133
+ feed.items.each do |item|
134
+
135
+ item_attribs = {
136
+ fetched_at: feed_fetched_at,
137
+ title: item.title,
138
+ url: item.url,
139
+ summary: item.summary? ? item.summary : nil,
140
+ content: item.content? ? item.content : nil,
141
+ published_at: item.published? ? item.published : nil,
142
+ touched_at: item.updated? ? item.updated : nil,
143
+ feed_id: feed_rec.id # add feed_id fk_ref
144
+ }
145
+
146
+ if debug?
147
+ puts "*** dump item_attribs w/ class types:"
148
+ item_attribs.each do |key,value|
149
+ next if [:summary,:content].include?( key ) # skip summary n content
150
+ puts " #{key}: >#{value}< : #{value.class.name}"
151
+ end
152
+ end
153
+
154
+
155
+ rec = Item.find_by_guid( item.guid )
156
+ if rec.nil?
157
+ rec = Item.new
158
+ item_attribs[ :guid ] = item.guid
159
+ puts "** NEW | #{item.title}"
160
+ else
161
+ ## todo: check if any attribs changed
162
+ puts "UPDATE | #{item.title}"
163
+ end
164
+
165
+ rec.update_attributes!( item_attribs )
166
+ end # each item
167
+
168
+ # update cached value latest published_at for item
169
+ item_recs = feed_rec.items.latest.limit(1).all
170
+ unless item_recs.empty?
171
+ if item_recs[0].published_at?
172
+ feed_rec.latest_published_at = item_recs[0].published_at
173
+ else # try touched_at
174
+ feed_rec.latest_published_at = item_recs[0].touched_at
175
+ end
176
+ feed_rec.save!
177
+ end
178
+
179
+ end # each feed
180
+
181
+ end # method update_feeds
182
+
183
+
184
+ end # class Refresher
185
+
186
+ end # module Pluto
@@ -0,0 +1,85 @@
1
+ module Pluto
2
+
3
+ class Subscriber
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def debug=(value)
10
+ @debug = value
11
+ ### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
12
+ end
13
+
14
+ def debug?
15
+ @debug || false
16
+ end
17
+
18
+ def update_subscriptions( config, opts={} )
19
+
20
+ site_attribs = {
21
+ title: config[ 'title' ] || config[ 'name' ] # support either title or name
22
+ }
23
+
24
+ ## for now - use single site w/ key planet
25
+ ## -- fix!! allow multiple sites (planets)
26
+
27
+ site_key = 'planet'
28
+ site_rec = Site.find_by_key( site_key )
29
+ if site_rec.nil?
30
+ site_rec = Site.new
31
+ site_attribs[ :key ] = site_key
32
+
33
+ ## use object_id: site.id and object_type: Site
34
+ ## change - model/table/schema!!!
35
+ Action.create!( title: 'new site', object: site_attribs[ :title ] )
36
+ end
37
+ site_rec.update_attributes!( site_attribs )
38
+
39
+ # -- log update action
40
+ Action.create!( title: 'update subscriptions' )
41
+
42
+
43
+ config.each do |key, value|
44
+
45
+ next if ['title','name','feeds'].include?( key ) # skip "top-level" feed keys e.g. title, etc.
46
+
47
+ ### todo/check:
48
+ ## check value - must be hash
49
+ # check if url or feed_url present
50
+ # that is, check for required props/key-value pairs
51
+
52
+ feed_key = key.to_s.dup
53
+ feed_hash = value
54
+
55
+ # todo: use title from feed?
56
+ feed_attribs = {
57
+ feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
58
+ url: feed_hash[ 'link' ] || feed_hash[ 'site' ] || feed_hash[ 'url' ],
59
+ title: feed_hash[ 'title' ] || feed_hash[ 'name' ] || feed_hash[ 'author' ]
60
+ }
61
+
62
+ puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
63
+
64
+ feed_rec = Feed.find_by_key( feed_key )
65
+ if feed_rec.nil?
66
+ feed_rec = Feed.new
67
+ feed_attribs[ :key ] = feed_key
68
+
69
+ ## use object_id: feed.id and object_type: Feed
70
+ ## change - model/table/schema!!!
71
+ ## todo: add parent_action_id - why? why not?
72
+ Action.create!( title: 'new feed', object: feed_attribs[ :title ] )
73
+ end
74
+
75
+ feed_rec.update_attributes!( feed_attribs )
76
+
77
+ ## todo:
78
+ # add subscription records (feed,site) - how?
79
+ end
80
+
81
+ end # method update_subscriptions
82
+
83
+ end # class Subscriber
84
+
85
+ end # module Pluto
@@ -1,17 +1,107 @@
1
+ # encoding: UTF-8
2
+
1
3
  module Pluto
2
4
 
3
- ####
4
- # fix: rename to DateHelper
5
5
 
6
6
  module TemplateHelper
7
7
 
8
- def strip_tags( hypertext )
8
+ def strip_tags( hy )
9
9
  ### tobe done
10
10
  ## strip markup tags; return plain text
11
- hypertext.gsub( /<[^>]+>/, '' )
11
+ hy.gsub( /<[^>]+>/, '' )
12
12
  end
13
13
 
14
14
 
15
+ def whitelist( hy, tags, opts={} )
16
+
17
+ # note: assumes properly escaped <> in hy/hypertext
18
+
19
+ ###############################################
20
+ # step one - save whitelisted tags use ‹tag›
21
+ tags.each do |tag|
22
+ # note: we strip all attribues
23
+ # note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc.
24
+ # downcase all tags
25
+
26
+ # convert xml-style empty tags to simple html emtpty tags
27
+ # e.g. <br/> or <br /> becomses <br>
28
+ hy = hy.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg. <br /> or <br/> becomes ‹br›
29
+
30
+ # make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
31
+ hy = hy.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag <p>
32
+ hy = hy.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g. </p>
33
+ end
34
+
35
+ ############################
36
+ # step two - clean tags
37
+
38
+ # strip images - special treatment for debugging
39
+ hy = hy.gsub( /<img[^>]*>/i, '♦' ) # for debugging use black diamond e.g. ♦
40
+ hy = hy.gsub( /<\/img>/i, '' ) # should not exists
41
+
42
+ # strip all remaining tags
43
+ hy = hy.gsub( /<[^>]+>/, '' )
44
+
45
+ pp hy # fix: debugging indo - remove
46
+
47
+ ############################################
48
+ # step three - restore whitelisted tags
49
+
50
+ return hy if opts[:skip_restore].present? # skip step 3 for debugging
51
+
52
+ tags.each do |tag|
53
+ # hy = hy.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g. <p>
54
+ # hy = hy.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g. </p>
55
+ hy = hy.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" }
56
+ hy = hy.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g. </p>
57
+ end
58
+
59
+ hy
60
+ end # method whitelist
61
+
62
+
63
+ ## change to simple_hypertext or
64
+ # hypertext_simple or
65
+ # sanitize ???
66
+
67
+ def textify( hy, opts={} ) # hy -> hypertext
68
+ ## turn into text
69
+ # todo: add options for
70
+ # keep links, images, lists (?too), code, codeblocks
71
+
72
+ hy = whitelist( hy, [:br, :p, :ul, :ol, :li, :pre, :code], opts )
73
+
74
+ # strip bold
75
+ # hy = hy.gsub( /<b[^>]*>/, '**' ) # fix: will also swallow bxxx tags - add b space
76
+ # hy = hy.gsub( /<\/b>/, '**' )
77
+
78
+ # strip em
79
+ # hy = hy.gsub( /<em[^>]*>/, '__' )
80
+ # hy = hy.gsub( /<\/em>/, '__' )
81
+
82
+ # clean (prettify) literal urls (strip protocoll)
83
+ hy = hy.gsub( /(http|https):\/\//, '' )
84
+
85
+ # hy = hy.gsub( /&nbsp;/, ' ' )
86
+
87
+ # # try to cleanup whitespaces
88
+ # # -- keep no more than two spaces
89
+ # hy = hy.gsub( /[ \t]{3,}/, ' ' )
90
+ # # -- keep no more than two new lines
91
+ # hy = hy.gsub( /\n{2,}/m, "\n\n" )
92
+ # # -- remove all trailing spaces
93
+ # hy = hy.gsub( /[ \t\n]+$/m, '' )
94
+ # # -- remove all leading spaces
95
+ # hy = hy.gsub( /^[ \t\n]+/m, '' )
96
+
97
+ hy
98
+ end
99
+
100
+
101
+ ####
102
+ # fix: move to DateHelper ??
103
+
104
+
15
105
  def time_ago_in_words( from_time )
16
106
  from_time = from_time.to_time
17
107
  to_time = Time.now
@@ -27,23 +117,26 @@ module TemplateHelper
27
117
  case distance_in_minutes
28
118
  when 0..1 then "just now"
29
119
  when 2...45 then "%d minutes ago" % distance_in_minutes
30
- when 45...90 then "about 1 hour ago" ## use one instead of 1 ?? why? why not?
120
+ when 45...90 then "an hour ago" ## use one instead of 1 ?? why? why not?
31
121
  # 90 mins up to 24 hours
32
- when 90...1440 then "about %d hours ago" % (distance_in_minutes.to_f / 60.0).round
122
+ when 90...1440 then "%d hours ago" % (distance_in_minutes.to_f / 60.0).round
33
123
  # 24 hours up to 42 hours
34
- when 1440...2520 then "1 day ago" ## use one day ago - why? why not?
124
+ when 1440...2520 then "a day ago" ## use one day ago - why? why not?
35
125
  # 42 hours up to 30 days
36
126
  when 2520...43200 then "%d days ago" % (distance_in_minutes.to_f / 1440.0).round
37
127
  # 30 days up to 60 days
38
- # fix: use pluralize for months
39
- when 43200...86400 then "about %d months ago" % (distance_in_minutes.to_f / 43200.0).round
128
+ # fix: use pluralize for months - fix: change when - use just for a month ago
129
+ when 43200...86400 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
40
130
  # 60 days up to 365 days
41
131
  when 86400...525600 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
42
132
  ## fix - add number of years ago
43
- else "about a year ago" #todo: use over a year ago???
133
+ else "over a year ago" #todo: use over a year ago???
134
+ # fix: split into two - use
135
+ # 1) a year ago
136
+ # 2) (x) years ago
44
137
  end
45
138
  end
46
139
 
47
140
 
48
141
  end # module TemplateHelper
49
- end # module Pluto
142
+ end # module Pluto