pluto 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,15 +14,12 @@ class Formatter
14
14
  @config = config
15
15
  end
16
16
 
17
- attr_reader :opts
18
-
19
- def site
20
- ### fix !!!!!!!!!!
21
- ## fix/todo: change to db record for site
22
- @config
23
- end
17
+ attr_reader :opts, :config, :site
24
18
 
25
19
  def run( arg )
20
+ ## fix: change arg to planet_key or just key or similar
21
+ # todo: rename run to some less generic - merge/build/etc. ??
22
+
26
23
  manifest_name = opts.manifest
27
24
  manifest_name = manifest_name.downcase.gsub('.txt', '' ) # remove .txt if present
28
25
 
@@ -51,9 +48,20 @@ class Formatter
51
48
 
52
49
  manifestsrc = manifests[0][1]
53
50
  pakpath = opts.output_path
54
-
55
-
51
+
56
52
  name = arg
53
+
54
+ ## for now - use single site w/ key planet
55
+ ##-- fix!! allow multiple sites (planets)
56
+
57
+ site_key = 'planet'
58
+ @site = Site.find_by_key( site_key )
59
+ if @site.nil?
60
+ puts "*** warn: no site with key '#{site_key}' found; using untitled site record"
61
+ @site = Site.new
62
+ @site.title = 'Planet Untitled'
63
+ end
64
+
57
65
  Pakman::Templater.new.merge_pak( manifestsrc, pakpath, binding, name )
58
66
  end
59
67
 
@@ -0,0 +1,186 @@
1
+ module Pluto
2
+
3
+ class Refresher
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def initialize
10
+ @worker = ::Fetcher::Worker.new
11
+ end
12
+
13
+ attr_reader :worker
14
+
15
+ def debug=(value)
16
+ @debug = value
17
+ ### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
18
+ end
19
+
20
+ def debug?
21
+ @debug || false
22
+ end
23
+
24
+ def fetch_feed( url )
25
+
26
+ ### fix: use worker.get( url ) # check http response code etc.
27
+
28
+ xml = worker.read( url )
29
+
30
+ ###
31
+ # NB: Net::HTTP will NOT set encoding UTF-8 etc.
32
+ # will mostly be ASCII
33
+ # - try to change encoding to UTF-8 ourselves
34
+ logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
35
+
36
+ #####
37
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
38
+
39
+ ## NB:
40
+ # for now "hardcoded" to utf8 - what else can we do?
41
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
42
+ xml = xml.force_encoding( Encoding::UTF_8 )
43
+ logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
44
+ xml
45
+ end
46
+
47
+
48
+ def update_feeds( opts={} )
49
+
50
+ if debug?
51
+ ## turn on logging for sql too
52
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
53
+ end
54
+
55
+ ### move to feedutils
56
+ ### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
57
+
58
+ # -- log update action
59
+ Action.create!( title: 'update feeds' )
60
+
61
+ #####
62
+ # -- update fetched_at timestamps for all sites
63
+ feeds_fetched_at = Time.now
64
+ Site.all.each do |site|
65
+ site.fetched_at = feeds_fetched_at
66
+ site.save!
67
+ end
68
+
69
+ Feed.all.each do |feed_rec|
70
+
71
+ feed_key = feed_rec.key
72
+ feed_url = feed_rec.feed_url
73
+
74
+ feed_xml = fetch_feed( feed_url )
75
+
76
+ logger.debug "feed_xml:"
77
+ logger.debug feed_xml[ 0..300 ] # get first 300 chars
78
+
79
+ # if opts.verbose? # also write a copy to disk
80
+ if debug?
81
+ logger.debug "saving feed to >./#{feed_key}.xml<..."
82
+ File.open( "./#{feed_key}.xml", 'w' ) do |f|
83
+ f.write( feed_xml )
84
+ end
85
+ end
86
+
87
+ puts "Before parsing feed >#{feed_key}<..."
88
+
89
+ ## fix/todo: check for feed.nil? -> error parsing!!!
90
+ # or throw exception
91
+ feed = FeedUtils::Parser.parse( feed_xml )
92
+
93
+ feed_fetched_at = Time.now
94
+
95
+ ## todo/check: move feed_rec update to the end (after item updates??)
96
+
97
+ # update feed attribs e.g.
98
+ # generator
99
+ # published_at,built_at,touched_at,fetched_at
100
+ # summary,title2
101
+
102
+ ## fix:
103
+ ## weird rss exception error on windows w/ dates
104
+ # e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
105
+ #
106
+ # move to_datetime to feedutils!! if it works
107
+ ## todo: move this comments to feedutils??
108
+
109
+
110
+ feed_attribs = {
111
+ fetched_at: feed_fetched_at,
112
+ format: feed.format,
113
+ published_at: feed.published? ? feed.published : nil,
114
+ touched_at: feed.updated? ? feed.updated : nil,
115
+ built_at: feed.built? ? feed.built : nil,
116
+ summary: feed.summary? ? feed.summary : nil,
117
+ title2: feed.title2? ? feed.title2 : nil,
118
+ generator: feed.generator
119
+ }
120
+
121
+ if debug?
122
+ ## puts "*** dump feed_attribs:"
123
+ ## pp feed_attribs
124
+ puts "*** dump feed_attribs w/ class types:"
125
+ feed_attribs.each do |key,value|
126
+ puts " #{key}: >#{value}< : #{value.class.name}"
127
+ end
128
+ end
129
+
130
+ feed_rec.update_attributes!( feed_attribs )
131
+
132
+
133
+ feed.items.each do |item|
134
+
135
+ item_attribs = {
136
+ fetched_at: feed_fetched_at,
137
+ title: item.title,
138
+ url: item.url,
139
+ summary: item.summary? ? item.summary : nil,
140
+ content: item.content? ? item.content : nil,
141
+ published_at: item.published? ? item.published : nil,
142
+ touched_at: item.updated? ? item.updated : nil,
143
+ feed_id: feed_rec.id # add feed_id fk_ref
144
+ }
145
+
146
+ if debug?
147
+ puts "*** dump item_attribs w/ class types:"
148
+ item_attribs.each do |key,value|
149
+ next if [:summary,:content].include?( key ) # skip summary n content
150
+ puts " #{key}: >#{value}< : #{value.class.name}"
151
+ end
152
+ end
153
+
154
+
155
+ rec = Item.find_by_guid( item.guid )
156
+ if rec.nil?
157
+ rec = Item.new
158
+ item_attribs[ :guid ] = item.guid
159
+ puts "** NEW | #{item.title}"
160
+ else
161
+ ## todo: check if any attribs changed
162
+ puts "UPDATE | #{item.title}"
163
+ end
164
+
165
+ rec.update_attributes!( item_attribs )
166
+ end # each item
167
+
168
+ # update cached value latest published_at for item
169
+ item_recs = feed_rec.items.latest.limit(1).all
170
+ unless item_recs.empty?
171
+ if item_recs[0].published_at?
172
+ feed_rec.latest_published_at = item_recs[0].published_at
173
+ else # try touched_at
174
+ feed_rec.latest_published_at = item_recs[0].touched_at
175
+ end
176
+ feed_rec.save!
177
+ end
178
+
179
+ end # each feed
180
+
181
+ end # method update_feeds
182
+
183
+
184
+ end # class Refresher
185
+
186
+ end # module Pluto
@@ -0,0 +1,85 @@
1
+ module Pluto
2
+
3
+ class Subscriber
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def debug=(value)
10
+ @debug = value
11
+ ### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
12
+ end
13
+
14
+ def debug?
15
+ @debug || false
16
+ end
17
+
18
+ def update_subscriptions( config, opts={} )
19
+
20
+ site_attribs = {
21
+ title: config[ 'title' ] || config[ 'name' ] # support either title or name
22
+ }
23
+
24
+ ## for now - use single site w/ key planet
25
+ ## -- fix!! allow multiple sites (planets)
26
+
27
+ site_key = 'planet'
28
+ site_rec = Site.find_by_key( site_key )
29
+ if site_rec.nil?
30
+ site_rec = Site.new
31
+ site_attribs[ :key ] = site_key
32
+
33
+ ## use object_id: site.id and object_type: Site
34
+ ## change - model/table/schema!!!
35
+ Action.create!( title: 'new site', object: site_attribs[ :title ] )
36
+ end
37
+ site_rec.update_attributes!( site_attribs )
38
+
39
+ # -- log update action
40
+ Action.create!( title: 'update subscriptions' )
41
+
42
+
43
+ config.each do |key, value|
44
+
45
+ next if ['title','name','feeds'].include?( key ) # skip "top-level" feed keys e.g. title, etc.
46
+
47
+ ### todo/check:
48
+ ## check value - must be hash
49
+ # check if url or feed_url present
50
+ # that is, check for required props/key-value pairs
51
+
52
+ feed_key = key.to_s.dup
53
+ feed_hash = value
54
+
55
+ # todo: use title from feed?
56
+ feed_attribs = {
57
+ feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
58
+ url: feed_hash[ 'link' ] || feed_hash[ 'site' ] || feed_hash[ 'url' ],
59
+ title: feed_hash[ 'title' ] || feed_hash[ 'name' ] || feed_hash[ 'author' ]
60
+ }
61
+
62
+ puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
63
+
64
+ feed_rec = Feed.find_by_key( feed_key )
65
+ if feed_rec.nil?
66
+ feed_rec = Feed.new
67
+ feed_attribs[ :key ] = feed_key
68
+
69
+ ## use object_id: feed.id and object_type: Feed
70
+ ## change - model/table/schema!!!
71
+ ## todo: add parent_action_id - why? why not?
72
+ Action.create!( title: 'new feed', object: feed_attribs[ :title ] )
73
+ end
74
+
75
+ feed_rec.update_attributes!( feed_attribs )
76
+
77
+ ## todo:
78
+ # add subscription records (feed,site) - how?
79
+ end
80
+
81
+ end # method update_subscriptions
82
+
83
+ end # class Subscriber
84
+
85
+ end # module Pluto
@@ -1,17 +1,107 @@
1
+ # encoding: UTF-8
2
+
1
3
  module Pluto
2
4
 
3
- ####
4
- # fix: rename to DateHelper
5
5
 
6
6
  module TemplateHelper
7
7
 
8
- def strip_tags( hypertext )
8
+ def strip_tags( hy )
9
9
  ### tobe done
10
10
  ## strip markup tags; return plain text
11
- hypertext.gsub( /<[^>]+>/, '' )
11
+ hy.gsub( /<[^>]+>/, '' )
12
12
  end
13
13
 
14
14
 
15
+ def whitelist( hy, tags, opts={} )
16
+
17
+ # note: assumes properly escaped <> in hy/hypertext
18
+
19
+ ###############################################
20
+ # step one - save whitelisted tags use ‹tag›
21
+ tags.each do |tag|
22
+ # note: we strip all attribues
23
+ # note: match all tags case insensitive e.g. allow a,A or br,BR,bR etc.
24
+ # downcase all tags
25
+
26
+ # convert xml-style empty tags to simple html emtpty tags
27
+ # e.g. <br/> or <br /> becomses <br>
28
+ hy = hy.gsub( /<(#{tag})\s*\/>/i ) { |_| "‹#{$1.downcase}›" } # eg. <br /> or <br/> becomes ‹br›
29
+
30
+ # make sure we won't swall <br> for <b> for example, thus use \s+ before [^>]
31
+ hy = hy.gsub( /<(#{tag})(\s+[^>]*)?>/i ) { |_| "‹#{$1.downcase}›" } # opening tag <p>
32
+ hy = hy.gsub( /<\/(#{tag})\s*>/i ) { |_| "‹/#{$1.downcase}›" } # closing tag e.g. </p>
33
+ end
34
+
35
+ ############################
36
+ # step two - clean tags
37
+
38
+ # strip images - special treatment for debugging
39
+ hy = hy.gsub( /<img[^>]*>/i, '♦' ) # for debugging use black diamond e.g. ♦
40
+ hy = hy.gsub( /<\/img>/i, '' ) # should not exists
41
+
42
+ # strip all remaining tags
43
+ hy = hy.gsub( /<[^>]+>/, '' )
44
+
45
+ pp hy # fix: debugging indo - remove
46
+
47
+ ############################################
48
+ # step three - restore whitelisted tags
49
+
50
+ return hy if opts[:skip_restore].present? # skip step 3 for debugging
51
+
52
+ tags.each do |tag|
53
+ # hy = hy.gsub( /‹(#{tag})›/, "<\1>" ) # opening tag e.g. <p>
54
+ # hy = hy.gsub( /‹\/(#{tag})›/, "<\/\1>" ) # closing tag e.g. </p>
55
+ hy = hy.gsub( /‹(#{tag})›/ ) { |_| "<#{$1}>" }
56
+ hy = hy.gsub( /‹\/(#{tag})›/ ) { |_| "<\/#{$1}>" } # closing tag e.g. </p>
57
+ end
58
+
59
+ hy
60
+ end # method whitelist
61
+
62
+
63
+ ## change to simple_hypertext or
64
+ # hypertext_simple or
65
+ # sanitize ???
66
+
67
+ def textify( hy, opts={} ) # hy -> hypertext
68
+ ## turn into text
69
+ # todo: add options for
70
+ # keep links, images, lists (?too), code, codeblocks
71
+
72
+ hy = whitelist( hy, [:br, :p, :ul, :ol, :li, :pre, :code], opts )
73
+
74
+ # strip bold
75
+ # hy = hy.gsub( /<b[^>]*>/, '**' ) # fix: will also swallow bxxx tags - add b space
76
+ # hy = hy.gsub( /<\/b>/, '**' )
77
+
78
+ # strip em
79
+ # hy = hy.gsub( /<em[^>]*>/, '__' )
80
+ # hy = hy.gsub( /<\/em>/, '__' )
81
+
82
+ # clean (prettify) literal urls (strip protocoll)
83
+ hy = hy.gsub( /(http|https):\/\//, '' )
84
+
85
+ # hy = hy.gsub( /&nbsp;/, ' ' )
86
+
87
+ # # try to cleanup whitespaces
88
+ # # -- keep no more than two spaces
89
+ # hy = hy.gsub( /[ \t]{3,}/, ' ' )
90
+ # # -- keep no more than two new lines
91
+ # hy = hy.gsub( /\n{2,}/m, "\n\n" )
92
+ # # -- remove all trailing spaces
93
+ # hy = hy.gsub( /[ \t\n]+$/m, '' )
94
+ # # -- remove all leading spaces
95
+ # hy = hy.gsub( /^[ \t\n]+/m, '' )
96
+
97
+ hy
98
+ end
99
+
100
+
101
+ ####
102
+ # fix: move to DateHelper ??
103
+
104
+
15
105
  def time_ago_in_words( from_time )
16
106
  from_time = from_time.to_time
17
107
  to_time = Time.now
@@ -27,23 +117,26 @@ module TemplateHelper
27
117
  case distance_in_minutes
28
118
  when 0..1 then "just now"
29
119
  when 2...45 then "%d minutes ago" % distance_in_minutes
30
- when 45...90 then "about 1 hour ago" ## use one instead of 1 ?? why? why not?
120
+ when 45...90 then "an hour ago" ## use one instead of 1 ?? why? why not?
31
121
  # 90 mins up to 24 hours
32
- when 90...1440 then "about %d hours ago" % (distance_in_minutes.to_f / 60.0).round
122
+ when 90...1440 then "%d hours ago" % (distance_in_minutes.to_f / 60.0).round
33
123
  # 24 hours up to 42 hours
34
- when 1440...2520 then "1 day ago" ## use one day ago - why? why not?
124
+ when 1440...2520 then "a day ago" ## use one day ago - why? why not?
35
125
  # 42 hours up to 30 days
36
126
  when 2520...43200 then "%d days ago" % (distance_in_minutes.to_f / 1440.0).round
37
127
  # 30 days up to 60 days
38
- # fix: use pluralize for months
39
- when 43200...86400 then "about %d months ago" % (distance_in_minutes.to_f / 43200.0).round
128
+ # fix: use pluralize for months - fix: change when - use just for a month ago
129
+ when 43200...86400 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
40
130
  # 60 days up to 365 days
41
131
  when 86400...525600 then "%d months ago" % (distance_in_minutes.to_f / 43200.0).round
42
132
  ## fix - add number of years ago
43
- else "about a year ago" #todo: use over a year ago???
133
+ else "over a year ago" #todo: use over a year ago???
134
+ # fix: split into two - use
135
+ # 1) a year ago
136
+ # 2) (x) years ago
44
137
  end
45
138
  end
46
139
 
47
140
 
48
141
  end # module TemplateHelper
49
- end # module Pluto
142
+ end # module Pluto