pluto-models 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ module Pluto
2
+ module Models
3
+
4
+ class Item < ActiveRecord::Base
5
+ self.table_name = 'items'
6
+
7
+ include Pluto::ActiveRecordMethods # e.g. read_attribute_w_fallbacks
8
+
9
+ belongs_to :feed
10
+
11
+ ##################################
12
+ # attribute reader aliases
13
+ def name() title; end # alias for title
14
+ def description() summary; end # alias for summary -- also add descr shortcut??
15
+ def link() url; end # alias for url
16
+
17
+ def self.latest
18
+ # note: order by first non-null datetime field
19
+ # coalesce - supported by sqlite (yes), postgres (yes)
20
+
21
+ # note: if not published,touched or built_at use hardcoded 1971-01-01 for now
22
+ order( "coalesce(items.published,items.touched,'1971-01-01') desc" )
23
+ end
24
+
25
+ def published?() read_attribute(:published).present?; end
26
+
27
+ def published
28
+ ## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
29
+ # db backed attribute
30
+
31
+ read_attribute_w_fallbacks(
32
+ :published,
33
+ :touched # try touched (aka updated RSS/ATOM)
34
+ )
35
+ end
36
+
37
+
38
+
39
+ def debug=(value) @debug = value; end
40
+ def debug?() @debug || false; end
41
+
42
+ def update_from_struct!( feed_rec, data )
43
+ ## check: new item/record? not saved? add guid
44
+ # otherwise do not add guid - why? why not?
45
+
46
+ item_attribs = {
47
+ guid: data.guid, # todo: only add for new records???
48
+ title: data.title,
49
+ url: data.url,
50
+ summary: data.summary,
51
+ content: data.content,
52
+ published: data.published,
53
+ touched: data.updated,
54
+ feed_id: feed_rec.id, # add feed_id fk_ref
55
+ fetched: feed_rec.fetched
56
+ }
57
+
58
+ if debug?
59
+ puts "*** dump item_attribs w/ class types:"
60
+ item_attribs.each do |key,value|
61
+ next if [:summary,:content].include?( key ) # skip summary n content
62
+ puts " #{key}: >#{value}< : #{value.class.name}"
63
+ end
64
+ end
65
+
66
+ update_attributes!( item_attribs )
67
+ end
68
+
69
+ end # class Item
70
+
71
+
72
+ end # module Models
73
+ end # module Pluto
@@ -0,0 +1,19 @@
1
+ module Pluto
2
+ module Models
3
+
4
+ class Site < ActiveRecord::Base
5
+ self.table_name = 'sites'
6
+
7
+ has_many :subscriptions
8
+ has_many :feeds, :through => :subscriptions
9
+ has_many :items, :through => :feeds
10
+
11
+ ##################################
12
+ # attribute reader aliases
13
+ def name() title; end # alias for title
14
+ def fetched_at() fetched; end # - legacy attrib reader -- remove!!!
15
+
16
+ end
17
+
18
+ end # module Models
19
+ end # module Pluto
@@ -0,0 +1,14 @@
1
+ module Pluto
2
+ module Models
3
+
4
+
5
+ class Subscription < ActiveRecord::Base
6
+ self.table_name = 'subscriptions'
7
+
8
+ belongs_to :site
9
+ belongs_to :feed
10
+ end
11
+
12
+
13
+ end # module Models
14
+ end # module Pluto
@@ -0,0 +1,47 @@
1
+ module Pluto
2
+ module Models
3
+
4
+ class ItemCursor
5
+
6
+ def initialize( items )
7
+ @items = items
8
+ end
9
+
10
+ def each
11
+ last_published = Time.local( 1971, 1, 1 )
12
+ last_feed_id = -1 ## todo: use feed_key instead of id?? why? why not??
13
+
14
+ @items.each do |item|
15
+
16
+ item_published = item.published # cache published value ref
17
+
18
+ if last_published.year == item_published.year &&
19
+ last_published.month == item_published.month &&
20
+ last_published.day == item_published.day
21
+ new_date = false
22
+ else
23
+ new_date = true
24
+ end
25
+
26
+ ## note:
27
+ # new date also **always** starts new feed
28
+ # - e.g. used for grouping within day (follows planet planet convention)
29
+
30
+ if new_date || last_feed_id != item.feed.id
31
+ new_feed = true
32
+ else
33
+ new_feed = false
34
+ end
35
+
36
+ yield( item, new_date, new_feed )
37
+
38
+ last_published = item.published
39
+ last_feed_id = item.feed.id
40
+ end
41
+ end # method each
42
+
43
+ end # class ItemCursor
44
+
45
+
46
+ end # module Models
47
+ end # module Pluto
@@ -0,0 +1,130 @@
1
+ module Pluto
2
+
3
+ class Refresher
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def initialize
10
+ @worker = Fetcher.new
11
+ end
12
+
13
+ def debug=(value) @debug = value; end
14
+ def debug?() @debug || false; end
15
+
16
+
17
+ def update_sites( opts={} ) # update all site configs
18
+ if debug?
19
+ ## turn on logging for sql too
20
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
21
+ @worker.debug = true # also pass along worker debug flag if set
22
+ end
23
+
24
+ # -- log update activity
25
+ Activity.create!( text: "update sites (#{Site.count})" )
26
+
27
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
28
+
29
+ Site.order(:id).each do |site|
30
+ update_site_worker( site ) if site.url.present? # note: only update if (source) url present
31
+ end
32
+ end
33
+
34
+
35
+ def update_feeds( opts={} ) # update all feeds
36
+ if debug?
37
+ ## turn on logging for sql too
38
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
39
+ @worker.debug = true # also pass along worker debug flag if set
40
+ end
41
+
42
+ # -- log update activity
43
+ Activity.create!( text: "update feeds (#{Feed.count})" )
44
+
45
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
46
+
47
+ Feed.order(:id).each do |feed|
48
+ update_feed_worker( feed )
49
+ end
50
+ end
51
+
52
+
53
+ def update_feeds_for( site_key, opts={} )
54
+ if debug?
55
+ ## turn on logging for sql too
56
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
57
+ @worker.debug = true # also pass along worker debug flag if set
58
+ end
59
+
60
+ # -- log update activity
61
+ Activity.create!( text: "update feeds >#{site_key}<" )
62
+
63
+ site = Site.find_by_key!( site_key )
64
+
65
+ site.feeds.each do |feed|
66
+ update_feed_worker( feed )
67
+ end
68
+
69
+ end # method update_feeds
70
+
71
+
72
+ private
73
+ def update_site_worker( site_rec )
74
+ site_config = @worker.site_by_rec_if_modified( site_rec )
75
+
76
+ # on error or if http-not modified etc. skip update/processing
77
+ return if site_config.nil?
78
+
79
+ subscriber = Subscriber.new
80
+ subscriber.debug = debug? ? true : false # pass along debug flag
81
+
82
+ site_key = site_rec.key
83
+ subscriber.update_subscriptions_for( site_key, site_config )
84
+ end
85
+
86
+
87
+ def update_feed_worker( feed_rec )
88
+ feed = @worker.feed_by_rec_if_modified( feed_rec )
89
+
90
+ # on error or if http-not modified etc. skip update/processing
91
+ return if feed.nil?
92
+
93
+ ## fix/todo: reload feed_red - fetched date updated etc.
94
+ ## check if needed for access to fetched date
95
+
96
+
97
+ ## todo/check: move feed_rec update to the end (after item updates??)
98
+
99
+ # update feed attribs e.g.
100
+ # generator
101
+ # published_at,built_at,touched_at,fetched_at
102
+ # summary,title2
103
+
104
+ ## fix:
105
+ ## weird rss exception error on windows w/ dates
106
+ # e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
107
+ #
108
+ # move to_datetime to feedutils!! if it works
109
+ ## todo: move this comments to feedutils??
110
+
111
+
112
+ feed_rec.debug = debug? ? true : false # pass along debug flag
113
+ ## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
114
+ feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
115
+
116
+
117
+ # update cached value last published for item
118
+ last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
119
+ if last_item_rec.present?
120
+ if last_item_rec.published?
121
+ feed_rec.update_attributes!( last_published: last_item_rec.published )
122
+ else # try touched
123
+ feed_rec.update_attributes!( last_published: last_item_rec.touched )
124
+ end
125
+ end
126
+ end # method update_feed_worker
127
+
128
+ end # class Refresher
129
+
130
+ end # module Pluto
@@ -0,0 +1,139 @@
1
+
2
+ module Pluto
3
+
4
+ class CreateDb < ActiveRecord::Migration
5
+
6
+ def up
7
+ create_table :sites do |t|
8
+ t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
9
+ t.string :key, :null => false # e.g. ruby, js, etc.
10
+
11
+ ############
12
+ # filters (site-wide)
13
+ t.string :includes # regex
14
+ t.string :excludes # regex
15
+
16
+ ######################
17
+ # for auto-update of feed list/site config
18
+
19
+ t.string :url # source url for auto-update (optional)
20
+
21
+ ## note: make sure to use same fields for update check as feed
22
+
23
+ t.datetime :fetched # last fetched/checked date -- make not null ??
24
+ t.integer :http_code # last http status code e.g. 200,404,etc.
25
+ t.string :http_etag # last http header etag
26
+ ## note: save last-modified header as text (not datetime) - pass through as is
27
+ t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
28
+ t.string :http_server # last http server header if present
29
+
30
+ # note: do NOT store body content (that is, text) and md5 digest
31
+ # use git! and github! commit will be http_etag!!
32
+
33
+ t.datetime :fetched # last fetched/checked date
34
+
35
+ #############
36
+ # more fields
37
+
38
+ t.timestamps # created_at, updated_at
39
+ end
40
+
41
+ create_table :subscriptions do |t| # has_many join table (sites/feeds)
42
+ t.references :site, :null => false
43
+ t.references :feed, :null => false
44
+ t.timestamps
45
+ end
46
+
47
+ create_table :feeds do |t|
48
+ t.string :title # user supplied title
49
+ t.string :auto_title # "fallback" - auto(fill) title from feed
50
+
51
+ t.string :title2 # user supplied title2
52
+ t.string :auto_title2 # "fallback" - auto(fill) title2 from feed e.g. subtitle (atom)
53
+
54
+ t.string :url # user supplied site url
55
+ t.string :auto_url # "fallback" - auto(fill) url from feed
56
+
57
+ t.string :feed_url # user supplied feed url
58
+ t.string :auto_feed_url # "fallback" - auto discovery feed url from (site) url
59
+
60
+ t.text :summary # e.g. description (rss)
61
+
62
+ t.string :generator # feed generator (e.g. wordpress, etc.) from feed
63
+
64
+ t.datetime :published # from feed published(atom)+ pubDate(rss)
65
+ t.datetime :built # from feed lastBuiltDate(rss)
66
+ t.datetime :touched # from feed updated(atom)
67
+
68
+ ### extras (move to array for custom fields or similar??)
69
+ t.string :github # github handle (optional)
70
+ t.string :twitter # twitter handle (optional)
71
+ t.string :avatar # gravator or hackergotchi handle (optional)
72
+
73
+ ### add class/kind field e.g.
74
+ # - personal feed/blog/site, that is, individual author
75
+ # - team blog/site
76
+ # - org (anization) or com(pany blog/site)
77
+ # - newsfeed (composite)
78
+ # - other (link blog?, podcast?) - why? why not??
79
+
80
+ ############
81
+ # filters
82
+ t.string :includes # regex
83
+ t.string :excludes # regex
84
+ # todo: add generic filter list e.g. t.string :filters (comma,pipe or space separated method names?)
85
+
86
+ # -- our own (meta) fields
87
+ t.datetime :last_published # cache last (latest) published for items
88
+
89
+ t.string :key, :null => false
90
+ t.string :format # e.g. atom (1.0), rss 2.0, rss 0.7 etc.
91
+
92
+ t.integer :http_code # last http status code e.g. 200,404,etc.
93
+ t.string :http_etag # last http header etag
94
+ ## note: save last-modified header as text (not datetime) - pass through as is
95
+ t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
96
+ t.string :http_server # last http server header if present
97
+
98
+ t.string :md5 # md5 hash of body
99
+ t.text :body # last http response body (complete feed!)
100
+
101
+ t.datetime :fetched # last fetched/checked date
102
+
103
+ t.timestamps # created_at, updated_at
104
+ end
105
+
106
+
107
+ create_table :items do |t|
108
+ t.string :guid
109
+ t.string :url
110
+
111
+ ## note: title may contain more than 255 chars!! use text for sure!
112
+ ## e.g. Rails Girls blog has massive titles in feed
113
+
114
+ t.text :title # todo: add some :null => false ??
115
+ t.text :summary # e.g. description (rss), summary (atom)
116
+ t.text :content
117
+
118
+ t.datetime :published # from feed (published) + pubDate(rss)
119
+ t.datetime :touched # from feed updated (atom)
120
+
121
+ ## todo: add :last_updated_at ?? (NOTE: updated_at already take by auto-timestamps)
122
+ t.references :feed, :null => false
123
+
124
+ t.datetime :fetched # last fetched/check date
125
+ t.timestamps # created_at, updated_at
126
+
127
+ ## t.string :author
128
+ ## todo: add author/authors, category/categories
129
+ end
130
+
131
+ end
132
+
133
+ def down
134
+ raise ActiveRecord::IrreversibleMigration
135
+ end
136
+
137
+ end # class CreateDb
138
+
139
+ end # module Pluto
@@ -0,0 +1,102 @@
1
+ module Pluto
2
+
3
+ class Subscriber
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def debug=(value) @debug = value; end
10
+ def debug?() @debug || false; end
11
+
12
+
13
+ def update_subscriptions( config, opts={} )
14
+ # !!!! -- depreciated API - remove - do NOT use anymore
15
+ puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
16
+ update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
17
+ end
18
+
19
+
20
+ def update_subscriptions_for( site_key, config, opts={} )
21
+ site_attribs = {
22
+ title: config['title'] || config['name'], # support either title or name
23
+ url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
24
+ }
25
+
26
+ logger.debug "site_attribs: #{site_attribs.inspect}"
27
+
28
+ site_rec = Site.find_by_key( site_key )
29
+ if site_rec.nil?
30
+ site_rec = Site.new
31
+ site_attribs[ :key ] = site_key
32
+
33
+ ## use object_id: site.id and object_type: Site
34
+ ## change - model/table/schema!!!
35
+ Activity.create!( text: "new site >#{site_key}< - #{site_attribs[ :title ]}" )
36
+ end
37
+ site_rec.update_attributes!( site_attribs )
38
+
39
+ # -- log update activity
40
+ Activity.create!( text: "update subscriptions >#{site_key}<" )
41
+
42
+ # clean out subscriptions and add again
43
+ logger.debug "before site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
44
+ site_rec.subscriptions.destroy_all # note: use destroy_all NOT delete_all (delete_all tries by default only nullify)
45
+ logger.debug "after site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
46
+
47
+ config.each do |key, value|
48
+
49
+ ## todo: downcase key - why ??? why not???
50
+
51
+ # skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
52
+ next if ['title','title2','name',
53
+ 'source', 'url',
54
+ 'include','includes','exclude','excludes',
55
+ 'feeds',
56
+ 'planet','defaults'].include?( key )
57
+
58
+ ### todo/check:
59
+ ## check value - must be hash
60
+ # check if url or feed_url present
61
+ # that is, check for required props/key-value pairs
62
+
63
+ feed_key = key.to_s.dup
64
+ feed_hash = value
65
+
66
+ # todo/fix: use title from feed?
67
+ # e.g. fill up auto_title, auto_url, etc.
68
+
69
+ feed_attribs = {
70
+ feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
71
+ url: feed_hash[ 'link' ] || feed_hash[ 'url' ],
72
+ title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
73
+ title2: feed_hash[ 'title2' ],
74
+ includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
75
+ excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
76
+ }
77
+
78
+ puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
79
+
80
+ feed_rec = Feed.find_by_key( feed_key )
81
+ if feed_rec.nil?
82
+ feed_rec = Feed.new
83
+ feed_attribs[ :key ] = feed_key
84
+
85
+ ## use object_id: feed.id and object_type: Feed
86
+ ## change - model/table/schema!!!
87
+ ## todo: add parent_action_id - why? why not?
88
+ Activity.create!( text: "new feed >#{feed_key}< - #{feed_attribs[ :title ]}" )
89
+ end
90
+
91
+ feed_rec.update_attributes!( feed_attribs )
92
+
93
+ # add subscription record
94
+ # note: subscriptions get cleaned out on update first (see above)
95
+ site_rec.subscriptions.create!( feed_id: feed_rec.id )
96
+ end
97
+
98
+ end # method update_subscriptions
99
+
100
+ end # class Subscriber
101
+
102
+ end # module Pluto