pluto-models 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,73 @@
1
+ module Pluto
2
+ module Models
3
+
4
+ class Item < ActiveRecord::Base
5
+ self.table_name = 'items'
6
+
7
+ include Pluto::ActiveRecordMethods # e.g. read_attribute_w_fallbacks
8
+
9
+ belongs_to :feed
10
+
11
+ ##################################
12
+ # attribute reader aliases
13
+ def name() title; end # alias for title
14
+ def description() summary; end # alias for summary -- also add descr shortcut??
15
+ def link() url; end # alias for url
16
+
17
+ def self.latest
18
+ # note: order by first non-null datetime field
19
+ # coalesce - supported by sqlite (yes), postgres (yes)
20
+
21
+ # note: if not published,touched or built_at use hardcoded 1971-01-01 for now
22
+ order( "coalesce(items.published,items.touched,'1971-01-01') desc" )
23
+ end
24
+
25
+ def published?() read_attribute(:published).present?; end
26
+
27
+ def published
28
+ ## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
29
+ # db backed attribute
30
+
31
+ read_attribute_w_fallbacks(
32
+ :published,
33
+ :touched # try touched (aka updated RSS/ATOM)
34
+ )
35
+ end
36
+
37
+
38
+
39
+ def debug=(value) @debug = value; end
40
+ def debug?() @debug || false; end
41
+
42
+ def update_from_struct!( feed_rec, data )
43
+ ## check: new item/record? not saved? add guid
44
+ # otherwise do not add guid - why? why not?
45
+
46
+ item_attribs = {
47
+ guid: data.guid, # todo: only add for new records???
48
+ title: data.title,
49
+ url: data.url,
50
+ summary: data.summary,
51
+ content: data.content,
52
+ published: data.published,
53
+ touched: data.updated,
54
+ feed_id: feed_rec.id, # add feed_id fk_ref
55
+ fetched: feed_rec.fetched
56
+ }
57
+
58
+ if debug?
59
+ puts "*** dump item_attribs w/ class types:"
60
+ item_attribs.each do |key,value|
61
+ next if [:summary,:content].include?( key ) # skip summary n content
62
+ puts " #{key}: >#{value}< : #{value.class.name}"
63
+ end
64
+ end
65
+
66
+ update_attributes!( item_attribs )
67
+ end
68
+
69
+ end # class Item
70
+
71
+
72
+ end # module Models
73
+ end # module Pluto
@@ -0,0 +1,19 @@
1
+ module Pluto
2
+ module Models
3
+
4
+ class Site < ActiveRecord::Base
5
+ self.table_name = 'sites'
6
+
7
+ has_many :subscriptions
8
+ has_many :feeds, :through => :subscriptions
9
+ has_many :items, :through => :feeds
10
+
11
+ ##################################
12
+ # attribute reader aliases
13
+ def name() title; end # alias for title
14
+ def fetched_at() fetched; end # - legacy attrib reader -- remove!!!
15
+
16
+ end
17
+
18
+ end # module Models
19
+ end # module Pluto
@@ -0,0 +1,14 @@
1
+ module Pluto
2
+ module Models
3
+
4
+
5
+ class Subscription < ActiveRecord::Base
6
+ self.table_name = 'subscriptions'
7
+
8
+ belongs_to :site
9
+ belongs_to :feed
10
+ end
11
+
12
+
13
+ end # module Models
14
+ end # module Pluto
@@ -0,0 +1,47 @@
1
+ module Pluto
2
+ module Models
3
+
4
+ class ItemCursor
5
+
6
+ def initialize( items )
7
+ @items = items
8
+ end
9
+
10
+ def each
11
+ last_published = Time.local( 1971, 1, 1 )
12
+ last_feed_id = -1 ## todo: use feed_key instead of id?? why? why not??
13
+
14
+ @items.each do |item|
15
+
16
+ item_published = item.published # cache published value ref
17
+
18
+ if last_published.year == item_published.year &&
19
+ last_published.month == item_published.month &&
20
+ last_published.day == item_published.day
21
+ new_date = false
22
+ else
23
+ new_date = true
24
+ end
25
+
26
+ ## note:
27
+ # new date also **always** starts new feed
28
+ # - e.g. used for grouping within day (follows planet planet convention)
29
+
30
+ if new_date || last_feed_id != item.feed.id
31
+ new_feed = true
32
+ else
33
+ new_feed = false
34
+ end
35
+
36
+ yield( item, new_date, new_feed )
37
+
38
+ last_published = item.published
39
+ last_feed_id = item.feed.id
40
+ end
41
+ end # method each
42
+
43
+ end # class ItemCursor
44
+
45
+
46
+ end # module Models
47
+ end # module Pluto
@@ -0,0 +1,130 @@
1
+ module Pluto
2
+
3
+ class Refresher
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def initialize
10
+ @worker = Fetcher.new
11
+ end
12
+
13
+ def debug=(value) @debug = value; end
14
+ def debug?() @debug || false; end
15
+
16
+
17
+ def update_sites( opts={} ) # update all site configs
18
+ if debug?
19
+ ## turn on logging for sql too
20
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
21
+ @worker.debug = true # also pass along worker debug flag if set
22
+ end
23
+
24
+ # -- log update activity
25
+ Activity.create!( text: "update sites (#{Site.count})" )
26
+
27
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
28
+
29
+ Site.order(:id).each do |site|
30
+ update_site_worker( site ) if site.url.present? # note: only update if (source) url present
31
+ end
32
+ end
33
+
34
+
35
+ def update_feeds( opts={} ) # update all feeds
36
+ if debug?
37
+ ## turn on logging for sql too
38
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
39
+ @worker.debug = true # also pass along worker debug flag if set
40
+ end
41
+
42
+ # -- log update activity
43
+ Activity.create!( text: "update feeds (#{Feed.count})" )
44
+
45
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
46
+
47
+ Feed.order(:id).each do |feed|
48
+ update_feed_worker( feed )
49
+ end
50
+ end
51
+
52
+
53
+ def update_feeds_for( site_key, opts={} )
54
+ if debug?
55
+ ## turn on logging for sql too
56
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
57
+ @worker.debug = true # also pass along worker debug flag if set
58
+ end
59
+
60
+ # -- log update activity
61
+ Activity.create!( text: "update feeds >#{site_key}<" )
62
+
63
+ site = Site.find_by_key!( site_key )
64
+
65
+ site.feeds.each do |feed|
66
+ update_feed_worker( feed )
67
+ end
68
+
69
+ end # method update_feeds
70
+
71
+
72
+ private
73
+ def update_site_worker( site_rec )
74
+ site_config = @worker.site_by_rec_if_modified( site_rec )
75
+
76
+ # on error or if http-not modified etc. skip update/processing
77
+ return if site_config.nil?
78
+
79
+ subscriber = Subscriber.new
80
+ subscriber.debug = debug? ? true : false # pass along debug flag
81
+
82
+ site_key = site_rec.key
83
+ subscriber.update_subscriptions_for( site_key, site_config )
84
+ end
85
+
86
+
87
+ def update_feed_worker( feed_rec )
88
+ feed = @worker.feed_by_rec_if_modified( feed_rec )
89
+
90
+ # on error or if http-not modified etc. skip update/processing
91
+ return if feed.nil?
92
+
93
+ ## fix/todo: reload feed_red - fetched date updated etc.
94
+ ## check if needed for access to fetched date
95
+
96
+
97
+ ## todo/check: move feed_rec update to the end (after item updates??)
98
+
99
+ # update feed attribs e.g.
100
+ # generator
101
+ # published_at,built_at,touched_at,fetched_at
102
+ # summary,title2
103
+
104
+ ## fix:
105
+ ## weird rss exception error on windows w/ dates
106
+ # e.g. /lib/ruby/1.9.1/rss/rss.rb:37:in `w3cdtf': wrong number of arguments (1 for 0) (ArgumentError)
107
+ #
108
+ # move to_datetime to feedutils!! if it works
109
+ ## todo: move this comments to feedutils??
110
+
111
+
112
+ feed_rec.debug = debug? ? true : false # pass along debug flag
113
+ ## fix/todo: pass debug flag as opts - debug: true|false !!!!!!
114
+ feed_rec.save_from_struct!( feed ) # todo: find a better name - why? why not??
115
+
116
+
117
+ # update cached value last published for item
118
+ last_item_rec = feed_rec.items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
119
+ if last_item_rec.present?
120
+ if last_item_rec.published?
121
+ feed_rec.update_attributes!( last_published: last_item_rec.published )
122
+ else # try touched
123
+ feed_rec.update_attributes!( last_published: last_item_rec.touched )
124
+ end
125
+ end
126
+ end # method update_feed_worker
127
+
128
+ end # class Refresher
129
+
130
+ end # module Pluto
@@ -0,0 +1,139 @@
1
+
2
+ module Pluto
3
+
4
+ class CreateDb < ActiveRecord::Migration
5
+
6
+ def up
7
+ create_table :sites do |t|
8
+ t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
9
+ t.string :key, :null => false # e.g. ruby, js, etc.
10
+
11
+ ############
12
+ # filters (site-wide)
13
+ t.string :includes # regex
14
+ t.string :excludes # regex
15
+
16
+ ######################
17
+ # for auto-update of feed list/site config
18
+
19
+ t.string :url # source url for auto-update (optional)
20
+
21
+ ## note: make sure to use same fields for update check as feed
22
+
23
+ t.datetime :fetched # last fetched/checked date -- make not null ??
24
+ t.integer :http_code # last http status code e.g. 200,404,etc.
25
+ t.string :http_etag # last http header etag
26
+ ## note: save last-modified header as text (not datetime) - pass through as is
27
+ t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
28
+ t.string :http_server # last http server header if present
29
+
30
+ # note: do NOT store body content (that is, text) and md5 digest
31
+ # use git! and github! commit will be http_etag!!
32
+
33
+ t.datetime :fetched # last fetched/checked date
34
+
35
+ #############
36
+ # more fields
37
+
38
+ t.timestamps # created_at, updated_at
39
+ end
40
+
41
+ create_table :subscriptions do |t| # has_many join table (sites/feeds)
42
+ t.references :site, :null => false
43
+ t.references :feed, :null => false
44
+ t.timestamps
45
+ end
46
+
47
+ create_table :feeds do |t|
48
+ t.string :title # user supplied title
49
+ t.string :auto_title # "fallback" - auto(fill) title from feed
50
+
51
+ t.string :title2 # user supplied title2
52
+ t.string :auto_title2 # "fallback" - auto(fill) title2 from feed e.g. subtitle (atom)
53
+
54
+ t.string :url # user supplied site url
55
+ t.string :auto_url # "fallback" - auto(fill) url from feed
56
+
57
+ t.string :feed_url # user supplied feed url
58
+ t.string :auto_feed_url # "fallback" - auto discovery feed url from (site) url
59
+
60
+ t.text :summary # e.g. description (rss)
61
+
62
+ t.string :generator # feed generator (e.g. wordpress, etc.) from feed
63
+
64
+ t.datetime :published # from feed published(atom)+ pubDate(rss)
65
+ t.datetime :built # from feed lastBuiltDate(rss)
66
+ t.datetime :touched # from feed updated(atom)
67
+
68
+ ### extras (move to array for custom fields or similar??)
69
+ t.string :github # github handle (optional)
70
+ t.string :twitter # twitter handle (optional)
71
+ t.string :avatar # gravator or hackergotchi handle (optional)
72
+
73
+ ### add class/kind field e.g.
74
+ # - personal feed/blog/site, that is, individual author
75
+ # - team blog/site
76
+ # - org (anization) or com(pany blog/site)
77
+ # - newsfeed (composite)
78
+ # - other (link blog?, podcast?) - why? why not??
79
+
80
+ ############
81
+ # filters
82
+ t.string :includes # regex
83
+ t.string :excludes # regex
84
+ # todo: add generic filter list e.g. t.string :filters (comma,pipe or space separated method names?)
85
+
86
+ # -- our own (meta) fields
87
+ t.datetime :last_published # cache last (latest) published for items
88
+
89
+ t.string :key, :null => false
90
+ t.string :format # e.g. atom (1.0), rss 2.0, rss 0.7 etc.
91
+
92
+ t.integer :http_code # last http status code e.g. 200,404,etc.
93
+ t.string :http_etag # last http header etag
94
+ ## note: save last-modified header as text (not datetime) - pass through as is
95
+ t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
96
+ t.string :http_server # last http server header if present
97
+
98
+ t.string :md5 # md5 hash of body
99
+ t.text :body # last http response body (complete feed!)
100
+
101
+ t.datetime :fetched # last fetched/checked date
102
+
103
+ t.timestamps # created_at, updated_at
104
+ end
105
+
106
+
107
+ create_table :items do |t|
108
+ t.string :guid
109
+ t.string :url
110
+
111
+ ## note: title may contain more than 255 chars!! use text for sure!
112
+ ## e.g. Rails Girls blog has massive titles in feed
113
+
114
+ t.text :title # todo: add some :null => false ??
115
+ t.text :summary # e.g. description (rss), summary (atom)
116
+ t.text :content
117
+
118
+ t.datetime :published # from feed (published) + pubDate(rss)
119
+ t.datetime :touched # from feed updated (atom)
120
+
121
+ ## todo: add :last_updated_at ?? (NOTE: updated_at already take by auto-timestamps)
122
+ t.references :feed, :null => false
123
+
124
+ t.datetime :fetched # last fetched/check date
125
+ t.timestamps # created_at, updated_at
126
+
127
+ ## t.string :author
128
+ ## todo: add author/authors, category/categories
129
+ end
130
+
131
+ end
132
+
133
+ def down
134
+ raise ActiveRecord::IrreversibleMigration
135
+ end
136
+
137
+ end # class CreateDb
138
+
139
+ end # module Pluto
@@ -0,0 +1,102 @@
1
+ module Pluto
2
+
3
+ class Subscriber
4
+
5
+ include LogUtils::Logging
6
+
7
+ include Models
8
+
9
+ def debug=(value) @debug = value; end
10
+ def debug?() @debug || false; end
11
+
12
+
13
+ def update_subscriptions( config, opts={} )
14
+ # !!!! -- depreciated API - remove - do NOT use anymore
15
+ puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
16
+ update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
17
+ end
18
+
19
+
20
+ def update_subscriptions_for( site_key, config, opts={} )
21
+ site_attribs = {
22
+ title: config['title'] || config['name'], # support either title or name
23
+ url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
24
+ }
25
+
26
+ logger.debug "site_attribs: #{site_attribs.inspect}"
27
+
28
+ site_rec = Site.find_by_key( site_key )
29
+ if site_rec.nil?
30
+ site_rec = Site.new
31
+ site_attribs[ :key ] = site_key
32
+
33
+ ## use object_id: site.id and object_type: Site
34
+ ## change - model/table/schema!!!
35
+ Activity.create!( text: "new site >#{site_key}< - #{site_attribs[ :title ]}" )
36
+ end
37
+ site_rec.update_attributes!( site_attribs )
38
+
39
+ # -- log update activity
40
+ Activity.create!( text: "update subscriptions >#{site_key}<" )
41
+
42
+ # clean out subscriptions and add again
43
+ logger.debug "before site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
44
+ site_rec.subscriptions.destroy_all # note: use destroy_all NOT delete_all (delete_all tries by default only nullify)
45
+ logger.debug "after site.subscriptions.delete_all - count: #{site_rec.subscriptions.count}"
46
+
47
+ config.each do |key, value|
48
+
49
+ ## todo: downcase key - why ??? why not???
50
+
51
+ # skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
52
+ next if ['title','title2','name',
53
+ 'source', 'url',
54
+ 'include','includes','exclude','excludes',
55
+ 'feeds',
56
+ 'planet','defaults'].include?( key )
57
+
58
+ ### todo/check:
59
+ ## check value - must be hash
60
+ # check if url or feed_url present
61
+ # that is, check for required props/key-value pairs
62
+
63
+ feed_key = key.to_s.dup
64
+ feed_hash = value
65
+
66
+ # todo/fix: use title from feed?
67
+ # e.g. fill up auto_title, auto_url, etc.
68
+
69
+ feed_attribs = {
70
+ feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ],
71
+ url: feed_hash[ 'link' ] || feed_hash[ 'url' ],
72
+ title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
73
+ title2: feed_hash[ 'title2' ],
74
+ includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
75
+ excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
76
+ }
77
+
78
+ puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
79
+
80
+ feed_rec = Feed.find_by_key( feed_key )
81
+ if feed_rec.nil?
82
+ feed_rec = Feed.new
83
+ feed_attribs[ :key ] = feed_key
84
+
85
+ ## use object_id: feed.id and object_type: Feed
86
+ ## change - model/table/schema!!!
87
+ ## todo: add parent_action_id - why? why not?
88
+ Activity.create!( text: "new feed >#{feed_key}< - #{feed_attribs[ :title ]}" )
89
+ end
90
+
91
+ feed_rec.update_attributes!( feed_attribs )
92
+
93
+ # add subscription record
94
+ # note: subscriptions get cleaned out on update first (see above)
95
+ site_rec.subscriptions.create!( feed_id: feed_rec.id )
96
+ end
97
+
98
+ end # method update_subscriptions
99
+
100
+ end # class Subscriber
101
+
102
+ end # module Pluto