pluto-models 1.3.2 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/README.md +4 -3
- data/Rakefile +4 -3
- data/lib/pluto/models.rb +3 -41
- data/lib/pluto/models/feed.rb +54 -56
- data/lib/pluto/models/item.rb +20 -15
- data/lib/pluto/models/site.rb +171 -1
- data/lib/pluto/models/utils.rb +7 -7
- data/lib/pluto/schema.rb +25 -26
- data/lib/pluto/version.rb +2 -2
- data/test/data/ruby.ini +18 -0
- data/test/test_filter.rb +9 -6
- data/test/test_helpers.rb +7 -1
- data/test/test_site.rb +74 -0
- metadata +25 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2adea6bc1a5429eea27ded5314e603e339ff2595
|
4
|
+
data.tar.gz: 50f68d6d907924a99745c74a5ba71545cafcf746
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81b50daf9bfc77c04abedece3600bca9e9355f46f3e2df182ef6b22676fbdeed6529b6511142707cc3dc666ff82384ce86b71f24b04a421282f6f74357229844
|
7
|
+
data.tar.gz: cd4cd39e1a282a1ca242d9b8a743672bf37db241890485a520355310b8cdc5904c8e147ceae0d277b3163d8b74524468463fc921e50f8be63f5b5a2e4090ff5c
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# pluto-models gem - planet schema 'n' models for easy (re)use
|
2
2
|
|
3
|
-
* home :: [github.com/feedreader/pluto
|
4
|
-
* bugs :: [github.com/feedreader/pluto
|
3
|
+
* home :: [github.com/feedreader/pluto.models](https://github.com/feedreader/pluto.models)
|
4
|
+
* bugs :: [github.com/feedreader/pluto.models/issues](https://github.com/feedreader/pluto.models/issues)
|
5
5
|
* gem :: [rubygems.org/gems/pluto-models](https://rubygems.org/gems/pluto-models)
|
6
6
|
* rdoc :: [rubydoc.info/gems/pluto-models](http://rubydoc.info/gems/pluto-models)
|
7
7
|
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
@@ -85,7 +85,8 @@ end
|
|
85
85
|
## Real World Usage
|
86
86
|
|
87
87
|
- [`pluto`](https://github.com/feedreader/pluto) - planet generator command line tool using the pluto-models gem
|
88
|
-
- [`pluto.live`](https://github.com/feedreader/pluto.live) - sample planet site; sinatra web app
|
88
|
+
- [`pluto.live.starter`](https://github.com/feedreader/pluto.live.starter) - sample planet site; sinatra web app starter template in ruby using the pluto-models gem
|
89
|
+
- [`pluto.live`](https://github.com/feedreader/pluto.live) - sample planet site; rails web app in ruby using the pluto-models gem
|
89
90
|
|
90
91
|
|
91
92
|
|
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ Hoe.spec 'pluto-models' do
|
|
8
8
|
self.summary = "pluto-models - planet schema 'n' models for easy (re)use"
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/feedreader/pluto
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto.models']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'feedreader@googlegroups.com'
|
@@ -20,8 +20,9 @@ Hoe.spec 'pluto-models' do
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['props', '>= 1.1.2'],
|
22
22
|
['logutils', '>= 0.6.1'],
|
23
|
-
['
|
24
|
-
['
|
23
|
+
['feedparser', '>= 1.0.0'],
|
24
|
+
['feedfilter', '>= 1.1.1'],
|
25
|
+
['textutils', '>= 1.0.1'],
|
25
26
|
['activerecord'],
|
26
27
|
['logutils-activerecord', '>= 0.2.0'],
|
27
28
|
['props-activerecord', '>= 0.1.0'],
|
data/lib/pluto/models.rb
CHANGED
@@ -19,8 +19,10 @@ require 'active_record'
|
|
19
19
|
|
20
20
|
require 'props' # manage settings/env
|
21
21
|
require 'logutils'
|
22
|
-
require 'feedutils'
|
23
22
|
require 'textutils'
|
23
|
+
require 'feedparser'
|
24
|
+
require 'feedfilter'
|
25
|
+
|
24
26
|
|
25
27
|
## add more activerecords addons/utils
|
26
28
|
require 'activerecord/utils' # add macros e.g. read_attr_w_fallbacks etc.
|
@@ -108,45 +110,5 @@ end # module Pluto
|
|
108
110
|
|
109
111
|
|
110
112
|
|
111
|
-
######
|
112
|
-
# todo - move to ext/array.rb or similar
|
113
|
-
|
114
|
-
class Array
|
115
|
-
|
116
|
-
## todo: check if there's already a builtin method for this
|
117
|
-
#
|
118
|
-
# note:
|
119
|
-
# in rails ary.in_groups(3) results in
|
120
|
-
# top-to-bottom, left-to-right.
|
121
|
-
# and not left-to-right first and than top-to-bottom.
|
122
|
-
#
|
123
|
-
# rename to in_groups_vertical(3) ???
|
124
|
-
|
125
|
-
def in_columns( cols ) # alias for convenience for chunks - needed? why? why not?
|
126
|
-
chunks( cols )
|
127
|
-
end
|
128
|
-
|
129
|
-
def chunks( number_of_chunks )
|
130
|
-
## NB: use chunks - columns might be in use by ActiveRecord!
|
131
|
-
###
|
132
|
-
# e.g.
|
133
|
-
# [1,2,3,4,5,6,7,8,9,10].columns(3)
|
134
|
-
# becomes:
|
135
|
-
# [[1,4,7,10],
|
136
|
-
# [2,5,8],
|
137
|
-
# [3,6,9]]
|
138
|
-
|
139
|
-
## check/todo: make a copy of the array first??
|
140
|
-
# for now reference to original items get added to columns
|
141
|
-
chunks = (1..number_of_chunks).collect { [] }
|
142
|
-
each_with_index do |item,index|
|
143
|
-
chunks[ index % number_of_chunks ] << item
|
144
|
-
end
|
145
|
-
chunks
|
146
|
-
end
|
147
|
-
|
148
|
-
end
|
149
|
-
|
150
|
-
|
151
113
|
# say hello
|
152
114
|
puts Pluto.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
|
data/lib/pluto/models/feed.rb
CHANGED
@@ -21,9 +21,9 @@ class Feed < ActiveRecord::Base
|
|
21
21
|
# note: order by first non-null datetime field
|
22
22
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
23
23
|
|
24
|
-
# note: if not
|
25
|
-
## order( "coalesce(published,
|
26
|
-
order( "coalesce(feeds.
|
24
|
+
# note: if not updated or published use hardcoded 1971-01-01 for now
|
25
|
+
## order( "coalesce(updated,published,'1971-01-01') desc" )
|
26
|
+
order( "coalesce(feeds.items_last_updated,'1971-01-01') desc" )
|
27
27
|
end
|
28
28
|
|
29
29
|
##################################
|
@@ -34,6 +34,7 @@ class Feed < ActiveRecord::Base
|
|
34
34
|
def name() title; end # alias for title
|
35
35
|
def description() summary; end # alias for summary
|
36
36
|
def desc() summary; end # alias(2) for summary
|
37
|
+
def subtitle() summary; end # alias(3) for summary
|
37
38
|
def link() url; end # alias for url
|
38
39
|
def feed() feed_url; end # alias for feed_url
|
39
40
|
|
@@ -46,28 +47,28 @@ class Feed < ActiveRecord::Base
|
|
46
47
|
|
47
48
|
def url?() read_attribute(:url).present?; end
|
48
49
|
def title?() read_attribute(:title).present?; end
|
49
|
-
def title2?() read_attribute(:title2).present?; end
|
50
50
|
def feed_url?() read_attribute(:feed_url).present?; end
|
51
51
|
|
52
52
|
def url() read_attribute_w_fallbacks( :url, :auto_url ); end
|
53
53
|
def title() read_attribute_w_fallbacks( :title, :auto_title ); end
|
54
|
-
def title2() read_attribute_w_fallbacks( :title2, :auto_title2 ); end
|
55
54
|
def feed_url() read_attribute_w_fallbacks( :feed_url, :auto_feed_url ); end
|
56
55
|
|
56
|
+
def summary?() read_attribute(:summary).present?; end
|
57
57
|
|
58
|
+
|
59
|
+
def updated?() read_attribute(:updated).present?; end
|
58
60
|
def published?() read_attribute(:published).present?; end
|
59
|
-
def touched?() read_attribute(:touched).present?; end
|
60
61
|
|
62
|
+
def updated
|
63
|
+
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
64
|
+
# db backed attribute
|
65
|
+
read_attribute_w_fallbacks( :updated, :published )
|
66
|
+
end
|
61
67
|
|
62
68
|
def published
|
63
69
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
64
70
|
# db backed attribute
|
65
|
-
|
66
|
-
read_attribute_w_fallbacks(
|
67
|
-
:published,
|
68
|
-
:touched, # try touched (aka updated (ATOM))
|
69
|
-
:built # try build (aka lastBuildDate (RSS))
|
70
|
-
)
|
71
|
+
read_attribute_w_fallbacks( :published, :updated )
|
71
72
|
end
|
72
73
|
|
73
74
|
|
@@ -75,44 +76,23 @@ class Feed < ActiveRecord::Base
|
|
75
76
|
def debug?() @debug || false; end
|
76
77
|
|
77
78
|
|
78
|
-
def
|
79
|
-
return false if text.blank? ## allow/guard against nil and empty string
|
79
|
+
def deep_update_from_struct!( data )
|
80
80
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
81
|
+
######
|
82
|
+
## check for filters (includes/excludes) if present
|
83
|
+
## for now just check for includes
|
84
|
+
##
|
85
|
+
if includes.present?
|
86
|
+
includesFilter = FeedFilter::IncludeFilters.new( includes )
|
87
|
+
else
|
88
|
+
includesFilter = nil
|
85
89
|
end
|
86
90
|
|
87
|
-
false # no term match found
|
88
|
-
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
def save_from_struct!( data )
|
93
|
-
|
94
|
-
update_from_struct!( data )
|
95
|
-
|
96
91
|
data.items.each do |item|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
##
|
102
|
-
if includes.present?
|
103
|
-
## split terms (allow comma,pipe) - do NOT use space; allows e.g. terms such as github pages
|
104
|
-
terms = includes.split( /\s*[,|]\s*/ )
|
105
|
-
## remove leading and trailing white spaces - check - still required when using \s* ??
|
106
|
-
terms = terms.map { |term| term.strip }
|
107
|
-
match = match_terms?( terms, item.title ) ||
|
108
|
-
match_terms?( terms, item.summary) ||
|
109
|
-
match_terms?( terms, item.content)
|
110
|
-
|
111
|
-
if match == false
|
112
|
-
puts "** SKIPPING | #{item.title}"
|
113
|
-
puts " no include terms match: #{terms.join('|')}"
|
114
|
-
next ## skip to next item
|
115
|
-
end
|
92
|
+
if includesFilter && includesFilter.match_item?( item ) == false
|
93
|
+
puts "** SKIPPING | #{item.title}"
|
94
|
+
puts " no include terms match: #{includes}"
|
95
|
+
next ## skip to next item
|
116
96
|
end
|
117
97
|
|
118
98
|
item_rec = Item.find_by_guid( item.guid )
|
@@ -123,17 +103,37 @@ class Feed < ActiveRecord::Base
|
|
123
103
|
## todo: check if any attribs changed
|
124
104
|
puts "UPDATE | #{item.title}"
|
125
105
|
end
|
126
|
-
|
106
|
+
|
127
107
|
item_rec.debug = debug? ? true : false # pass along debug flag
|
128
|
-
|
108
|
+
|
109
|
+
item_rec.feed_id = id # feed_rec.id - add feed_id fk_ref
|
110
|
+
item_rec.fetched = fetched # feed_rec.fetched
|
111
|
+
|
112
|
+
item_rec.update_from_struct!( item )
|
129
113
|
|
130
114
|
end # each item
|
131
|
-
|
115
|
+
|
116
|
+
|
117
|
+
# update cached value last published for item
|
118
|
+
## todo/check: force reload of items - why? why not??
|
119
|
+
last_item_rec = items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
120
|
+
if last_item_rec.present?
|
121
|
+
if last_item_rec.updated?
|
122
|
+
self.items_last_updated = last_item_rec.updated
|
123
|
+
## save! ## note: will get save w/ update_from_struct! - why? why not??
|
124
|
+
else # try published
|
125
|
+
self.items_last_updated = last_item_rec.published
|
126
|
+
## save! ## note: will get save w/ update_from_struct! - why? why not??
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
update_from_struct!( data )
|
131
|
+
end # method deep_update_from_struct!
|
132
132
|
|
133
133
|
|
134
134
|
def update_from_struct!( data )
|
135
135
|
|
136
|
-
## todo: move to
|
136
|
+
## todo: move to FeedParser::Feed ??? why? why not??
|
137
137
|
if data.generator
|
138
138
|
generator_full = ''
|
139
139
|
generator_full << data.generator
|
@@ -145,9 +145,9 @@ class Feed < ActiveRecord::Base
|
|
145
145
|
|
146
146
|
##
|
147
147
|
# todo:
|
148
|
-
## strip all tags from
|
148
|
+
## strip all tags from summary (subtitle)
|
149
149
|
## limit to 255 chars
|
150
|
-
## e.g.
|
150
|
+
## e.g. summary (subtitle) such as this exist
|
151
151
|
## This is a low-traffic announce-only list for people interested
|
152
152
|
## in hearing news about Polymer (<a href="http://polymer-project.org">http://polymer-project.org</a>).
|
153
153
|
## The higher-traffic mailing list for all kinds of discussion is
|
@@ -156,16 +156,14 @@ class Feed < ActiveRecord::Base
|
|
156
156
|
|
157
157
|
feed_attribs = {
|
158
158
|
format: data.format,
|
159
|
+
updated: data.updated,
|
159
160
|
published: data.published,
|
160
|
-
touched: data.updated,
|
161
|
-
built: data.built,
|
162
161
|
summary: data.summary,
|
162
|
+
generator: generator_full,
|
163
163
|
### todo/fix: add/use
|
164
164
|
# auto_title: ???,
|
165
165
|
# auto_url: ???,
|
166
166
|
# auto_feed_url: ???,
|
167
|
-
auto_title2: data.title2 ? strip_tags(data.title2)[0...255] : data.title2, # limit to 255 chars; strip tags
|
168
|
-
generator: generator_full
|
169
167
|
}
|
170
168
|
|
171
169
|
if debug?
|
data/lib/pluto/models/item.rb
CHANGED
@@ -11,6 +11,7 @@ class Item < ActiveRecord::Base
|
|
11
11
|
## todo/fix:
|
12
12
|
## use a module ref or something; do NOT include all methods - why? why not?
|
13
13
|
include TextUtils::HypertextHelper ## e.g. lets us use strip_tags( ht )
|
14
|
+
include FeedFilter::AdsFilter ## e.g. lets us use strip_ads( ht )
|
14
15
|
|
15
16
|
|
16
17
|
##################################
|
@@ -25,41 +26,45 @@ class Item < ActiveRecord::Base
|
|
25
26
|
# note: order by first non-null datetime field
|
26
27
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
27
28
|
|
28
|
-
# note: if not published
|
29
|
-
order( "coalesce(items.
|
29
|
+
# note: if not updated,published use hardcoded 1971-01-01 for now
|
30
|
+
order( "coalesce(items.updated,items.published,'1971-01-01') desc" )
|
30
31
|
end
|
31
32
|
|
32
|
-
def
|
33
|
+
def updated?() read_attribute(:updated).present?; end
|
34
|
+
def published?() read_attribute(:published).present?; end # note: published is basically an alias for created
|
33
35
|
|
34
|
-
def
|
36
|
+
def updated
|
35
37
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
36
38
|
# db backed attribute
|
37
|
-
|
38
|
-
read_attribute_w_fallbacks(
|
39
|
-
:published,
|
40
|
-
:touched # try touched (aka updated RSS/ATOM)
|
41
|
-
)
|
39
|
+
read_attribute_w_fallbacks( :updated, :published )
|
42
40
|
end
|
43
41
|
|
42
|
+
def published
|
43
|
+
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
44
|
+
# db backed attribute
|
45
|
+
read_attribute_w_fallbacks( :published, :updated )
|
46
|
+
end
|
44
47
|
|
45
48
|
|
46
49
|
def debug=(value) @debug = value; end
|
47
50
|
def debug?() @debug || false; end
|
48
51
|
|
49
|
-
|
52
|
+
|
53
|
+
def update_from_struct!( data )
|
50
54
|
## check: new item/record? not saved? add guid
|
51
55
|
# otherwise do not add guid - why? why not?
|
52
56
|
|
57
|
+
## note: for now also strip ads in summary
|
58
|
+
## fix/todo: summary (in the future) is supposed to be only plain vanilla text
|
59
|
+
|
53
60
|
item_attribs = {
|
54
61
|
guid: data.guid, # todo: only add for new records???
|
55
62
|
title: data.title ? strip_tags(data.title)[0...255] : data.title, ## limit to 255 chars; strip tags
|
56
63
|
url: data.url,
|
57
|
-
summary: data.summary,
|
58
|
-
content: data.content,
|
64
|
+
summary: data.summary.blank? ? data.summary : strip_ads( data.summary ).strip,
|
65
|
+
content: data.content.blank? ? data.content : strip_ads( data.content ).strip,
|
66
|
+
updated: data.updated,
|
59
67
|
published: data.published,
|
60
|
-
touched: data.updated,
|
61
|
-
feed_id: feed_rec.id, # add feed_id fk_ref
|
62
|
-
fetched: feed_rec.fetched
|
63
68
|
}
|
64
69
|
|
65
70
|
if debug?
|
data/lib/pluto/models/site.rb
CHANGED
@@ -13,7 +13,6 @@ class Site < ActiveRecord::Base
|
|
13
13
|
##################################
|
14
14
|
# attribute reader aliases
|
15
15
|
def name() title; end # alias for title
|
16
|
-
def fetched_at() fetched; end # - legacy attrib reader -- remove!!!
|
17
16
|
|
18
17
|
def owner_name() author; end # alias for author
|
19
18
|
def owner() author; end # alias(2) for author
|
@@ -22,8 +21,179 @@ class Site < ActiveRecord::Base
|
|
22
21
|
def owner_email() email; end # alias for email
|
23
22
|
def author_email() email; end # alias(2) for email
|
24
23
|
|
24
|
+
|
25
|
+
|
26
|
+
def self.deep_create_or_update_from_hash!( name, config, opts={} )
|
27
|
+
|
28
|
+
## note: allow (optional) config of site key too
|
29
|
+
site_key = config['key'] || config['slug']
|
30
|
+
if site_key.nil?
|
31
|
+
## if no key configured; use (file)name; remove -_ chars
|
32
|
+
## e.g. jekyll-meta becomes jekyllmeta etc.
|
33
|
+
site_key = name.downcase.gsub( /[\-_]/, '' )
|
34
|
+
end
|
35
|
+
|
36
|
+
site_rec = Site.find_by_key( site_key )
|
37
|
+
if site_rec.nil?
|
38
|
+
site_rec = Site.new
|
39
|
+
site_rec.key = site_key
|
40
|
+
end
|
41
|
+
|
42
|
+
site_rec.deep_update_from_hash!( config, opts )
|
43
|
+
site_rec
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def deep_update_from_hash!( config, opts={} )
|
48
|
+
|
49
|
+
site_attribs = {
|
50
|
+
title: config['title'] || config['name'], # support either title or name
|
51
|
+
url: config['source'] || config['url'], # support source or url for source url for auto-update (optional)
|
52
|
+
author: config['author'] || config['owner'],
|
53
|
+
email: config['email'],
|
54
|
+
updated: Time.now, ## track last_update via pluto (w/ update_subscription_for fn)
|
55
|
+
}
|
56
|
+
|
57
|
+
## note: allow (optional) config of site key too
|
58
|
+
site_key = config['key'] || config['slug']
|
59
|
+
site_attribs[:key] = site_key if site_key
|
60
|
+
|
61
|
+
|
62
|
+
logger = LogUtils::Logger.root
|
63
|
+
logger.debug "site_attribs: #{site_attribs.inspect}"
|
64
|
+
|
65
|
+
if new_record?
|
66
|
+
## use object_id: site.id and object_type: Site
|
67
|
+
## change - model/table/schema!!!
|
68
|
+
Activity.create!( text: "new site >#{key}< - #{title}" )
|
69
|
+
end
|
70
|
+
|
71
|
+
update_attributes!( site_attribs )
|
72
|
+
|
73
|
+
|
74
|
+
# -- log update activity
|
75
|
+
## Activity.create!( text: "update subscriptions for site >#{key}<" )
|
76
|
+
|
77
|
+
#### todo/fix:
|
78
|
+
## double check - how to handle delete
|
79
|
+
## feeds might get referenced by other sites
|
80
|
+
## cannot just delete feeds; only save to delete join table (subscriptions)
|
81
|
+
## check if feed "lingers" on with no reference (to site)???
|
82
|
+
|
83
|
+
# clean out subscriptions and add again
|
84
|
+
logger.debug "before site.subscriptions.delete_all - count: #{subscriptions.count}"
|
85
|
+
# note: use destroy_all NOT delete_all (delete_all tries by default only nullify)
|
86
|
+
subscriptions.destroy_all
|
87
|
+
logger.debug "after site.subscriptions.delete_all - count: #{subscriptions.count}"
|
88
|
+
|
89
|
+
|
90
|
+
config.each do |k, v|
|
91
|
+
|
92
|
+
## todo: downcase key - why ??? why not???
|
93
|
+
|
94
|
+
# skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
|
95
|
+
next if ['key','slug',
|
96
|
+
'title','name','name2','title2','subtitle',
|
97
|
+
'source', 'url',
|
98
|
+
'include','includes','exclude','excludes',
|
99
|
+
'feeds',
|
100
|
+
'author', 'owner', 'email',
|
101
|
+
'planet','defaults'].include?( k )
|
102
|
+
|
103
|
+
### todo/check:
|
104
|
+
## check value - must be hash
|
105
|
+
# check if url or feed_url present
|
106
|
+
# that is, check for required props/key-value pairs
|
107
|
+
|
108
|
+
feed_key = k.to_s.dup
|
109
|
+
feed_hash = v
|
110
|
+
|
111
|
+
# todo/fix: use title from feed?
|
112
|
+
# e.g. fill up auto_title, auto_url, etc.
|
113
|
+
|
114
|
+
feed_attribs = {
|
115
|
+
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ] || feed_hash[ 'xml_url' ],
|
116
|
+
url: feed_hash[ 'link' ] || feed_hash[ 'url' ] || feed_hash[ 'html_url' ],
|
117
|
+
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
118
|
+
## note: title2 no longer supported; use summary or subtitle?
|
119
|
+
### title2: feed_hash[ 'title2' ] || feed_hash[ 'name2' ] || feed_hash[ 'subtitle'],
|
120
|
+
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
121
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ],
|
122
|
+
author: feed_hash[ 'author' ] || feed_hash[ 'owner' ],
|
123
|
+
email: feed_hash[ 'email' ],
|
124
|
+
avatar: feed_hash[ 'avatar' ] || feed_hash[ 'face'],
|
125
|
+
location: feed_hash[ 'location' ],
|
126
|
+
github: feed_hash[ 'github' ],
|
127
|
+
twitter: feed_hash[ 'twitter' ],
|
128
|
+
rubygems: feed_hash[ 'rubygems' ],
|
129
|
+
meetup: feed_hash[ 'meetup' ], ### -- remove from schema - virtual attrib ?? - why? why not??
|
130
|
+
}
|
131
|
+
|
132
|
+
feed_attribs[:encoding] = feed_hash['encoding']||feed_hash['charset'] if feed_hash['encoding']||feed_hash['charset']
|
133
|
+
|
134
|
+
#####
|
135
|
+
##
|
136
|
+
# auto-fill; convenience helpers
|
137
|
+
|
138
|
+
if feed_hash['meetup']
|
139
|
+
## link/url = http://www.meetup.com/vienna-rb
|
140
|
+
## feed/feed_url = http://www.meetup.com/vienna-rb/events/rss/vienna.rb/
|
141
|
+
|
142
|
+
feed_attribs[:url] = "http://www.meetup.com/#{feed_hash['meetup']}" if feed_attribs[:url].nil?
|
143
|
+
feed_attribs[:feed_url] = "http://www.meetup.com/#{feed_hash['meetup']}/events/rss/#{feed_hash['meetup']}/" if feed_attribs[:feed_url].nil?
|
144
|
+
end
|
145
|
+
|
146
|
+
if feed_hash['googlegroups']
|
147
|
+
## link/url = https://groups.google.com/group/beerdb or
|
148
|
+
## https://groups.google.com/forum/#!forum/beerdb
|
149
|
+
## feed/feed_url = https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15
|
150
|
+
|
151
|
+
feed_attribs[:url] = "https://groups.google.com/group/#{feed_hash['googlegroups']}" if feed_attribs[:url].nil?
|
152
|
+
feed_attribs[:feed_url] = "https://groups.google.com/forum/feed//#{feed_hash['googlegroups']}/topics/atom.xml?num=15" if feed_attribs[:feed_url].nil?
|
153
|
+
end
|
154
|
+
|
155
|
+
if feed_hash['github'] && feed_hash['github'].index('/') ## e.g. jekyll/jekyll
|
156
|
+
## link/url = https://github.com/jekyll/jekyll
|
157
|
+
## feed/feed_url = https://github.com/jekyll/jekyll/commits/master.atom
|
158
|
+
|
159
|
+
feed_attribs[:url] = "https://github.com/#{feed_hash['github']}" if feed_attribs[:url].nil?
|
160
|
+
feed_attribs[:feed_url] = "https://github.com/#{feed_hash['github']}/commits/master.atom" if feed_attribs[:feed_url].nil?
|
161
|
+
end
|
162
|
+
|
163
|
+
if feed_hash['rubygems'] && feed_attribs[:url].nil? && feed_attribs[:feed_url].nil?
|
164
|
+
## link/url = http://rubygems.org/gems/jekyll
|
165
|
+
## feed/feed_url = http://rubygems.org/gems/jekyll/versions.atom
|
166
|
+
|
167
|
+
feed_attribs[:url] = "http://rubygems.org/gems/#{feed_hash['rubygems']}" if feed_attribs[:url].nil?
|
168
|
+
feed_attribs[:feed_url] = "http://rubygems.org/gems/#{feed_hash['rubygems']}/versions.atom" if feed_attribs[:feed_url].nil?
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
173
|
+
|
174
|
+
feed_rec = Feed.find_by_key( feed_key )
|
175
|
+
if feed_rec.nil?
|
176
|
+
feed_rec = Feed.new
|
177
|
+
feed_attribs[:key] = feed_key
|
178
|
+
|
179
|
+
## use object_id: feed.id and object_type: Feed
|
180
|
+
## change - model/table/schema!!!
|
181
|
+
## todo: add parent_action_id - why? why not?
|
182
|
+
Activity.create!( text: "new feed >#{feed_key}< - #{feed_attribs[:title]}" )
|
183
|
+
end
|
184
|
+
|
185
|
+
feed_rec.update_attributes!( feed_attribs )
|
186
|
+
|
187
|
+
# add subscription record
|
188
|
+
# note: subscriptions get cleaned out on update first (see above)
|
189
|
+
subscriptions.create!( feed_id: feed_rec.id )
|
190
|
+
end
|
191
|
+
|
192
|
+
end # method deep_update_from_hash!
|
193
|
+
|
25
194
|
end # class Site
|
26
195
|
|
27
196
|
|
28
197
|
end # module Model
|
29
198
|
end # module Pluto
|
199
|
+
|
data/lib/pluto/models/utils.rb
CHANGED
@@ -10,16 +10,16 @@ class ItemCursor
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def each
|
13
|
-
|
13
|
+
last_updated = Time.local( 1971, 1, 1 )
|
14
14
|
last_feed_id = -1 ## todo: use feed_key instead of id?? why? why not??
|
15
|
-
|
15
|
+
|
16
16
|
@items.each do |item|
|
17
17
|
|
18
|
-
|
18
|
+
item_updated = item.updated # cache updated value ref
|
19
19
|
|
20
|
-
if
|
21
|
-
|
22
|
-
|
20
|
+
if last_updated.year == item_updated.year &&
|
21
|
+
last_updated.month == item_updated.month &&
|
22
|
+
last_updated.day == item_updated.day
|
23
23
|
new_date = false
|
24
24
|
else
|
25
25
|
new_date = true
|
@@ -37,7 +37,7 @@ class ItemCursor
|
|
37
37
|
|
38
38
|
yield( item, new_date, new_feed )
|
39
39
|
|
40
|
-
|
40
|
+
last_updated = item.updated
|
41
41
|
last_feed_id = item.feed.id
|
42
42
|
end
|
43
43
|
end # method each
|
data/lib/pluto/schema.rb
CHANGED
@@ -35,6 +35,7 @@ class CreateDb < ActiveRecord::Migration
|
|
35
35
|
|
36
36
|
# note: do NOT store body content (that is, text) and md5 digest
|
37
37
|
# use git! and github! commit will be http_etag!!
|
38
|
+
t.string :md5 # md5 hash of body
|
38
39
|
|
39
40
|
|
40
41
|
#############
|
@@ -53,37 +54,35 @@ class CreateDb < ActiveRecord::Migration
|
|
53
54
|
create_table :feeds do |t|
|
54
55
|
t.string :key, null: false
|
55
56
|
t.string :encoding, null: false, default: 'utf8' # charset encoding; default to utf8
|
56
|
-
t.string :format # e.g. atom (1.0), rss 2.0,
|
57
|
-
|
58
|
-
t.string :title # user supplied title
|
59
|
-
t.string :auto_title # "fallback" - auto(fill) title from feed
|
60
|
-
|
61
|
-
t.string :title2 # user supplied title2
|
62
|
-
t.string :auto_title2 # "fallback" - auto(fill) title2 from feed e.g. subtitle (atom)
|
63
|
-
|
64
|
-
t.string :url # user supplied site url
|
65
|
-
t.string :auto_url # "fallback" - auto(fill) url from feed
|
57
|
+
t.string :format # e.g. atom (1.0), rss 2.0, etc.
|
66
58
|
|
59
|
+
t.string :title # user supplied title
|
60
|
+
t.string :url # user supplied site url
|
67
61
|
t.string :feed_url # user supplied feed url
|
62
|
+
|
63
|
+
t.string :auto_title # "fallback" - auto(fill) title from feed
|
64
|
+
t.string :auto_url # "fallback" - auto(fill) url from feed
|
68
65
|
t.string :auto_feed_url # "fallback" - auto discovery feed url from (site) url
|
69
66
|
|
70
|
-
t.text :summary # e.g. description (rss)
|
67
|
+
t.text :summary # e.g. description (rss), subtitle (atom)
|
68
|
+
## todo: add auto_summary - why? why not?
|
71
69
|
|
72
70
|
t.string :generator # feed generator (e.g. wordpress, etc.) from feed
|
73
|
-
|
74
|
-
t.datetime :
|
75
|
-
t.datetime :
|
76
|
-
t.datetime :touched # from feed updated(atom)
|
71
|
+
|
72
|
+
t.datetime :updated # from feed updated(atom) + lastBuildDate(rss)
|
73
|
+
t.datetime :published # from feed published(atom) + pubDate(rss) - note: published basically an alias for created
|
77
74
|
|
78
75
|
|
79
76
|
### extras (move to array for custom fields or similar??)
|
80
|
-
t.string :author
|
81
|
-
t.string :email
|
82
|
-
t.string :avatar
|
77
|
+
t.string :author # author_name, owner_name
|
78
|
+
t.string :email # author_email, owner_email
|
79
|
+
t.string :avatar # gravator or hackergotchi handle (optional)
|
80
|
+
t.string :location # e.g. Vienna > Austria, Bamberg > Germany etc. (optional)
|
83
81
|
|
84
|
-
t.string :github
|
85
|
-
t.string :
|
86
|
-
t.string :
|
82
|
+
t.string :github # github handle (optional)
|
83
|
+
t.string :rubygems # rubygems handle (optional)
|
84
|
+
t.string :twitter # twitter handle (optional)
|
85
|
+
t.string :meetup # meetup handle (optional)
|
87
86
|
|
88
87
|
|
89
88
|
### add class/kind field e.g.
|
@@ -100,7 +99,7 @@ class CreateDb < ActiveRecord::Migration
|
|
100
99
|
# todo: add generic filter list e.g. t.string :filters (comma,pipe or space separated method names?)
|
101
100
|
|
102
101
|
# -- our own (meta) fields
|
103
|
-
t.datetime :
|
102
|
+
t.datetime :items_last_updated # cache last (latest) updated for items - e.g. latest date from updated item
|
104
103
|
t.datetime :fetched # last fetched date via pluto
|
105
104
|
|
106
105
|
t.integer :http_code # last http status code e.g. 200,404,etc.
|
@@ -123,15 +122,15 @@ class CreateDb < ActiveRecord::Migration
|
|
123
122
|
|
124
123
|
## note: title may contain more than 255 chars!!
|
125
124
|
## e.g. Rails Girls blog has massive titles in feed
|
126
|
-
## cut-off/limit to 255
|
125
|
+
## cut-off/limit to 255 - why?? why not??
|
127
126
|
## also strip tags in titles - why? why not?? - see feed.title2/auto_title2
|
128
127
|
|
129
|
-
t.
|
128
|
+
t.text :title # todo: add some :null => false ??
|
130
129
|
t.text :summary # e.g. description (rss), summary (atom)
|
131
130
|
t.text :content
|
132
131
|
|
133
|
-
t.datetime :
|
134
|
-
t.datetime :
|
132
|
+
t.datetime :updated # from feed updated (atom) + pubDate(rss)
|
133
|
+
t.datetime :published # from feed published (atom) -- note: published is basically an alias for created
|
135
134
|
|
136
135
|
## todo: add :last_updated_at ?? (NOTE: updated_at already take by auto-timestamps)
|
137
136
|
t.references :feed, null: false
|
data/lib/pluto/version.rb
CHANGED
data/test/data/ruby.ini
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
title = Planet Ruby
|
2
|
+
source = https://github.com/feedreader/pluto.models/raw/master/test/data/ruby.ini
|
3
|
+
|
4
|
+
[rubylang]
|
5
|
+
title = Ruby Lang News
|
6
|
+
link = http://www.ruby-lang.org/en/news
|
7
|
+
feed = http://www.ruby-lang.org/en/feeds/news.rss
|
8
|
+
|
9
|
+
[rubyonrails]
|
10
|
+
title = Ruby on Rails News
|
11
|
+
link = http://weblog.rubyonrails.org
|
12
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
13
|
+
|
14
|
+
[viennarb]
|
15
|
+
title = Vienna.rb News
|
16
|
+
link = http://vienna-rb.at
|
17
|
+
feed = http://vienna-rb.at/atom.xml
|
18
|
+
|
data/test/test_filter.rb
CHANGED
@@ -23,34 +23,37 @@ class TestFilter < MiniTest::Test
|
|
23
23
|
title: 'Test'
|
24
24
|
)
|
25
25
|
|
26
|
-
feed_data =
|
26
|
+
feed_data = FeedParser::Feed.new
|
27
27
|
feed_data.title = 'Test'
|
28
28
|
feed_data.items = []
|
29
29
|
|
30
|
-
item_data =
|
30
|
+
item_data = FeedParser::Item.new
|
31
31
|
item_data.title = 'Test #1'
|
32
32
|
item_data.summary = 'Test'
|
33
33
|
item_data.content = 'Test'
|
34
|
+
item_data.updated = Date.today
|
34
35
|
|
35
36
|
feed_data.items << item_data
|
36
37
|
|
37
|
-
item_data =
|
38
|
+
item_data = FeedParser::Item.new
|
38
39
|
item_data.title = 'Test #2'
|
39
40
|
item_data.summary = "Test\nTest\nTest1"
|
40
41
|
item_data.content = 'Test'
|
42
|
+
item_data.updated = Date.today
|
41
43
|
|
42
44
|
feed_data.items << item_data
|
43
45
|
|
44
46
|
|
45
|
-
item_data =
|
47
|
+
item_data = FeedParser::Item.new
|
46
48
|
item_data.title = 'Test #3'
|
47
49
|
item_data.summary = "Test\nTest\nTest"
|
48
50
|
item_data.content = 'Test\nTest\nGitHub Pages'
|
51
|
+
item_data.updated = Date.today
|
49
52
|
|
50
53
|
feed_data.items << item_data
|
51
54
|
|
52
|
-
feed1.
|
53
|
-
|
55
|
+
feed1.deep_update_from_struct!( feed_data ) ## check w/ includes
|
56
|
+
feed2.deep_update_from_struct!( feed_data ) ## check w/o includes
|
54
57
|
|
55
58
|
assert true ## if we get here it should workd
|
56
59
|
end
|
data/test/test_helpers.rb
CHANGED
@@ -10,7 +10,13 @@ require 'helper'
|
|
10
10
|
|
11
11
|
class TestHelper < MiniTest::Test
|
12
12
|
|
13
|
-
|
13
|
+
def setup
|
14
|
+
Log.delete_all
|
15
|
+
Site.delete_all
|
16
|
+
Feed.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
Item.delete_all
|
19
|
+
end
|
14
20
|
|
15
21
|
def test_banner
|
16
22
|
puts Pluto.banner
|
data/test/test_site.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_site.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestSite < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Log.delete_all
|
15
|
+
Site.delete_all
|
16
|
+
Feed.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
Item.delete_all
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def test_site_create
|
23
|
+
site_text = File.read( "#{Pluto.root}/test/data/ruby.ini")
|
24
|
+
site_config = INI.load( site_text )
|
25
|
+
## pp site_config
|
26
|
+
|
27
|
+
assert_equal 0, Site.count
|
28
|
+
assert_equal 0, Feed.count
|
29
|
+
|
30
|
+
Site.deep_create_or_update_from_hash!( 'ruby', site_config )
|
31
|
+
|
32
|
+
assert_equal 1, Site.count
|
33
|
+
assert_equal 3, Feed.count
|
34
|
+
|
35
|
+
ruby = Site.find_by_key!( 'ruby' )
|
36
|
+
assert_equal 'Planet Ruby', ruby.title
|
37
|
+
assert_equal 3, ruby.subscriptions.count
|
38
|
+
assert_equal 3, ruby.feeds.count
|
39
|
+
|
40
|
+
rubylang = Feed.find_by_key!( 'rubylang' )
|
41
|
+
assert_equal 'Ruby Lang News', rubylang.title
|
42
|
+
assert_equal 'http://www.ruby-lang.org/en/news', rubylang.url
|
43
|
+
assert_equal 'http://www.ruby-lang.org/en/feeds/news.rss', rubylang.feed_url
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_site_update
|
47
|
+
site_text = File.read( "#{Pluto.root}/test/data/ruby.ini")
|
48
|
+
site_config = INI.load( site_text )
|
49
|
+
## pp site_config
|
50
|
+
|
51
|
+
assert_equal 0, Site.count
|
52
|
+
assert_equal 0, Feed.count
|
53
|
+
|
54
|
+
## note: call twice (first time for create, second time for update)
|
55
|
+
Site.deep_create_or_update_from_hash!( 'ruby', site_config )
|
56
|
+
Site.deep_create_or_update_from_hash!( 'ruby', site_config )
|
57
|
+
|
58
|
+
assert_equal 1, Site.count
|
59
|
+
assert_equal 3, Feed.count
|
60
|
+
|
61
|
+
ruby = Site.find_by_key!( 'ruby' )
|
62
|
+
assert_equal 'Planet Ruby', ruby.title
|
63
|
+
assert_equal 3, ruby.subscriptions.count
|
64
|
+
assert_equal 3, ruby.feeds.count
|
65
|
+
|
66
|
+
rubylang = Feed.find_by_key!( 'rubylang' )
|
67
|
+
assert_equal 'Ruby Lang News', rubylang.title
|
68
|
+
assert_equal 'http://www.ruby-lang.org/en/news', rubylang.url
|
69
|
+
assert_equal 'http://www.ruby-lang.org/en/feeds/news.rss', rubylang.feed_url
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
end # class TestSite
|
74
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-models
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: props
|
@@ -39,33 +39,47 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.6.1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: feedparser
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 1.0.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 1.0.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: feedfilter
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.1.1
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.1.1
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: textutils
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - ">="
|
60
74
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
75
|
+
version: 1.0.1
|
62
76
|
type: :runtime
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
82
|
+
version: 1.0.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: activerecord
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -188,10 +202,12 @@ files:
|
|
188
202
|
- lib/pluto/models/utils.rb
|
189
203
|
- lib/pluto/schema.rb
|
190
204
|
- lib/pluto/version.rb
|
205
|
+
- test/data/ruby.ini
|
191
206
|
- test/helper.rb
|
192
207
|
- test/test_filter.rb
|
193
208
|
- test/test_helpers.rb
|
194
|
-
|
209
|
+
- test/test_site.rb
|
210
|
+
homepage: https://github.com/feedreader/pluto.models
|
195
211
|
licenses:
|
196
212
|
- Public Domain
|
197
213
|
metadata: {}
|
@@ -219,4 +235,5 @@ specification_version: 4
|
|
219
235
|
summary: pluto-models - planet schema 'n' models for easy (re)use
|
220
236
|
test_files:
|
221
237
|
- test/test_filter.rb
|
238
|
+
- test/test_site.rb
|
222
239
|
- test/test_helpers.rb
|