pluto-models 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/README.md +4 -3
- data/Rakefile +4 -3
- data/lib/pluto/models.rb +3 -41
- data/lib/pluto/models/feed.rb +54 -56
- data/lib/pluto/models/item.rb +20 -15
- data/lib/pluto/models/site.rb +171 -1
- data/lib/pluto/models/utils.rb +7 -7
- data/lib/pluto/schema.rb +25 -26
- data/lib/pluto/version.rb +2 -2
- data/test/data/ruby.ini +18 -0
- data/test/test_filter.rb +9 -6
- data/test/test_helpers.rb +7 -1
- data/test/test_site.rb +74 -0
- metadata +25 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2adea6bc1a5429eea27ded5314e603e339ff2595
|
4
|
+
data.tar.gz: 50f68d6d907924a99745c74a5ba71545cafcf746
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81b50daf9bfc77c04abedece3600bca9e9355f46f3e2df182ef6b22676fbdeed6529b6511142707cc3dc666ff82384ce86b71f24b04a421282f6f74357229844
|
7
|
+
data.tar.gz: cd4cd39e1a282a1ca242d9b8a743672bf37db241890485a520355310b8cdc5904c8e147ceae0d277b3163d8b74524468463fc921e50f8be63f5b5a2e4090ff5c
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# pluto-models gem - planet schema 'n' models for easy (re)use
|
2
2
|
|
3
|
-
* home :: [github.com/feedreader/pluto
|
4
|
-
* bugs :: [github.com/feedreader/pluto
|
3
|
+
* home :: [github.com/feedreader/pluto.models](https://github.com/feedreader/pluto.models)
|
4
|
+
* bugs :: [github.com/feedreader/pluto.models/issues](https://github.com/feedreader/pluto.models/issues)
|
5
5
|
* gem :: [rubygems.org/gems/pluto-models](https://rubygems.org/gems/pluto-models)
|
6
6
|
* rdoc :: [rubydoc.info/gems/pluto-models](http://rubydoc.info/gems/pluto-models)
|
7
7
|
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
@@ -85,7 +85,8 @@ end
|
|
85
85
|
## Real World Usage
|
86
86
|
|
87
87
|
- [`pluto`](https://github.com/feedreader/pluto) - planet generator command line tool using the pluto-models gem
|
88
|
-
- [`pluto.live`](https://github.com/feedreader/pluto.live) - sample planet site; sinatra web app
|
88
|
+
- [`pluto.live.starter`](https://github.com/feedreader/pluto.live.starter) - sample planet site; sinatra web app starter template in ruby using the pluto-models gem
|
89
|
+
- [`pluto.live`](https://github.com/feedreader/pluto.live) - sample planet site; rails web app in ruby using the pluto-models gem
|
89
90
|
|
90
91
|
|
91
92
|
|
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ Hoe.spec 'pluto-models' do
|
|
8
8
|
self.summary = "pluto-models - planet schema 'n' models for easy (re)use"
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/feedreader/pluto
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto.models']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'feedreader@googlegroups.com'
|
@@ -20,8 +20,9 @@ Hoe.spec 'pluto-models' do
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['props', '>= 1.1.2'],
|
22
22
|
['logutils', '>= 0.6.1'],
|
23
|
-
['
|
24
|
-
['
|
23
|
+
['feedparser', '>= 1.0.0'],
|
24
|
+
['feedfilter', '>= 1.1.1'],
|
25
|
+
['textutils', '>= 1.0.1'],
|
25
26
|
['activerecord'],
|
26
27
|
['logutils-activerecord', '>= 0.2.0'],
|
27
28
|
['props-activerecord', '>= 0.1.0'],
|
data/lib/pluto/models.rb
CHANGED
@@ -19,8 +19,10 @@ require 'active_record'
|
|
19
19
|
|
20
20
|
require 'props' # manage settings/env
|
21
21
|
require 'logutils'
|
22
|
-
require 'feedutils'
|
23
22
|
require 'textutils'
|
23
|
+
require 'feedparser'
|
24
|
+
require 'feedfilter'
|
25
|
+
|
24
26
|
|
25
27
|
## add more activerecords addons/utils
|
26
28
|
require 'activerecord/utils' # add macros e.g. read_attr_w_fallbacks etc.
|
@@ -108,45 +110,5 @@ end # module Pluto
|
|
108
110
|
|
109
111
|
|
110
112
|
|
111
|
-
######
|
112
|
-
# todo - move to ext/array.rb or similar
|
113
|
-
|
114
|
-
class Array
|
115
|
-
|
116
|
-
## todo: check if there's already a builtin method for this
|
117
|
-
#
|
118
|
-
# note:
|
119
|
-
# in rails ary.in_groups(3) results in
|
120
|
-
# top-to-bottom, left-to-right.
|
121
|
-
# and not left-to-right first and than top-to-bottom.
|
122
|
-
#
|
123
|
-
# rename to in_groups_vertical(3) ???
|
124
|
-
|
125
|
-
def in_columns( cols ) # alias for convenience for chunks - needed? why? why not?
|
126
|
-
chunks( cols )
|
127
|
-
end
|
128
|
-
|
129
|
-
def chunks( number_of_chunks )
|
130
|
-
## NB: use chunks - columns might be in use by ActiveRecord!
|
131
|
-
###
|
132
|
-
# e.g.
|
133
|
-
# [1,2,3,4,5,6,7,8,9,10].columns(3)
|
134
|
-
# becomes:
|
135
|
-
# [[1,4,7,10],
|
136
|
-
# [2,5,8],
|
137
|
-
# [3,6,9]]
|
138
|
-
|
139
|
-
## check/todo: make a copy of the array first??
|
140
|
-
# for now reference to original items get added to columns
|
141
|
-
chunks = (1..number_of_chunks).collect { [] }
|
142
|
-
each_with_index do |item,index|
|
143
|
-
chunks[ index % number_of_chunks ] << item
|
144
|
-
end
|
145
|
-
chunks
|
146
|
-
end
|
147
|
-
|
148
|
-
end
|
149
|
-
|
150
|
-
|
151
113
|
# say hello
|
152
114
|
puts Pluto.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
|
data/lib/pluto/models/feed.rb
CHANGED
@@ -21,9 +21,9 @@ class Feed < ActiveRecord::Base
|
|
21
21
|
# note: order by first non-null datetime field
|
22
22
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
23
23
|
|
24
|
-
# note: if not
|
25
|
-
## order( "coalesce(published,
|
26
|
-
order( "coalesce(feeds.
|
24
|
+
# note: if not updated or published use hardcoded 1971-01-01 for now
|
25
|
+
## order( "coalesce(updated,published,'1971-01-01') desc" )
|
26
|
+
order( "coalesce(feeds.items_last_updated,'1971-01-01') desc" )
|
27
27
|
end
|
28
28
|
|
29
29
|
##################################
|
@@ -34,6 +34,7 @@ class Feed < ActiveRecord::Base
|
|
34
34
|
def name() title; end # alias for title
|
35
35
|
def description() summary; end # alias for summary
|
36
36
|
def desc() summary; end # alias(2) for summary
|
37
|
+
def subtitle() summary; end # alias(3) for summary
|
37
38
|
def link() url; end # alias for url
|
38
39
|
def feed() feed_url; end # alias for feed_url
|
39
40
|
|
@@ -46,28 +47,28 @@ class Feed < ActiveRecord::Base
|
|
46
47
|
|
47
48
|
def url?() read_attribute(:url).present?; end
|
48
49
|
def title?() read_attribute(:title).present?; end
|
49
|
-
def title2?() read_attribute(:title2).present?; end
|
50
50
|
def feed_url?() read_attribute(:feed_url).present?; end
|
51
51
|
|
52
52
|
def url() read_attribute_w_fallbacks( :url, :auto_url ); end
|
53
53
|
def title() read_attribute_w_fallbacks( :title, :auto_title ); end
|
54
|
-
def title2() read_attribute_w_fallbacks( :title2, :auto_title2 ); end
|
55
54
|
def feed_url() read_attribute_w_fallbacks( :feed_url, :auto_feed_url ); end
|
56
55
|
|
56
|
+
def summary?() read_attribute(:summary).present?; end
|
57
57
|
|
58
|
+
|
59
|
+
def updated?() read_attribute(:updated).present?; end
|
58
60
|
def published?() read_attribute(:published).present?; end
|
59
|
-
def touched?() read_attribute(:touched).present?; end
|
60
61
|
|
62
|
+
def updated
|
63
|
+
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
64
|
+
# db backed attribute
|
65
|
+
read_attribute_w_fallbacks( :updated, :published )
|
66
|
+
end
|
61
67
|
|
62
68
|
def published
|
63
69
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
64
70
|
# db backed attribute
|
65
|
-
|
66
|
-
read_attribute_w_fallbacks(
|
67
|
-
:published,
|
68
|
-
:touched, # try touched (aka updated (ATOM))
|
69
|
-
:built # try build (aka lastBuildDate (RSS))
|
70
|
-
)
|
71
|
+
read_attribute_w_fallbacks( :published, :updated )
|
71
72
|
end
|
72
73
|
|
73
74
|
|
@@ -75,44 +76,23 @@ class Feed < ActiveRecord::Base
|
|
75
76
|
def debug?() @debug || false; end
|
76
77
|
|
77
78
|
|
78
|
-
def
|
79
|
-
return false if text.blank? ## allow/guard against nil and empty string
|
79
|
+
def deep_update_from_struct!( data )
|
80
80
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
81
|
+
######
|
82
|
+
## check for filters (includes/excludes) if present
|
83
|
+
## for now just check for includes
|
84
|
+
##
|
85
|
+
if includes.present?
|
86
|
+
includesFilter = FeedFilter::IncludeFilters.new( includes )
|
87
|
+
else
|
88
|
+
includesFilter = nil
|
85
89
|
end
|
86
90
|
|
87
|
-
false # no term match found
|
88
|
-
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
def save_from_struct!( data )
|
93
|
-
|
94
|
-
update_from_struct!( data )
|
95
|
-
|
96
91
|
data.items.each do |item|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
##
|
102
|
-
if includes.present?
|
103
|
-
## split terms (allow comma,pipe) - do NOT use space; allows e.g. terms such as github pages
|
104
|
-
terms = includes.split( /\s*[,|]\s*/ )
|
105
|
-
## remove leading and trailing white spaces - check - still required when using \s* ??
|
106
|
-
terms = terms.map { |term| term.strip }
|
107
|
-
match = match_terms?( terms, item.title ) ||
|
108
|
-
match_terms?( terms, item.summary) ||
|
109
|
-
match_terms?( terms, item.content)
|
110
|
-
|
111
|
-
if match == false
|
112
|
-
puts "** SKIPPING | #{item.title}"
|
113
|
-
puts " no include terms match: #{terms.join('|')}"
|
114
|
-
next ## skip to next item
|
115
|
-
end
|
92
|
+
if includesFilter && includesFilter.match_item?( item ) == false
|
93
|
+
puts "** SKIPPING | #{item.title}"
|
94
|
+
puts " no include terms match: #{includes}"
|
95
|
+
next ## skip to next item
|
116
96
|
end
|
117
97
|
|
118
98
|
item_rec = Item.find_by_guid( item.guid )
|
@@ -123,17 +103,37 @@ class Feed < ActiveRecord::Base
|
|
123
103
|
## todo: check if any attribs changed
|
124
104
|
puts "UPDATE | #{item.title}"
|
125
105
|
end
|
126
|
-
|
106
|
+
|
127
107
|
item_rec.debug = debug? ? true : false # pass along debug flag
|
128
|
-
|
108
|
+
|
109
|
+
item_rec.feed_id = id # feed_rec.id - add feed_id fk_ref
|
110
|
+
item_rec.fetched = fetched # feed_rec.fetched
|
111
|
+
|
112
|
+
item_rec.update_from_struct!( item )
|
129
113
|
|
130
114
|
end # each item
|
131
|
-
|
115
|
+
|
116
|
+
|
117
|
+
# update cached value last published for item
|
118
|
+
## todo/check: force reload of items - why? why not??
|
119
|
+
last_item_rec = items.latest.limit(1).first # note limit(1) will return relation/arrar - use first to get first element or nil from ary
|
120
|
+
if last_item_rec.present?
|
121
|
+
if last_item_rec.updated?
|
122
|
+
self.items_last_updated = last_item_rec.updated
|
123
|
+
## save! ## note: will get save w/ update_from_struct! - why? why not??
|
124
|
+
else # try published
|
125
|
+
self.items_last_updated = last_item_rec.published
|
126
|
+
## save! ## note: will get save w/ update_from_struct! - why? why not??
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
update_from_struct!( data )
|
131
|
+
end # method deep_update_from_struct!
|
132
132
|
|
133
133
|
|
134
134
|
def update_from_struct!( data )
|
135
135
|
|
136
|
-
## todo: move to
|
136
|
+
## todo: move to FeedParser::Feed ??? why? why not??
|
137
137
|
if data.generator
|
138
138
|
generator_full = ''
|
139
139
|
generator_full << data.generator
|
@@ -145,9 +145,9 @@ class Feed < ActiveRecord::Base
|
|
145
145
|
|
146
146
|
##
|
147
147
|
# todo:
|
148
|
-
## strip all tags from
|
148
|
+
## strip all tags from summary (subtitle)
|
149
149
|
## limit to 255 chars
|
150
|
-
## e.g.
|
150
|
+
## e.g. summary (subtitle) such as this exist
|
151
151
|
## This is a low-traffic announce-only list for people interested
|
152
152
|
## in hearing news about Polymer (<a href="http://polymer-project.org">http://polymer-project.org</a>).
|
153
153
|
## The higher-traffic mailing list for all kinds of discussion is
|
@@ -156,16 +156,14 @@ class Feed < ActiveRecord::Base
|
|
156
156
|
|
157
157
|
feed_attribs = {
|
158
158
|
format: data.format,
|
159
|
+
updated: data.updated,
|
159
160
|
published: data.published,
|
160
|
-
touched: data.updated,
|
161
|
-
built: data.built,
|
162
161
|
summary: data.summary,
|
162
|
+
generator: generator_full,
|
163
163
|
### todo/fix: add/use
|
164
164
|
# auto_title: ???,
|
165
165
|
# auto_url: ???,
|
166
166
|
# auto_feed_url: ???,
|
167
|
-
auto_title2: data.title2 ? strip_tags(data.title2)[0...255] : data.title2, # limit to 255 chars; strip tags
|
168
|
-
generator: generator_full
|
169
167
|
}
|
170
168
|
|
171
169
|
if debug?
|
data/lib/pluto/models/item.rb
CHANGED
@@ -11,6 +11,7 @@ class Item < ActiveRecord::Base
|
|
11
11
|
## todo/fix:
|
12
12
|
## use a module ref or something; do NOT include all methods - why? why not?
|
13
13
|
include TextUtils::HypertextHelper ## e.g. lets us use strip_tags( ht )
|
14
|
+
include FeedFilter::AdsFilter ## e.g. lets us use strip_ads( ht )
|
14
15
|
|
15
16
|
|
16
17
|
##################################
|
@@ -25,41 +26,45 @@ class Item < ActiveRecord::Base
|
|
25
26
|
# note: order by first non-null datetime field
|
26
27
|
# coalesce - supported by sqlite (yes), postgres (yes)
|
27
28
|
|
28
|
-
# note: if not published
|
29
|
-
order( "coalesce(items.
|
29
|
+
# note: if not updated,published use hardcoded 1971-01-01 for now
|
30
|
+
order( "coalesce(items.updated,items.published,'1971-01-01') desc" )
|
30
31
|
end
|
31
32
|
|
32
|
-
def
|
33
|
+
def updated?() read_attribute(:updated).present?; end
|
34
|
+
def published?() read_attribute(:published).present?; end # note: published is basically an alias for created
|
33
35
|
|
34
|
-
def
|
36
|
+
def updated
|
35
37
|
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
36
38
|
# db backed attribute
|
37
|
-
|
38
|
-
read_attribute_w_fallbacks(
|
39
|
-
:published,
|
40
|
-
:touched # try touched (aka updated RSS/ATOM)
|
41
|
-
)
|
39
|
+
read_attribute_w_fallbacks( :updated, :published )
|
42
40
|
end
|
43
41
|
|
42
|
+
def published
|
43
|
+
## todo/fix: use a new name - do NOT squeeze convenience lookup into existing
|
44
|
+
# db backed attribute
|
45
|
+
read_attribute_w_fallbacks( :published, :updated )
|
46
|
+
end
|
44
47
|
|
45
48
|
|
46
49
|
def debug=(value) @debug = value; end
|
47
50
|
def debug?() @debug || false; end
|
48
51
|
|
49
|
-
|
52
|
+
|
53
|
+
def update_from_struct!( data )
|
50
54
|
## check: new item/record? not saved? add guid
|
51
55
|
# otherwise do not add guid - why? why not?
|
52
56
|
|
57
|
+
## note: for now also strip ads in summary
|
58
|
+
## fix/todo: summary (in the future) is supposed to be only plain vanilla text
|
59
|
+
|
53
60
|
item_attribs = {
|
54
61
|
guid: data.guid, # todo: only add for new records???
|
55
62
|
title: data.title ? strip_tags(data.title)[0...255] : data.title, ## limit to 255 chars; strip tags
|
56
63
|
url: data.url,
|
57
|
-
summary: data.summary,
|
58
|
-
content: data.content,
|
64
|
+
summary: data.summary.blank? ? data.summary : strip_ads( data.summary ).strip,
|
65
|
+
content: data.content.blank? ? data.content : strip_ads( data.content ).strip,
|
66
|
+
updated: data.updated,
|
59
67
|
published: data.published,
|
60
|
-
touched: data.updated,
|
61
|
-
feed_id: feed_rec.id, # add feed_id fk_ref
|
62
|
-
fetched: feed_rec.fetched
|
63
68
|
}
|
64
69
|
|
65
70
|
if debug?
|
data/lib/pluto/models/site.rb
CHANGED
@@ -13,7 +13,6 @@ class Site < ActiveRecord::Base
|
|
13
13
|
##################################
|
14
14
|
# attribute reader aliases
|
15
15
|
def name() title; end # alias for title
|
16
|
-
def fetched_at() fetched; end # - legacy attrib reader -- remove!!!
|
17
16
|
|
18
17
|
def owner_name() author; end # alias for author
|
19
18
|
def owner() author; end # alias(2) for author
|
@@ -22,8 +21,179 @@ class Site < ActiveRecord::Base
|
|
22
21
|
def owner_email() email; end # alias for email
|
23
22
|
def author_email() email; end # alias(2) for email
|
24
23
|
|
24
|
+
|
25
|
+
|
26
|
+
def self.deep_create_or_update_from_hash!( name, config, opts={} )
|
27
|
+
|
28
|
+
## note: allow (optional) config of site key too
|
29
|
+
site_key = config['key'] || config['slug']
|
30
|
+
if site_key.nil?
|
31
|
+
## if no key configured; use (file)name; remove -_ chars
|
32
|
+
## e.g. jekyll-meta becomes jekyllmeta etc.
|
33
|
+
site_key = name.downcase.gsub( /[\-_]/, '' )
|
34
|
+
end
|
35
|
+
|
36
|
+
site_rec = Site.find_by_key( site_key )
|
37
|
+
if site_rec.nil?
|
38
|
+
site_rec = Site.new
|
39
|
+
site_rec.key = site_key
|
40
|
+
end
|
41
|
+
|
42
|
+
site_rec.deep_update_from_hash!( config, opts )
|
43
|
+
site_rec
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def deep_update_from_hash!( config, opts={} )
|
48
|
+
|
49
|
+
site_attribs = {
|
50
|
+
title: config['title'] || config['name'], # support either title or name
|
51
|
+
url: config['source'] || config['url'], # support source or url for source url for auto-update (optional)
|
52
|
+
author: config['author'] || config['owner'],
|
53
|
+
email: config['email'],
|
54
|
+
updated: Time.now, ## track last_update via pluto (w/ update_subscription_for fn)
|
55
|
+
}
|
56
|
+
|
57
|
+
## note: allow (optional) config of site key too
|
58
|
+
site_key = config['key'] || config['slug']
|
59
|
+
site_attribs[:key] = site_key if site_key
|
60
|
+
|
61
|
+
|
62
|
+
logger = LogUtils::Logger.root
|
63
|
+
logger.debug "site_attribs: #{site_attribs.inspect}"
|
64
|
+
|
65
|
+
if new_record?
|
66
|
+
## use object_id: site.id and object_type: Site
|
67
|
+
## change - model/table/schema!!!
|
68
|
+
Activity.create!( text: "new site >#{key}< - #{title}" )
|
69
|
+
end
|
70
|
+
|
71
|
+
update_attributes!( site_attribs )
|
72
|
+
|
73
|
+
|
74
|
+
# -- log update activity
|
75
|
+
## Activity.create!( text: "update subscriptions for site >#{key}<" )
|
76
|
+
|
77
|
+
#### todo/fix:
|
78
|
+
## double check - how to handle delete
|
79
|
+
## feeds might get referenced by other sites
|
80
|
+
## cannot just delete feeds; only save to delete join table (subscriptions)
|
81
|
+
## check if feed "lingers" on with no reference (to site)???
|
82
|
+
|
83
|
+
# clean out subscriptions and add again
|
84
|
+
logger.debug "before site.subscriptions.delete_all - count: #{subscriptions.count}"
|
85
|
+
# note: use destroy_all NOT delete_all (delete_all tries by default only nullify)
|
86
|
+
subscriptions.destroy_all
|
87
|
+
logger.debug "after site.subscriptions.delete_all - count: #{subscriptions.count}"
|
88
|
+
|
89
|
+
|
90
|
+
config.each do |k, v|
|
91
|
+
|
92
|
+
## todo: downcase key - why ??? why not???
|
93
|
+
|
94
|
+
# skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
|
95
|
+
next if ['key','slug',
|
96
|
+
'title','name','name2','title2','subtitle',
|
97
|
+
'source', 'url',
|
98
|
+
'include','includes','exclude','excludes',
|
99
|
+
'feeds',
|
100
|
+
'author', 'owner', 'email',
|
101
|
+
'planet','defaults'].include?( k )
|
102
|
+
|
103
|
+
### todo/check:
|
104
|
+
## check value - must be hash
|
105
|
+
# check if url or feed_url present
|
106
|
+
# that is, check for required props/key-value pairs
|
107
|
+
|
108
|
+
feed_key = k.to_s.dup
|
109
|
+
feed_hash = v
|
110
|
+
|
111
|
+
# todo/fix: use title from feed?
|
112
|
+
# e.g. fill up auto_title, auto_url, etc.
|
113
|
+
|
114
|
+
feed_attribs = {
|
115
|
+
feed_url: feed_hash[ 'feed' ] || feed_hash[ 'feed_url' ] || feed_hash[ 'xml_url' ],
|
116
|
+
url: feed_hash[ 'link' ] || feed_hash[ 'url' ] || feed_hash[ 'html_url' ],
|
117
|
+
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
118
|
+
## note: title2 no longer supported; use summary or subtitle?
|
119
|
+
### title2: feed_hash[ 'title2' ] || feed_hash[ 'name2' ] || feed_hash[ 'subtitle'],
|
120
|
+
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
121
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ],
|
122
|
+
author: feed_hash[ 'author' ] || feed_hash[ 'owner' ],
|
123
|
+
email: feed_hash[ 'email' ],
|
124
|
+
avatar: feed_hash[ 'avatar' ] || feed_hash[ 'face'],
|
125
|
+
location: feed_hash[ 'location' ],
|
126
|
+
github: feed_hash[ 'github' ],
|
127
|
+
twitter: feed_hash[ 'twitter' ],
|
128
|
+
rubygems: feed_hash[ 'rubygems' ],
|
129
|
+
meetup: feed_hash[ 'meetup' ], ### -- remove from schema - virtual attrib ?? - why? why not??
|
130
|
+
}
|
131
|
+
|
132
|
+
feed_attribs[:encoding] = feed_hash['encoding']||feed_hash['charset'] if feed_hash['encoding']||feed_hash['charset']
|
133
|
+
|
134
|
+
#####
|
135
|
+
##
|
136
|
+
# auto-fill; convenience helpers
|
137
|
+
|
138
|
+
if feed_hash['meetup']
|
139
|
+
## link/url = http://www.meetup.com/vienna-rb
|
140
|
+
## feed/feed_url = http://www.meetup.com/vienna-rb/events/rss/vienna.rb/
|
141
|
+
|
142
|
+
feed_attribs[:url] = "http://www.meetup.com/#{feed_hash['meetup']}" if feed_attribs[:url].nil?
|
143
|
+
feed_attribs[:feed_url] = "http://www.meetup.com/#{feed_hash['meetup']}/events/rss/#{feed_hash['meetup']}/" if feed_attribs[:feed_url].nil?
|
144
|
+
end
|
145
|
+
|
146
|
+
if feed_hash['googlegroups']
|
147
|
+
## link/url = https://groups.google.com/group/beerdb or
|
148
|
+
## https://groups.google.com/forum/#!forum/beerdb
|
149
|
+
## feed/feed_url = https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15
|
150
|
+
|
151
|
+
feed_attribs[:url] = "https://groups.google.com/group/#{feed_hash['googlegroups']}" if feed_attribs[:url].nil?
|
152
|
+
feed_attribs[:feed_url] = "https://groups.google.com/forum/feed//#{feed_hash['googlegroups']}/topics/atom.xml?num=15" if feed_attribs[:feed_url].nil?
|
153
|
+
end
|
154
|
+
|
155
|
+
if feed_hash['github'] && feed_hash['github'].index('/') ## e.g. jekyll/jekyll
|
156
|
+
## link/url = https://github.com/jekyll/jekyll
|
157
|
+
## feed/feed_url = https://github.com/jekyll/jekyll/commits/master.atom
|
158
|
+
|
159
|
+
feed_attribs[:url] = "https://github.com/#{feed_hash['github']}" if feed_attribs[:url].nil?
|
160
|
+
feed_attribs[:feed_url] = "https://github.com/#{feed_hash['github']}/commits/master.atom" if feed_attribs[:feed_url].nil?
|
161
|
+
end
|
162
|
+
|
163
|
+
if feed_hash['rubygems'] && feed_attribs[:url].nil? && feed_attribs[:feed_url].nil?
|
164
|
+
## link/url = http://rubygems.org/gems/jekyll
|
165
|
+
## feed/feed_url = http://rubygems.org/gems/jekyll/versions.atom
|
166
|
+
|
167
|
+
feed_attribs[:url] = "http://rubygems.org/gems/#{feed_hash['rubygems']}" if feed_attribs[:url].nil?
|
168
|
+
feed_attribs[:feed_url] = "http://rubygems.org/gems/#{feed_hash['rubygems']}/versions.atom" if feed_attribs[:feed_url].nil?
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
173
|
+
|
174
|
+
feed_rec = Feed.find_by_key( feed_key )
|
175
|
+
if feed_rec.nil?
|
176
|
+
feed_rec = Feed.new
|
177
|
+
feed_attribs[:key] = feed_key
|
178
|
+
|
179
|
+
## use object_id: feed.id and object_type: Feed
|
180
|
+
## change - model/table/schema!!!
|
181
|
+
## todo: add parent_action_id - why? why not?
|
182
|
+
Activity.create!( text: "new feed >#{feed_key}< - #{feed_attribs[:title]}" )
|
183
|
+
end
|
184
|
+
|
185
|
+
feed_rec.update_attributes!( feed_attribs )
|
186
|
+
|
187
|
+
# add subscription record
|
188
|
+
# note: subscriptions get cleaned out on update first (see above)
|
189
|
+
subscriptions.create!( feed_id: feed_rec.id )
|
190
|
+
end
|
191
|
+
|
192
|
+
end # method deep_update_from_hash!
|
193
|
+
|
25
194
|
end # class Site
|
26
195
|
|
27
196
|
|
28
197
|
end # module Model
|
29
198
|
end # module Pluto
|
199
|
+
|
data/lib/pluto/models/utils.rb
CHANGED
@@ -10,16 +10,16 @@ class ItemCursor
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def each
|
13
|
-
|
13
|
+
last_updated = Time.local( 1971, 1, 1 )
|
14
14
|
last_feed_id = -1 ## todo: use feed_key instead of id?? why? why not??
|
15
|
-
|
15
|
+
|
16
16
|
@items.each do |item|
|
17
17
|
|
18
|
-
|
18
|
+
item_updated = item.updated # cache updated value ref
|
19
19
|
|
20
|
-
if
|
21
|
-
|
22
|
-
|
20
|
+
if last_updated.year == item_updated.year &&
|
21
|
+
last_updated.month == item_updated.month &&
|
22
|
+
last_updated.day == item_updated.day
|
23
23
|
new_date = false
|
24
24
|
else
|
25
25
|
new_date = true
|
@@ -37,7 +37,7 @@ class ItemCursor
|
|
37
37
|
|
38
38
|
yield( item, new_date, new_feed )
|
39
39
|
|
40
|
-
|
40
|
+
last_updated = item.updated
|
41
41
|
last_feed_id = item.feed.id
|
42
42
|
end
|
43
43
|
end # method each
|
data/lib/pluto/schema.rb
CHANGED
@@ -35,6 +35,7 @@ class CreateDb < ActiveRecord::Migration
|
|
35
35
|
|
36
36
|
# note: do NOT store body content (that is, text) and md5 digest
|
37
37
|
# use git! and github! commit will be http_etag!!
|
38
|
+
t.string :md5 # md5 hash of body
|
38
39
|
|
39
40
|
|
40
41
|
#############
|
@@ -53,37 +54,35 @@ class CreateDb < ActiveRecord::Migration
|
|
53
54
|
create_table :feeds do |t|
|
54
55
|
t.string :key, null: false
|
55
56
|
t.string :encoding, null: false, default: 'utf8' # charset encoding; default to utf8
|
56
|
-
t.string :format # e.g. atom (1.0), rss 2.0,
|
57
|
-
|
58
|
-
t.string :title # user supplied title
|
59
|
-
t.string :auto_title # "fallback" - auto(fill) title from feed
|
60
|
-
|
61
|
-
t.string :title2 # user supplied title2
|
62
|
-
t.string :auto_title2 # "fallback" - auto(fill) title2 from feed e.g. subtitle (atom)
|
63
|
-
|
64
|
-
t.string :url # user supplied site url
|
65
|
-
t.string :auto_url # "fallback" - auto(fill) url from feed
|
57
|
+
t.string :format # e.g. atom (1.0), rss 2.0, etc.
|
66
58
|
|
59
|
+
t.string :title # user supplied title
|
60
|
+
t.string :url # user supplied site url
|
67
61
|
t.string :feed_url # user supplied feed url
|
62
|
+
|
63
|
+
t.string :auto_title # "fallback" - auto(fill) title from feed
|
64
|
+
t.string :auto_url # "fallback" - auto(fill) url from feed
|
68
65
|
t.string :auto_feed_url # "fallback" - auto discovery feed url from (site) url
|
69
66
|
|
70
|
-
t.text :summary # e.g. description (rss)
|
67
|
+
t.text :summary # e.g. description (rss), subtitle (atom)
|
68
|
+
## todo: add auto_summary - why? why not?
|
71
69
|
|
72
70
|
t.string :generator # feed generator (e.g. wordpress, etc.) from feed
|
73
|
-
|
74
|
-
t.datetime :
|
75
|
-
t.datetime :
|
76
|
-
t.datetime :touched # from feed updated(atom)
|
71
|
+
|
72
|
+
t.datetime :updated # from feed updated(atom) + lastBuildDate(rss)
|
73
|
+
t.datetime :published # from feed published(atom) + pubDate(rss) - note: published basically an alias for created
|
77
74
|
|
78
75
|
|
79
76
|
### extras (move to array for custom fields or similar??)
|
80
|
-
t.string :author
|
81
|
-
t.string :email
|
82
|
-
t.string :avatar
|
77
|
+
t.string :author # author_name, owner_name
|
78
|
+
t.string :email # author_email, owner_email
|
79
|
+
t.string :avatar # gravator or hackergotchi handle (optional)
|
80
|
+
t.string :location # e.g. Vienna > Austria, Bamberg > Germany etc. (optional)
|
83
81
|
|
84
|
-
t.string :github
|
85
|
-
t.string :
|
86
|
-
t.string :
|
82
|
+
t.string :github # github handle (optional)
|
83
|
+
t.string :rubygems # rubygems handle (optional)
|
84
|
+
t.string :twitter # twitter handle (optional)
|
85
|
+
t.string :meetup # meetup handle (optional)
|
87
86
|
|
88
87
|
|
89
88
|
### add class/kind field e.g.
|
@@ -100,7 +99,7 @@ class CreateDb < ActiveRecord::Migration
|
|
100
99
|
# todo: add generic filter list e.g. t.string :filters (comma,pipe or space separated method names?)
|
101
100
|
|
102
101
|
# -- our own (meta) fields
|
103
|
-
t.datetime :
|
102
|
+
t.datetime :items_last_updated # cache last (latest) updated for items - e.g. latest date from updated item
|
104
103
|
t.datetime :fetched # last fetched date via pluto
|
105
104
|
|
106
105
|
t.integer :http_code # last http status code e.g. 200,404,etc.
|
@@ -123,15 +122,15 @@ class CreateDb < ActiveRecord::Migration
|
|
123
122
|
|
124
123
|
## note: title may contain more than 255 chars!!
|
125
124
|
## e.g. Rails Girls blog has massive titles in feed
|
126
|
-
## cut-off/limit to 255
|
125
|
+
## cut-off/limit to 255 - why?? why not??
|
127
126
|
## also strip tags in titles - why? why not?? - see feed.title2/auto_title2
|
128
127
|
|
129
|
-
t.
|
128
|
+
t.text :title # todo: add some :null => false ??
|
130
129
|
t.text :summary # e.g. description (rss), summary (atom)
|
131
130
|
t.text :content
|
132
131
|
|
133
|
-
t.datetime :
|
134
|
-
t.datetime :
|
132
|
+
t.datetime :updated # from feed updated (atom) + pubDate(rss)
|
133
|
+
t.datetime :published # from feed published (atom) -- note: published is basically an alias for created
|
135
134
|
|
136
135
|
## todo: add :last_updated_at ?? (NOTE: updated_at already take by auto-timestamps)
|
137
136
|
t.references :feed, null: false
|
data/lib/pluto/version.rb
CHANGED
data/test/data/ruby.ini
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
title = Planet Ruby
|
2
|
+
source = https://github.com/feedreader/pluto.models/raw/master/test/data/ruby.ini
|
3
|
+
|
4
|
+
[rubylang]
|
5
|
+
title = Ruby Lang News
|
6
|
+
link = http://www.ruby-lang.org/en/news
|
7
|
+
feed = http://www.ruby-lang.org/en/feeds/news.rss
|
8
|
+
|
9
|
+
[rubyonrails]
|
10
|
+
title = Ruby on Rails News
|
11
|
+
link = http://weblog.rubyonrails.org
|
12
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
13
|
+
|
14
|
+
[viennarb]
|
15
|
+
title = Vienna.rb News
|
16
|
+
link = http://vienna-rb.at
|
17
|
+
feed = http://vienna-rb.at/atom.xml
|
18
|
+
|
data/test/test_filter.rb
CHANGED
@@ -23,34 +23,37 @@ class TestFilter < MiniTest::Test
|
|
23
23
|
title: 'Test'
|
24
24
|
)
|
25
25
|
|
26
|
-
feed_data =
|
26
|
+
feed_data = FeedParser::Feed.new
|
27
27
|
feed_data.title = 'Test'
|
28
28
|
feed_data.items = []
|
29
29
|
|
30
|
-
item_data =
|
30
|
+
item_data = FeedParser::Item.new
|
31
31
|
item_data.title = 'Test #1'
|
32
32
|
item_data.summary = 'Test'
|
33
33
|
item_data.content = 'Test'
|
34
|
+
item_data.updated = Date.today
|
34
35
|
|
35
36
|
feed_data.items << item_data
|
36
37
|
|
37
|
-
item_data =
|
38
|
+
item_data = FeedParser::Item.new
|
38
39
|
item_data.title = 'Test #2'
|
39
40
|
item_data.summary = "Test\nTest\nTest1"
|
40
41
|
item_data.content = 'Test'
|
42
|
+
item_data.updated = Date.today
|
41
43
|
|
42
44
|
feed_data.items << item_data
|
43
45
|
|
44
46
|
|
45
|
-
item_data =
|
47
|
+
item_data = FeedParser::Item.new
|
46
48
|
item_data.title = 'Test #3'
|
47
49
|
item_data.summary = "Test\nTest\nTest"
|
48
50
|
item_data.content = 'Test\nTest\nGitHub Pages'
|
51
|
+
item_data.updated = Date.today
|
49
52
|
|
50
53
|
feed_data.items << item_data
|
51
54
|
|
52
|
-
feed1.
|
53
|
-
|
55
|
+
feed1.deep_update_from_struct!( feed_data ) ## check w/ includes
|
56
|
+
feed2.deep_update_from_struct!( feed_data ) ## check w/o includes
|
54
57
|
|
55
58
|
assert true ## if we get here it should workd
|
56
59
|
end
|
data/test/test_helpers.rb
CHANGED
@@ -10,7 +10,13 @@ require 'helper'
|
|
10
10
|
|
11
11
|
class TestHelper < MiniTest::Test
|
12
12
|
|
13
|
-
|
13
|
+
def setup
|
14
|
+
Log.delete_all
|
15
|
+
Site.delete_all
|
16
|
+
Feed.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
Item.delete_all
|
19
|
+
end
|
14
20
|
|
15
21
|
def test_banner
|
16
22
|
puts Pluto.banner
|
data/test/test_site.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_site.rb
|
6
|
+
# or better
|
7
|
+
# rake test
|
8
|
+
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
class TestSite < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Log.delete_all
|
15
|
+
Site.delete_all
|
16
|
+
Feed.delete_all
|
17
|
+
Subscription.delete_all
|
18
|
+
Item.delete_all
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def test_site_create
|
23
|
+
site_text = File.read( "#{Pluto.root}/test/data/ruby.ini")
|
24
|
+
site_config = INI.load( site_text )
|
25
|
+
## pp site_config
|
26
|
+
|
27
|
+
assert_equal 0, Site.count
|
28
|
+
assert_equal 0, Feed.count
|
29
|
+
|
30
|
+
Site.deep_create_or_update_from_hash!( 'ruby', site_config )
|
31
|
+
|
32
|
+
assert_equal 1, Site.count
|
33
|
+
assert_equal 3, Feed.count
|
34
|
+
|
35
|
+
ruby = Site.find_by_key!( 'ruby' )
|
36
|
+
assert_equal 'Planet Ruby', ruby.title
|
37
|
+
assert_equal 3, ruby.subscriptions.count
|
38
|
+
assert_equal 3, ruby.feeds.count
|
39
|
+
|
40
|
+
rubylang = Feed.find_by_key!( 'rubylang' )
|
41
|
+
assert_equal 'Ruby Lang News', rubylang.title
|
42
|
+
assert_equal 'http://www.ruby-lang.org/en/news', rubylang.url
|
43
|
+
assert_equal 'http://www.ruby-lang.org/en/feeds/news.rss', rubylang.feed_url
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_site_update
|
47
|
+
site_text = File.read( "#{Pluto.root}/test/data/ruby.ini")
|
48
|
+
site_config = INI.load( site_text )
|
49
|
+
## pp site_config
|
50
|
+
|
51
|
+
assert_equal 0, Site.count
|
52
|
+
assert_equal 0, Feed.count
|
53
|
+
|
54
|
+
## note: call twice (first time for create, second time for update)
|
55
|
+
Site.deep_create_or_update_from_hash!( 'ruby', site_config )
|
56
|
+
Site.deep_create_or_update_from_hash!( 'ruby', site_config )
|
57
|
+
|
58
|
+
assert_equal 1, Site.count
|
59
|
+
assert_equal 3, Feed.count
|
60
|
+
|
61
|
+
ruby = Site.find_by_key!( 'ruby' )
|
62
|
+
assert_equal 'Planet Ruby', ruby.title
|
63
|
+
assert_equal 3, ruby.subscriptions.count
|
64
|
+
assert_equal 3, ruby.feeds.count
|
65
|
+
|
66
|
+
rubylang = Feed.find_by_key!( 'rubylang' )
|
67
|
+
assert_equal 'Ruby Lang News', rubylang.title
|
68
|
+
assert_equal 'http://www.ruby-lang.org/en/news', rubylang.url
|
69
|
+
assert_equal 'http://www.ruby-lang.org/en/feeds/news.rss', rubylang.feed_url
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
end # class TestSite
|
74
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-models
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: props
|
@@ -39,33 +39,47 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.6.1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: feedparser
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 1.0.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 1.0.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: feedfilter
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.1.1
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.1.1
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: textutils
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - ">="
|
60
74
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
75
|
+
version: 1.0.1
|
62
76
|
type: :runtime
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
82
|
+
version: 1.0.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: activerecord
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -188,10 +202,12 @@ files:
|
|
188
202
|
- lib/pluto/models/utils.rb
|
189
203
|
- lib/pluto/schema.rb
|
190
204
|
- lib/pluto/version.rb
|
205
|
+
- test/data/ruby.ini
|
191
206
|
- test/helper.rb
|
192
207
|
- test/test_filter.rb
|
193
208
|
- test/test_helpers.rb
|
194
|
-
|
209
|
+
- test/test_site.rb
|
210
|
+
homepage: https://github.com/feedreader/pluto.models
|
195
211
|
licenses:
|
196
212
|
- Public Domain
|
197
213
|
metadata: {}
|
@@ -219,4 +235,5 @@ specification_version: 4
|
|
219
235
|
summary: pluto-models - planet schema 'n' models for easy (re)use
|
220
236
|
test_files:
|
221
237
|
- test/test_filter.rb
|
238
|
+
- test/test_site.rb
|
222
239
|
- test/test_helpers.rb
|