feedutils 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -0
- data/Rakefile +1 -1
- data/lib/feedutils.rb +1 -1
- data/lib/feedutils/builder/atom.rb +15 -26
- data/lib/feedutils/builder/rss.rb +10 -13
- data/lib/feedutils/feed.rb +6 -18
- data/lib/feedutils/item.rb +6 -14
- data/lib/feedutils/parser.rb +3 -1
- data/lib/feedutils/version.rb +1 -1
- data/test/test_atom.rb +16 -3
- data/test/test_rss.rb +14 -4
- metadata +9 -9
data/README.md
CHANGED
@@ -6,6 +6,8 @@ feedutils gems - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
|
6
6
|
* bugs :: [github.com/rubylibs/feedutils/issues](https://github.com/rubylibs/feedutils/issues)
|
7
7
|
* gem :: [rubygems.org/gems/feedutils](https://rubygems.org/gems/feedutils)
|
8
8
|
* rdoc :: [rubydoc.info/gems/feedutils](http://rubydoc.info/gems/feedutils)
|
9
|
+
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
10
|
+
|
9
11
|
|
10
12
|
## Usage
|
11
13
|
|
@@ -13,8 +15,13 @@ TBD
|
|
13
15
|
|
14
16
|
## Alternatives
|
15
17
|
|
18
|
+
- [`syndication`](http://syndication.rubyforge.org) [(Source)](https://github.com/lpar/syndication) - by Mathew (aka lpar); RSS 1.0, 2.0, Atom, and understands namespaces; optional support for Dublin Core, iTunes/podcast feeds, and RSS 1.0 Syndication and Content modules
|
19
|
+
- [`simple-rss`](http://rubyforge.org/projects/simple-rss)
|
20
|
+
- [`feedtools`](http://rubyforge.org/projects/feedtools)
|
21
|
+
|
16
22
|
TBD
|
17
23
|
|
24
|
+
|
18
25
|
## License
|
19
26
|
|
20
27
|
The `feedutils` scripts are dedicated to the public domain.
|
data/Rakefile
CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'feedutils' do
|
|
11
11
|
self.urls = ['https://github.com/rubylibs/feedutils']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
|
-
self.email = '
|
14
|
+
self.email = 'feedreader@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
17
|
self.readme_file = 'README.md'
|
data/lib/feedutils.rb
CHANGED
@@ -23,7 +23,7 @@ require 'feedutils/parser'
|
|
23
23
|
module FeedUtils
|
24
24
|
|
25
25
|
def self.banner
|
26
|
-
"feedutils
|
26
|
+
"feedutils/#{VERSION} (with stdlib rss/#{RSS::VERSION}) on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
27
27
|
end
|
28
28
|
|
29
29
|
=begin
|
@@ -21,10 +21,11 @@ class AtomFeedBuilder
|
|
21
21
|
|
22
22
|
def build_feed( atom_feed )
|
23
23
|
feed = Feed.new
|
24
|
-
feed.object = atom_feed
|
24
|
+
## feed.object = atom_feed # not use for now
|
25
25
|
feed.format = 'atom'
|
26
26
|
|
27
27
|
feed.title = atom_feed.title.content
|
28
|
+
logger.debug " atom | title.content >#{atom_feed.title.content}< : #{atom_feed.title.content.class.name}"
|
28
29
|
|
29
30
|
if atom_feed.updated
|
30
31
|
# NOTE: empty updated.content e.g. used by google groups feed
|
@@ -39,6 +40,12 @@ class AtomFeedBuilder
|
|
39
40
|
if atom_feed.generator
|
40
41
|
feed.generator = atom_feed.generator.content
|
41
42
|
logger.debug " atom | generator.content >#{atom_feed.generator.content}< : #{atom_feed.generator.content.class.name}"
|
43
|
+
|
44
|
+
# pp atom_feed.generator
|
45
|
+
feed.generator_version = atom_feed.generator.version
|
46
|
+
feed.generator_uri = atom_feed.generator.uri
|
47
|
+
logger.debug " atom | generator.version >#{atom_feed.generator.version}< : #{atom_feed.generator.version.class.name}"
|
48
|
+
logger.debug " atom | generator.uri >#{atom_feed.generator.uri}< : #{atom_feed.generator.uri.class.name}"
|
42
49
|
end
|
43
50
|
|
44
51
|
if atom_feed.subtitle
|
@@ -58,7 +65,7 @@ class AtomFeedBuilder
|
|
58
65
|
|
59
66
|
def build_feed_item( atom_item )
|
60
67
|
item = Item.new # Item.new
|
61
|
-
item.object = atom_item
|
68
|
+
## item.object = atom_item # not used for now
|
62
69
|
|
63
70
|
item.title = atom_item.title.content
|
64
71
|
item.url = atom_item.link.href
|
@@ -68,36 +75,26 @@ class AtomFeedBuilder
|
|
68
75
|
|
69
76
|
|
70
77
|
if atom_item.updated
|
71
|
-
## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
|
72
|
-
|
73
|
-
item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
|
74
|
-
|
75
78
|
## change time to utc if present? why? why not?
|
79
|
+
# -- .utc.strftime( "%Y-%m-%d %H:%M" )
|
80
|
+
|
81
|
+
## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
|
76
82
|
|
83
|
+
item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime
|
77
84
|
logger.debug " atom | item.updated.content >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
|
78
85
|
end
|
79
|
-
|
86
|
+
|
80
87
|
if atom_item.published
|
81
88
|
## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
|
82
89
|
|
83
|
-
item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime
|
90
|
+
item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime
|
84
91
|
logger.debug " atom | item.published.content >#{atom_item.published.content}< : #{atom_item.published.content.class.name}"
|
85
92
|
end
|
86
93
|
|
87
|
-
# - todo/check: does it exist in atom format?
|
88
|
-
# item.published = item.updated # fix: check if publshed set
|
89
94
|
|
90
95
|
item.guid = atom_item.id.content
|
91
|
-
|
92
96
|
logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
|
93
97
|
|
94
|
-
# todo: move logic to updater or something
|
95
|
-
# - not part of normalize
|
96
|
-
|
97
|
-
|
98
|
-
## fix/todo:
|
99
|
-
# also save/include full content in content
|
100
|
-
|
101
98
|
if atom_item.content
|
102
99
|
item.content = atom_item.content.content
|
103
100
|
end
|
@@ -106,14 +103,6 @@ class AtomFeedBuilder
|
|
106
103
|
item.summary = atom_item.summary.content
|
107
104
|
end
|
108
105
|
|
109
|
-
# let client deal w/ missing summary - move to attic - delete
|
110
|
-
# text = atom_item.content.content
|
111
|
-
# ## strip all html tags
|
112
|
-
# text = text.gsub( /<[^>]+>/, '' )
|
113
|
-
# text = text[ 0..400 ] # get first 400 chars
|
114
|
-
# ## todo: check for length if > 400 add ... at the end???
|
115
|
-
# item.summary = text
|
116
|
-
|
117
106
|
item
|
118
107
|
end # method build_feed_item
|
119
108
|
|
@@ -24,7 +24,7 @@ class RssFeedBuilder
|
|
24
24
|
|
25
25
|
def build_feed( rss_feed )
|
26
26
|
feed = Feed.new
|
27
|
-
feed.object = rss_feed
|
27
|
+
## feed.object = rss_feed # not use for now
|
28
28
|
feed.format = "rss #{rss_feed.rss_version}"
|
29
29
|
|
30
30
|
feed.title = rss_feed.channel.title # required
|
@@ -65,11 +65,14 @@ class RssFeedBuilder
|
|
65
65
|
def build_feed_item( rss_item )
|
66
66
|
|
67
67
|
item = Item.new
|
68
|
-
item.object = rss_item
|
68
|
+
## item.object = rss_item # not use for now
|
69
69
|
|
70
70
|
item.title = rss_item.title
|
71
71
|
item.url = rss_item.link
|
72
72
|
|
73
|
+
logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
|
74
|
+
logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
|
75
|
+
|
73
76
|
## todo:
|
74
77
|
## check if feedburner:origLink present - if yes, use it for url/link
|
75
78
|
## example: use
|
@@ -80,8 +83,10 @@ class RssFeedBuilder
|
|
80
83
|
|
81
84
|
item.summary = rss_item.description
|
82
85
|
|
83
|
-
|
84
|
-
|
86
|
+
# check for <content:encoded>
|
87
|
+
# -- using RSS 1.0 content module in RSS 2.0
|
88
|
+
item.content = rss_item.content_encoded
|
89
|
+
logger.debug " rss | item.content_encoded[0..40]: >#{rss_item.content_encoded ? rss_item.content_encoded[0..40] : ''}< : #{rss_item.content_encoded.class.name}"
|
85
90
|
|
86
91
|
# NOTE:
|
87
92
|
# All date-times in RSS conform
|
@@ -94,15 +99,7 @@ class RssFeedBuilder
|
|
94
99
|
item.published = rss_item.pubDate.nil? ? nil : rss_item.pubDate.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
|
95
100
|
|
96
101
|
logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
|
97
|
-
|
98
|
-
## fix/todo: add
|
99
|
-
## check for <content:encoded>
|
100
|
-
## full content (example use e.g. in sitepoint/ruby/feed/)
|
101
|
-
# content: item.content_encoded,
|
102
|
-
|
103
|
-
# if item.content_encoded.nil?
|
104
|
-
# puts " using description for content"
|
105
|
-
# end
|
102
|
+
|
106
103
|
|
107
104
|
## fix/todo: check if rss_item.guid present? !!!!
|
108
105
|
##
|
data/lib/feedutils/feed.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module FeedUtils
|
2
2
|
|
3
3
|
class Feed
|
4
|
-
attr_accessor :object
|
4
|
+
### attr_accessor :object # not use for now
|
5
5
|
|
6
6
|
attr_accessor :format # e.g. atom|rss 2.0|etc.
|
7
7
|
attr_accessor :title
|
@@ -11,29 +11,15 @@ class Feed
|
|
11
11
|
attr_accessor :items
|
12
12
|
|
13
13
|
def summary?() @summary.nil? == false; end
|
14
|
-
#
|
15
|
-
def summary() summary? ? @summary : @title2; end
|
16
|
-
attr_writer :summary # e.g. description (rss)
|
17
|
-
|
14
|
+
attr_accessor :summary # e.g. description (rss)
|
18
15
|
attr_accessor :summary_type # e.g. text|html|html-escaped
|
19
16
|
|
20
17
|
def title2?() @title2.nil? == false; end
|
21
18
|
attr_accessor :title2 # e.g. subtitle (atom)
|
22
|
-
|
23
19
|
attr_accessor :title2_type # e.g. text|html|html-escaped
|
24
20
|
|
25
21
|
def published?() @published.nil? == false; end
|
26
|
-
|
27
|
-
def published
|
28
|
-
if published?
|
29
|
-
@published
|
30
|
-
elsif updated?
|
31
|
-
@updated
|
32
|
-
else
|
33
|
-
@built
|
34
|
-
end
|
35
|
-
end
|
36
|
-
attr_writer :published
|
22
|
+
attr_accessor :published
|
37
23
|
|
38
24
|
def updated?() @updated.nil? == false; end
|
39
25
|
attr_accessor :updated
|
@@ -42,11 +28,13 @@ class Feed
|
|
42
28
|
attr_accessor :built
|
43
29
|
|
44
30
|
attr_accessor :generator
|
45
|
-
|
31
|
+
attr_accessor :generator_version # e.g. @version (atom)
|
32
|
+
attr_accessor :generator_uri # e.g. @uri (atom) - use alias url/link ???
|
46
33
|
|
47
34
|
## fix:
|
48
35
|
# add pretty printer/inspect (exclude object)
|
49
36
|
|
37
|
+
|
50
38
|
end # class Feed
|
51
39
|
|
52
40
|
end # module FeedUtils
|
data/lib/feedutils/item.rb
CHANGED
@@ -1,35 +1,27 @@
|
|
1
1
|
module FeedUtils
|
2
2
|
|
3
3
|
class Item
|
4
|
-
attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
|
4
|
+
## attr_accessor :object # not used for now -- orginal object (e.g RSS item or ATOM entry etc.)
|
5
5
|
|
6
6
|
attr_accessor :title
|
7
7
|
attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
|
8
8
|
attr_accessor :url # todo: rename to link (use alias) ??
|
9
9
|
|
10
10
|
|
11
|
-
# no content? try/return summary
|
12
|
-
def content() content? ? @content : @summary; end
|
13
11
|
def content?() @content.nil? == false; end
|
14
|
-
|
12
|
+
attr_accessor :content
|
13
|
+
attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
|
15
14
|
|
16
|
-
attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
|
17
|
-
|
18
|
-
# no summary? try/return content
|
19
|
-
def summary() summary? ? @summary : @content; end
|
20
15
|
def summary?() @summary.nil? == false; end
|
21
|
-
|
22
|
-
|
23
|
-
attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
|
16
|
+
attr_accessor :summary
|
17
|
+
attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
|
24
18
|
|
25
19
|
## todo: add summary (alias description) ???
|
26
20
|
## todo: add author/authors
|
27
21
|
## todo: add category/categories
|
28
22
|
|
29
|
-
# no published date? try/return updated
|
30
|
-
def published() published? ? @published : @updated; end
|
31
23
|
def published?() @published.nil? == false; end
|
32
|
-
|
24
|
+
attr_accessor :published
|
33
25
|
|
34
26
|
def updated?() @updated.nil? == false; end
|
35
27
|
attr_accessor :updated
|
data/lib/feedutils/parser.rb
CHANGED
data/lib/feedutils/version.rb
CHANGED
data/test/test_atom.rb
CHANGED
@@ -10,17 +10,30 @@ class TestAtom < MiniTest::Unit::TestCase
|
|
10
10
|
|
11
11
|
def test_rubyonrails
|
12
12
|
feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
|
13
|
-
|
13
|
+
|
14
|
+
assert_equal 'atom', feed.format
|
14
15
|
end
|
15
16
|
|
16
17
|
def test_railstutorial
|
17
18
|
feed = parse_feed( 'http://feeds.feedburner.com/railstutorial?format=xml' )
|
18
|
-
|
19
|
+
|
20
|
+
assert_equal 'atom', feed.format
|
19
21
|
end
|
20
22
|
|
21
23
|
def test_googlegroup
|
22
24
|
feed = parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' )
|
23
|
-
|
25
|
+
|
26
|
+
assert_equal 'atom', feed.format
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def test_headius
|
31
|
+
feed = parse_feed( 'http://blog.headius.com/feeds/posts/default' )
|
32
|
+
|
33
|
+
assert_equal 'atom', feed.format
|
34
|
+
assert_equal 'Blogger', feed.generator
|
35
|
+
assert_equal 'Headius', feed.title
|
36
|
+
assert_equal 'Helping the JVM Into the 21st Century', feed.title2 # aka subtitle in atom
|
24
37
|
end
|
25
38
|
|
26
39
|
end
|
data/test/test_rss.rb
CHANGED
@@ -11,18 +11,28 @@ class TestRss < MiniTest::Unit::TestCase
|
|
11
11
|
|
12
12
|
def test_rubyflow
|
13
13
|
feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
|
14
|
-
|
14
|
+
|
15
|
+
assert_equal 'rss 2.0', feed.format
|
15
16
|
end
|
16
17
|
|
17
18
|
def test_sitepointruby
|
18
19
|
feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
|
19
|
-
|
20
|
+
|
21
|
+
assert_equal 'rss 2.0', feed.format
|
20
22
|
end
|
21
23
|
|
22
24
|
def test_lambdatheultimate
|
23
25
|
## check - has no item.guid - will use item.link for guid
|
24
26
|
feed = parse_feed( 'http://lambda-the-ultimate.org/rss.xml' )
|
25
|
-
|
27
|
+
|
28
|
+
assert_equal 'rss 2.0', feed.format
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_rubymine
|
32
|
+
# includes item/content:encoded
|
33
|
+
feed = parse_feed( 'http://feeds.feedburner.com/jetbrains_rubymine?format=xml' )
|
34
|
+
|
35
|
+
assert_equal 'rss 2.0', feed.format
|
26
36
|
end
|
27
37
|
|
28
|
-
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-09
|
12
|
+
date: 2013-10-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &74494180 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *74494180
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &74493890 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *74493890
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &74493660 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,9 +43,9 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *74493660
|
47
47
|
description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
48
|
-
email:
|
48
|
+
email: feedreader@googlegroups.com
|
49
49
|
executables: []
|
50
50
|
extensions: []
|
51
51
|
extra_rdoc_files:
|