feedutils 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -0
- data/Rakefile +1 -1
- data/lib/feedutils.rb +1 -1
- data/lib/feedutils/builder/atom.rb +15 -26
- data/lib/feedutils/builder/rss.rb +10 -13
- data/lib/feedutils/feed.rb +6 -18
- data/lib/feedutils/item.rb +6 -14
- data/lib/feedutils/parser.rb +3 -1
- data/lib/feedutils/version.rb +1 -1
- data/test/test_atom.rb +16 -3
- data/test/test_rss.rb +14 -4
- metadata +9 -9
data/README.md
CHANGED
@@ -6,6 +6,8 @@ feedutils gems - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
|
6
6
|
* bugs :: [github.com/rubylibs/feedutils/issues](https://github.com/rubylibs/feedutils/issues)
|
7
7
|
* gem :: [rubygems.org/gems/feedutils](https://rubygems.org/gems/feedutils)
|
8
8
|
* rdoc :: [rubydoc.info/gems/feedutils](http://rubydoc.info/gems/feedutils)
|
9
|
+
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
10
|
+
|
9
11
|
|
10
12
|
## Usage
|
11
13
|
|
@@ -13,8 +15,13 @@ TBD
|
|
13
15
|
|
14
16
|
## Alternatives
|
15
17
|
|
18
|
+
- [`syndication`](http://syndication.rubyforge.org) [(Source)](https://github.com/lpar/syndication) - by Mathew (aka lpar); RSS 1.0, 2.0, Atom, and understands namespaces; optional support for Dublin Core, iTunes/podcast feeds, and RSS 1.0 Syndication and Content modules
|
19
|
+
- [`simple-rss`](http://rubyforge.org/projects/simple-rss)
|
20
|
+
- [`feedtools`](http://rubyforge.org/projects/feedtools)
|
21
|
+
|
16
22
|
TBD
|
17
23
|
|
24
|
+
|
18
25
|
## License
|
19
26
|
|
20
27
|
The `feedutils` scripts are dedicated to the public domain.
|
data/Rakefile
CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'feedutils' do
|
|
11
11
|
self.urls = ['https://github.com/rubylibs/feedutils']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
|
-
self.email = '
|
14
|
+
self.email = 'feedreader@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
17
|
self.readme_file = 'README.md'
|
data/lib/feedutils.rb
CHANGED
@@ -23,7 +23,7 @@ require 'feedutils/parser'
|
|
23
23
|
module FeedUtils
|
24
24
|
|
25
25
|
def self.banner
|
26
|
-
"feedutils
|
26
|
+
"feedutils/#{VERSION} (with stdlib rss/#{RSS::VERSION}) on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
27
27
|
end
|
28
28
|
|
29
29
|
=begin
|
@@ -21,10 +21,11 @@ class AtomFeedBuilder
|
|
21
21
|
|
22
22
|
def build_feed( atom_feed )
|
23
23
|
feed = Feed.new
|
24
|
-
feed.object = atom_feed
|
24
|
+
## feed.object = atom_feed # not use for now
|
25
25
|
feed.format = 'atom'
|
26
26
|
|
27
27
|
feed.title = atom_feed.title.content
|
28
|
+
logger.debug " atom | title.content >#{atom_feed.title.content}< : #{atom_feed.title.content.class.name}"
|
28
29
|
|
29
30
|
if atom_feed.updated
|
30
31
|
# NOTE: empty updated.content e.g. used by google groups feed
|
@@ -39,6 +40,12 @@ class AtomFeedBuilder
|
|
39
40
|
if atom_feed.generator
|
40
41
|
feed.generator = atom_feed.generator.content
|
41
42
|
logger.debug " atom | generator.content >#{atom_feed.generator.content}< : #{atom_feed.generator.content.class.name}"
|
43
|
+
|
44
|
+
# pp atom_feed.generator
|
45
|
+
feed.generator_version = atom_feed.generator.version
|
46
|
+
feed.generator_uri = atom_feed.generator.uri
|
47
|
+
logger.debug " atom | generator.version >#{atom_feed.generator.version}< : #{atom_feed.generator.version.class.name}"
|
48
|
+
logger.debug " atom | generator.uri >#{atom_feed.generator.uri}< : #{atom_feed.generator.uri.class.name}"
|
42
49
|
end
|
43
50
|
|
44
51
|
if atom_feed.subtitle
|
@@ -58,7 +65,7 @@ class AtomFeedBuilder
|
|
58
65
|
|
59
66
|
def build_feed_item( atom_item )
|
60
67
|
item = Item.new # Item.new
|
61
|
-
item.object = atom_item
|
68
|
+
## item.object = atom_item # not used for now
|
62
69
|
|
63
70
|
item.title = atom_item.title.content
|
64
71
|
item.url = atom_item.link.href
|
@@ -68,36 +75,26 @@ class AtomFeedBuilder
|
|
68
75
|
|
69
76
|
|
70
77
|
if atom_item.updated
|
71
|
-
## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
|
72
|
-
|
73
|
-
item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
|
74
|
-
|
75
78
|
## change time to utc if present? why? why not?
|
79
|
+
# -- .utc.strftime( "%Y-%m-%d %H:%M" )
|
80
|
+
|
81
|
+
## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
|
76
82
|
|
83
|
+
item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime
|
77
84
|
logger.debug " atom | item.updated.content >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
|
78
85
|
end
|
79
|
-
|
86
|
+
|
80
87
|
if atom_item.published
|
81
88
|
## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
|
82
89
|
|
83
|
-
item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime
|
90
|
+
item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime
|
84
91
|
logger.debug " atom | item.published.content >#{atom_item.published.content}< : #{atom_item.published.content.class.name}"
|
85
92
|
end
|
86
93
|
|
87
|
-
# - todo/check: does it exist in atom format?
|
88
|
-
# item.published = item.updated # fix: check if publshed set
|
89
94
|
|
90
95
|
item.guid = atom_item.id.content
|
91
|
-
|
92
96
|
logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
|
93
97
|
|
94
|
-
# todo: move logic to updater or something
|
95
|
-
# - not part of normalize
|
96
|
-
|
97
|
-
|
98
|
-
## fix/todo:
|
99
|
-
# also save/include full content in content
|
100
|
-
|
101
98
|
if atom_item.content
|
102
99
|
item.content = atom_item.content.content
|
103
100
|
end
|
@@ -106,14 +103,6 @@ class AtomFeedBuilder
|
|
106
103
|
item.summary = atom_item.summary.content
|
107
104
|
end
|
108
105
|
|
109
|
-
# let client deal w/ missing summary - move to attic - delete
|
110
|
-
# text = atom_item.content.content
|
111
|
-
# ## strip all html tags
|
112
|
-
# text = text.gsub( /<[^>]+>/, '' )
|
113
|
-
# text = text[ 0..400 ] # get first 400 chars
|
114
|
-
# ## todo: check for length if > 400 add ... at the end???
|
115
|
-
# item.summary = text
|
116
|
-
|
117
106
|
item
|
118
107
|
end # method build_feed_item
|
119
108
|
|
@@ -24,7 +24,7 @@ class RssFeedBuilder
|
|
24
24
|
|
25
25
|
def build_feed( rss_feed )
|
26
26
|
feed = Feed.new
|
27
|
-
feed.object = rss_feed
|
27
|
+
## feed.object = rss_feed # not use for now
|
28
28
|
feed.format = "rss #{rss_feed.rss_version}"
|
29
29
|
|
30
30
|
feed.title = rss_feed.channel.title # required
|
@@ -65,11 +65,14 @@ class RssFeedBuilder
|
|
65
65
|
def build_feed_item( rss_item )
|
66
66
|
|
67
67
|
item = Item.new
|
68
|
-
item.object = rss_item
|
68
|
+
## item.object = rss_item # not use for now
|
69
69
|
|
70
70
|
item.title = rss_item.title
|
71
71
|
item.url = rss_item.link
|
72
72
|
|
73
|
+
logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
|
74
|
+
logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
|
75
|
+
|
73
76
|
## todo:
|
74
77
|
## check if feedburner:origLink present - if yes, use it for url/link
|
75
78
|
## example: use
|
@@ -80,8 +83,10 @@ class RssFeedBuilder
|
|
80
83
|
|
81
84
|
item.summary = rss_item.description
|
82
85
|
|
83
|
-
|
84
|
-
|
86
|
+
# check for <content:encoded>
|
87
|
+
# -- using RSS 1.0 content module in RSS 2.0
|
88
|
+
item.content = rss_item.content_encoded
|
89
|
+
logger.debug " rss | item.content_encoded[0..40]: >#{rss_item.content_encoded ? rss_item.content_encoded[0..40] : ''}< : #{rss_item.content_encoded.class.name}"
|
85
90
|
|
86
91
|
# NOTE:
|
87
92
|
# All date-times in RSS conform
|
@@ -94,15 +99,7 @@ class RssFeedBuilder
|
|
94
99
|
item.published = rss_item.pubDate.nil? ? nil : rss_item.pubDate.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
|
95
100
|
|
96
101
|
logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
|
97
|
-
|
98
|
-
## fix/todo: add
|
99
|
-
## check for <content:encoded>
|
100
|
-
## full content (example use e.g. in sitepoint/ruby/feed/)
|
101
|
-
# content: item.content_encoded,
|
102
|
-
|
103
|
-
# if item.content_encoded.nil?
|
104
|
-
# puts " using description for content"
|
105
|
-
# end
|
102
|
+
|
106
103
|
|
107
104
|
## fix/todo: check if rss_item.guid present? !!!!
|
108
105
|
##
|
data/lib/feedutils/feed.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module FeedUtils
|
2
2
|
|
3
3
|
class Feed
|
4
|
-
attr_accessor :object
|
4
|
+
### attr_accessor :object # not use for now
|
5
5
|
|
6
6
|
attr_accessor :format # e.g. atom|rss 2.0|etc.
|
7
7
|
attr_accessor :title
|
@@ -11,29 +11,15 @@ class Feed
|
|
11
11
|
attr_accessor :items
|
12
12
|
|
13
13
|
def summary?() @summary.nil? == false; end
|
14
|
-
#
|
15
|
-
def summary() summary? ? @summary : @title2; end
|
16
|
-
attr_writer :summary # e.g. description (rss)
|
17
|
-
|
14
|
+
attr_accessor :summary # e.g. description (rss)
|
18
15
|
attr_accessor :summary_type # e.g. text|html|html-escaped
|
19
16
|
|
20
17
|
def title2?() @title2.nil? == false; end
|
21
18
|
attr_accessor :title2 # e.g. subtitle (atom)
|
22
|
-
|
23
19
|
attr_accessor :title2_type # e.g. text|html|html-escaped
|
24
20
|
|
25
21
|
def published?() @published.nil? == false; end
|
26
|
-
|
27
|
-
def published
|
28
|
-
if published?
|
29
|
-
@published
|
30
|
-
elsif updated?
|
31
|
-
@updated
|
32
|
-
else
|
33
|
-
@built
|
34
|
-
end
|
35
|
-
end
|
36
|
-
attr_writer :published
|
22
|
+
attr_accessor :published
|
37
23
|
|
38
24
|
def updated?() @updated.nil? == false; end
|
39
25
|
attr_accessor :updated
|
@@ -42,11 +28,13 @@ class Feed
|
|
42
28
|
attr_accessor :built
|
43
29
|
|
44
30
|
attr_accessor :generator
|
45
|
-
|
31
|
+
attr_accessor :generator_version # e.g. @version (atom)
|
32
|
+
attr_accessor :generator_uri # e.g. @uri (atom) - use alias url/link ???
|
46
33
|
|
47
34
|
## fix:
|
48
35
|
# add pretty printer/inspect (exclude object)
|
49
36
|
|
37
|
+
|
50
38
|
end # class Feed
|
51
39
|
|
52
40
|
end # module FeedUtils
|
data/lib/feedutils/item.rb
CHANGED
@@ -1,35 +1,27 @@
|
|
1
1
|
module FeedUtils
|
2
2
|
|
3
3
|
class Item
|
4
|
-
attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
|
4
|
+
## attr_accessor :object # not used for now -- orginal object (e.g RSS item or ATOM entry etc.)
|
5
5
|
|
6
6
|
attr_accessor :title
|
7
7
|
attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
|
8
8
|
attr_accessor :url # todo: rename to link (use alias) ??
|
9
9
|
|
10
10
|
|
11
|
-
# no content? try/return summary
|
12
|
-
def content() content? ? @content : @summary; end
|
13
11
|
def content?() @content.nil? == false; end
|
14
|
-
|
12
|
+
attr_accessor :content
|
13
|
+
attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
|
15
14
|
|
16
|
-
attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
|
17
|
-
|
18
|
-
# no summary? try/return content
|
19
|
-
def summary() summary? ? @summary : @content; end
|
20
15
|
def summary?() @summary.nil? == false; end
|
21
|
-
|
22
|
-
|
23
|
-
attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
|
16
|
+
attr_accessor :summary
|
17
|
+
attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
|
24
18
|
|
25
19
|
## todo: add summary (alias description) ???
|
26
20
|
## todo: add author/authors
|
27
21
|
## todo: add category/categories
|
28
22
|
|
29
|
-
# no published date? try/return updated
|
30
|
-
def published() published? ? @published : @updated; end
|
31
23
|
def published?() @published.nil? == false; end
|
32
|
-
|
24
|
+
attr_accessor :published
|
33
25
|
|
34
26
|
def updated?() @updated.nil? == false; end
|
35
27
|
attr_accessor :updated
|
data/lib/feedutils/parser.rb
CHANGED
data/lib/feedutils/version.rb
CHANGED
data/test/test_atom.rb
CHANGED
@@ -10,17 +10,30 @@ class TestAtom < MiniTest::Unit::TestCase
|
|
10
10
|
|
11
11
|
def test_rubyonrails
|
12
12
|
feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
|
13
|
-
|
13
|
+
|
14
|
+
assert_equal 'atom', feed.format
|
14
15
|
end
|
15
16
|
|
16
17
|
def test_railstutorial
|
17
18
|
feed = parse_feed( 'http://feeds.feedburner.com/railstutorial?format=xml' )
|
18
|
-
|
19
|
+
|
20
|
+
assert_equal 'atom', feed.format
|
19
21
|
end
|
20
22
|
|
21
23
|
def test_googlegroup
|
22
24
|
feed = parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' )
|
23
|
-
|
25
|
+
|
26
|
+
assert_equal 'atom', feed.format
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def test_headius
|
31
|
+
feed = parse_feed( 'http://blog.headius.com/feeds/posts/default' )
|
32
|
+
|
33
|
+
assert_equal 'atom', feed.format
|
34
|
+
assert_equal 'Blogger', feed.generator
|
35
|
+
assert_equal 'Headius', feed.title
|
36
|
+
assert_equal 'Helping the JVM Into the 21st Century', feed.title2 # aka subtitle in atom
|
24
37
|
end
|
25
38
|
|
26
39
|
end
|
data/test/test_rss.rb
CHANGED
@@ -11,18 +11,28 @@ class TestRss < MiniTest::Unit::TestCase
|
|
11
11
|
|
12
12
|
def test_rubyflow
|
13
13
|
feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
|
14
|
-
|
14
|
+
|
15
|
+
assert_equal 'rss 2.0', feed.format
|
15
16
|
end
|
16
17
|
|
17
18
|
def test_sitepointruby
|
18
19
|
feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
|
19
|
-
|
20
|
+
|
21
|
+
assert_equal 'rss 2.0', feed.format
|
20
22
|
end
|
21
23
|
|
22
24
|
def test_lambdatheultimate
|
23
25
|
## check - has no item.guid - will use item.link for guid
|
24
26
|
feed = parse_feed( 'http://lambda-the-ultimate.org/rss.xml' )
|
25
|
-
|
27
|
+
|
28
|
+
assert_equal 'rss 2.0', feed.format
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_rubymine
|
32
|
+
# includes item/content:encoded
|
33
|
+
feed = parse_feed( 'http://feeds.feedburner.com/jetbrains_rubymine?format=xml' )
|
34
|
+
|
35
|
+
assert_equal 'rss 2.0', feed.format
|
26
36
|
end
|
27
37
|
|
28
|
-
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-09
|
12
|
+
date: 2013-10-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &74494180 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *74494180
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &74493890 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *74493890
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &74493660 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,9 +43,9 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *74493660
|
47
47
|
description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
48
|
-
email:
|
48
|
+
email: feedreader@googlegroups.com
|
49
49
|
executables: []
|
50
50
|
extensions: []
|
51
51
|
extra_rdoc_files:
|