feedutils 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/Manifest.txt +3 -0
- data/Rakefile +2 -0
- data/lib/feedutils/builder/atom.rb +23 -15
- data/lib/feedutils/builder/rss.rb +66 -18
- data/lib/feedutils/utils.rb +109 -6
- data/lib/feedutils/version.rb +1 -1
- data/test/helper.rb +25 -0
- data/test/test_atom.rb +16 -0
- data/test/test_rss.rb +23 -0
- metadata +15 -9
data/.gemtest
ADDED
File without changes
|
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -3,6 +3,8 @@ module FeedUtils
|
|
3
3
|
|
4
4
|
class AtomFeedBuilder
|
5
5
|
|
6
|
+
include LogUtils::Logging
|
7
|
+
|
6
8
|
def initialize( atom_feed )
|
7
9
|
@feed = build_feed( atom_feed )
|
8
10
|
end
|
@@ -39,19 +41,36 @@ class AtomFeedBuilder
|
|
39
41
|
item.title = atom_item.title.content
|
40
42
|
item.url = atom_item.link.href
|
41
43
|
|
44
|
+
logger.debug " atom | item.title.content: >#{atom_item.title.content}< : #{atom_item.title.content.class.name}"
|
45
|
+
logger.debug " atom | item.link.href: >#{atom_item.link.href}< : #{atom_item.link.href.class.name}"
|
46
|
+
|
47
|
+
|
42
48
|
## todo: check if updated or published present
|
43
49
|
# set
|
44
|
-
item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
|
45
|
-
|
50
|
+
item.updated = atom_item.updated.content # .utc.strftime( "%Y-%m-%d %H:%M" )
|
51
|
+
|
52
|
+
|
53
|
+
## change time to utc if present? why? why not?
|
54
|
+
|
55
|
+
### todo: use/try published first? why? why not?
|
56
|
+
logger.debug " atom | item.updated >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
|
57
|
+
|
58
|
+
# - todo/check: does it exist in atom format?
|
59
|
+
# item.published = item.updated # fix: check if publshed set
|
46
60
|
|
47
61
|
item.guid = atom_item.id.content
|
48
62
|
|
63
|
+
logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
|
49
64
|
|
50
65
|
# todo: move logic to updater or something
|
51
66
|
# - not part of normalize
|
52
67
|
|
68
|
+
|
69
|
+
## fix/todo:
|
70
|
+
# also save/include full content in content
|
71
|
+
|
53
72
|
if atom_item.summary
|
54
|
-
item.
|
73
|
+
item.summary = atom_item.summary.content
|
55
74
|
else
|
56
75
|
if atom_item.content
|
57
76
|
text = atom_item.content.content.dup
|
@@ -59,21 +78,10 @@ class AtomFeedBuilder
|
|
59
78
|
text = text.gsub( /<[^>]+>/, '' )
|
60
79
|
text = text[ 0..400 ] # get first 400 chars
|
61
80
|
## todo: check for length if > 400 add ... at the end???
|
62
|
-
item.
|
81
|
+
item.summary = text
|
63
82
|
end
|
64
83
|
end
|
65
84
|
|
66
|
-
puts "- #{atom_item.title.content}"
|
67
|
-
puts " link >#{atom_item.link.href}<"
|
68
|
-
puts " id (~guid) >#{atom_item.id.content}<"
|
69
|
-
|
70
|
-
### todo: use/try published first? why? why not?
|
71
|
-
puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
|
72
|
-
puts
|
73
|
-
|
74
|
-
# puts "*** dump item:"
|
75
|
-
# pp item
|
76
|
-
|
77
85
|
item
|
78
86
|
end # method build_feed_item
|
79
87
|
|
@@ -1,8 +1,13 @@
|
|
1
1
|
|
2
2
|
module FeedUtils
|
3
3
|
|
4
|
+
### todo/fix:
|
5
|
+
# rename to Rss20FeedBuilder?? or FeedBuilderRss20 ??
|
6
|
+
|
4
7
|
class RssFeedBuilder
|
5
8
|
|
9
|
+
include LogUtils::Logging
|
10
|
+
|
6
11
|
def initialize( rss_feed )
|
7
12
|
@feed = build_feed( rss_feed )
|
8
13
|
end
|
@@ -20,9 +25,30 @@ class RssFeedBuilder
|
|
20
25
|
def build_feed( rss_feed )
|
21
26
|
feed = Feed.new
|
22
27
|
feed.object = rss_feed
|
23
|
-
feed.title = rss_feed.channel.title
|
24
28
|
feed.format = "rss #{rss_feed.rss_version}"
|
25
29
|
|
30
|
+
feed.title = rss_feed.channel.title # required
|
31
|
+
feed.url = rss_feed.channel.link # required
|
32
|
+
feed.summary = rss_feed.channel.description # required
|
33
|
+
|
34
|
+
# NOTE:
|
35
|
+
# All date-times in RSS conform
|
36
|
+
# to the Date and Time Specification of RFC 822
|
37
|
+
# e.g. Sun, 19 May 2012 15:21:36 GMT or
|
38
|
+
# Sat, 07 Sep 2013 00:00:01 GMT
|
39
|
+
|
40
|
+
feed.built = rss_feed.channel.lastBuildDate # optional
|
41
|
+
feed.published = rss_feed.channel.pubDate # optional
|
42
|
+
|
43
|
+
logger.debug " rss | channel.lastBuildDate: >#{rss_feed.channel.lastBuildDate}< : #{rss_feed.channel.lastBuildDate.class.name}"
|
44
|
+
logger.debug " rss | channel.pubDate: >#{rss_feed.channel.pubDate}< : #{rss_feed.channel.pubDate.class.name}"
|
45
|
+
|
46
|
+
|
47
|
+
feed.generator = rss_feed.channel.generator # optional
|
48
|
+
|
49
|
+
logger.debug " rss | channel.generator: >#{rss_feed.channel.generator}< : #{rss_feed.channel.generator.class.name}"
|
50
|
+
|
51
|
+
|
26
52
|
items = []
|
27
53
|
rss_feed.items.each do |rss_item|
|
28
54
|
items << build_feed_item( rss_item )
|
@@ -39,30 +65,52 @@ class RssFeedBuilder
|
|
39
65
|
|
40
66
|
item.title = rss_item.title
|
41
67
|
item.url = rss_item.link
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
68
|
+
|
69
|
+
## todo:
|
70
|
+
## check if feedburner:origLink present - if yes, use it for url/link
|
71
|
+
## example: use
|
72
|
+
## - <feedburner:origLink>http://www.rubyflow.com/items/9803-gotta-ruby-s-syntax</feedburner:origLink></item>
|
73
|
+
## instead of
|
74
|
+
## - <link>http://feedproxy.google.com/~r/Rubyflow/~3/Ym9Sltg_2_c/9803-gotta-ruby-s-syntax</link>
|
75
|
+
|
76
|
+
|
77
|
+
item.summary = rss_item.description
|
78
|
+
|
79
|
+
logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
|
80
|
+
logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
|
81
|
+
|
82
|
+
# NOTE:
|
83
|
+
# All date-times in RSS conform
|
84
|
+
# to the Date and Time Specification of RFC 822
|
85
|
+
# e.g. Sun, 19 May 2012 15:21:36 GMT or
|
86
|
+
# Sat, 07 Sep 2013 00:00:01 GMT
|
87
|
+
|
88
|
+
item.published = rss_item.pubDate # .utc.strftime( "%Y-%m-%d %H:%M" )
|
89
|
+
|
90
|
+
logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
|
91
|
+
|
92
|
+
## fix/todo: add
|
93
|
+
## check for <content:encoded>
|
94
|
+
## full content (example use e.g. in sitepoint/ruby/feed/)
|
48
95
|
# content: item.content_encoded,
|
49
96
|
|
50
97
|
# if item.content_encoded.nil?
|
51
98
|
# puts " using description for content"
|
52
|
-
|
53
|
-
item.content = rss_item.description
|
54
99
|
# end
|
55
|
-
|
100
|
+
|
101
|
+
## fix/todo: check if rss_item.guid present? !!!!
|
56
102
|
item.guid = rss_item.guid.content
|
57
103
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
#
|
65
|
-
#
|
104
|
+
logger.debug " rss | item.guid.content: >#{rss_item.guid.content}< : #{rss_item.guid.content.class.name}"
|
105
|
+
|
106
|
+
### todo: add support or authors (incl. dc:creator)
|
107
|
+
## <dc:creator>Dhaivat Pandya</dc:creator>
|
108
|
+
|
109
|
+
# todo: categories
|
110
|
+
# <category><![CDATA[Gems]]></category>
|
111
|
+
# <category><![CDATA[Ruby]]></category>
|
112
|
+
# <category><![CDATA[Ruby on Rails]]></category>
|
113
|
+
|
66
114
|
|
67
115
|
item
|
68
116
|
end # method build_feed_item_from_rss
|
data/lib/feedutils/utils.rb
CHANGED
@@ -7,10 +7,67 @@ module FeedUtils
|
|
7
7
|
|
8
8
|
attr_accessor :format # e.g. atom|rss 2.0|etc.
|
9
9
|
attr_accessor :title
|
10
|
-
attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
|
10
|
+
attr_accessor :title_type # e.g. text|html|html-escaped (optional) -use - why?? why not??
|
11
|
+
attr_accessor :url
|
11
12
|
|
12
13
|
attr_accessor :items
|
13
14
|
|
15
|
+
attr_accessor :summary # e.g. description (rss)
|
16
|
+
attr_accessor :summary_type # e.g. text|html|html-escaped
|
17
|
+
attr_accessor :title2 # e.g. subtitle (atom)
|
18
|
+
attr_accessor :title2_type # e.g. text|html|html-escaped
|
19
|
+
|
20
|
+
attr_accessor :published
|
21
|
+
attr_accessor :updated
|
22
|
+
attr_accessor :built
|
23
|
+
|
24
|
+
attr_accessor :generator
|
25
|
+
|
26
|
+
|
27
|
+
def title2?
|
28
|
+
@title2.nil? == false
|
29
|
+
end
|
30
|
+
|
31
|
+
def summary?
|
32
|
+
@summary.nil? == false
|
33
|
+
end
|
34
|
+
|
35
|
+
def built?
|
36
|
+
@built.nil? == false
|
37
|
+
end
|
38
|
+
|
39
|
+
def updated?
|
40
|
+
@updated.nil? == false
|
41
|
+
end
|
42
|
+
|
43
|
+
def published?
|
44
|
+
@published.nil? == false
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def summary
|
49
|
+
# no summary? try/return title2
|
50
|
+
if summary?
|
51
|
+
@summary
|
52
|
+
else
|
53
|
+
@title2
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def published
|
58
|
+
# no published date? try/return updated or built
|
59
|
+
if published?
|
60
|
+
@published
|
61
|
+
elsif updated?
|
62
|
+
@updated
|
63
|
+
else
|
64
|
+
@built
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
## fix:
|
69
|
+
# add pretty printer/inspect (exclude object)
|
70
|
+
|
14
71
|
end # class Feed
|
15
72
|
|
16
73
|
|
@@ -18,24 +75,65 @@ module FeedUtils
|
|
18
75
|
attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
|
19
76
|
|
20
77
|
attr_accessor :title
|
21
|
-
attr_accessor :title_type # optional for now (text|html) - not yet set
|
78
|
+
attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
|
22
79
|
attr_accessor :url # todo: rename to link (use alias) ??
|
23
80
|
attr_accessor :content
|
24
|
-
attr_accessor :content_type # optional for now (text|html) - not yet set
|
81
|
+
attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
|
82
|
+
attr_accessor :summary
|
83
|
+
attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
|
25
84
|
|
26
85
|
## todo: add summary (alias description) ???
|
27
86
|
## todo: add author/authors
|
28
87
|
## todo: add category/categories
|
29
88
|
|
30
|
-
attr_accessor :updated
|
31
89
|
attr_accessor :published
|
90
|
+
attr_accessor :updated
|
32
91
|
|
33
92
|
attr_accessor :guid # todo: rename to id (use alias) ??
|
93
|
+
|
94
|
+
|
95
|
+
def summary?
|
96
|
+
@summary.nil? == false
|
97
|
+
end
|
98
|
+
|
99
|
+
def content?
|
100
|
+
@content.nil? == false
|
101
|
+
end
|
102
|
+
|
103
|
+
def published?
|
104
|
+
@published.nil? == false
|
105
|
+
end
|
106
|
+
|
107
|
+
def updated?
|
108
|
+
@updated.nil? == false
|
109
|
+
end
|
110
|
+
|
111
|
+
def content
|
112
|
+
# no content? try/return summary
|
113
|
+
if content?
|
114
|
+
@content
|
115
|
+
else
|
116
|
+
@summary
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def published
|
121
|
+
# no published date? try/return updated
|
122
|
+
if published?
|
123
|
+
@published
|
124
|
+
else
|
125
|
+
@updated
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
|
34
130
|
end # class Item
|
35
131
|
|
36
132
|
|
37
133
|
class Parser
|
38
134
|
|
135
|
+
include LogUtils::Logging
|
136
|
+
|
39
137
|
### Note: lets keep/use same API as RSS::Parser for now
|
40
138
|
def initialize( xml )
|
41
139
|
@xml = xml
|
@@ -49,7 +147,7 @@ module FeedUtils
|
|
49
147
|
puts "Parsing feed..."
|
50
148
|
feed_wild = parser.parse # not yet normalized
|
51
149
|
|
52
|
-
|
150
|
+
logger.debug " feed.class=#{feed_wild.class.name}"
|
53
151
|
|
54
152
|
if feed_wild.is_a?( RSS::Atom::Feed )
|
55
153
|
feed = AtomFeedBuilder.build( feed_wild )
|
@@ -60,7 +158,12 @@ module FeedUtils
|
|
60
158
|
puts "== #{feed.format} / #{feed.title} =="
|
61
159
|
feed # return new (normalized) feed
|
62
160
|
end
|
63
|
-
|
161
|
+
|
162
|
+
### convenience class/factory method
|
163
|
+
def self.parse( xml, opts={} )
|
164
|
+
self.new( xml ).parse
|
165
|
+
end
|
166
|
+
|
64
167
|
end # class Parser
|
65
168
|
|
66
169
|
|
data/lib/feedutils/version.rb
CHANGED
data/test/helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
|
4
|
+
## minitest setup
|
5
|
+
|
6
|
+
# require 'minitest/unit'
|
7
|
+
require 'minitest/autorun'
|
8
|
+
|
9
|
+
# include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
|
10
|
+
|
11
|
+
require 'logutils'
|
12
|
+
require 'fetcher'
|
13
|
+
|
14
|
+
## our own code
|
15
|
+
|
16
|
+
require 'feedutils'
|
17
|
+
|
18
|
+
|
19
|
+
LogUtils::Logger.root.level = :debug
|
20
|
+
|
21
|
+
def parse_feed( feed_url )
|
22
|
+
xml = Fetcher.read( feed_url )
|
23
|
+
|
24
|
+
FeedUtils::Parser.parse( xml )
|
25
|
+
end
|
data/test/test_atom.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_rss.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
class TestAtom < MiniTest::Unit::TestCase
|
10
|
+
|
11
|
+
def test_rubyonrails
|
12
|
+
feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
|
13
|
+
assert( feed.format == 'atom' )
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/test/test_rss.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_rss.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
class TestRss < MiniTest::Unit::TestCase
|
10
|
+
|
11
|
+
|
12
|
+
def test_rubyflow
|
13
|
+
feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
|
14
|
+
assert( feed.format == 'rss 2.0' )
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_sitepointruby
|
18
|
+
feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
|
19
|
+
assert( feed.format == 'rss 2.0' )
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-09-
|
12
|
+
date: 2013-09-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &68865360 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *68865360
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &68864940 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *68864940
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &68864560 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *68864560
|
47
47
|
description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|
@@ -60,6 +60,10 @@ files:
|
|
60
60
|
- lib/feedutils/builder/rss.rb
|
61
61
|
- lib/feedutils/utils.rb
|
62
62
|
- lib/feedutils/version.rb
|
63
|
+
- test/helper.rb
|
64
|
+
- test/test_atom.rb
|
65
|
+
- test/test_rss.rb
|
66
|
+
- .gemtest
|
63
67
|
homepage: https://github.com/rubylibs/feedutils
|
64
68
|
licenses:
|
65
69
|
- Public Domain
|
@@ -87,4 +91,6 @@ rubygems_version: 1.8.17
|
|
87
91
|
signing_key:
|
88
92
|
specification_version: 3
|
89
93
|
summary: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
90
|
-
test_files:
|
94
|
+
test_files:
|
95
|
+
- test/test_atom.rb
|
96
|
+
- test/test_rss.rb
|