feedutils 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/Manifest.txt CHANGED
@@ -7,3 +7,6 @@ lib/feedutils/builder/atom.rb
7
7
  lib/feedutils/builder/rss.rb
8
8
  lib/feedutils/utils.rb
9
9
  lib/feedutils/version.rb
10
+ test/helper.rb
11
+ test/test_atom.rb
12
+ test/test_rss.rb
data/Rakefile CHANGED
@@ -20,6 +20,8 @@ Hoe.spec 'feedutils' do
20
20
  self.extra_deps = [
21
21
  ['logutils', '>= 0.5']
22
22
  ]
23
+
24
+ ### todo: add fetcher dep for testing (e.g. development only)
23
25
 
24
26
  self.licenses = ['Public Domain']
25
27
 
@@ -3,6 +3,8 @@ module FeedUtils
3
3
 
4
4
  class AtomFeedBuilder
5
5
 
6
+ include LogUtils::Logging
7
+
6
8
  def initialize( atom_feed )
7
9
  @feed = build_feed( atom_feed )
8
10
  end
@@ -39,19 +41,36 @@ class AtomFeedBuilder
39
41
  item.title = atom_item.title.content
40
42
  item.url = atom_item.link.href
41
43
 
44
+ logger.debug " atom | item.title.content: >#{atom_item.title.content}< : #{atom_item.title.content.class.name}"
45
+ logger.debug " atom | item.link.href: >#{atom_item.link.href}< : #{atom_item.link.href.class.name}"
46
+
47
+
42
48
  ## todo: check if updated or published present
43
49
  # set
44
- item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
45
- item.published = item.updated # fix: check if publshed set
50
+ item.updated = atom_item.updated.content # .utc.strftime( "%Y-%m-%d %H:%M" )
51
+
52
+
53
+ ## change time to utc if present? why? why not?
54
+
55
+ ### todo: use/try published first? why? why not?
56
+ logger.debug " atom | item.updated >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
57
+
58
+ # - todo/check: does it exist in atom format?
59
+ # item.published = item.updated # fix: check if publshed set
46
60
 
47
61
  item.guid = atom_item.id.content
48
62
 
63
+ logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
49
64
 
50
65
  # todo: move logic to updater or something
51
66
  # - not part of normalize
52
67
 
68
+
69
+ ## fix/todo:
70
+ # also save/include full content in content
71
+
53
72
  if atom_item.summary
54
- item.content = atom_item.summary.content
73
+ item.summary = atom_item.summary.content
55
74
  else
56
75
  if atom_item.content
57
76
  text = atom_item.content.content.dup
@@ -59,21 +78,10 @@ class AtomFeedBuilder
59
78
  text = text.gsub( /<[^>]+>/, '' )
60
79
  text = text[ 0..400 ] # get first 400 chars
61
80
  ## todo: check for length if > 400 add ... at the end???
62
- item.content = text
81
+ item.summary = text
63
82
  end
64
83
  end
65
84
 
66
- puts "- #{atom_item.title.content}"
67
- puts " link >#{atom_item.link.href}<"
68
- puts " id (~guid) >#{atom_item.id.content}<"
69
-
70
- ### todo: use/try published first? why? why not?
71
- puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
72
- puts
73
-
74
- # puts "*** dump item:"
75
- # pp item
76
-
77
85
  item
78
86
  end # method build_feed_item
79
87
 
@@ -1,8 +1,13 @@
1
1
 
2
2
  module FeedUtils
3
3
 
4
+ ### todo/fix:
5
+ # rename to Rss20FeedBuilder?? or FeedBuilderRss20 ??
6
+
4
7
  class RssFeedBuilder
5
8
 
9
+ include LogUtils::Logging
10
+
6
11
  def initialize( rss_feed )
7
12
  @feed = build_feed( rss_feed )
8
13
  end
@@ -20,9 +25,30 @@ class RssFeedBuilder
20
25
  def build_feed( rss_feed )
21
26
  feed = Feed.new
22
27
  feed.object = rss_feed
23
- feed.title = rss_feed.channel.title
24
28
  feed.format = "rss #{rss_feed.rss_version}"
25
29
 
30
+ feed.title = rss_feed.channel.title # required
31
+ feed.url = rss_feed.channel.link # required
32
+ feed.summary = rss_feed.channel.description # required
33
+
34
+ # NOTE:
35
+ # All date-times in RSS conform
36
+ # to the Date and Time Specification of RFC 822
37
+ # e.g. Sun, 19 May 2012 15:21:36 GMT or
38
+ # Sat, 07 Sep 2013 00:00:01 GMT
39
+
40
+ feed.built = rss_feed.channel.lastBuildDate # optional
41
+ feed.published = rss_feed.channel.pubDate # optional
42
+
43
+ logger.debug " rss | channel.lastBuildDate: >#{rss_feed.channel.lastBuildDate}< : #{rss_feed.channel.lastBuildDate.class.name}"
44
+ logger.debug " rss | channel.pubDate: >#{rss_feed.channel.pubDate}< : #{rss_feed.channel.pubDate.class.name}"
45
+
46
+
47
+ feed.generator = rss_feed.channel.generator # optional
48
+
49
+ logger.debug " rss | channel.generator: >#{rss_feed.channel.generator}< : #{rss_feed.channel.generator.class.name}"
50
+
51
+
26
52
  items = []
27
53
  rss_feed.items.each do |rss_item|
28
54
  items << build_feed_item( rss_item )
@@ -39,30 +65,52 @@ class RssFeedBuilder
39
65
 
40
66
  item.title = rss_item.title
41
67
  item.url = rss_item.link
42
-
43
- ## todo: check if updated or published present
44
- # set
45
- item.published = rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )
46
- item.updated = item.published
47
-
68
+
69
+ ## todo:
70
+ ## check if feedburner:origLink present - if yes, use it for url/link
71
+ ## example: use
72
+ ## - <feedburner:origLink>http://www.rubyflow.com/items/9803-gotta-ruby-s-syntax</feedburner:origLink></item>
73
+ ## instead of
74
+ ## - <link>http://feedproxy.google.com/~r/Rubyflow/~3/Ym9Sltg_2_c/9803-gotta-ruby-s-syntax</link>
75
+
76
+
77
+ item.summary = rss_item.description
78
+
79
+ logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
80
+ logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
81
+
82
+ # NOTE:
83
+ # All date-times in RSS conform
84
+ # to the Date and Time Specification of RFC 822
85
+ # e.g. Sun, 19 May 2012 15:21:36 GMT or
86
+ # Sat, 07 Sep 2013 00:00:01 GMT
87
+
88
+ item.published = rss_item.pubDate # .utc.strftime( "%Y-%m-%d %H:%M" )
89
+
90
+ logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
91
+
92
+ ## fix/todo: add
93
+ ## check for <content:encoded>
94
+ ## full content (example use e.g. in sitepoint/ruby/feed/)
48
95
  # content: item.content_encoded,
49
96
 
50
97
  # if item.content_encoded.nil?
51
98
  # puts " using description for content"
52
-
53
- item.content = rss_item.description
54
99
  # end
55
-
100
+
101
+ ## fix/todo: check if rss_item.guid present? !!!!
56
102
  item.guid = rss_item.guid.content
57
103
 
58
- puts "- #{rss_item.title}"
59
- puts " link (#{rss_item.link})"
60
- puts " guid (#{rss_item.guid.content})"
61
- puts " pubDate >#{rss_item.pubDate}< >#{rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{rss_item.pubDate.class.name}"
62
- puts
63
-
64
- # puts "*** dump item:"
65
- # pp item
104
+ logger.debug " rss | item.guid.content: >#{rss_item.guid.content}< : #{rss_item.guid.content.class.name}"
105
+
106
+ ### todo: add support or authors (incl. dc:creator)
107
+ ## <dc:creator>Dhaivat Pandya</dc:creator>
108
+
109
+ # todo: categories
110
+ # <category><![CDATA[Gems]]></category>
111
+ # <category><![CDATA[Ruby]]></category>
112
+ # <category><![CDATA[Ruby on Rails]]></category>
113
+
66
114
 
67
115
  item
68
116
  end # method build_feed_item_from_rss
@@ -7,10 +7,67 @@ module FeedUtils
7
7
 
8
8
  attr_accessor :format # e.g. atom|rss 2.0|etc.
9
9
  attr_accessor :title
10
- attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
10
+ attr_accessor :title_type # e.g. text|html|html-escaped (optional) -use - why?? why not??
11
+ attr_accessor :url
11
12
 
12
13
  attr_accessor :items
13
14
 
15
+ attr_accessor :summary # e.g. description (rss)
16
+ attr_accessor :summary_type # e.g. text|html|html-escaped
17
+ attr_accessor :title2 # e.g. subtitle (atom)
18
+ attr_accessor :title2_type # e.g. text|html|html-escaped
19
+
20
+ attr_accessor :published
21
+ attr_accessor :updated
22
+ attr_accessor :built
23
+
24
+ attr_accessor :generator
25
+
26
+
27
+ def title2?
28
+ @title2.nil? == false
29
+ end
30
+
31
+ def summary?
32
+ @summary.nil? == false
33
+ end
34
+
35
+ def built?
36
+ @built.nil? == false
37
+ end
38
+
39
+ def updated?
40
+ @updated.nil? == false
41
+ end
42
+
43
+ def published?
44
+ @published.nil? == false
45
+ end
46
+
47
+
48
+ def summary
49
+ # no summary? try/return title2
50
+ if summary?
51
+ @summary
52
+ else
53
+ @title2
54
+ end
55
+ end
56
+
57
+ def published
58
+ # no published date? try/return updated or built
59
+ if published?
60
+ @published
61
+ elsif updated?
62
+ @updated
63
+ else
64
+ @built
65
+ end
66
+ end
67
+
68
+ ## fix:
69
+ # add pretty printer/inspect (exclude object)
70
+
14
71
  end # class Feed
15
72
 
16
73
 
@@ -18,24 +75,65 @@ module FeedUtils
18
75
  attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
19
76
 
20
77
  attr_accessor :title
21
- attr_accessor :title_type # optional for now (text|html) - not yet set
78
+ attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
22
79
  attr_accessor :url # todo: rename to link (use alias) ??
23
80
  attr_accessor :content
24
- attr_accessor :content_type # optional for now (text|html) - not yet set
81
+ attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
82
+ attr_accessor :summary
83
+ attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
25
84
 
26
85
  ## todo: add summary (alias description) ???
27
86
  ## todo: add author/authors
28
87
  ## todo: add category/categories
29
88
 
30
- attr_accessor :updated
31
89
  attr_accessor :published
90
+ attr_accessor :updated
32
91
 
33
92
  attr_accessor :guid # todo: rename to id (use alias) ??
93
+
94
+
95
+ def summary?
96
+ @summary.nil? == false
97
+ end
98
+
99
+ def content?
100
+ @content.nil? == false
101
+ end
102
+
103
+ def published?
104
+ @published.nil? == false
105
+ end
106
+
107
+ def updated?
108
+ @updated.nil? == false
109
+ end
110
+
111
+ def content
112
+ # no content? try/return summary
113
+ if content?
114
+ @content
115
+ else
116
+ @summary
117
+ end
118
+ end
119
+
120
+ def published
121
+ # no published date? try/return updated
122
+ if published?
123
+ @published
124
+ else
125
+ @updated
126
+ end
127
+ end
128
+
129
+
34
130
  end # class Item
35
131
 
36
132
 
37
133
  class Parser
38
134
 
135
+ include LogUtils::Logging
136
+
39
137
  ### Note: lets keep/use same API as RSS::Parser for now
40
138
  def initialize( xml )
41
139
  @xml = xml
@@ -49,7 +147,7 @@ module FeedUtils
49
147
  puts "Parsing feed..."
50
148
  feed_wild = parser.parse # not yet normalized
51
149
 
52
- puts " feed.class=#{feed_wild.class.name}"
150
+ logger.debug " feed.class=#{feed_wild.class.name}"
53
151
 
54
152
  if feed_wild.is_a?( RSS::Atom::Feed )
55
153
  feed = AtomFeedBuilder.build( feed_wild )
@@ -60,7 +158,12 @@ module FeedUtils
60
158
  puts "== #{feed.format} / #{feed.title} =="
61
159
  feed # return new (normalized) feed
62
160
  end
63
-
161
+
162
+ ### convenience class/factory method
163
+ def self.parse( xml, opts={} )
164
+ self.new( xml ).parse
165
+ end
166
+
64
167
  end # class Parser
65
168
 
66
169
 
@@ -1,4 +1,4 @@
1
1
 
2
2
  module FeedUtils
3
- VERSION = '0.1.0'
3
+ VERSION = '0.2.0'
4
4
  end
data/test/helper.rb ADDED
@@ -0,0 +1,25 @@
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+
4
+ ## minitest setup
5
+
6
+ # require 'minitest/unit'
7
+ require 'minitest/autorun'
8
+
9
+ # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
+
11
+ require 'logutils'
12
+ require 'fetcher'
13
+
14
+ ## our own code
15
+
16
+ require 'feedutils'
17
+
18
+
19
+ LogUtils::Logger.root.level = :debug
20
+
21
+ def parse_feed( feed_url )
22
+ xml = Fetcher.read( feed_url )
23
+
24
+ FeedUtils::Parser.parse( xml )
25
+ end
data/test/test_atom.rb ADDED
@@ -0,0 +1,16 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_rss.rb
4
+ # or better
5
+ # rake test
6
+
7
+ require 'helper'
8
+
9
+ class TestAtom < MiniTest::Unit::TestCase
10
+
11
+ def test_rubyonrails
12
+ feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
13
+ assert( feed.format == 'atom' )
14
+ end
15
+
16
+ end
data/test/test_rss.rb ADDED
@@ -0,0 +1,23 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_rss.rb
4
+ # or better
5
+ # rake test
6
+
7
+ require 'helper'
8
+
9
+ class TestRss < MiniTest::Unit::TestCase
10
+
11
+
12
+ def test_rubyflow
13
+ feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
14
+ assert( feed.format == 'rss 2.0' )
15
+ end
16
+
17
+ def test_sitepointruby
18
+ feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
19
+ assert( feed.format == 'rss 2.0' )
20
+ end
21
+
22
+
23
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-19 00:00:00.000000000 Z
12
+ date: 2013-09-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &73814860 !ruby/object:Gem::Requirement
16
+ requirement: &68865360 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *73814860
24
+ version_requirements: *68865360
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &73813400 !ruby/object:Gem::Requirement
27
+ requirement: &68864940 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *73813400
35
+ version_requirements: *68864940
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &73812180 !ruby/object:Gem::Requirement
38
+ requirement: &68864560 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *73812180
46
+ version_requirements: *68864560
47
47
  description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -60,6 +60,10 @@ files:
60
60
  - lib/feedutils/builder/rss.rb
61
61
  - lib/feedutils/utils.rb
62
62
  - lib/feedutils/version.rb
63
+ - test/helper.rb
64
+ - test/test_atom.rb
65
+ - test/test_rss.rb
66
+ - .gemtest
63
67
  homepage: https://github.com/rubylibs/feedutils
64
68
  licenses:
65
69
  - Public Domain
@@ -87,4 +91,6 @@ rubygems_version: 1.8.17
87
91
  signing_key:
88
92
  specification_version: 3
89
93
  summary: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
90
- test_files: []
94
+ test_files:
95
+ - test/test_atom.rb
96
+ - test/test_rss.rb