feedutils 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/Manifest.txt CHANGED
@@ -7,3 +7,6 @@ lib/feedutils/builder/atom.rb
7
7
  lib/feedutils/builder/rss.rb
8
8
  lib/feedutils/utils.rb
9
9
  lib/feedutils/version.rb
10
+ test/helper.rb
11
+ test/test_atom.rb
12
+ test/test_rss.rb
data/Rakefile CHANGED
@@ -20,6 +20,8 @@ Hoe.spec 'feedutils' do
20
20
  self.extra_deps = [
21
21
  ['logutils', '>= 0.5']
22
22
  ]
23
+
24
+ ### todo: add fetcher dep for testing (e.g. development only)
23
25
 
24
26
  self.licenses = ['Public Domain']
25
27
 
@@ -3,6 +3,8 @@ module FeedUtils
3
3
 
4
4
  class AtomFeedBuilder
5
5
 
6
+ include LogUtils::Logging
7
+
6
8
  def initialize( atom_feed )
7
9
  @feed = build_feed( atom_feed )
8
10
  end
@@ -39,19 +41,36 @@ class AtomFeedBuilder
39
41
  item.title = atom_item.title.content
40
42
  item.url = atom_item.link.href
41
43
 
44
+ logger.debug " atom | item.title.content: >#{atom_item.title.content}< : #{atom_item.title.content.class.name}"
45
+ logger.debug " atom | item.link.href: >#{atom_item.link.href}< : #{atom_item.link.href.class.name}"
46
+
47
+
42
48
  ## todo: check if updated or published present
43
49
  # set
44
- item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
45
- item.published = item.updated # fix: check if publshed set
50
+ item.updated = atom_item.updated.content # .utc.strftime( "%Y-%m-%d %H:%M" )
51
+
52
+
53
+ ## change time to utc if present? why? why not?
54
+
55
+ ### todo: use/try published first? why? why not?
56
+ logger.debug " atom | item.updated >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
57
+
58
+ # - todo/check: does it exist in atom format?
59
+ # item.published = item.updated # fix: check if publshed set
46
60
 
47
61
  item.guid = atom_item.id.content
48
62
 
63
+ logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
49
64
 
50
65
  # todo: move logic to updater or something
51
66
  # - not part of normalize
52
67
 
68
+
69
+ ## fix/todo:
70
+ # also save/include full content in content
71
+
53
72
  if atom_item.summary
54
- item.content = atom_item.summary.content
73
+ item.summary = atom_item.summary.content
55
74
  else
56
75
  if atom_item.content
57
76
  text = atom_item.content.content.dup
@@ -59,21 +78,10 @@ class AtomFeedBuilder
59
78
  text = text.gsub( /<[^>]+>/, '' )
60
79
  text = text[ 0..400 ] # get first 400 chars
61
80
  ## todo: check for length if > 400 add ... at the end???
62
- item.content = text
81
+ item.summary = text
63
82
  end
64
83
  end
65
84
 
66
- puts "- #{atom_item.title.content}"
67
- puts " link >#{atom_item.link.href}<"
68
- puts " id (~guid) >#{atom_item.id.content}<"
69
-
70
- ### todo: use/try published first? why? why not?
71
- puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
72
- puts
73
-
74
- # puts "*** dump item:"
75
- # pp item
76
-
77
85
  item
78
86
  end # method build_feed_item
79
87
 
@@ -1,8 +1,13 @@
1
1
 
2
2
  module FeedUtils
3
3
 
4
+ ### todo/fix:
5
+ # rename to Rss20FeedBuilder?? or FeedBuilderRss20 ??
6
+
4
7
  class RssFeedBuilder
5
8
 
9
+ include LogUtils::Logging
10
+
6
11
  def initialize( rss_feed )
7
12
  @feed = build_feed( rss_feed )
8
13
  end
@@ -20,9 +25,30 @@ class RssFeedBuilder
20
25
  def build_feed( rss_feed )
21
26
  feed = Feed.new
22
27
  feed.object = rss_feed
23
- feed.title = rss_feed.channel.title
24
28
  feed.format = "rss #{rss_feed.rss_version}"
25
29
 
30
+ feed.title = rss_feed.channel.title # required
31
+ feed.url = rss_feed.channel.link # required
32
+ feed.summary = rss_feed.channel.description # required
33
+
34
+ # NOTE:
35
+ # All date-times in RSS conform
36
+ # to the Date and Time Specification of RFC 822
37
+ # e.g. Sun, 19 May 2012 15:21:36 GMT or
38
+ # Sat, 07 Sep 2013 00:00:01 GMT
39
+
40
+ feed.built = rss_feed.channel.lastBuildDate # optional
41
+ feed.published = rss_feed.channel.pubDate # optional
42
+
43
+ logger.debug " rss | channel.lastBuildDate: >#{rss_feed.channel.lastBuildDate}< : #{rss_feed.channel.lastBuildDate.class.name}"
44
+ logger.debug " rss | channel.pubDate: >#{rss_feed.channel.pubDate}< : #{rss_feed.channel.pubDate.class.name}"
45
+
46
+
47
+ feed.generator = rss_feed.channel.generator # optional
48
+
49
+ logger.debug " rss | channel.generator: >#{rss_feed.channel.generator}< : #{rss_feed.channel.generator.class.name}"
50
+
51
+
26
52
  items = []
27
53
  rss_feed.items.each do |rss_item|
28
54
  items << build_feed_item( rss_item )
@@ -39,30 +65,52 @@ class RssFeedBuilder
39
65
 
40
66
  item.title = rss_item.title
41
67
  item.url = rss_item.link
42
-
43
- ## todo: check if updated or published present
44
- # set
45
- item.published = rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )
46
- item.updated = item.published
47
-
68
+
69
+ ## todo:
70
+ ## check if feedburner:origLink present - if yes, use it for url/link
71
+ ## example: use
72
+ ## - <feedburner:origLink>http://www.rubyflow.com/items/9803-gotta-ruby-s-syntax</feedburner:origLink></item>
73
+ ## instead of
74
+ ## - <link>http://feedproxy.google.com/~r/Rubyflow/~3/Ym9Sltg_2_c/9803-gotta-ruby-s-syntax</link>
75
+
76
+
77
+ item.summary = rss_item.description
78
+
79
+ logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
80
+ logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
81
+
82
+ # NOTE:
83
+ # All date-times in RSS conform
84
+ # to the Date and Time Specification of RFC 822
85
+ # e.g. Sun, 19 May 2012 15:21:36 GMT or
86
+ # Sat, 07 Sep 2013 00:00:01 GMT
87
+
88
+ item.published = rss_item.pubDate # .utc.strftime( "%Y-%m-%d %H:%M" )
89
+
90
+ logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
91
+
92
+ ## fix/todo: add
93
+ ## check for <content:encoded>
94
+ ## full content (example use e.g. in sitepoint/ruby/feed/)
48
95
  # content: item.content_encoded,
49
96
 
50
97
  # if item.content_encoded.nil?
51
98
  # puts " using description for content"
52
-
53
- item.content = rss_item.description
54
99
  # end
55
-
100
+
101
+ ## fix/todo: check if rss_item.guid present? !!!!
56
102
  item.guid = rss_item.guid.content
57
103
 
58
- puts "- #{rss_item.title}"
59
- puts " link (#{rss_item.link})"
60
- puts " guid (#{rss_item.guid.content})"
61
- puts " pubDate >#{rss_item.pubDate}< >#{rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{rss_item.pubDate.class.name}"
62
- puts
63
-
64
- # puts "*** dump item:"
65
- # pp item
104
+ logger.debug " rss | item.guid.content: >#{rss_item.guid.content}< : #{rss_item.guid.content.class.name}"
105
+
106
+ ### todo: add support or authors (incl. dc:creator)
107
+ ## <dc:creator>Dhaivat Pandya</dc:creator>
108
+
109
+ # todo: categories
110
+ # <category><![CDATA[Gems]]></category>
111
+ # <category><![CDATA[Ruby]]></category>
112
+ # <category><![CDATA[Ruby on Rails]]></category>
113
+
66
114
 
67
115
  item
68
116
  end # method build_feed_item_from_rss
@@ -7,10 +7,67 @@ module FeedUtils
7
7
 
8
8
  attr_accessor :format # e.g. atom|rss 2.0|etc.
9
9
  attr_accessor :title
10
- attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
10
+ attr_accessor :title_type # e.g. text|html|html-escaped (optional) -use - why?? why not??
11
+ attr_accessor :url
11
12
 
12
13
  attr_accessor :items
13
14
 
15
+ attr_accessor :summary # e.g. description (rss)
16
+ attr_accessor :summary_type # e.g. text|html|html-escaped
17
+ attr_accessor :title2 # e.g. subtitle (atom)
18
+ attr_accessor :title2_type # e.g. text|html|html-escaped
19
+
20
+ attr_accessor :published
21
+ attr_accessor :updated
22
+ attr_accessor :built
23
+
24
+ attr_accessor :generator
25
+
26
+
27
+ def title2?
28
+ @title2.nil? == false
29
+ end
30
+
31
+ def summary?
32
+ @summary.nil? == false
33
+ end
34
+
35
+ def built?
36
+ @built.nil? == false
37
+ end
38
+
39
+ def updated?
40
+ @updated.nil? == false
41
+ end
42
+
43
+ def published?
44
+ @published.nil? == false
45
+ end
46
+
47
+
48
+ def summary
49
+ # no summary? try/return title2
50
+ if summary?
51
+ @summary
52
+ else
53
+ @title2
54
+ end
55
+ end
56
+
57
+ def published
58
+ # no published date? try/return updated or built
59
+ if published?
60
+ @published
61
+ elsif updated?
62
+ @updated
63
+ else
64
+ @built
65
+ end
66
+ end
67
+
68
+ ## fix:
69
+ # add pretty printer/inspect (exclude object)
70
+
14
71
  end # class Feed
15
72
 
16
73
 
@@ -18,24 +75,65 @@ module FeedUtils
18
75
  attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
19
76
 
20
77
  attr_accessor :title
21
- attr_accessor :title_type # optional for now (text|html) - not yet set
78
+ attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
22
79
  attr_accessor :url # todo: rename to link (use alias) ??
23
80
  attr_accessor :content
24
- attr_accessor :content_type # optional for now (text|html) - not yet set
81
+ attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
82
+ attr_accessor :summary
83
+ attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
25
84
 
26
85
  ## todo: add summary (alias description) ???
27
86
  ## todo: add author/authors
28
87
  ## todo: add category/categories
29
88
 
30
- attr_accessor :updated
31
89
  attr_accessor :published
90
+ attr_accessor :updated
32
91
 
33
92
  attr_accessor :guid # todo: rename to id (use alias) ??
93
+
94
+
95
+ def summary?
96
+ @summary.nil? == false
97
+ end
98
+
99
+ def content?
100
+ @content.nil? == false
101
+ end
102
+
103
+ def published?
104
+ @published.nil? == false
105
+ end
106
+
107
+ def updated?
108
+ @updated.nil? == false
109
+ end
110
+
111
+ def content
112
+ # no content? try/return summary
113
+ if content?
114
+ @content
115
+ else
116
+ @summary
117
+ end
118
+ end
119
+
120
+ def published
121
+ # no published date? try/return updated
122
+ if published?
123
+ @published
124
+ else
125
+ @updated
126
+ end
127
+ end
128
+
129
+
34
130
  end # class Item
35
131
 
36
132
 
37
133
  class Parser
38
134
 
135
+ include LogUtils::Logging
136
+
39
137
  ### Note: lets keep/use same API as RSS::Parser for now
40
138
  def initialize( xml )
41
139
  @xml = xml
@@ -49,7 +147,7 @@ module FeedUtils
49
147
  puts "Parsing feed..."
50
148
  feed_wild = parser.parse # not yet normalized
51
149
 
52
- puts " feed.class=#{feed_wild.class.name}"
150
+ logger.debug " feed.class=#{feed_wild.class.name}"
53
151
 
54
152
  if feed_wild.is_a?( RSS::Atom::Feed )
55
153
  feed = AtomFeedBuilder.build( feed_wild )
@@ -60,7 +158,12 @@ module FeedUtils
60
158
  puts "== #{feed.format} / #{feed.title} =="
61
159
  feed # return new (normalized) feed
62
160
  end
63
-
161
+
162
+ ### convenience class/factory method
163
+ def self.parse( xml, opts={} )
164
+ self.new( xml ).parse
165
+ end
166
+
64
167
  end # class Parser
65
168
 
66
169
 
@@ -1,4 +1,4 @@
1
1
 
2
2
  module FeedUtils
3
- VERSION = '0.1.0'
3
+ VERSION = '0.2.0'
4
4
  end
data/test/helper.rb ADDED
@@ -0,0 +1,25 @@
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+
4
+ ## minitest setup
5
+
6
+ # require 'minitest/unit'
7
+ require 'minitest/autorun'
8
+
9
+ # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
+
11
+ require 'logutils'
12
+ require 'fetcher'
13
+
14
+ ## our own code
15
+
16
+ require 'feedutils'
17
+
18
+
19
+ LogUtils::Logger.root.level = :debug
20
+
21
+ def parse_feed( feed_url )
22
+ xml = Fetcher.read( feed_url )
23
+
24
+ FeedUtils::Parser.parse( xml )
25
+ end
data/test/test_atom.rb ADDED
@@ -0,0 +1,16 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_rss.rb
4
+ # or better
5
+ # rake test
6
+
7
+ require 'helper'
8
+
9
+ class TestAtom < MiniTest::Unit::TestCase
10
+
11
+ def test_rubyonrails
12
+ feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
13
+ assert( feed.format == 'atom' )
14
+ end
15
+
16
+ end
data/test/test_rss.rb ADDED
@@ -0,0 +1,23 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_rss.rb
4
+ # or better
5
+ # rake test
6
+
7
+ require 'helper'
8
+
9
+ class TestRss < MiniTest::Unit::TestCase
10
+
11
+
12
+ def test_rubyflow
13
+ feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
14
+ assert( feed.format == 'rss 2.0' )
15
+ end
16
+
17
+ def test_sitepointruby
18
+ feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
19
+ assert( feed.format == 'rss 2.0' )
20
+ end
21
+
22
+
23
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-19 00:00:00.000000000 Z
12
+ date: 2013-09-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &73814860 !ruby/object:Gem::Requirement
16
+ requirement: &68865360 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *73814860
24
+ version_requirements: *68865360
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &73813400 !ruby/object:Gem::Requirement
27
+ requirement: &68864940 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *73813400
35
+ version_requirements: *68864940
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &73812180 !ruby/object:Gem::Requirement
38
+ requirement: &68864560 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *73812180
46
+ version_requirements: *68864560
47
47
  description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -60,6 +60,10 @@ files:
60
60
  - lib/feedutils/builder/rss.rb
61
61
  - lib/feedutils/utils.rb
62
62
  - lib/feedutils/version.rb
63
+ - test/helper.rb
64
+ - test/test_atom.rb
65
+ - test/test_rss.rb
66
+ - .gemtest
63
67
  homepage: https://github.com/rubylibs/feedutils
64
68
  licenses:
65
69
  - Public Domain
@@ -87,4 +91,6 @@ rubygems_version: 1.8.17
87
91
  signing_key:
88
92
  specification_version: 3
89
93
  summary: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
90
- test_files: []
94
+ test_files:
95
+ - test/test_atom.rb
96
+ - test/test_rss.rb