feedutils 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -6,6 +6,8 @@ feedutils gems - web feed parser and normalizer (RSS 2.0, Atom, etc.)
6
6
  * bugs :: [github.com/rubylibs/feedutils/issues](https://github.com/rubylibs/feedutils/issues)
7
7
  * gem :: [rubygems.org/gems/feedutils](https://rubygems.org/gems/feedutils)
8
8
  * rdoc :: [rubydoc.info/gems/feedutils](http://rubydoc.info/gems/feedutils)
9
+ * forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
10
+
9
11
 
10
12
  ## Usage
11
13
 
@@ -13,8 +15,13 @@ TBD
13
15
 
14
16
  ## Alternatives
15
17
 
18
+ - [`syndication`](http://syndication.rubyforge.org) [(Source)](https://github.com/lpar/syndication) - by Mathew (aka lpar); RSS 1.0, 2.0, Atom, and understands namespaces; optional support for Dublin Core, iTunes/podcast feeds, and RSS 1.0 Syndication and Content modules
19
+ - [`simple-rss`](http://rubyforge.org/projects/simple-rss)
20
+ - [`feedtools`](http://rubyforge.org/projects/feedtools)
21
+
16
22
  TBD
17
23
 
24
+
18
25
  ## License
19
26
 
20
27
  The `feedutils` scripts are dedicated to the public domain.
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'feedutils' do
11
11
  self.urls = ['https://github.com/rubylibs/feedutils']
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'webslideshow@googlegroups.com'
14
+ self.email = 'feedreader@googlegroups.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
data/lib/feedutils.rb CHANGED
@@ -23,7 +23,7 @@ require 'feedutils/parser'
23
23
  module FeedUtils
24
24
 
25
25
  def self.banner
26
- "feedutils #{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
26
+ "feedutils/#{VERSION} (with stdlib rss/#{RSS::VERSION}) on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
27
27
  end
28
28
 
29
29
  =begin
@@ -21,10 +21,11 @@ class AtomFeedBuilder
21
21
 
22
22
  def build_feed( atom_feed )
23
23
  feed = Feed.new
24
- feed.object = atom_feed
24
+ ## feed.object = atom_feed # not use for now
25
25
  feed.format = 'atom'
26
26
 
27
27
  feed.title = atom_feed.title.content
28
+ logger.debug " atom | title.content >#{atom_feed.title.content}< : #{atom_feed.title.content.class.name}"
28
29
 
29
30
  if atom_feed.updated
30
31
  # NOTE: empty updated.content e.g. used by google groups feed
@@ -39,6 +40,12 @@ class AtomFeedBuilder
39
40
  if atom_feed.generator
40
41
  feed.generator = atom_feed.generator.content
41
42
  logger.debug " atom | generator.content >#{atom_feed.generator.content}< : #{atom_feed.generator.content.class.name}"
43
+
44
+ # pp atom_feed.generator
45
+ feed.generator_version = atom_feed.generator.version
46
+ feed.generator_uri = atom_feed.generator.uri
47
+ logger.debug " atom | generator.version >#{atom_feed.generator.version}< : #{atom_feed.generator.version.class.name}"
48
+ logger.debug " atom | generator.uri >#{atom_feed.generator.uri}< : #{atom_feed.generator.uri.class.name}"
42
49
  end
43
50
 
44
51
  if atom_feed.subtitle
@@ -58,7 +65,7 @@ class AtomFeedBuilder
58
65
 
59
66
  def build_feed_item( atom_item )
60
67
  item = Item.new # Item.new
61
- item.object = atom_item
68
+ ## item.object = atom_item # not used for now
62
69
 
63
70
  item.title = atom_item.title.content
64
71
  item.url = atom_item.link.href
@@ -68,36 +75,26 @@ class AtomFeedBuilder
68
75
 
69
76
 
70
77
  if atom_item.updated
71
- ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
72
-
73
- item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
74
-
75
78
  ## change time to utc if present? why? why not?
79
+ # -- .utc.strftime( "%Y-%m-%d %H:%M" )
80
+
81
+ ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
76
82
 
83
+ item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime
77
84
  logger.debug " atom | item.updated.content >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
78
85
  end
79
-
86
+
80
87
  if atom_item.published
81
88
  ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
82
89
 
83
- item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
90
+ item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime
84
91
  logger.debug " atom | item.published.content >#{atom_item.published.content}< : #{atom_item.published.content.class.name}"
85
92
  end
86
93
 
87
- # - todo/check: does it exist in atom format?
88
- # item.published = item.updated # fix: check if publshed set
89
94
 
90
95
  item.guid = atom_item.id.content
91
-
92
96
  logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
93
97
 
94
- # todo: move logic to updater or something
95
- # - not part of normalize
96
-
97
-
98
- ## fix/todo:
99
- # also save/include full content in content
100
-
101
98
  if atom_item.content
102
99
  item.content = atom_item.content.content
103
100
  end
@@ -106,14 +103,6 @@ class AtomFeedBuilder
106
103
  item.summary = atom_item.summary.content
107
104
  end
108
105
 
109
- # let client deal w/ missing summary - move to attic - delete
110
- # text = atom_item.content.content
111
- # ## strip all html tags
112
- # text = text.gsub( /<[^>]+>/, '' )
113
- # text = text[ 0..400 ] # get first 400 chars
114
- # ## todo: check for length if > 400 add ... at the end???
115
- # item.summary = text
116
-
117
106
  item
118
107
  end # method build_feed_item
119
108
 
@@ -24,7 +24,7 @@ class RssFeedBuilder
24
24
 
25
25
  def build_feed( rss_feed )
26
26
  feed = Feed.new
27
- feed.object = rss_feed
27
+ ## feed.object = rss_feed # not use for now
28
28
  feed.format = "rss #{rss_feed.rss_version}"
29
29
 
30
30
  feed.title = rss_feed.channel.title # required
@@ -65,11 +65,14 @@ class RssFeedBuilder
65
65
  def build_feed_item( rss_item )
66
66
 
67
67
  item = Item.new
68
- item.object = rss_item
68
+ ## item.object = rss_item # not use for now
69
69
 
70
70
  item.title = rss_item.title
71
71
  item.url = rss_item.link
72
72
 
73
+ logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
74
+ logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
75
+
73
76
  ## todo:
74
77
  ## check if feedburner:origLink present - if yes, use it for url/link
75
78
  ## example: use
@@ -80,8 +83,10 @@ class RssFeedBuilder
80
83
 
81
84
  item.summary = rss_item.description
82
85
 
83
- logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
84
- logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
86
+ # check for <content:encoded>
87
+ # -- using RSS 1.0 content module in RSS 2.0
88
+ item.content = rss_item.content_encoded
89
+ logger.debug " rss | item.content_encoded[0..40]: >#{rss_item.content_encoded ? rss_item.content_encoded[0..40] : ''}< : #{rss_item.content_encoded.class.name}"
85
90
 
86
91
  # NOTE:
87
92
  # All date-times in RSS conform
@@ -94,15 +99,7 @@ class RssFeedBuilder
94
99
  item.published = rss_item.pubDate.nil? ? nil : rss_item.pubDate.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
95
100
 
96
101
  logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
97
-
98
- ## fix/todo: add
99
- ## check for <content:encoded>
100
- ## full content (example use e.g. in sitepoint/ruby/feed/)
101
- # content: item.content_encoded,
102
-
103
- # if item.content_encoded.nil?
104
- # puts " using description for content"
105
- # end
102
+
106
103
 
107
104
  ## fix/todo: check if rss_item.guid present? !!!!
108
105
  ##
@@ -1,7 +1,7 @@
1
1
  module FeedUtils
2
2
 
3
3
  class Feed
4
- attr_accessor :object
4
+ ### attr_accessor :object # not use for now
5
5
 
6
6
  attr_accessor :format # e.g. atom|rss 2.0|etc.
7
7
  attr_accessor :title
@@ -11,29 +11,15 @@ class Feed
11
11
  attr_accessor :items
12
12
 
13
13
  def summary?() @summary.nil? == false; end
14
- # no summary? try/return title2
15
- def summary() summary? ? @summary : @title2; end
16
- attr_writer :summary # e.g. description (rss)
17
-
14
+ attr_accessor :summary # e.g. description (rss)
18
15
  attr_accessor :summary_type # e.g. text|html|html-escaped
19
16
 
20
17
  def title2?() @title2.nil? == false; end
21
18
  attr_accessor :title2 # e.g. subtitle (atom)
22
-
23
19
  attr_accessor :title2_type # e.g. text|html|html-escaped
24
20
 
25
21
  def published?() @published.nil? == false; end
26
- # no published date? try/return updated or built
27
- def published
28
- if published?
29
- @published
30
- elsif updated?
31
- @updated
32
- else
33
- @built
34
- end
35
- end
36
- attr_writer :published
22
+ attr_accessor :published
37
23
 
38
24
  def updated?() @updated.nil? == false; end
39
25
  attr_accessor :updated
@@ -42,11 +28,13 @@ class Feed
42
28
  attr_accessor :built
43
29
 
44
30
  attr_accessor :generator
45
-
31
+ attr_accessor :generator_version # e.g. @version (atom)
32
+ attr_accessor :generator_uri # e.g. @uri (atom) - use alias url/link ???
46
33
 
47
34
  ## fix:
48
35
  # add pretty printer/inspect (exclude object)
49
36
 
37
+
50
38
  end # class Feed
51
39
 
52
40
  end # module FeedUtils
@@ -1,35 +1,27 @@
1
1
  module FeedUtils
2
2
 
3
3
  class Item
4
- attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
4
+ ## attr_accessor :object # not used for now -- orginal object (e.g RSS item or ATOM entry etc.)
5
5
 
6
6
  attr_accessor :title
7
7
  attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
8
8
  attr_accessor :url # todo: rename to link (use alias) ??
9
9
 
10
10
 
11
- # no content? try/return summary
12
- def content() content? ? @content : @summary; end
13
11
  def content?() @content.nil? == false; end
14
- attr_writer :content
12
+ attr_accessor :content
13
+ attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
15
14
 
16
- attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
17
-
18
- # no summary? try/return content
19
- def summary() summary? ? @summary : @content; end
20
15
  def summary?() @summary.nil? == false; end
21
- attr_writer :summary
22
-
23
- attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
16
+ attr_accessor :summary
17
+ attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
24
18
 
25
19
  ## todo: add summary (alias description) ???
26
20
  ## todo: add author/authors
27
21
  ## todo: add category/categories
28
22
 
29
- # no published date? try/return updated
30
- def published() published? ? @published : @updated; end
31
23
  def published?() @published.nil? == false; end
32
- attr_writer :published
24
+ attr_accessor :published
33
25
 
34
26
  def updated?() @updated.nil? == false; end
35
27
  attr_accessor :updated
@@ -15,8 +15,10 @@ class Parser
15
15
  def initialize( xml )
16
16
  @xml = xml
17
17
  end
18
-
18
+
19
19
  def parse
20
+ logger.debug "using stdlib rss/#{RSS::VERSION}"
21
+
20
22
  parser = RSS::Parser.new( @xml )
21
23
  parser.do_validate = false
22
24
  parser.ignore_unknown_element = true
@@ -1,4 +1,4 @@
1
1
 
2
2
  module FeedUtils
3
- VERSION = '0.3.2'
3
+ VERSION = '0.4.0'
4
4
  end
data/test/test_atom.rb CHANGED
@@ -10,17 +10,30 @@ class TestAtom < MiniTest::Unit::TestCase
10
10
 
11
11
  def test_rubyonrails
12
12
  feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
13
- assert( feed.format == 'atom' )
13
+
14
+ assert_equal 'atom', feed.format
14
15
  end
15
16
 
16
17
  def test_railstutorial
17
18
  feed = parse_feed( 'http://feeds.feedburner.com/railstutorial?format=xml' )
18
- assert( feed.format == 'atom' )
19
+
20
+ assert_equal 'atom', feed.format
19
21
  end
20
22
 
21
23
  def test_googlegroup
22
24
  feed = parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' )
23
- assert( feed.format == 'atom' )
25
+
26
+ assert_equal 'atom', feed.format
27
+ end
28
+
29
+
30
+ def test_headius
31
+ feed = parse_feed( 'http://blog.headius.com/feeds/posts/default' )
32
+
33
+ assert_equal 'atom', feed.format
34
+ assert_equal 'Blogger', feed.generator
35
+ assert_equal 'Headius', feed.title
36
+ assert_equal 'Helping the JVM Into the 21st Century', feed.title2 # aka subtitle in atom
24
37
  end
25
38
 
26
39
  end
data/test/test_rss.rb CHANGED
@@ -11,18 +11,28 @@ class TestRss < MiniTest::Unit::TestCase
11
11
 
12
12
  def test_rubyflow
13
13
  feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
14
- assert( feed.format == 'rss 2.0' )
14
+
15
+ assert_equal 'rss 2.0', feed.format
15
16
  end
16
17
 
17
18
  def test_sitepointruby
18
19
  feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
19
- assert( feed.format == 'rss 2.0' )
20
+
21
+ assert_equal 'rss 2.0', feed.format
20
22
  end
21
23
 
22
24
  def test_lambdatheultimate
23
25
  ## check - has no item.guid - will use item.link for guid
24
26
  feed = parse_feed( 'http://lambda-the-ultimate.org/rss.xml' )
25
- assert( feed.format == 'rss 2.0' )
27
+
28
+ assert_equal 'rss 2.0', feed.format
29
+ end
30
+
31
+ def test_rubymine
32
+ # includes item/content:encoded
33
+ feed = parse_feed( 'http://feeds.feedburner.com/jetbrains_rubymine?format=xml' )
34
+
35
+ assert_equal 'rss 2.0', feed.format
26
36
  end
27
37
 
28
- end
38
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-28 00:00:00.000000000 Z
12
+ date: 2013-10-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &82615740 !ruby/object:Gem::Requirement
16
+ requirement: &74494180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *82615740
24
+ version_requirements: *74494180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &82615490 !ruby/object:Gem::Requirement
27
+ requirement: &74493890 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *82615490
35
+ version_requirements: *74493890
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &82615190 !ruby/object:Gem::Requirement
38
+ requirement: &74493660 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,9 +43,9 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *82615190
46
+ version_requirements: *74493660
47
47
  description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
48
- email: webslideshow@googlegroups.com
48
+ email: feedreader@googlegroups.com
49
49
  executables: []
50
50
  extensions: []
51
51
  extra_rdoc_files: