feedutils 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -6,6 +6,8 @@ feedutils gems - web feed parser and normalizer (RSS 2.0, Atom, etc.)
6
6
  * bugs :: [github.com/rubylibs/feedutils/issues](https://github.com/rubylibs/feedutils/issues)
7
7
  * gem :: [rubygems.org/gems/feedutils](https://rubygems.org/gems/feedutils)
8
8
  * rdoc :: [rubydoc.info/gems/feedutils](http://rubydoc.info/gems/feedutils)
9
+ * forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
10
+
9
11
 
10
12
  ## Usage
11
13
 
@@ -13,8 +15,13 @@ TBD
13
15
 
14
16
  ## Alternatives
15
17
 
18
+ - [`syndication`](http://syndication.rubyforge.org) [(Source)](https://github.com/lpar/syndication) - by Mathew (aka lpar); RSS 1.0, 2.0, Atom, and understands namespaces; optional support for Dublin Core, iTunes/podcast feeds, and RSS 1.0 Syndication and Content modules
19
+ - [`simple-rss`](http://rubyforge.org/projects/simple-rss)
20
+ - [`feedtools`](http://rubyforge.org/projects/feedtools)
21
+
16
22
  TBD
17
23
 
24
+
18
25
  ## License
19
26
 
20
27
  The `feedutils` scripts are dedicated to the public domain.
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'feedutils' do
11
11
  self.urls = ['https://github.com/rubylibs/feedutils']
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'webslideshow@googlegroups.com'
14
+ self.email = 'feedreader@googlegroups.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
data/lib/feedutils.rb CHANGED
@@ -23,7 +23,7 @@ require 'feedutils/parser'
23
23
  module FeedUtils
24
24
 
25
25
  def self.banner
26
- "feedutils #{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
26
+ "feedutils/#{VERSION} (with stdlib rss/#{RSS::VERSION}) on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
27
27
  end
28
28
 
29
29
  =begin
@@ -21,10 +21,11 @@ class AtomFeedBuilder
21
21
 
22
22
  def build_feed( atom_feed )
23
23
  feed = Feed.new
24
- feed.object = atom_feed
24
+ ## feed.object = atom_feed # not use for now
25
25
  feed.format = 'atom'
26
26
 
27
27
  feed.title = atom_feed.title.content
28
+ logger.debug " atom | title.content >#{atom_feed.title.content}< : #{atom_feed.title.content.class.name}"
28
29
 
29
30
  if atom_feed.updated
30
31
  # NOTE: empty updated.content e.g. used by google groups feed
@@ -39,6 +40,12 @@ class AtomFeedBuilder
39
40
  if atom_feed.generator
40
41
  feed.generator = atom_feed.generator.content
41
42
  logger.debug " atom | generator.content >#{atom_feed.generator.content}< : #{atom_feed.generator.content.class.name}"
43
+
44
+ # pp atom_feed.generator
45
+ feed.generator_version = atom_feed.generator.version
46
+ feed.generator_uri = atom_feed.generator.uri
47
+ logger.debug " atom | generator.version >#{atom_feed.generator.version}< : #{atom_feed.generator.version.class.name}"
48
+ logger.debug " atom | generator.uri >#{atom_feed.generator.uri}< : #{atom_feed.generator.uri.class.name}"
42
49
  end
43
50
 
44
51
  if atom_feed.subtitle
@@ -58,7 +65,7 @@ class AtomFeedBuilder
58
65
 
59
66
  def build_feed_item( atom_item )
60
67
  item = Item.new # Item.new
61
- item.object = atom_item
68
+ ## item.object = atom_item # not used for now
62
69
 
63
70
  item.title = atom_item.title.content
64
71
  item.url = atom_item.link.href
@@ -68,36 +75,26 @@ class AtomFeedBuilder
68
75
 
69
76
 
70
77
  if atom_item.updated
71
- ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
72
-
73
- item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
74
-
75
78
  ## change time to utc if present? why? why not?
79
+ # -- .utc.strftime( "%Y-%m-%d %H:%M" )
80
+
81
+ ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
76
82
 
83
+ item.updated = atom_item.updated.content.nil? ? nil : atom_item.updated.content.to_datetime
77
84
  logger.debug " atom | item.updated.content >#{atom_item.updated.content}< : #{atom_item.updated.content.class.name}"
78
85
  end
79
-
86
+
80
87
  if atom_item.published
81
88
  ## convert from time to to_datetime (avoid errors on windows w/ builtin rss lib)
82
89
 
83
- item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
90
+ item.published = atom_item.published.content.nil? ? nil : atom_item.published.content.to_datetime
84
91
  logger.debug " atom | item.published.content >#{atom_item.published.content}< : #{atom_item.published.content.class.name}"
85
92
  end
86
93
 
87
- # - todo/check: does it exist in atom format?
88
- # item.published = item.updated # fix: check if publshed set
89
94
 
90
95
  item.guid = atom_item.id.content
91
-
92
96
  logger.debug " atom | item.id.content: >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"
93
97
 
94
- # todo: move logic to updater or something
95
- # - not part of normalize
96
-
97
-
98
- ## fix/todo:
99
- # also save/include full content in content
100
-
101
98
  if atom_item.content
102
99
  item.content = atom_item.content.content
103
100
  end
@@ -106,14 +103,6 @@ class AtomFeedBuilder
106
103
  item.summary = atom_item.summary.content
107
104
  end
108
105
 
109
- # let client deal w/ missing summary - move to attic - delete
110
- # text = atom_item.content.content
111
- # ## strip all html tags
112
- # text = text.gsub( /<[^>]+>/, '' )
113
- # text = text[ 0..400 ] # get first 400 chars
114
- # ## todo: check for length if > 400 add ... at the end???
115
- # item.summary = text
116
-
117
106
  item
118
107
  end # method build_feed_item
119
108
 
@@ -24,7 +24,7 @@ class RssFeedBuilder
24
24
 
25
25
  def build_feed( rss_feed )
26
26
  feed = Feed.new
27
- feed.object = rss_feed
27
+ ## feed.object = rss_feed # not use for now
28
28
  feed.format = "rss #{rss_feed.rss_version}"
29
29
 
30
30
  feed.title = rss_feed.channel.title # required
@@ -65,11 +65,14 @@ class RssFeedBuilder
65
65
  def build_feed_item( rss_item )
66
66
 
67
67
  item = Item.new
68
- item.object = rss_item
68
+ ## item.object = rss_item # not use for now
69
69
 
70
70
  item.title = rss_item.title
71
71
  item.url = rss_item.link
72
72
 
73
+ logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
74
+ logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
75
+
73
76
  ## todo:
74
77
  ## check if feedburner:origLink present - if yes, use it for url/link
75
78
  ## example: use
@@ -80,8 +83,10 @@ class RssFeedBuilder
80
83
 
81
84
  item.summary = rss_item.description
82
85
 
83
- logger.debug " rss | item.title: >#{rss_item.title}< : #{rss_item.title.class.name}"
84
- logger.debug " rss | item.link: >#{rss_item.link}< : #{rss_item.link.class.name}"
86
+ # check for <content:encoded>
87
+ # -- using RSS 1.0 content module in RSS 2.0
88
+ item.content = rss_item.content_encoded
89
+ logger.debug " rss | item.content_encoded[0..40]: >#{rss_item.content_encoded ? rss_item.content_encoded[0..40] : ''}< : #{rss_item.content_encoded.class.name}"
85
90
 
86
91
  # NOTE:
87
92
  # All date-times in RSS conform
@@ -94,15 +99,7 @@ class RssFeedBuilder
94
99
  item.published = rss_item.pubDate.nil? ? nil : rss_item.pubDate.to_datetime # .utc.strftime( "%Y-%m-%d %H:%M" )
95
100
 
96
101
  logger.debug " rss | item.pubDate: >#{rss_item.pubDate}< : #{rss_item.pubDate.class.name}"
97
-
98
- ## fix/todo: add
99
- ## check for <content:encoded>
100
- ## full content (example use e.g. in sitepoint/ruby/feed/)
101
- # content: item.content_encoded,
102
-
103
- # if item.content_encoded.nil?
104
- # puts " using description for content"
105
- # end
102
+
106
103
 
107
104
  ## fix/todo: check if rss_item.guid present? !!!!
108
105
  ##
@@ -1,7 +1,7 @@
1
1
  module FeedUtils
2
2
 
3
3
  class Feed
4
- attr_accessor :object
4
+ ### attr_accessor :object # not use for now
5
5
 
6
6
  attr_accessor :format # e.g. atom|rss 2.0|etc.
7
7
  attr_accessor :title
@@ -11,29 +11,15 @@ class Feed
11
11
  attr_accessor :items
12
12
 
13
13
  def summary?() @summary.nil? == false; end
14
- # no summary? try/return title2
15
- def summary() summary? ? @summary : @title2; end
16
- attr_writer :summary # e.g. description (rss)
17
-
14
+ attr_accessor :summary # e.g. description (rss)
18
15
  attr_accessor :summary_type # e.g. text|html|html-escaped
19
16
 
20
17
  def title2?() @title2.nil? == false; end
21
18
  attr_accessor :title2 # e.g. subtitle (atom)
22
-
23
19
  attr_accessor :title2_type # e.g. text|html|html-escaped
24
20
 
25
21
  def published?() @published.nil? == false; end
26
- # no published date? try/return updated or built
27
- def published
28
- if published?
29
- @published
30
- elsif updated?
31
- @updated
32
- else
33
- @built
34
- end
35
- end
36
- attr_writer :published
22
+ attr_accessor :published
37
23
 
38
24
  def updated?() @updated.nil? == false; end
39
25
  attr_accessor :updated
@@ -42,11 +28,13 @@ class Feed
42
28
  attr_accessor :built
43
29
 
44
30
  attr_accessor :generator
45
-
31
+ attr_accessor :generator_version # e.g. @version (atom)
32
+ attr_accessor :generator_uri # e.g. @uri (atom) - use alias url/link ???
46
33
 
47
34
  ## fix:
48
35
  # add pretty printer/inspect (exclude object)
49
36
 
37
+
50
38
  end # class Feed
51
39
 
52
40
  end # module FeedUtils
@@ -1,35 +1,27 @@
1
1
  module FeedUtils
2
2
 
3
3
  class Item
4
- attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
4
+ ## attr_accessor :object # not used for now -- orginal object (e.g RSS item or ATOM entry etc.)
5
5
 
6
6
  attr_accessor :title
7
7
  attr_accessor :title_type # optional for now (text|html|html-escaped) - not yet set
8
8
  attr_accessor :url # todo: rename to link (use alias) ??
9
9
 
10
10
 
11
- # no content? try/return summary
12
- def content() content? ? @content : @summary; end
13
11
  def content?() @content.nil? == false; end
14
- attr_writer :content
12
+ attr_accessor :content
13
+ attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
15
14
 
16
- attr_accessor :content_type # optional for now (text|html|html-escaped|binary-base64) - not yet set
17
-
18
- # no summary? try/return content
19
- def summary() summary? ? @summary : @content; end
20
15
  def summary?() @summary.nil? == false; end
21
- attr_writer :summary
22
-
23
- attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
16
+ attr_accessor :summary
17
+ attr_accessor :summary_type # optional for now (text|html|html-escaped) - not yet set
24
18
 
25
19
  ## todo: add summary (alias description) ???
26
20
  ## todo: add author/authors
27
21
  ## todo: add category/categories
28
22
 
29
- # no published date? try/return updated
30
- def published() published? ? @published : @updated; end
31
23
  def published?() @published.nil? == false; end
32
- attr_writer :published
24
+ attr_accessor :published
33
25
 
34
26
  def updated?() @updated.nil? == false; end
35
27
  attr_accessor :updated
@@ -15,8 +15,10 @@ class Parser
15
15
  def initialize( xml )
16
16
  @xml = xml
17
17
  end
18
-
18
+
19
19
  def parse
20
+ logger.debug "using stdlib rss/#{RSS::VERSION}"
21
+
20
22
  parser = RSS::Parser.new( @xml )
21
23
  parser.do_validate = false
22
24
  parser.ignore_unknown_element = true
@@ -1,4 +1,4 @@
1
1
 
2
2
  module FeedUtils
3
- VERSION = '0.3.2'
3
+ VERSION = '0.4.0'
4
4
  end
data/test/test_atom.rb CHANGED
@@ -10,17 +10,30 @@ class TestAtom < MiniTest::Unit::TestCase
10
10
 
11
11
  def test_rubyonrails
12
12
  feed = parse_feed( 'http://weblog.rubyonrails.org/feed/atom.xml' )
13
- assert( feed.format == 'atom' )
13
+
14
+ assert_equal 'atom', feed.format
14
15
  end
15
16
 
16
17
  def test_railstutorial
17
18
  feed = parse_feed( 'http://feeds.feedburner.com/railstutorial?format=xml' )
18
- assert( feed.format == 'atom' )
19
+
20
+ assert_equal 'atom', feed.format
19
21
  end
20
22
 
21
23
  def test_googlegroup
22
24
  feed = parse_feed( 'https://groups.google.com/forum/feed/beerdb/topics/atom.xml?num=15' )
23
- assert( feed.format == 'atom' )
25
+
26
+ assert_equal 'atom', feed.format
27
+ end
28
+
29
+
30
+ def test_headius
31
+ feed = parse_feed( 'http://blog.headius.com/feeds/posts/default' )
32
+
33
+ assert_equal 'atom', feed.format
34
+ assert_equal 'Blogger', feed.generator
35
+ assert_equal 'Headius', feed.title
36
+ assert_equal 'Helping the JVM Into the 21st Century', feed.title2 # aka subtitle in atom
24
37
  end
25
38
 
26
39
  end
data/test/test_rss.rb CHANGED
@@ -11,18 +11,28 @@ class TestRss < MiniTest::Unit::TestCase
11
11
 
12
12
  def test_rubyflow
13
13
  feed = parse_feed( 'http://feeds.feedburner.com/Rubyflow?format=xml' )
14
- assert( feed.format == 'rss 2.0' )
14
+
15
+ assert_equal 'rss 2.0', feed.format
15
16
  end
16
17
 
17
18
  def test_sitepointruby
18
19
  feed = parse_feed( 'http://www.sitepoint.com/ruby/feed/' )
19
- assert( feed.format == 'rss 2.0' )
20
+
21
+ assert_equal 'rss 2.0', feed.format
20
22
  end
21
23
 
22
24
  def test_lambdatheultimate
23
25
  ## check - has no item.guid - will use item.link for guid
24
26
  feed = parse_feed( 'http://lambda-the-ultimate.org/rss.xml' )
25
- assert( feed.format == 'rss 2.0' )
27
+
28
+ assert_equal 'rss 2.0', feed.format
29
+ end
30
+
31
+ def test_rubymine
32
+ # includes item/content:encoded
33
+ feed = parse_feed( 'http://feeds.feedburner.com/jetbrains_rubymine?format=xml' )
34
+
35
+ assert_equal 'rss 2.0', feed.format
26
36
  end
27
37
 
28
- end
38
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-28 00:00:00.000000000 Z
12
+ date: 2013-10-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &82615740 !ruby/object:Gem::Requirement
16
+ requirement: &74494180 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *82615740
24
+ version_requirements: *74494180
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &82615490 !ruby/object:Gem::Requirement
27
+ requirement: &74493890 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *82615490
35
+ version_requirements: *74493890
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &82615190 !ruby/object:Gem::Requirement
38
+ requirement: &74493660 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,9 +43,9 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *82615190
46
+ version_requirements: *74493660
47
47
  description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
48
- email: webslideshow@googlegroups.com
48
+ email: feedreader@googlegroups.com
49
49
  executables: []
50
50
  extensions: []
51
51
  extra_rdoc_files: