feed-abstract 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
4
+ Gemfile.lock
5
+ *.swp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in feed-abstract.gemspec
4
+ gemspec
data/README.rdoc ADDED
@@ -0,0 +1,23 @@
1
+ = Feed::Abstract
2
+
3
+ Feed::Abstract creates a common object graph for RSS, Atom, and RDF feeds using the classes returned by RSS::Parser
4
+
5
+ == Installation
6
+
7
+ gem install feed-abstract
8
+
9
+ == Usage
10
+
11
+ See Feed::Abstract::Feed for basic examples. Also see the Feed::Abstract::Channel and Feed::Abstract::Item namespaces.
12
+
13
+ == Author
14
+
15
+ Dan Collis Puro, Berkman Center for Internet & Society
16
+ djcp@cyber.law.harvard.edu
17
+
18
+ == License & Copyright
19
+
20
+ GPLv3
21
+
22
+ 2011 President and Fellows of Harvard College
23
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "feed-abstract/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "feed-abstract"
7
+ s.version = Feed::Abstract::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Daniel Collis-Puro"]
10
+ s.email = ["djcp@cyber.law.harvard.edu"]
11
+ s.homepage = "https://github.com/berkmancenter/feed-abstract"
12
+ s.summary = %q{Abstracts RSS/Atom/RDF parsing features from the ruby standard lib into a common object graph.}
13
+ s.description = %q{This library creates a common object graph for the RSS/Atom/RDF parsing classes in the ruby standard library. This allows you parse different feed formats and get back the same (or at least a very similar) set of results - item authors are accessible under an "author(s)" attribute, categories/tags/subjects are accessible under "category(ies)" attributes, etc. We do our best to make sure the data makes sense, too - RSS items lack an "updated" attribute, so we use "pubDate" to populate it. }
14
+ s.files = `git ls-files`.split("\n")
15
+ s.rdoc_options = ["--charset=UTF-8"]
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+ s.add_development_dependency "rspec", "2.6"
20
+ s.extra_rdoc_files = [
21
+ "README.rdoc"
22
+ ]
23
+ end
@@ -0,0 +1,33 @@
1
+ # encoding: UTF-8
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
4
+
5
+ require 'rss'
6
+
7
+ require "feed-abstract/version"
8
+ require 'feed-abstract/mixins'
9
+
10
+ require 'feed-abstract/channel/atom'
11
+ require 'feed-abstract/channel/rss'
12
+ require 'feed-abstract/channel/rdf'
13
+
14
+ require 'feed-abstract/items/rss'
15
+ require 'feed-abstract/items/atom'
16
+ require 'feed-abstract/items/rdf'
17
+
18
+ require 'feed-abstract/item/rss'
19
+ require 'feed-abstract/item/atom'
20
+ require 'feed-abstract/item/rdf'
21
+
22
+ require 'feed-abstract/feed'
23
+
24
+
25
+ #RSS::Rss::Channel::Item.class_eval{
26
+ # def foobar
27
+ # 'asdfasdf'
28
+ # end
29
+ #}
30
+
31
+ #RSS::Rss::Channel::Item.instance_eval{
32
+ # install_get_attribute('category', '', false )
33
+ #}
@@ -0,0 +1,47 @@
1
+ # encoding: UTF-8
2
+
3
+ module Feed
4
+ class Abstract
5
+
6
+ # You don't want this class. You want Feed::Abstract::Channel::Atom, Feed::Abstract::Channel::RSS or Feed::Abstract::Channel::RDF.
7
+ class Channel
8
+
9
+ # See Feed::AbstractMixins::Atom for more instance methods.
10
+ class Atom
11
+ include Feed::AbstractMixins::Atom
12
+
13
+ attr_reader :feed, :source
14
+
15
+ def initialize(feed)
16
+ @feed = @source = feed
17
+ end
18
+
19
+ def description
20
+ @feed.subtitle.content
21
+ end
22
+ alias :subtitle :description
23
+
24
+ # A string representing the application that created this feed.
25
+ def generator
26
+ return '' if @feed.generator.nil?
27
+ @feed.generator.content
28
+ end
29
+
30
+ # A URL (perhaps with domain, depending on input) representing an icon for the feed.
31
+ def icon
32
+ return '' if @feed.icon.nil?
33
+ @feed.icon.content
34
+ end
35
+
36
+ # A URL (perhaps with domain, depending on input) representing a logo for the feed.
37
+ def logo
38
+ return '' if @feed.logo.nil?
39
+ @feed.logo.content
40
+ end
41
+
42
+ end
43
+ end
44
+
45
+
46
+ end
47
+ end
@@ -0,0 +1,63 @@
1
+ # encoding: UTF-8
2
+
3
+ module Feed
4
+ class Abstract
5
+ class Channel
6
+ class RDF < RSS
7
+
8
+ # The authors list as an array.
9
+ def authors
10
+ return [] if @feed.channel.dc_publishers.empty?
11
+ @feed.channel.dc_publishers
12
+ end
13
+
14
+ # The authors list as a string, joined with a comma.
15
+ def author
16
+ return '' if self.authors.empty?
17
+ self.authors.join(', ')
18
+ end
19
+
20
+ # The generator of this feed.
21
+ def generator
22
+ return '' unless @feed.channel.respond_to?(:about)
23
+ if @feed.channel.about.match(/connotea/i)
24
+ return 'Connotea'
25
+ end
26
+ ''
27
+ end
28
+
29
+ # The category list as an array.
30
+ def categories
31
+ return [] if @feed.channel.dc_subjects.empty?
32
+ @feed.channel.dc_subjects.collect{|c| c.content}
33
+ end
34
+
35
+ # The category list as a string, joined with a comma.
36
+ def category
37
+ return '' if self.categories.empty?
38
+ self.categories.join(', ')
39
+ end
40
+
41
+ # A URL (with or without domain depending on input) to a icon representing this feed.
42
+ def icon
43
+ return '' if @feed.channel.image.nil?
44
+ @feed.channel.image.resource
45
+ end
46
+ alias :logo :icon
47
+
48
+ # Copyright info.
49
+ def rights
50
+ return '' if @feed.channel.dc_rights.nil?
51
+ @feed.channel.dc_rights
52
+ end
53
+
54
+ # A Time object representing when this feed was updated, or at least "dated" according to the RDF spec.
55
+ def updated
56
+ return '' if @feed.channel.dc_date.nil?
57
+ @feed.channel.dc_date
58
+ end
59
+
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,91 @@
1
+ # encoding: UTF-8
2
+
3
+ module Feed
4
+ class Abstract
5
+ class Channel
6
+ class RSS
7
+ include Feed::AbstractMixins::RSS
8
+ attr_reader :feed, :source
9
+
10
+ def initialize(feed)
11
+ @feed = @source = feed
12
+ end
13
+
14
+ def title
15
+ @feed.channel.title
16
+ end
17
+
18
+ def description
19
+ @feed.channel.description
20
+ end
21
+ alias :subtitle :description
22
+
23
+ # The generator of this feed as a string. Sometimes a URL, sometimes a string (e.g. the application name).
24
+ def generator
25
+ if ! @feed.channel.generator.nil? && @feed.channel.generator.match(/wordpress\.org/i)
26
+ return 'WordPress'
27
+ elsif @feed.channel.link.match(/www\.delicious\.com/i)
28
+ return 'Delicious'
29
+ end
30
+ return '' if @feed.channel.generator.nil?
31
+ @feed.channel.generator
32
+ end
33
+
34
+ def link
35
+ return '' if @feed.channel.link.nil?
36
+ @feed.channel.link
37
+ end
38
+
39
+ # Copyright info.
40
+ def rights
41
+ return '' if @feed.channel.copyright.nil? && @feed.channel.dc_rights.nil?
42
+ [@feed.channel.copyright,@feed.channel.dc_rights].compact.join(' ')
43
+ end
44
+
45
+ # A Time object.
46
+ def updated
47
+ return '' if @feed.channel.lastBuildDate.nil?
48
+ @feed.channel.lastBuildDate
49
+ end
50
+
51
+ # A globally unique ID for this feed. A URL in this case.
52
+ def guid
53
+ return '' if @feed.channel.link.nil?
54
+ @feed.channel.link
55
+ end
56
+
57
+ # The authors (a merge of the RSS managingEditor and dc:publisher elements) as an array.
58
+ def authors
59
+ return [] if @feed.channel.managingEditor.nil? && @feed.channel.dc_publishers.empty?
60
+ [@feed.channel.managingEditor, @feed.channel.dc_publishers].flatten.uniq
61
+ end
62
+
63
+ # The author list joined with a comma.
64
+ def author
65
+ return '' if self.authors.empty?
66
+ self.authors.join(', ')
67
+ end
68
+
69
+ # The category list (a merge of the RSS category and dc:subject elements) as an array.
70
+ def categories
71
+ return [] if @feed.channel.categories.empty? && @feed.channel.dc_subjects.empty?
72
+ [@feed.channel.categories, @feed.channel.dc_subjects].flatten.uniq.collect{|c| c.content}
73
+ end
74
+
75
+ # The category list as a string, joined with a comma.
76
+ def category
77
+ return '' if @feed.channel.categories.empty?
78
+ @feed.channel.categories.collect{|c| c.content}.join(', ')
79
+ end
80
+
81
+ # A URL to an icon representing this feed.
82
+ def icon
83
+ return '' if @feed.channel.image.nil?
84
+ @feed.channel.image.url
85
+ end
86
+ alias :logo :icon
87
+
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,74 @@
1
+ # encoding: UTF-8
2
+
3
+ module Feed
4
+ class ParserError < Exception
5
+ end
6
+
7
+ class Abstract
8
+
9
+ # Feed::Abstract::Feed is the main class. It invokes RSS::Parser and negotiates which of the Feed::Abstract::Channel and Feed::Abstract::Item classes get dispatched to normalize the object graph of the feed you're parsing.
10
+ class Feed
11
+ attr_reader :channel, :raw_feed, :items
12
+
13
+ # === Parameters
14
+ # * xml - a string or object instance that responds to <b>read</b>
15
+ # * :do_validate - whether or not the feed should be validated. Passed through to RSS::Parser
16
+ # * :ignore_unknown_element - passed through to RSS::Parser
17
+ #
18
+ # === Returns
19
+ # An object with three attributes:
20
+ # * channel - an instance of Feed::Abstract::Channel matching the type of feed we recognized
21
+ # * items - an array of items matching the type of feed we recognized.
22
+ # * raw_feed - the raw feed object returned by RSS::Parser, which might include RSS::Atom::Feed, RSS::RDF, or RSS::Rss
23
+ # You will most likely be using the <b>channel</b> and <b>items</b> attributes.
24
+ #
25
+ # === Notes
26
+ # If a feed can't be parsed, we'll throw a Feed::ParserError.
27
+ #
28
+ # == Examples
29
+ #
30
+ # f = Feed::Abstract::Feed.new(File.open('/home/foo/xml/feed.rss2'))
31
+ # puts f.channel.title
32
+ # puts f.channel.description
33
+ #
34
+ # f.items.each do|item|
35
+ # puts item.title
36
+ # puts item.link
37
+ # end
38
+ #
39
+ # f = Feed::Abstract::Feed.new(File.open('/home/foo/xml/feed.atom'))
40
+ # puts f.channel.generator
41
+ #
42
+ # puts "All tags / categories / subjects in this feed: " + f.items.collect{|i| i.categories}.flatten.uniq.sort.join(', ')
43
+ #
44
+ # f = Feed::Abstract::Feed.new(Net::HTTP.get(URI.parse('http://rss.slashdot.org/Slashdot/slashdot')))
45
+ # puts f.items.collect{|i| i.link}
46
+ #
47
+ def initialize(xml = nil, options = {:do_validate => false, :ignore_unknown_element => true})
48
+ input = (xml.respond_to?(:read)) ? xml.read : xml
49
+ @raw_feed = RSS::Parser.parse(input,options[:do_validate], options[:ignore_unknown_element])
50
+ if @raw_feed == nil
51
+ raise Feed::ParserError
52
+ end
53
+ negotiate_channel_class
54
+ end
55
+
56
+ private
57
+
58
+ #Here's an easy extension point for custom parsers.
59
+ def negotiate_channel_class
60
+ if @raw_feed.class == RSS::Atom::Feed
61
+ @channel = Channel::Atom.new(@raw_feed)
62
+ @items = Items::Atom.new(@raw_feed)
63
+ elsif @raw_feed.class == RSS::RDF
64
+ @channel = Channel::RDF.new(@raw_feed)
65
+ @items = Items::RDF.new(@raw_feed)
66
+ else
67
+ @channel = Channel::RSS.new(@raw_feed)
68
+ @items = Items::RSS.new(@raw_feed)
69
+ end
70
+ end
71
+
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ module Feed
4
+ class Abstract
5
+
6
+ # You don't want this class. You want Feed::Abstract::Item::Atom, Feed::Abstract::Item::RSS or Feed::Abstract::Item::RDF.
7
+ class Item
8
+
9
+ # See Feed::AbstractMixins::Atom for more instance methods.
10
+ class Atom
11
+ include Feed::AbstractMixins::Atom
12
+ attr_reader :item, :source
13
+
14
+ def initialize(item)
15
+ @item = @source = item
16
+ end
17
+
18
+ # The full content of the item, most likely html.
19
+ def content
20
+ return '' if @item.content.nil?
21
+ @item.content.content
22
+ end
23
+
24
+ # The contributor list as an array.
25
+ def contributors
26
+ return [] if @item.contributors.empty?
27
+ @item.contributors.collect{|c| c.name.content}
28
+ end
29
+
30
+ #The contributor list as a strong joined with a comma.
31
+ def contributor
32
+ return '' if @item.contributors.empty?
33
+ @item.contributors.collect{|c| c.name.content}.join(', ')
34
+ end
35
+
36
+ def summary
37
+ return '' if @item.summary.nil?
38
+ @item.summary.content
39
+ end
40
+
41
+ # A Time object
42
+ def published
43
+ return '' if @item.published.nil?
44
+ @item.published.content
45
+ end
46
+
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: UTF-8
2
+
3
+ module Feed
4
+ class Abstract
5
+ class Item
6
+ class RDF < RSS
7
+
8
+ # The author list (from the dc:creator element) as an array.
9
+ def authors
10
+ (@item.dc_creators.empty?) ? [] : @item.dc_creators.collect{|c| c.content}
11
+ end
12
+
13
+ # The author list as a string, joined with a comma.
14
+ def author
15
+ return '' if self.authors.empty?
16
+ self.authors.join(', ')
17
+ end
18
+
19
+ # The category list (parsed from the dc:subject element) as an array.
20
+ def categories
21
+ return [] if @item.dc_subjects.empty?
22
+ @item.dc_subjects.collect{|c| c.content}
23
+ end
24
+
25
+ # The category list as a string, joined with a comma.
26
+ def category
27
+ return '' if self.categories.empty?
28
+ self.categories.join(', ')
29
+ end
30
+
31
+ # A Time object.
32
+ def updated
33
+ return '' if @item.dc_date.nil?
34
+ @item.dc_date
35
+ end
36
+ alias :published :updated
37
+
38
+ # A globally unique id, in this case probably a URL.
39
+ def guid
40
+ return '' if @item.about.nil?
41
+ @item.about
42
+ end
43
+
44
+
45
+ end
46
+ end
47
+ end
48
+ end