namelessjon-feed_me 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Jonas Nicklas
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ Feed Me
2
+ =======
3
+
4
+ Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
5
+
6
+ Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
7
+
8
+ Use it like this:
9
+
10
+ file = File.read('some_feed.atom')
11
+
12
+ feed = FeedMe.parse(file)
13
+
14
+ feed.title
15
+ feed.author.name
16
+ feed.entries.each do |entry|
17
+ entry.title
18
+ entry.content
19
+ end
20
+
21
+ Check out the specs or lib/feed_me/consts.rb for the complete API.
22
+
23
+ DISCLAIMER: This is very much alpha software, use at your own risk!
data/Rakefile ADDED
@@ -0,0 +1,76 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'spec/rake/spectask'
4
+
5
+ file_list = FileList['spec/*_spec.rb']
6
+
7
+ namespace :spec do
8
+ desc "Run all examples with RCov"
9
+ Spec::Rake::SpecTask.new('rcov') do |t|
10
+ t.spec_files = file_list
11
+ t.rcov = true
12
+ t.rcov_dir = "doc/coverage"
13
+ t.rcov_opts = ['--exclude', 'spec']
14
+ end
15
+
16
+ desc "Generate an html report"
17
+ Spec::Rake::SpecTask.new('report') do |t|
18
+ t.spec_files = file_list
19
+ t.rcov_opts = ['--exclude', 'spec']
20
+ t.spec_opts = ["--format", "html:doc/reports/specs.html"]
21
+ t.fail_on_error = false
22
+ end
23
+
24
+ end
25
+
26
+ desc 'Default: run specs.'
27
+ task :default => 'spec:rcov'
28
+
29
+ PLUGIN = "feed_me"
30
+ NAME = "feed_me"
31
+ VERSION = "0.0.2"
32
+ AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
33
+ EMAIL = "jonas.nicklas@gmail.com"
34
+ HOMEPAGE = "http://github.com/jnicklas/feed_me"
35
+ SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
36
+
37
+ spec = Gem::Specification.new do |s|
38
+ s.name = NAME
39
+ s.platform = Gem::Platform::RUBY
40
+ s.version = VERSION
41
+ s.has_rdoc = true
42
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
43
+ s.summary = SUMMARY
44
+ s.description = s.summary
45
+ s.authors = AUTHOR
46
+ s.email = EMAIL
47
+ s.homepage = HOMEPAGE
48
+ s.require_path = 'lib'
49
+ s.autorequire = PLUGIN
50
+ s.add_dependency('hpricot')
51
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
52
+ end
53
+
54
+ begin
55
+ require 'jeweler'
56
+ Jeweler::Tasks.new(spec)
57
+ rescue
58
+ puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
59
+ end
60
+
61
+ Rake::GemPackageTask.new(spec) do |pkg|
62
+ pkg.gem_spec = spec
63
+ end
64
+
65
+ task :install => [:package] do
66
+ sh %{sudo gem install pkg/#{NAME}-#{VERSION}}
67
+ end
68
+
69
+ namespace :jruby do
70
+
71
+ desc "Run :package and install the resulting .gem with jruby"
72
+ task :install => :package do
73
+ sh %{#{SUDO} jruby -S gem install pkg/#{NAME}-#{Merb::VERSION}.gem --no-rdoc --no-ri}
74
+ end
75
+
76
+ end
data/TODO ADDED
@@ -0,0 +1,5 @@
1
+ TODO:
2
+ Fix LICENSE with your name
3
+ Fix Rakefile with your name and contact info
4
+ Add your code to lib/feed_me.rb
5
+ Add your Merb rake tasks to lib/feed_me/merbtasks.rb
@@ -0,0 +1,114 @@
1
+ class FeedMe::AbstractParser
2
+
3
+ class << self
4
+
5
+ attr_accessor :properties, :root_nodes
6
+
7
+ def build(xml, format, *args)
8
+ # in a world with activesupport this would have been written as
9
+ # format_parser = (format.to_s.camelize + self.to_s).constantize
10
+ camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
11
+ bare_class = self.to_s.split('::').last
12
+
13
+ begin
14
+ format_parser = FeedMe.const_get(camelized_format + bare_class)
15
+ rescue NameError
16
+ end
17
+
18
+ if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
19
+ return format_parser.new(xml, format, *args)
20
+ else
21
+ return self.new(xml, format, *args)
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ def initialize(xml, format)
29
+ self.xml = xml
30
+ self.format = format
31
+ self.properties = self.class.properties[self.format]
32
+
33
+ append_methods
34
+ end
35
+
36
+ def to_hash
37
+ hash = {}
38
+ self.properties.each do |method, p|
39
+ hash[method] = self.send(method)
40
+ end
41
+ return hash
42
+ end
43
+
44
+ attr_accessor :xml, :format, :properties
45
+
46
+ alias_method :root_node, :xml
47
+
48
+ protected
49
+
50
+ def fetch_rss_person(selector)
51
+ item = fetch(selector)
52
+ if(item)
53
+ email, name = item.split(/\s+/, 2)
54
+ name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
55
+ else
56
+ name, email = nil
57
+ end
58
+ FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
59
+ end
60
+
61
+ def append_methods
62
+ self.properties.each do |method, p|
63
+ unless respond_to?(method)
64
+ block = get_proc_for_property(method, p)
65
+ # meta programming magic
66
+ (class << self; self; end).module_eval do
67
+ define_method method, &block
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def get_proc_for_property(method, p)
74
+ if p.class == Array
75
+ return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
76
+ elsif p.class == Hash
77
+ return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
78
+ elsif p != :undefined
79
+ return caching_proc(method, &proc { fetch("/#{p}", root_node) })
80
+ else
81
+ return proc { nil }
82
+ end
83
+ end
84
+
85
+ def caching_proc(name, &block)
86
+ proc do
87
+ ivar = instance_variable_get("@#{name}")
88
+ unless ivar
89
+ result = yield
90
+ instance_variable_set("@#{name}", result)
91
+ return result
92
+ end
93
+ end
94
+ end
95
+
96
+ def fetch(selector, search_in = xml, method = :inner_html)
97
+ item = search_in.at(selector)
98
+
99
+ self.try("extract_" + method.to_s, item) if item
100
+ end
101
+
102
+ def extract_inner_html(item)
103
+ item.inner_html
104
+ end
105
+
106
+ def extract_href(item)
107
+ item[:href]
108
+ end
109
+
110
+ def extract_time(item)
111
+ Time.parse(item.inner_html).utc
112
+ end
113
+
114
+ end
@@ -0,0 +1,68 @@
1
+ module FeedMe
2
+
3
+ ROOT_NODES = {
4
+ :atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
5
+ :rss2 => "//rss[@version=2.0]/channel"
6
+ }
7
+
8
+ FEED_PROPERTIES = {
9
+ :atom => {
10
+ :title => :title,
11
+ :updated_at => [:updated, :time],
12
+ :feed_id => :id,
13
+ :url => ["link[@rel=alternate]", :href],
14
+ :href => ["link[@rel=self]", :href],
15
+ :description => :subtitle,
16
+ :generator => :generator,
17
+ :author => {
18
+ :email => 'author/email',
19
+ :name => 'author/name',
20
+ :uri => 'author/uri'
21
+ },
22
+ :entries => :special
23
+ },
24
+ :rss2 => {
25
+ :title => :title,
26
+ :updated_at => [:lastBuildDate, :time],
27
+ :feed_id => :undefined,
28
+ :url => :link,
29
+ :href => :undefined,
30
+ :description => :description,
31
+ :generator => :generator,
32
+ :author => :special,
33
+ :entries => :special
34
+ }
35
+ }
36
+
37
+ ITEM_PROPERTIES = {
38
+ :atom => {
39
+ :title => :title,
40
+ :updated_at => [:updated, :time],
41
+ :item_id => :id,
42
+ :url => ["link[@rel=alternate]", :href],
43
+ :content => :content,
44
+ :author => {
45
+ :email => 'author/email',
46
+ :name => 'author/name',
47
+ :uri => 'author/uri'
48
+ }
49
+ },
50
+ :rss2 => {
51
+ :title => :title,
52
+ :updated_at => [:pubDate, :time],
53
+ :item_id => :guid,
54
+ :url => :link,
55
+ :content => :description,
56
+ :author => :special
57
+ }
58
+ }
59
+
60
+ AUTHOR_PROPERTIES = {
61
+ :atom => {
62
+ :name => :name,
63
+ :uri => :uri,
64
+ :email => :email
65
+ }
66
+ }
67
+
68
+ end
@@ -0,0 +1,57 @@
1
+ module FeedMe
2
+
3
+ class FeedParser < AbstractParser
4
+
5
+ self.properties = FEED_PROPERTIES
6
+
7
+ class << self
8
+
9
+ def open(file)
10
+ self.parse(Kernel.open(file).read)
11
+ end
12
+
13
+ # parses the passed feed and identifeis what kind of feed it is
14
+ # then returns a parser object
15
+ def parse(feed)
16
+ xml = Hpricot.XML(feed)
17
+
18
+ root_node, format = self.identify(xml)
19
+ self.build(root_node, format)
20
+ end
21
+
22
+ protected
23
+
24
+ def identify(xml)
25
+ FeedMe::ROOT_NODES.each do |f, s|
26
+ item = xml.at(s)
27
+ return item, f if item
28
+ end
29
+ end
30
+
31
+ end
32
+ end
33
+
34
+ class AtomFeedParser < FeedParser
35
+ self.properties = FEED_PROPERTIES
36
+
37
+ def entries
38
+ xml.search('entry').map do |el|
39
+ ItemParser.build(el, self.format, self)
40
+ end
41
+ end
42
+ end
43
+
44
+ class Rss2FeedParser < FeedParser
45
+ self.properties = FEED_PROPERTIES
46
+
47
+ def entries
48
+ xml.search('item').map do |el|
49
+ ItemParser.build(el, self.format, self)
50
+ end
51
+ end
52
+
53
+ def author
54
+ fetch_rss_person("managingEditor")
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,13 @@
1
+ module FeedMe
2
+
3
+ class FeedStruct < AbstractParser
4
+
5
+ def initialize(xml, properties)
6
+ self.xml = xml
7
+ self.properties = properties
8
+ append_methods
9
+ end
10
+
11
+ end
12
+
13
+ end
@@ -0,0 +1,25 @@
1
+ module FeedMe
2
+
3
+ class ItemParser < AbstractParser
4
+
5
+ self.properties = ITEM_PROPERTIES
6
+
7
+ attr_accessor :feed
8
+
9
+ def initialize(xml, format, feed)
10
+ super(xml, format)
11
+ self.feed = feed
12
+ end
13
+
14
+ end
15
+
16
+ class Rss2ItemParser < ItemParser
17
+
18
+ self.properties = ITEM_PROPERTIES
19
+
20
+ def author
21
+ fetch_rss_person("author")
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,6 @@
1
+ namespace :feed_me do
2
+ desc "Do something for feed_me"
3
+ task :default do
4
+ puts "feed_me doesn't do anything"
5
+ end
6
+ end
@@ -0,0 +1,15 @@
1
+ module FeedMe
2
+
3
+ class SimpleStruct
4
+
5
+ def initialize(hash = {})
6
+ (class << self; self; end).module_eval do
7
+ hash.each do |method, result|
8
+ define_method( method ) { result }
9
+ end
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ end
data/lib/feed_me.rb ADDED
@@ -0,0 +1,34 @@
1
+ # make sure we're running inside Merb
2
+ if defined?(Merb::Plugins)
3
+ dependency 'hpricot'
4
+ else
5
+ require 'rubygems'
6
+ require 'hpricot'
7
+ end
8
+ require 'time'
9
+
10
+ unless nil.respond_to? :try
11
+ # the ultimate duck
12
+ class Object
13
+ def try(method, *args)
14
+ self.send(method, *args)
15
+ rescue NoMethodError
16
+ nil
17
+ end
18
+ end
19
+ end
20
+
21
+ module FeedMe
22
+ def self.parse(feed)
23
+ FeedMe::FeedParser.parse(feed)
24
+ end
25
+
26
+ def self.open(file)
27
+ FeedMe::FeedParser.parse(file)
28
+ end
29
+ end
30
+
31
+ ['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
32
+ 'feed_parser', 'item_parser'].each do |f|
33
+ require File.join(File.dirname(__FILE__), 'feed_me', f)
34
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: namelessjon-feed_me
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Jonas Nicklas
8
+ - Jonathan Stott
9
+ autorequire: feed_me
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2008-11-16 00:00:00 -08:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: hpricot
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Nice and simple RSS and atom feed parsing built on hpricot
26
+ email: jonas.nicklas@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README
33
+ - LICENSE
34
+ - TODO
35
+ files:
36
+ - LICENSE
37
+ - README
38
+ - Rakefile
39
+ - TODO
40
+ - lib/feed_me
41
+ - lib/feed_me/abstract_parser.rb
42
+ - lib/feed_me/consts.rb
43
+ - lib/feed_me/feed_parser.rb
44
+ - lib/feed_me/feed_struct.rb
45
+ - lib/feed_me/item_parser.rb
46
+ - lib/feed_me/merbtasks.rb
47
+ - lib/feed_me/simple_struct.rb
48
+ - lib/feed_me.rb
49
+ has_rdoc: true
50
+ homepage: http://github.com/jnicklas/feed_me
51
+ post_install_message:
52
+ rdoc_options: []
53
+
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.2.0
72
+ signing_key:
73
+ specification_version: 2
74
+ summary: Nice and simple RSS and atom feed parsing built on hpricot
75
+ test_files: []
76
+