namelessjon-feed_me 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Jonas Nicklas
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,23 @@
1
+ Feed Me
2
+ =======
3
+
4
+ Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
5
+
6
+ Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
7
+
8
+ Use it like this:
9
+
10
+ file = File.read('some_feed.atom')
11
+
12
+ feed = FeedMe.parse(file)
13
+
14
+ feed.title
15
+ feed.author.name
16
+ feed.entries.each do |entry|
17
+ entry.title
18
+ entry.content
19
+ end
20
+
21
+ Check out the specs or lib/feed_me/consts.rb for the complete API.
22
+
23
+ DISCLAIMER: This is very much alpha software, use at your own risk!
data/Rakefile ADDED
@@ -0,0 +1,76 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'spec/rake/spectask'
4
+
5
+ file_list = FileList['spec/*_spec.rb']
6
+
7
+ namespace :spec do
8
+ desc "Run all examples with RCov"
9
+ Spec::Rake::SpecTask.new('rcov') do |t|
10
+ t.spec_files = file_list
11
+ t.rcov = true
12
+ t.rcov_dir = "doc/coverage"
13
+ t.rcov_opts = ['--exclude', 'spec']
14
+ end
15
+
16
+ desc "Generate an html report"
17
+ Spec::Rake::SpecTask.new('report') do |t|
18
+ t.spec_files = file_list
19
+ t.rcov_opts = ['--exclude', 'spec']
20
+ t.spec_opts = ["--format", "html:doc/reports/specs.html"]
21
+ t.fail_on_error = false
22
+ end
23
+
24
+ end
25
+
26
+ desc 'Default: run specs.'
27
+ task :default => 'spec:rcov'
28
+
29
+ PLUGIN = "feed_me"
30
+ NAME = "feed_me"
31
+ VERSION = "0.0.2"
32
+ AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
33
+ EMAIL = "jonas.nicklas@gmail.com"
34
+ HOMEPAGE = "http://github.com/jnicklas/feed_me"
35
+ SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
36
+
37
+ spec = Gem::Specification.new do |s|
38
+ s.name = NAME
39
+ s.platform = Gem::Platform::RUBY
40
+ s.version = VERSION
41
+ s.has_rdoc = true
42
+ s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
43
+ s.summary = SUMMARY
44
+ s.description = s.summary
45
+ s.authors = AUTHOR
46
+ s.email = EMAIL
47
+ s.homepage = HOMEPAGE
48
+ s.require_path = 'lib'
49
+ s.autorequire = PLUGIN
50
+ s.add_dependency('hpricot')
51
+ s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
52
+ end
53
+
54
+ begin
55
+ require 'jeweler'
56
+ Jeweler::Tasks.new(spec)
57
+ rescue
58
+ puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
59
+ end
60
+
61
+ Rake::GemPackageTask.new(spec) do |pkg|
62
+ pkg.gem_spec = spec
63
+ end
64
+
65
+ task :install => [:package] do
66
+ sh %{sudo gem install pkg/#{NAME}-#{VERSION}}
67
+ end
68
+
69
+ namespace :jruby do
70
+
71
+ desc "Run :package and install the resulting .gem with jruby"
72
+ task :install => :package do
73
+ sh %{#{SUDO} jruby -S gem install pkg/#{NAME}-#{Merb::VERSION}.gem --no-rdoc --no-ri}
74
+ end
75
+
76
+ end
data/TODO ADDED
@@ -0,0 +1,5 @@
1
+ TODO:
2
+ Fix LICENSE with your name
3
+ Fix Rakefile with your name and contact info
4
+ Add your code to lib/feed_me.rb
5
+ Add your Merb rake tasks to lib/feed_me/merbtasks.rb
@@ -0,0 +1,114 @@
1
+ class FeedMe::AbstractParser
2
+
3
+ class << self
4
+
5
+ attr_accessor :properties, :root_nodes
6
+
7
+ def build(xml, format, *args)
8
+ # in a world with activesupport this would have been written as
9
+ # format_parser = (format.to_s.camelize + self.to_s).constantize
10
+ camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
11
+ bare_class = self.to_s.split('::').last
12
+
13
+ begin
14
+ format_parser = FeedMe.const_get(camelized_format + bare_class)
15
+ rescue NameError
16
+ end
17
+
18
+ if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
19
+ return format_parser.new(xml, format, *args)
20
+ else
21
+ return self.new(xml, format, *args)
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ def initialize(xml, format)
29
+ self.xml = xml
30
+ self.format = format
31
+ self.properties = self.class.properties[self.format]
32
+
33
+ append_methods
34
+ end
35
+
36
+ def to_hash
37
+ hash = {}
38
+ self.properties.each do |method, p|
39
+ hash[method] = self.send(method)
40
+ end
41
+ return hash
42
+ end
43
+
44
+ attr_accessor :xml, :format, :properties
45
+
46
+ alias_method :root_node, :xml
47
+
48
+ protected
49
+
50
+ def fetch_rss_person(selector)
51
+ item = fetch(selector)
52
+ if(item)
53
+ email, name = item.split(/\s+/, 2)
54
+ name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
55
+ else
56
+ name, email = nil
57
+ end
58
+ FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
59
+ end
60
+
61
+ def append_methods
62
+ self.properties.each do |method, p|
63
+ unless respond_to?(method)
64
+ block = get_proc_for_property(method, p)
65
+ # meta programming magic
66
+ (class << self; self; end).module_eval do
67
+ define_method method, &block
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def get_proc_for_property(method, p)
74
+ if p.class == Array
75
+ return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
76
+ elsif p.class == Hash
77
+ return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
78
+ elsif p != :undefined
79
+ return caching_proc(method, &proc { fetch("/#{p}", root_node) })
80
+ else
81
+ return proc { nil }
82
+ end
83
+ end
84
+
85
+ def caching_proc(name, &block)
86
+ proc do
87
+ ivar = instance_variable_get("@#{name}")
88
+ unless ivar
89
+ result = yield
90
+ instance_variable_set("@#{name}", result)
91
+ return result
92
+ end
93
+ end
94
+ end
95
+
96
+ def fetch(selector, search_in = xml, method = :inner_html)
97
+ item = search_in.at(selector)
98
+
99
+ self.try("extract_" + method.to_s, item) if item
100
+ end
101
+
102
+ def extract_inner_html(item)
103
+ item.inner_html
104
+ end
105
+
106
+ def extract_href(item)
107
+ item[:href]
108
+ end
109
+
110
+ def extract_time(item)
111
+ Time.parse(item.inner_html).utc
112
+ end
113
+
114
+ end
@@ -0,0 +1,68 @@
1
+ module FeedMe
2
+
3
+ ROOT_NODES = {
4
+ :atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
5
+ :rss2 => "//rss[@version=2.0]/channel"
6
+ }
7
+
8
+ FEED_PROPERTIES = {
9
+ :atom => {
10
+ :title => :title,
11
+ :updated_at => [:updated, :time],
12
+ :feed_id => :id,
13
+ :url => ["link[@rel=alternate]", :href],
14
+ :href => ["link[@rel=self]", :href],
15
+ :description => :subtitle,
16
+ :generator => :generator,
17
+ :author => {
18
+ :email => 'author/email',
19
+ :name => 'author/name',
20
+ :uri => 'author/uri'
21
+ },
22
+ :entries => :special
23
+ },
24
+ :rss2 => {
25
+ :title => :title,
26
+ :updated_at => [:lastBuildDate, :time],
27
+ :feed_id => :undefined,
28
+ :url => :link,
29
+ :href => :undefined,
30
+ :description => :description,
31
+ :generator => :generator,
32
+ :author => :special,
33
+ :entries => :special
34
+ }
35
+ }
36
+
37
+ ITEM_PROPERTIES = {
38
+ :atom => {
39
+ :title => :title,
40
+ :updated_at => [:updated, :time],
41
+ :item_id => :id,
42
+ :url => ["link[@rel=alternate]", :href],
43
+ :content => :content,
44
+ :author => {
45
+ :email => 'author/email',
46
+ :name => 'author/name',
47
+ :uri => 'author/uri'
48
+ }
49
+ },
50
+ :rss2 => {
51
+ :title => :title,
52
+ :updated_at => [:pubDate, :time],
53
+ :item_id => :guid,
54
+ :url => :link,
55
+ :content => :description,
56
+ :author => :special
57
+ }
58
+ }
59
+
60
+ AUTHOR_PROPERTIES = {
61
+ :atom => {
62
+ :name => :name,
63
+ :uri => :uri,
64
+ :email => :email
65
+ }
66
+ }
67
+
68
+ end
@@ -0,0 +1,57 @@
1
+ module FeedMe
2
+
3
+ class FeedParser < AbstractParser
4
+
5
+ self.properties = FEED_PROPERTIES
6
+
7
+ class << self
8
+
9
+ def open(file)
10
+ self.parse(Kernel.open(file).read)
11
+ end
12
+
13
+ # parses the passed feed and identifeis what kind of feed it is
14
+ # then returns a parser object
15
+ def parse(feed)
16
+ xml = Hpricot.XML(feed)
17
+
18
+ root_node, format = self.identify(xml)
19
+ self.build(root_node, format)
20
+ end
21
+
22
+ protected
23
+
24
+ def identify(xml)
25
+ FeedMe::ROOT_NODES.each do |f, s|
26
+ item = xml.at(s)
27
+ return item, f if item
28
+ end
29
+ end
30
+
31
+ end
32
+ end
33
+
34
+ class AtomFeedParser < FeedParser
35
+ self.properties = FEED_PROPERTIES
36
+
37
+ def entries
38
+ xml.search('entry').map do |el|
39
+ ItemParser.build(el, self.format, self)
40
+ end
41
+ end
42
+ end
43
+
44
+ class Rss2FeedParser < FeedParser
45
+ self.properties = FEED_PROPERTIES
46
+
47
+ def entries
48
+ xml.search('item').map do |el|
49
+ ItemParser.build(el, self.format, self)
50
+ end
51
+ end
52
+
53
+ def author
54
+ fetch_rss_person("managingEditor")
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,13 @@
1
+ module FeedMe
2
+
3
+ class FeedStruct < AbstractParser
4
+
5
+ def initialize(xml, properties)
6
+ self.xml = xml
7
+ self.properties = properties
8
+ append_methods
9
+ end
10
+
11
+ end
12
+
13
+ end
@@ -0,0 +1,25 @@
1
+ module FeedMe
2
+
3
+ class ItemParser < AbstractParser
4
+
5
+ self.properties = ITEM_PROPERTIES
6
+
7
+ attr_accessor :feed
8
+
9
+ def initialize(xml, format, feed)
10
+ super(xml, format)
11
+ self.feed = feed
12
+ end
13
+
14
+ end
15
+
16
+ class Rss2ItemParser < ItemParser
17
+
18
+ self.properties = ITEM_PROPERTIES
19
+
20
+ def author
21
+ fetch_rss_person("author")
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,6 @@
1
+ namespace :feed_me do
2
+ desc "Do something for feed_me"
3
+ task :default do
4
+ puts "feed_me doesn't do anything"
5
+ end
6
+ end
@@ -0,0 +1,15 @@
1
+ module FeedMe
2
+
3
+ class SimpleStruct
4
+
5
+ def initialize(hash = {})
6
+ (class << self; self; end).module_eval do
7
+ hash.each do |method, result|
8
+ define_method( method ) { result }
9
+ end
10
+ end
11
+ end
12
+
13
+ end
14
+
15
+ end
data/lib/feed_me.rb ADDED
@@ -0,0 +1,34 @@
1
+ # make sure we're running inside Merb
2
+ if defined?(Merb::Plugins)
3
+ dependency 'hpricot'
4
+ else
5
+ require 'rubygems'
6
+ require 'hpricot'
7
+ end
8
+ require 'time'
9
+
10
+ unless nil.respond_to? :try
11
+ # the ultimate duck
12
+ class Object
13
+ def try(method, *args)
14
+ self.send(method, *args)
15
+ rescue NoMethodError
16
+ nil
17
+ end
18
+ end
19
+ end
20
+
21
+ module FeedMe
22
+ def self.parse(feed)
23
+ FeedMe::FeedParser.parse(feed)
24
+ end
25
+
26
+ def self.open(file)
27
+ FeedMe::FeedParser.parse(file)
28
+ end
29
+ end
30
+
31
+ ['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
32
+ 'feed_parser', 'item_parser'].each do |f|
33
+ require File.join(File.dirname(__FILE__), 'feed_me', f)
34
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: namelessjon-feed_me
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Jonas Nicklas
8
+ - Jonathan Stott
9
+ autorequire: feed_me
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2008-11-16 00:00:00 -08:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: hpricot
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Nice and simple RSS and atom feed parsing built on hpricot
26
+ email: jonas.nicklas@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README
33
+ - LICENSE
34
+ - TODO
35
+ files:
36
+ - LICENSE
37
+ - README
38
+ - Rakefile
39
+ - TODO
40
+ - lib/feed_me
41
+ - lib/feed_me/abstract_parser.rb
42
+ - lib/feed_me/consts.rb
43
+ - lib/feed_me/feed_parser.rb
44
+ - lib/feed_me/feed_struct.rb
45
+ - lib/feed_me/item_parser.rb
46
+ - lib/feed_me/merbtasks.rb
47
+ - lib/feed_me/simple_struct.rb
48
+ - lib/feed_me.rb
49
+ has_rdoc: true
50
+ homepage: http://github.com/jnicklas/feed_me
51
+ post_install_message:
52
+ rdoc_options: []
53
+
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.2.0
72
+ signing_key:
73
+ specification_version: 2
74
+ summary: Nice and simple RSS and atom feed parsing built on hpricot
75
+ test_files: []
76
+