namelessjon-feed_me 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README +23 -0
- data/Rakefile +76 -0
- data/TODO +5 -0
- data/lib/feed_me/abstract_parser.rb +114 -0
- data/lib/feed_me/consts.rb +68 -0
- data/lib/feed_me/feed_parser.rb +57 -0
- data/lib/feed_me/feed_struct.rb +13 -0
- data/lib/feed_me/item_parser.rb +25 -0
- data/lib/feed_me/merbtasks.rb +6 -0
- data/lib/feed_me/simple_struct.rb +15 -0
- data/lib/feed_me.rb +34 -0
- metadata +76 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Jonas Nicklas
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Feed Me
|
2
|
+
=======
|
3
|
+
|
4
|
+
Feed Me is a simple parser for RSS2 and Atom feed, adding other feed formats should be trivial. Feed Me is pretty minimal and basically only does translation/cleanup from different feed formats to a consistent API. It is designed to be minimal.
|
5
|
+
|
6
|
+
Feed Me is built on the excellent Hpricot parser written by _why the lucky stiff.
|
7
|
+
|
8
|
+
Use it like this:
|
9
|
+
|
10
|
+
file = File.read('some_feed.atom')
|
11
|
+
|
12
|
+
feed = FeedMe.parse(file)
|
13
|
+
|
14
|
+
feed.title
|
15
|
+
feed.author.name
|
16
|
+
feed.entries.each do |entry|
|
17
|
+
entry.title
|
18
|
+
entry.content
|
19
|
+
end
|
20
|
+
|
21
|
+
Check out the specs or lib/feed_me/consts.rb for the complete API.
|
22
|
+
|
23
|
+
DISCLAIMER: This is very much alpha software, use at your own risk!
|
data/Rakefile
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'spec/rake/spectask'
|
4
|
+
|
5
|
+
file_list = FileList['spec/*_spec.rb']
|
6
|
+
|
7
|
+
namespace :spec do
|
8
|
+
desc "Run all examples with RCov"
|
9
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
10
|
+
t.spec_files = file_list
|
11
|
+
t.rcov = true
|
12
|
+
t.rcov_dir = "doc/coverage"
|
13
|
+
t.rcov_opts = ['--exclude', 'spec']
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Generate an html report"
|
17
|
+
Spec::Rake::SpecTask.new('report') do |t|
|
18
|
+
t.spec_files = file_list
|
19
|
+
t.rcov_opts = ['--exclude', 'spec']
|
20
|
+
t.spec_opts = ["--format", "html:doc/reports/specs.html"]
|
21
|
+
t.fail_on_error = false
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
desc 'Default: run specs.'
|
27
|
+
task :default => 'spec:rcov'
|
28
|
+
|
29
|
+
PLUGIN = "feed_me"
|
30
|
+
NAME = "feed_me"
|
31
|
+
VERSION = "0.0.2"
|
32
|
+
AUTHOR = ["Jonas Nicklas", "Jonathan Stott"]
|
33
|
+
EMAIL = "jonas.nicklas@gmail.com"
|
34
|
+
HOMEPAGE = "http://github.com/jnicklas/feed_me"
|
35
|
+
SUMMARY = "Nice and simple RSS and atom feed parsing built on hpricot"
|
36
|
+
|
37
|
+
spec = Gem::Specification.new do |s|
|
38
|
+
s.name = NAME
|
39
|
+
s.platform = Gem::Platform::RUBY
|
40
|
+
s.version = VERSION
|
41
|
+
s.has_rdoc = true
|
42
|
+
s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
|
43
|
+
s.summary = SUMMARY
|
44
|
+
s.description = s.summary
|
45
|
+
s.authors = AUTHOR
|
46
|
+
s.email = EMAIL
|
47
|
+
s.homepage = HOMEPAGE
|
48
|
+
s.require_path = 'lib'
|
49
|
+
s.autorequire = PLUGIN
|
50
|
+
s.add_dependency('hpricot')
|
51
|
+
s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,specs}/**/*")
|
52
|
+
end
|
53
|
+
|
54
|
+
begin
|
55
|
+
require 'jeweler'
|
56
|
+
Jeweler::Tasks.new(spec)
|
57
|
+
rescue
|
58
|
+
puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install namelessjon-jeweler -s http://gems.github.com"
|
59
|
+
end
|
60
|
+
|
61
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
62
|
+
pkg.gem_spec = spec
|
63
|
+
end
|
64
|
+
|
65
|
+
task :install => [:package] do
|
66
|
+
sh %{sudo gem install pkg/#{NAME}-#{VERSION}}
|
67
|
+
end
|
68
|
+
|
69
|
+
namespace :jruby do
|
70
|
+
|
71
|
+
desc "Run :package and install the resulting .gem with jruby"
|
72
|
+
task :install => :package do
|
73
|
+
sh %{#{SUDO} jruby -S gem install pkg/#{NAME}-#{Merb::VERSION}.gem --no-rdoc --no-ri}
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
data/TODO
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
class FeedMe::AbstractParser
|
2
|
+
|
3
|
+
class << self
|
4
|
+
|
5
|
+
attr_accessor :properties, :root_nodes
|
6
|
+
|
7
|
+
def build(xml, format, *args)
|
8
|
+
# in a world with activesupport this would have been written as
|
9
|
+
# format_parser = (format.to_s.camelize + self.to_s).constantize
|
10
|
+
camelized_format = format.to_s.split('_').map{ |w| w.capitalize }.join('')
|
11
|
+
bare_class = self.to_s.split('::').last
|
12
|
+
|
13
|
+
begin
|
14
|
+
format_parser = FeedMe.const_get(camelized_format + bare_class)
|
15
|
+
rescue NameError
|
16
|
+
end
|
17
|
+
|
18
|
+
if format_parser.is_a?(Class) and format_parser.ancestors.include?(self)
|
19
|
+
return format_parser.new(xml, format, *args)
|
20
|
+
else
|
21
|
+
return self.new(xml, format, *args)
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(xml, format)
|
29
|
+
self.xml = xml
|
30
|
+
self.format = format
|
31
|
+
self.properties = self.class.properties[self.format]
|
32
|
+
|
33
|
+
append_methods
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_hash
|
37
|
+
hash = {}
|
38
|
+
self.properties.each do |method, p|
|
39
|
+
hash[method] = self.send(method)
|
40
|
+
end
|
41
|
+
return hash
|
42
|
+
end
|
43
|
+
|
44
|
+
attr_accessor :xml, :format, :properties
|
45
|
+
|
46
|
+
alias_method :root_node, :xml
|
47
|
+
|
48
|
+
protected
|
49
|
+
|
50
|
+
def fetch_rss_person(selector)
|
51
|
+
item = fetch(selector)
|
52
|
+
if(item)
|
53
|
+
email, name = item.split(/\s+/, 2)
|
54
|
+
name = name.match( /\((.*?)\)/ ).to_a[1] if name # strip parentheses
|
55
|
+
else
|
56
|
+
name, email = nil
|
57
|
+
end
|
58
|
+
FeedMe::SimpleStruct.new(:email => email, :name => name, :uri => nil)
|
59
|
+
end
|
60
|
+
|
61
|
+
def append_methods
|
62
|
+
self.properties.each do |method, p|
|
63
|
+
unless respond_to?(method)
|
64
|
+
block = get_proc_for_property(method, p)
|
65
|
+
# meta programming magic
|
66
|
+
(class << self; self; end).module_eval do
|
67
|
+
define_method method, &block
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_proc_for_property(method, p)
|
74
|
+
if p.class == Array
|
75
|
+
return caching_proc(method, &proc { fetch("/#{p[0]}", root_node, p[1].to_sym) })
|
76
|
+
elsif p.class == Hash
|
77
|
+
return caching_proc(method, &proc { FeedMe::FeedStruct.new(root_node, p) })
|
78
|
+
elsif p != :undefined
|
79
|
+
return caching_proc(method, &proc { fetch("/#{p}", root_node) })
|
80
|
+
else
|
81
|
+
return proc { nil }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def caching_proc(name, &block)
|
86
|
+
proc do
|
87
|
+
ivar = instance_variable_get("@#{name}")
|
88
|
+
unless ivar
|
89
|
+
result = yield
|
90
|
+
instance_variable_set("@#{name}", result)
|
91
|
+
return result
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def fetch(selector, search_in = xml, method = :inner_html)
|
97
|
+
item = search_in.at(selector)
|
98
|
+
|
99
|
+
self.try("extract_" + method.to_s, item) if item
|
100
|
+
end
|
101
|
+
|
102
|
+
def extract_inner_html(item)
|
103
|
+
item.inner_html
|
104
|
+
end
|
105
|
+
|
106
|
+
def extract_href(item)
|
107
|
+
item[:href]
|
108
|
+
end
|
109
|
+
|
110
|
+
def extract_time(item)
|
111
|
+
Time.parse(item.inner_html).utc
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
ROOT_NODES = {
|
4
|
+
:atom => "//feed[@xmlns='http://www.w3.org/2005/Atom']",
|
5
|
+
:rss2 => "//rss[@version=2.0]/channel"
|
6
|
+
}
|
7
|
+
|
8
|
+
FEED_PROPERTIES = {
|
9
|
+
:atom => {
|
10
|
+
:title => :title,
|
11
|
+
:updated_at => [:updated, :time],
|
12
|
+
:feed_id => :id,
|
13
|
+
:url => ["link[@rel=alternate]", :href],
|
14
|
+
:href => ["link[@rel=self]", :href],
|
15
|
+
:description => :subtitle,
|
16
|
+
:generator => :generator,
|
17
|
+
:author => {
|
18
|
+
:email => 'author/email',
|
19
|
+
:name => 'author/name',
|
20
|
+
:uri => 'author/uri'
|
21
|
+
},
|
22
|
+
:entries => :special
|
23
|
+
},
|
24
|
+
:rss2 => {
|
25
|
+
:title => :title,
|
26
|
+
:updated_at => [:lastBuildDate, :time],
|
27
|
+
:feed_id => :undefined,
|
28
|
+
:url => :link,
|
29
|
+
:href => :undefined,
|
30
|
+
:description => :description,
|
31
|
+
:generator => :generator,
|
32
|
+
:author => :special,
|
33
|
+
:entries => :special
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
ITEM_PROPERTIES = {
|
38
|
+
:atom => {
|
39
|
+
:title => :title,
|
40
|
+
:updated_at => [:updated, :time],
|
41
|
+
:item_id => :id,
|
42
|
+
:url => ["link[@rel=alternate]", :href],
|
43
|
+
:content => :content,
|
44
|
+
:author => {
|
45
|
+
:email => 'author/email',
|
46
|
+
:name => 'author/name',
|
47
|
+
:uri => 'author/uri'
|
48
|
+
}
|
49
|
+
},
|
50
|
+
:rss2 => {
|
51
|
+
:title => :title,
|
52
|
+
:updated_at => [:pubDate, :time],
|
53
|
+
:item_id => :guid,
|
54
|
+
:url => :link,
|
55
|
+
:content => :description,
|
56
|
+
:author => :special
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
AUTHOR_PROPERTIES = {
|
61
|
+
:atom => {
|
62
|
+
:name => :name,
|
63
|
+
:uri => :uri,
|
64
|
+
:email => :email
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
class FeedParser < AbstractParser
|
4
|
+
|
5
|
+
self.properties = FEED_PROPERTIES
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def open(file)
|
10
|
+
self.parse(Kernel.open(file).read)
|
11
|
+
end
|
12
|
+
|
13
|
+
# parses the passed feed and identifeis what kind of feed it is
|
14
|
+
# then returns a parser object
|
15
|
+
def parse(feed)
|
16
|
+
xml = Hpricot.XML(feed)
|
17
|
+
|
18
|
+
root_node, format = self.identify(xml)
|
19
|
+
self.build(root_node, format)
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
|
24
|
+
def identify(xml)
|
25
|
+
FeedMe::ROOT_NODES.each do |f, s|
|
26
|
+
item = xml.at(s)
|
27
|
+
return item, f if item
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class AtomFeedParser < FeedParser
|
35
|
+
self.properties = FEED_PROPERTIES
|
36
|
+
|
37
|
+
def entries
|
38
|
+
xml.search('entry').map do |el|
|
39
|
+
ItemParser.build(el, self.format, self)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Rss2FeedParser < FeedParser
|
45
|
+
self.properties = FEED_PROPERTIES
|
46
|
+
|
47
|
+
def entries
|
48
|
+
xml.search('item').map do |el|
|
49
|
+
ItemParser.build(el, self.format, self)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def author
|
54
|
+
fetch_rss_person("managingEditor")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module FeedMe
|
2
|
+
|
3
|
+
class ItemParser < AbstractParser
|
4
|
+
|
5
|
+
self.properties = ITEM_PROPERTIES
|
6
|
+
|
7
|
+
attr_accessor :feed
|
8
|
+
|
9
|
+
def initialize(xml, format, feed)
|
10
|
+
super(xml, format)
|
11
|
+
self.feed = feed
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
class Rss2ItemParser < ItemParser
|
17
|
+
|
18
|
+
self.properties = ITEM_PROPERTIES
|
19
|
+
|
20
|
+
def author
|
21
|
+
fetch_rss_person("author")
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
data/lib/feed_me.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# make sure we're running inside Merb
|
2
|
+
if defined?(Merb::Plugins)
|
3
|
+
dependency 'hpricot'
|
4
|
+
else
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hpricot'
|
7
|
+
end
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
unless nil.respond_to? :try
|
11
|
+
# the ultimate duck
|
12
|
+
class Object
|
13
|
+
def try(method, *args)
|
14
|
+
self.send(method, *args)
|
15
|
+
rescue NoMethodError
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module FeedMe
|
22
|
+
def self.parse(feed)
|
23
|
+
FeedMe::FeedParser.parse(feed)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.open(file)
|
27
|
+
FeedMe::FeedParser.parse(file)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
['consts', 'abstract_parser', 'feed_struct', 'simple_struct',
|
32
|
+
'feed_parser', 'item_parser'].each do |f|
|
33
|
+
require File.join(File.dirname(__FILE__), 'feed_me', f)
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: namelessjon-feed_me
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jonas Nicklas
|
8
|
+
- Jonathan Stott
|
9
|
+
autorequire: feed_me
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2008-11-16 00:00:00 -08:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: hpricot
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: Nice and simple RSS and atom feed parsing built on hpricot
|
26
|
+
email: jonas.nicklas@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README
|
33
|
+
- LICENSE
|
34
|
+
- TODO
|
35
|
+
files:
|
36
|
+
- LICENSE
|
37
|
+
- README
|
38
|
+
- Rakefile
|
39
|
+
- TODO
|
40
|
+
- lib/feed_me
|
41
|
+
- lib/feed_me/abstract_parser.rb
|
42
|
+
- lib/feed_me/consts.rb
|
43
|
+
- lib/feed_me/feed_parser.rb
|
44
|
+
- lib/feed_me/feed_struct.rb
|
45
|
+
- lib/feed_me/item_parser.rb
|
46
|
+
- lib/feed_me/merbtasks.rb
|
47
|
+
- lib/feed_me/simple_struct.rb
|
48
|
+
- lib/feed_me.rb
|
49
|
+
has_rdoc: true
|
50
|
+
homepage: http://github.com/jnicklas/feed_me
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
version:
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.2.0
|
72
|
+
signing_key:
|
73
|
+
specification_version: 2
|
74
|
+
summary: Nice and simple RSS and atom feed parsing built on hpricot
|
75
|
+
test_files: []
|
76
|
+
|