pauldix-feedzirra 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +1 -1
- data/Rakefile +39 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra/atom.rb +9 -1
- data/lib/feedzirra/atom_entry.rb +11 -0
- data/lib/feedzirra/atom_feed_burner.rb +9 -1
- data/lib/feedzirra/atom_feed_burner_entry.rb +11 -0
- data/lib/feedzirra/feed.rb +168 -27
- data/lib/feedzirra/feed_entry_utilities.rb +1 -23
- data/lib/feedzirra/itunes_rss.rb +46 -0
- data/lib/feedzirra/itunes_rss_item.rb +28 -0
- data/lib/feedzirra/itunes_rss_owner.rb +8 -0
- data/lib/feedzirra/rdf.rb +9 -1
- data/lib/feedzirra/rdf_entry.rb +10 -0
- data/lib/feedzirra/rss.rb +9 -1
- data/lib/feedzirra/rss_entry.rb +11 -0
- data/lib/feedzirra.rb +5 -0
- data/spec/feedzirra/feed_spec.rb +415 -123
- data/spec/feedzirra/itunes_rss_item_spec.rb +48 -0
- data/spec/feedzirra/itunes_rss_owner_spec.rb +18 -0
- data/spec/feedzirra/itunes_rss_spec.rb +50 -0
- data/spec/spec_helper.rb +21 -9
- metadata +8 -1
data/README.textile
CHANGED
@@ -99,7 +99,7 @@ feeds = Feedzirra::Feed.fetch_and_parse(feeds_urls)
|
|
99
99
|
# there will be a Fixnum of the http response code instead of a feed object
|
100
100
|
|
101
101
|
# updating multiple feeds. it expects a collection of feed objects
|
102
|
-
updated_feeds = Feedzirra::Feed.
|
102
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values)
|
103
103
|
|
104
104
|
# defining custom behavior on failure or success. note that a return status of 304 (not updated) will call the on_success handler
|
105
105
|
feed = Feedzirra::Feed.fetch_and_parse("http://feeds.feedburner.com/PaulDixExplainsNothing",
|
data/Rakefile
CHANGED
@@ -1,12 +1,51 @@
|
|
1
1
|
require "spec"
|
2
2
|
require "spec/rake/spectask"
|
3
|
+
require 'rake/rdoctask'
|
3
4
|
require 'lib/feedzirra.rb'
|
4
5
|
|
6
|
+
# Grab recently touched specs
|
7
|
+
def recent_specs(touched_since)
|
8
|
+
recent_specs = FileList['app/**/*'].map do |path|
|
9
|
+
|
10
|
+
if File.mtime(path) > touched_since
|
11
|
+
spec = File.join('spec', File.dirname(path).split("/")[1..-1].join('/'),
|
12
|
+
"#{File.basename(path, ".*")}_spec.rb")
|
13
|
+
spec if File.exists?(spec)
|
14
|
+
end
|
15
|
+
end.compact
|
16
|
+
|
17
|
+
recent_specs += FileList['spec/**/*_spec.rb'].select do |path|
|
18
|
+
File.mtime(path) > touched_since
|
19
|
+
end
|
20
|
+
recent_specs.uniq
|
21
|
+
end
|
22
|
+
|
23
|
+
# Tasks
|
5
24
|
Spec::Rake::SpecTask.new do |t|
|
6
25
|
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
7
26
|
t.spec_files = FileList['spec/**/*_spec.rb']
|
8
27
|
end
|
9
28
|
|
29
|
+
desc 'Run recent specs'
|
30
|
+
Spec::Rake::SpecTask.new("spec:recent") do |t|
|
31
|
+
t.spec_opts = ["--format","specdoc","--color"]
|
32
|
+
t.spec_files = recent_specs(Time.now - 600) # 10 min.
|
33
|
+
end
|
34
|
+
|
35
|
+
Spec::Rake::SpecTask.new('spec:rcov') do |t|
|
36
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
37
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
38
|
+
t.rcov = true
|
39
|
+
t.rcov_opts = ['--exclude', 'spec,/usr/lib/ruby,/usr/local,/var/lib,/Library', '--text-report']
|
40
|
+
end
|
41
|
+
|
42
|
+
Rake::RDocTask.new do |rd|
|
43
|
+
rd.title = 'Feedzirra'
|
44
|
+
rd.rdoc_dir = 'rdoc'
|
45
|
+
rd.rdoc_files.include('README.rdoc', 'lib/feedzirra.rb', 'lib/feedzirra/**/*.rb')
|
46
|
+
rd.options = ["--quiet", "--opname", "index.html", "--line-numbers", "--inline-source", '--main', 'README.rdoc']
|
47
|
+
end
|
48
|
+
|
10
49
|
task :install do
|
11
50
|
rm_rf "*.gem"
|
12
51
|
puts `gem build feedzirra.gemspec`
|
data/lib/feedzirra/atom.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Atom feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class Atom
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -7,7 +15,7 @@ module Feedzirra
|
|
7
15
|
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
8
16
|
elements :entry, :as => :entries, :class => AtomEntry
|
9
17
|
|
10
|
-
def self.able_to_parse?(xml)
|
18
|
+
def self.able_to_parse?(xml) #:nodoc:
|
11
19
|
xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
|
12
20
|
end
|
13
21
|
end
|
data/lib/feedzirra/atom_entry.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Atom feed entries.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * url
|
8
|
+
# * author
|
9
|
+
# * content
|
10
|
+
# * summary
|
11
|
+
# * published
|
12
|
+
# * categories
|
2
13
|
class AtomEntry
|
3
14
|
include SAXMachine
|
4
15
|
include FeedEntryUtilities
|
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Feedburner Atom feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class AtomFeedBurner
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -7,7 +15,7 @@ module Feedzirra
|
|
7
15
|
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
8
16
|
elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
|
9
17
|
|
10
|
-
def self.able_to_parse?(xml)
|
18
|
+
def self.able_to_parse?(xml) #:nodoc:
|
11
19
|
(xml =~ /Atom/ && xml =~ /feedburner/) || false
|
12
20
|
end
|
13
21
|
end
|
@@ -1,4 +1,15 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Feedburner Atom feed entries.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * url
|
8
|
+
# * author
|
9
|
+
# * content
|
10
|
+
# * summary
|
11
|
+
# * published
|
12
|
+
# * categories
|
2
13
|
class AtomFeedBurnerEntry
|
3
14
|
include SAXMachine
|
4
15
|
include FeedEntryUtilities
|
data/lib/feedzirra/feed.rb
CHANGED
@@ -4,30 +4,93 @@ module Feedzirra
|
|
4
4
|
class Feed
|
5
5
|
USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
|
6
6
|
|
7
|
+
# Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
|
8
|
+
#
|
9
|
+
# === Parameters
|
10
|
+
# [xml<String>] The XML that you would like parsed.
|
11
|
+
# === Returns
|
12
|
+
# An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
|
13
|
+
# === Raises
|
14
|
+
# Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
|
7
15
|
def self.parse(xml)
|
8
16
|
if parser = determine_feed_parser_for_xml(xml)
|
9
17
|
parser.parse(xml)
|
10
18
|
else
|
11
|
-
raise NoParserAvailable.new("
|
19
|
+
raise NoParserAvailable.new("No valid parser for XML.")
|
12
20
|
end
|
13
21
|
end
|
14
22
|
|
23
|
+
# Determines the correct parser class to use for parsing the feed.
|
24
|
+
#
|
25
|
+
# === Parameters
|
26
|
+
# [xml<String>] The XML that you would like determine the parser for.
|
27
|
+
# === Returns
|
28
|
+
# The class name of the parser that can handle the XML.
|
15
29
|
def self.determine_feed_parser_for_xml(xml)
|
16
30
|
start_of_doc = xml.slice(0, 1000)
|
17
31
|
feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
|
18
32
|
end
|
19
33
|
|
20
|
-
|
34
|
+
# Adds a new feed parsing class that will be used for parsing.
|
35
|
+
#
|
36
|
+
# === Parameters
|
37
|
+
# [klass<Constant>] The class/constant that you want to register.
|
38
|
+
# === Returns
|
39
|
+
# A updated array of feed parser class names.
|
40
|
+
def self.add_feed_class(klass)
|
21
41
|
feed_classes.unshift klass
|
22
42
|
end
|
23
|
-
|
43
|
+
|
44
|
+
# Provides a list of registered feed parsing classes.
|
45
|
+
#
|
46
|
+
# === Returns
|
47
|
+
# A array of class names.
|
24
48
|
def self.feed_classes
|
25
|
-
@feed_classes ||= [RSS, AtomFeedBurner, Atom]
|
49
|
+
@feed_classes ||= [ITunesRSS, RSS, AtomFeedBurner, Atom]
|
50
|
+
end
|
51
|
+
|
52
|
+
# Makes all entry types look for the passed in element to parse. This is actually just a call to
|
53
|
+
# element (a SAXMachine call) in the class
|
54
|
+
#
|
55
|
+
# === Parameters
|
56
|
+
# [element_tag<String>]
|
57
|
+
# [options<Hash>] Valid keys are same as with SAXMachine
|
58
|
+
def self.add_common_feed_entry_element(element_tag, options = {})
|
59
|
+
# need to think of a better way to do this. will break for people who want this behavior
|
60
|
+
# across their added classes
|
61
|
+
[RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
|
62
|
+
klass.send(:element, element_tag, options)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Makes all entry types look for the passed in elements to parse. This is actually just a call to
|
67
|
+
# elements (a SAXMachine call) in the class
|
68
|
+
#
|
69
|
+
# === Parameters
|
70
|
+
# [element_tag<String>]
|
71
|
+
# [options<Hash>] Valid keys are same as with SAXMachine
|
72
|
+
def self.add_common_feed_entry_elements(element_tag, options = {})
|
73
|
+
# need to think of a better way to do this. will break for people who want this behavior
|
74
|
+
# across their added classes
|
75
|
+
[RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
|
76
|
+
klass.send(:elements, element_tag, options)
|
77
|
+
end
|
26
78
|
end
|
27
79
|
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
80
|
+
# Fetches and returns the raw XML for each URL provided.
|
81
|
+
#
|
82
|
+
# === Parameters
|
83
|
+
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
84
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
85
|
+
# :user_agent - String that overrides the default user agent.
|
86
|
+
# :if_modified_since - Time object representing when the feed was last updated.
|
87
|
+
# :if_none_match - String that's normally an etag for the request that was stored previously.
|
88
|
+
# :on_success - Block that gets executed after a successful request.
|
89
|
+
# :on_failure - Block that gets executed after a failed request.
|
90
|
+
# === Returns
|
91
|
+
# A String of XML if a single URL is passed.
|
92
|
+
#
|
93
|
+
# A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
|
31
94
|
def self.fetch_raw(urls, options = {})
|
32
95
|
url_queue = [*urls]
|
33
96
|
multi = Curl::Multi.new
|
@@ -39,6 +102,8 @@ module Feedzirra
|
|
39
102
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
40
103
|
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
41
104
|
curl.follow_location = true
|
105
|
+
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
106
|
+
|
42
107
|
curl.on_success do |c|
|
43
108
|
responses[url] = decode_content(c)
|
44
109
|
end
|
@@ -52,14 +117,28 @@ module Feedzirra
|
|
52
117
|
multi.perform
|
53
118
|
return urls.is_a?(String) ? responses.values.first : responses
|
54
119
|
end
|
55
|
-
|
120
|
+
|
121
|
+
# Fetches and returns the parsed XML for each URL provided.
|
122
|
+
#
|
123
|
+
# === Parameters
|
124
|
+
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
125
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
126
|
+
# * :user_agent - String that overrides the default user agent.
|
127
|
+
# * :if_modified_since - Time object representing when the feed was last updated.
|
128
|
+
# * :if_none_match - String, an etag for the request that was stored previously.
|
129
|
+
# * :on_success - Block that gets executed after a successful request.
|
130
|
+
# * :on_failure - Block that gets executed after a failed request.
|
131
|
+
# === Returns
|
132
|
+
# A Feed object if a single URL is passed.
|
133
|
+
#
|
134
|
+
# A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
|
56
135
|
def self.fetch_and_parse(urls, options = {})
|
57
136
|
url_queue = [*urls]
|
58
137
|
multi = Curl::Multi.new
|
59
|
-
|
138
|
+
responses = {}
|
139
|
+
|
60
140
|
# I broke these down so I would only try to do 30 simultaneously because
|
61
141
|
# I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
|
62
|
-
responses = {}
|
63
142
|
url_queue.slice!(0, 30).each do |url|
|
64
143
|
add_url_to_multi(multi, url, url_queue, responses, options)
|
65
144
|
end
|
@@ -67,25 +146,44 @@ module Feedzirra
|
|
67
146
|
multi.perform
|
68
147
|
return urls.is_a?(String) ? responses.values.first : responses
|
69
148
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
149
|
+
|
150
|
+
# Decodes the XML document if it was compressed.
|
151
|
+
#
|
152
|
+
# === Parameters
|
153
|
+
# [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
|
154
|
+
# === Returns
|
155
|
+
# A decoded string of XML.
|
156
|
+
def self.decode_content(curl_request)
|
157
|
+
if curl_request.header_str.match(/Content-Encoding: gzip/)
|
158
|
+
gz = Zlib::GzipReader.new(StringIO.new(curl_request.body_str))
|
74
159
|
xml = gz.read
|
75
160
|
gz.close
|
76
|
-
elsif
|
77
|
-
xml = Zlib::Deflate.inflate(
|
161
|
+
elsif curl_request.header_str.match(/Content-Encoding: deflate/)
|
162
|
+
xml = Zlib::Deflate.inflate(curl_request.body_str)
|
78
163
|
else
|
79
|
-
xml =
|
164
|
+
xml = curl_request.body_str
|
80
165
|
end
|
81
|
-
|
166
|
+
|
82
167
|
xml
|
83
168
|
end
|
84
|
-
|
169
|
+
|
170
|
+
# Updates each feed for each Feed object provided.
|
171
|
+
#
|
172
|
+
# === Parameters
|
173
|
+
# [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
|
174
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
175
|
+
# * :user_agent - String that overrides the default user agent.
|
176
|
+
# * :on_success - Block that gets executed after a successful request.
|
177
|
+
# * :on_failure - Block that gets executed after a failed request.
|
178
|
+
# === Returns
|
179
|
+
# A updated Feed object if a single URL is passed.
|
180
|
+
#
|
181
|
+
# A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
|
85
182
|
def self.update(feeds, options = {})
|
86
183
|
feed_queue = [*feeds]
|
87
184
|
multi = Curl::Multi.new
|
88
185
|
responses = {}
|
186
|
+
|
89
187
|
feed_queue.slice!(0, 30).each do |feed|
|
90
188
|
add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
91
189
|
end
|
@@ -94,6 +192,20 @@ module Feedzirra
|
|
94
192
|
return responses.size == 1 ? responses.values.first : responses.values
|
95
193
|
end
|
96
194
|
|
195
|
+
# An abstraction for adding a feed by URL to the passed Curb::multi stack.
|
196
|
+
#
|
197
|
+
# === Parameters
|
198
|
+
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
199
|
+
# [url<String>] The URL of the feed that you would like to be fetched.
|
200
|
+
# [url_queue<Array>] An array of URLs that are queued for request.
|
201
|
+
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
202
|
+
# [feeds<String> or <Array>] A single feed object, or an array of feed objects.
|
203
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
204
|
+
# * :user_agent - String that overrides the default user agent.
|
205
|
+
# * :on_success - Block that gets executed after a successful request.
|
206
|
+
# * :on_failure - Block that gets executed after a failed request.
|
207
|
+
# === Returns
|
208
|
+
# The updated Curl::Multi object with the request details added to it's stack.
|
97
209
|
def self.add_url_to_multi(multi, url, url_queue, responses, options)
|
98
210
|
easy = Curl::Easy.new(url) do |curl|
|
99
211
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
@@ -101,10 +213,13 @@ module Feedzirra
|
|
101
213
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
102
214
|
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
103
215
|
curl.follow_location = true
|
216
|
+
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
217
|
+
|
104
218
|
curl.on_success do |c|
|
105
219
|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
106
220
|
xml = decode_content(c)
|
107
221
|
klass = determine_feed_parser_for_xml(xml)
|
222
|
+
|
108
223
|
if klass
|
109
224
|
feed = klass.parse(xml)
|
110
225
|
feed.feed_url = c.last_effective_url
|
@@ -113,9 +228,10 @@ module Feedzirra
|
|
113
228
|
responses[url] = feed
|
114
229
|
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
115
230
|
else
|
116
|
-
|
231
|
+
raise NoParserAvailable.new("Error determining parser for #{url} - #{c.last_effective_url}.")
|
117
232
|
end
|
118
233
|
end
|
234
|
+
|
119
235
|
curl.on_failure do |c|
|
120
236
|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
121
237
|
responses[url] = c.response_code
|
@@ -125,16 +241,28 @@ module Feedzirra
|
|
125
241
|
multi.add(easy)
|
126
242
|
end
|
127
243
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
244
|
+
# An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
|
245
|
+
#
|
246
|
+
# === Parameters
|
247
|
+
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
248
|
+
# [feed<Feed>] A feed object that you would like to be fetched.
|
249
|
+
# [url_queue<Array>] An array of feed objects that are queued for request.
|
250
|
+
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
251
|
+
# [feeds<String>] or <Array> A single feed object, or an array of feed objects.
|
252
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
253
|
+
# * :user_agent - String that overrides the default user agent.
|
254
|
+
# * :on_success - Block that gets executed after a successful request.
|
255
|
+
# * :on_failure - Block that gets executed after a failed request.
|
256
|
+
# === Returns
|
257
|
+
# The updated Curl::Multi object with the request details added to it's stack.
|
258
|
+
def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
133
259
|
easy = Curl::Easy.new(feed.feed_url) do |curl|
|
134
260
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
135
261
|
curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
|
136
262
|
curl.headers["If-None-Match"] = feed.etag if feed.etag
|
263
|
+
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
137
264
|
curl.follow_location = true
|
265
|
+
|
138
266
|
curl.on_success do |c|
|
139
267
|
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
140
268
|
updated_feed = Feed.parse(c.body_str)
|
@@ -145,6 +273,7 @@ module Feedzirra
|
|
145
273
|
responses[feed.feed_url] = feed
|
146
274
|
options[:on_success].call(feed) if options.has_key?(:on_success)
|
147
275
|
end
|
276
|
+
|
148
277
|
curl.on_failure do |c|
|
149
278
|
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
150
279
|
response_code = c.response_code
|
@@ -159,12 +288,24 @@ module Feedzirra
|
|
159
288
|
end
|
160
289
|
multi.add(easy)
|
161
290
|
end
|
162
|
-
|
291
|
+
|
292
|
+
# Determines the etag from the request headers.
|
293
|
+
#
|
294
|
+
# === Parameters
|
295
|
+
# [header<String>] Raw request header returned from the request
|
296
|
+
# === Returns
|
297
|
+
# A string of the etag or nil if it cannot be found in the headers.
|
163
298
|
def self.etag_from_header(header)
|
164
299
|
header =~ /.*ETag:\s(.*)\r/
|
165
300
|
$1
|
166
301
|
end
|
167
|
-
|
302
|
+
|
303
|
+
# Determines the last modified date from the request headers.
|
304
|
+
#
|
305
|
+
# === Parameters
|
306
|
+
# [header<String>] Raw request header returned from the request
|
307
|
+
# === Returns
|
308
|
+
# A Time object of the last modified date or nil if it cannot be found in the headers.
|
168
309
|
def self.last_modified_from_header(header)
|
169
310
|
header =~ /.*Last-Modified:\s(.*)\r/
|
170
311
|
Time.parse($1) if $1
|
@@ -1,15 +1,5 @@
|
|
1
1
|
module Feedzirra
|
2
2
|
module FeedEntryUtilities
|
3
|
-
module Sanitize
|
4
|
-
def sanitize!
|
5
|
-
self.replace(sanitize)
|
6
|
-
end
|
7
|
-
|
8
|
-
def sanitize
|
9
|
-
Dryopteris.sanitize(self)
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
3
|
attr_reader :published
|
14
4
|
|
15
5
|
def parse_datetime(string)
|
@@ -19,19 +9,7 @@ module Feedzirra
|
|
19
9
|
def published=(val)
|
20
10
|
@published = parse_datetime(val)
|
21
11
|
end
|
22
|
-
|
23
|
-
def content
|
24
|
-
@content.extend(Sanitize)
|
25
|
-
end
|
26
|
-
|
27
|
-
def title
|
28
|
-
@title.extend(Sanitize)
|
29
|
-
end
|
30
|
-
|
31
|
-
def author
|
32
|
-
@author.extend(Sanitize)
|
33
|
-
end
|
34
|
-
|
12
|
+
|
35
13
|
def sanitize!
|
36
14
|
self.title.sanitize!
|
37
15
|
self.author.sanitize!
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
# iTunes is RSS 2.0 + some apple extensions
|
3
|
+
# Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
|
4
|
+
class ITunesRSS
|
5
|
+
include SAXMachine
|
6
|
+
include FeedUtilities
|
7
|
+
|
8
|
+
attr_accessor :feed_url
|
9
|
+
|
10
|
+
# RSS 2.0 elements that need including
|
11
|
+
element :copyright
|
12
|
+
element :description
|
13
|
+
element :language
|
14
|
+
element :managingEditor
|
15
|
+
element :title
|
16
|
+
element :link, :as => :url
|
17
|
+
|
18
|
+
# If author is not present use managingEditor on the channel
|
19
|
+
element :"itunes:author", :as => :itunes_author
|
20
|
+
element :"itunes:block", :as => :itunes_block
|
21
|
+
element :"itunes:image", :value => :href, :as => :itunes_image
|
22
|
+
element :"itunes:explicit", :as => :itunes_explicit
|
23
|
+
element :"itunes:keywords", :as => :itunes_keywords
|
24
|
+
# New URL for the podcast feed
|
25
|
+
element :"itunes:new-feed-url", :as => :itunes_new_feed_url
|
26
|
+
element :"itunes:subtitle", :as => :itunes_subtitle
|
27
|
+
# If summary is not present, use the description tag
|
28
|
+
element :"itunes:summary", :as => :itunes_summary
|
29
|
+
|
30
|
+
# iTunes RSS feeds can have multiple main categories...
|
31
|
+
# ...and multiple sub-categories per category
|
32
|
+
# TODO subcategories not supported correctly - they are at the same level
|
33
|
+
# as the main categories
|
34
|
+
elements :"itunes:category", :as => :itunes_categories, :value => :text
|
35
|
+
|
36
|
+
elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
|
37
|
+
|
38
|
+
elements :item, :as => :entries, :class => ITunesRSSItem
|
39
|
+
|
40
|
+
def self.able_to_parse?(xml)
|
41
|
+
xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
# iTunes extensions to the standard RSS2.0 item
|
3
|
+
# Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
|
4
|
+
class ITunesRSSItem
|
5
|
+
include SAXMachine
|
6
|
+
include FeedUtilities
|
7
|
+
element :author
|
8
|
+
element :guid
|
9
|
+
element :title
|
10
|
+
element :link, :as => :url
|
11
|
+
element :description, :as => :summary
|
12
|
+
element :pubDate, :as => :published
|
13
|
+
|
14
|
+
# If author is not present use author tag on the item
|
15
|
+
element :"itunes:author", :as => :itunes_author
|
16
|
+
element :"itunes:block", :as => :itunes_block
|
17
|
+
element :"itunes:duration", :as => :itunes_duration
|
18
|
+
element :"itunes:explicit", :as => :itunes_explicit
|
19
|
+
element :"itunes:keywords", :as => :itunes_keywords
|
20
|
+
element :"itunes:subtitle", :as => :itunes_subtitle
|
21
|
+
# If summary is not present, use the description tag
|
22
|
+
element :"itunes:summary", :as => :itunes_summary
|
23
|
+
element :enclosure, :value => :length, :as => :enclosure_length
|
24
|
+
element :enclosure, :value => :type, :as => :enclosure_type
|
25
|
+
element :enclosure, :value => :url, :as => :enclosure_url
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/lib/feedzirra/rdf.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with RDF feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class RDF
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -8,7 +16,7 @@ module Feedzirra
|
|
8
16
|
|
9
17
|
attr_accessor :feed_url
|
10
18
|
|
11
|
-
def self.able_to_parse?(xml)
|
19
|
+
def self.able_to_parse?(xml) #:nodoc:
|
12
20
|
xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
|
13
21
|
end
|
14
22
|
end
|
data/lib/feedzirra/rdf_entry.rb
CHANGED
data/lib/feedzirra/rss.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with RSS feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class RSS
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -8,7 +16,7 @@ module Feedzirra
|
|
8
16
|
|
9
17
|
attr_accessor :feed_url
|
10
18
|
|
11
|
-
def self.able_to_parse?(xml)
|
19
|
+
def self.able_to_parse?(xml) #:nodoc:
|
12
20
|
xml =~ /\<rss|rdf/
|
13
21
|
end
|
14
22
|
end
|
data/lib/feedzirra/rss_entry.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with RDF feed entries.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * url
|
8
|
+
# * author
|
9
|
+
# * content
|
10
|
+
# * summary
|
11
|
+
# * published
|
12
|
+
# * categories
|
2
13
|
class RSSEntry
|
3
14
|
include SAXMachine
|
4
15
|
include FeedEntryUtilities
|