pauldix-feedzirra 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +1 -1
- data/Rakefile +39 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra/atom.rb +9 -1
- data/lib/feedzirra/atom_entry.rb +11 -0
- data/lib/feedzirra/atom_feed_burner.rb +9 -1
- data/lib/feedzirra/atom_feed_burner_entry.rb +11 -0
- data/lib/feedzirra/feed.rb +168 -27
- data/lib/feedzirra/feed_entry_utilities.rb +1 -23
- data/lib/feedzirra/itunes_rss.rb +46 -0
- data/lib/feedzirra/itunes_rss_item.rb +28 -0
- data/lib/feedzirra/itunes_rss_owner.rb +8 -0
- data/lib/feedzirra/rdf.rb +9 -1
- data/lib/feedzirra/rdf_entry.rb +10 -0
- data/lib/feedzirra/rss.rb +9 -1
- data/lib/feedzirra/rss_entry.rb +11 -0
- data/lib/feedzirra.rb +5 -0
- data/spec/feedzirra/feed_spec.rb +415 -123
- data/spec/feedzirra/itunes_rss_item_spec.rb +48 -0
- data/spec/feedzirra/itunes_rss_owner_spec.rb +18 -0
- data/spec/feedzirra/itunes_rss_spec.rb +50 -0
- data/spec/spec_helper.rb +21 -9
- metadata +8 -1
data/README.textile
CHANGED
@@ -99,7 +99,7 @@ feeds = Feedzirra::Feed.fetch_and_parse(feeds_urls)
|
|
99
99
|
# there will be a Fixnum of the http response code instead of a feed object
|
100
100
|
|
101
101
|
# updating multiple feeds. it expects a collection of feed objects
|
102
|
-
updated_feeds = Feedzirra::Feed.
|
102
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values)
|
103
103
|
|
104
104
|
# defining custom behavior on failure or success. note that a return status of 304 (not updated) will call the on_success handler
|
105
105
|
feed = Feedzirra::Feed.fetch_and_parse("http://feeds.feedburner.com/PaulDixExplainsNothing",
|
data/Rakefile
CHANGED
@@ -1,12 +1,51 @@
|
|
1
1
|
require "spec"
|
2
2
|
require "spec/rake/spectask"
|
3
|
+
require 'rake/rdoctask'
|
3
4
|
require 'lib/feedzirra.rb'
|
4
5
|
|
6
|
+
# Grab recently touched specs
|
7
|
+
def recent_specs(touched_since)
|
8
|
+
recent_specs = FileList['app/**/*'].map do |path|
|
9
|
+
|
10
|
+
if File.mtime(path) > touched_since
|
11
|
+
spec = File.join('spec', File.dirname(path).split("/")[1..-1].join('/'),
|
12
|
+
"#{File.basename(path, ".*")}_spec.rb")
|
13
|
+
spec if File.exists?(spec)
|
14
|
+
end
|
15
|
+
end.compact
|
16
|
+
|
17
|
+
recent_specs += FileList['spec/**/*_spec.rb'].select do |path|
|
18
|
+
File.mtime(path) > touched_since
|
19
|
+
end
|
20
|
+
recent_specs.uniq
|
21
|
+
end
|
22
|
+
|
23
|
+
# Tasks
|
5
24
|
Spec::Rake::SpecTask.new do |t|
|
6
25
|
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
7
26
|
t.spec_files = FileList['spec/**/*_spec.rb']
|
8
27
|
end
|
9
28
|
|
29
|
+
desc 'Run recent specs'
|
30
|
+
Spec::Rake::SpecTask.new("spec:recent") do |t|
|
31
|
+
t.spec_opts = ["--format","specdoc","--color"]
|
32
|
+
t.spec_files = recent_specs(Time.now - 600) # 10 min.
|
33
|
+
end
|
34
|
+
|
35
|
+
Spec::Rake::SpecTask.new('spec:rcov') do |t|
|
36
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
37
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
38
|
+
t.rcov = true
|
39
|
+
t.rcov_opts = ['--exclude', 'spec,/usr/lib/ruby,/usr/local,/var/lib,/Library', '--text-report']
|
40
|
+
end
|
41
|
+
|
42
|
+
Rake::RDocTask.new do |rd|
|
43
|
+
rd.title = 'Feedzirra'
|
44
|
+
rd.rdoc_dir = 'rdoc'
|
45
|
+
rd.rdoc_files.include('README.rdoc', 'lib/feedzirra.rb', 'lib/feedzirra/**/*.rb')
|
46
|
+
rd.options = ["--quiet", "--opname", "index.html", "--line-numbers", "--inline-source", '--main', 'README.rdoc']
|
47
|
+
end
|
48
|
+
|
10
49
|
task :install do
|
11
50
|
rm_rf "*.gem"
|
12
51
|
puts `gem build feedzirra.gemspec`
|
data/lib/feedzirra/atom.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Atom feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class Atom
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -7,7 +15,7 @@ module Feedzirra
|
|
7
15
|
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
8
16
|
elements :entry, :as => :entries, :class => AtomEntry
|
9
17
|
|
10
|
-
def self.able_to_parse?(xml)
|
18
|
+
def self.able_to_parse?(xml) #:nodoc:
|
11
19
|
xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
|
12
20
|
end
|
13
21
|
end
|
data/lib/feedzirra/atom_entry.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Atom feed entries.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * url
|
8
|
+
# * author
|
9
|
+
# * content
|
10
|
+
# * summary
|
11
|
+
# * published
|
12
|
+
# * categories
|
2
13
|
class AtomEntry
|
3
14
|
include SAXMachine
|
4
15
|
include FeedEntryUtilities
|
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Feedburner Atom feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class AtomFeedBurner
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -7,7 +15,7 @@ module Feedzirra
|
|
7
15
|
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
8
16
|
elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
|
9
17
|
|
10
|
-
def self.able_to_parse?(xml)
|
18
|
+
def self.able_to_parse?(xml) #:nodoc:
|
11
19
|
(xml =~ /Atom/ && xml =~ /feedburner/) || false
|
12
20
|
end
|
13
21
|
end
|
@@ -1,4 +1,15 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with Feedburner Atom feed entries.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * url
|
8
|
+
# * author
|
9
|
+
# * content
|
10
|
+
# * summary
|
11
|
+
# * published
|
12
|
+
# * categories
|
2
13
|
class AtomFeedBurnerEntry
|
3
14
|
include SAXMachine
|
4
15
|
include FeedEntryUtilities
|
data/lib/feedzirra/feed.rb
CHANGED
@@ -4,30 +4,93 @@ module Feedzirra
|
|
4
4
|
class Feed
|
5
5
|
USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
|
6
6
|
|
7
|
+
# Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
|
8
|
+
#
|
9
|
+
# === Parameters
|
10
|
+
# [xml<String>] The XML that you would like parsed.
|
11
|
+
# === Returns
|
12
|
+
# An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
|
13
|
+
# === Raises
|
14
|
+
# Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
|
7
15
|
def self.parse(xml)
|
8
16
|
if parser = determine_feed_parser_for_xml(xml)
|
9
17
|
parser.parse(xml)
|
10
18
|
else
|
11
|
-
raise NoParserAvailable.new("
|
19
|
+
raise NoParserAvailable.new("No valid parser for XML.")
|
12
20
|
end
|
13
21
|
end
|
14
22
|
|
23
|
+
# Determines the correct parser class to use for parsing the feed.
|
24
|
+
#
|
25
|
+
# === Parameters
|
26
|
+
# [xml<String>] The XML that you would like determine the parser for.
|
27
|
+
# === Returns
|
28
|
+
# The class name of the parser that can handle the XML.
|
15
29
|
def self.determine_feed_parser_for_xml(xml)
|
16
30
|
start_of_doc = xml.slice(0, 1000)
|
17
31
|
feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
|
18
32
|
end
|
19
33
|
|
20
|
-
|
34
|
+
# Adds a new feed parsing class that will be used for parsing.
|
35
|
+
#
|
36
|
+
# === Parameters
|
37
|
+
# [klass<Constant>] The class/constant that you want to register.
|
38
|
+
# === Returns
|
39
|
+
# A updated array of feed parser class names.
|
40
|
+
def self.add_feed_class(klass)
|
21
41
|
feed_classes.unshift klass
|
22
42
|
end
|
23
|
-
|
43
|
+
|
44
|
+
# Provides a list of registered feed parsing classes.
|
45
|
+
#
|
46
|
+
# === Returns
|
47
|
+
# A array of class names.
|
24
48
|
def self.feed_classes
|
25
|
-
@feed_classes ||= [RSS, AtomFeedBurner, Atom]
|
49
|
+
@feed_classes ||= [ITunesRSS, RSS, AtomFeedBurner, Atom]
|
50
|
+
end
|
51
|
+
|
52
|
+
# Makes all entry types look for the passed in element to parse. This is actually just a call to
|
53
|
+
# element (a SAXMachine call) in the class
|
54
|
+
#
|
55
|
+
# === Parameters
|
56
|
+
# [element_tag<String>]
|
57
|
+
# [options<Hash>] Valid keys are same as with SAXMachine
|
58
|
+
def self.add_common_feed_entry_element(element_tag, options = {})
|
59
|
+
# need to think of a better way to do this. will break for people who want this behavior
|
60
|
+
# across their added classes
|
61
|
+
[RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
|
62
|
+
klass.send(:element, element_tag, options)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Makes all entry types look for the passed in elements to parse. This is actually just a call to
|
67
|
+
# elements (a SAXMachine call) in the class
|
68
|
+
#
|
69
|
+
# === Parameters
|
70
|
+
# [element_tag<String>]
|
71
|
+
# [options<Hash>] Valid keys are same as with SAXMachine
|
72
|
+
def self.add_common_feed_entry_elements(element_tag, options = {})
|
73
|
+
# need to think of a better way to do this. will break for people who want this behavior
|
74
|
+
# across their added classes
|
75
|
+
[RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
|
76
|
+
klass.send(:elements, element_tag, options)
|
77
|
+
end
|
26
78
|
end
|
27
79
|
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
80
|
+
# Fetches and returns the raw XML for each URL provided.
|
81
|
+
#
|
82
|
+
# === Parameters
|
83
|
+
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
84
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
85
|
+
# :user_agent - String that overrides the default user agent.
|
86
|
+
# :if_modified_since - Time object representing when the feed was last updated.
|
87
|
+
# :if_none_match - String that's normally an etag for the request that was stored previously.
|
88
|
+
# :on_success - Block that gets executed after a successful request.
|
89
|
+
# :on_failure - Block that gets executed after a failed request.
|
90
|
+
# === Returns
|
91
|
+
# A String of XML if a single URL is passed.
|
92
|
+
#
|
93
|
+
# A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
|
31
94
|
def self.fetch_raw(urls, options = {})
|
32
95
|
url_queue = [*urls]
|
33
96
|
multi = Curl::Multi.new
|
@@ -39,6 +102,8 @@ module Feedzirra
|
|
39
102
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
40
103
|
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
41
104
|
curl.follow_location = true
|
105
|
+
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
106
|
+
|
42
107
|
curl.on_success do |c|
|
43
108
|
responses[url] = decode_content(c)
|
44
109
|
end
|
@@ -52,14 +117,28 @@ module Feedzirra
|
|
52
117
|
multi.perform
|
53
118
|
return urls.is_a?(String) ? responses.values.first : responses
|
54
119
|
end
|
55
|
-
|
120
|
+
|
121
|
+
# Fetches and returns the parsed XML for each URL provided.
|
122
|
+
#
|
123
|
+
# === Parameters
|
124
|
+
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
125
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
126
|
+
# * :user_agent - String that overrides the default user agent.
|
127
|
+
# * :if_modified_since - Time object representing when the feed was last updated.
|
128
|
+
# * :if_none_match - String, an etag for the request that was stored previously.
|
129
|
+
# * :on_success - Block that gets executed after a successful request.
|
130
|
+
# * :on_failure - Block that gets executed after a failed request.
|
131
|
+
# === Returns
|
132
|
+
# A Feed object if a single URL is passed.
|
133
|
+
#
|
134
|
+
# A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
|
56
135
|
def self.fetch_and_parse(urls, options = {})
|
57
136
|
url_queue = [*urls]
|
58
137
|
multi = Curl::Multi.new
|
59
|
-
|
138
|
+
responses = {}
|
139
|
+
|
60
140
|
# I broke these down so I would only try to do 30 simultaneously because
|
61
141
|
# I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
|
62
|
-
responses = {}
|
63
142
|
url_queue.slice!(0, 30).each do |url|
|
64
143
|
add_url_to_multi(multi, url, url_queue, responses, options)
|
65
144
|
end
|
@@ -67,25 +146,44 @@ module Feedzirra
|
|
67
146
|
multi.perform
|
68
147
|
return urls.is_a?(String) ? responses.values.first : responses
|
69
148
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
149
|
+
|
150
|
+
# Decodes the XML document if it was compressed.
|
151
|
+
#
|
152
|
+
# === Parameters
|
153
|
+
# [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
|
154
|
+
# === Returns
|
155
|
+
# A decoded string of XML.
|
156
|
+
def self.decode_content(curl_request)
|
157
|
+
if curl_request.header_str.match(/Content-Encoding: gzip/)
|
158
|
+
gz = Zlib::GzipReader.new(StringIO.new(curl_request.body_str))
|
74
159
|
xml = gz.read
|
75
160
|
gz.close
|
76
|
-
elsif
|
77
|
-
xml = Zlib::Deflate.inflate(
|
161
|
+
elsif curl_request.header_str.match(/Content-Encoding: deflate/)
|
162
|
+
xml = Zlib::Deflate.inflate(curl_request.body_str)
|
78
163
|
else
|
79
|
-
xml =
|
164
|
+
xml = curl_request.body_str
|
80
165
|
end
|
81
|
-
|
166
|
+
|
82
167
|
xml
|
83
168
|
end
|
84
|
-
|
169
|
+
|
170
|
+
# Updates each feed for each Feed object provided.
|
171
|
+
#
|
172
|
+
# === Parameters
|
173
|
+
# [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
|
174
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
175
|
+
# * :user_agent - String that overrides the default user agent.
|
176
|
+
# * :on_success - Block that gets executed after a successful request.
|
177
|
+
# * :on_failure - Block that gets executed after a failed request.
|
178
|
+
# === Returns
|
179
|
+
# A updated Feed object if a single URL is passed.
|
180
|
+
#
|
181
|
+
# A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
|
85
182
|
def self.update(feeds, options = {})
|
86
183
|
feed_queue = [*feeds]
|
87
184
|
multi = Curl::Multi.new
|
88
185
|
responses = {}
|
186
|
+
|
89
187
|
feed_queue.slice!(0, 30).each do |feed|
|
90
188
|
add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
91
189
|
end
|
@@ -94,6 +192,20 @@ module Feedzirra
|
|
94
192
|
return responses.size == 1 ? responses.values.first : responses.values
|
95
193
|
end
|
96
194
|
|
195
|
+
# An abstraction for adding a feed by URL to the passed Curb::multi stack.
|
196
|
+
#
|
197
|
+
# === Parameters
|
198
|
+
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
199
|
+
# [url<String>] The URL of the feed that you would like to be fetched.
|
200
|
+
# [url_queue<Array>] An array of URLs that are queued for request.
|
201
|
+
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
202
|
+
# [feeds<String> or <Array>] A single feed object, or an array of feed objects.
|
203
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
204
|
+
# * :user_agent - String that overrides the default user agent.
|
205
|
+
# * :on_success - Block that gets executed after a successful request.
|
206
|
+
# * :on_failure - Block that gets executed after a failed request.
|
207
|
+
# === Returns
|
208
|
+
# The updated Curl::Multi object with the request details added to it's stack.
|
97
209
|
def self.add_url_to_multi(multi, url, url_queue, responses, options)
|
98
210
|
easy = Curl::Easy.new(url) do |curl|
|
99
211
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
@@ -101,10 +213,13 @@ module Feedzirra
|
|
101
213
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
102
214
|
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
103
215
|
curl.follow_location = true
|
216
|
+
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
217
|
+
|
104
218
|
curl.on_success do |c|
|
105
219
|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
106
220
|
xml = decode_content(c)
|
107
221
|
klass = determine_feed_parser_for_xml(xml)
|
222
|
+
|
108
223
|
if klass
|
109
224
|
feed = klass.parse(xml)
|
110
225
|
feed.feed_url = c.last_effective_url
|
@@ -113,9 +228,10 @@ module Feedzirra
|
|
113
228
|
responses[url] = feed
|
114
229
|
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
115
230
|
else
|
116
|
-
|
231
|
+
raise NoParserAvailable.new("Error determining parser for #{url} - #{c.last_effective_url}.")
|
117
232
|
end
|
118
233
|
end
|
234
|
+
|
119
235
|
curl.on_failure do |c|
|
120
236
|
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
121
237
|
responses[url] = c.response_code
|
@@ -125,16 +241,28 @@ module Feedzirra
|
|
125
241
|
multi.add(easy)
|
126
242
|
end
|
127
243
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
244
|
+
# An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
|
245
|
+
#
|
246
|
+
# === Parameters
|
247
|
+
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
248
|
+
# [feed<Feed>] A feed object that you would like to be fetched.
|
249
|
+
# [url_queue<Array>] An array of feed objects that are queued for request.
|
250
|
+
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
251
|
+
# [feeds<String>] or <Array> A single feed object, or an array of feed objects.
|
252
|
+
# [options<Hash>] Valid keys for this argument as as followed:
|
253
|
+
# * :user_agent - String that overrides the default user agent.
|
254
|
+
# * :on_success - Block that gets executed after a successful request.
|
255
|
+
# * :on_failure - Block that gets executed after a failed request.
|
256
|
+
# === Returns
|
257
|
+
# The updated Curl::Multi object with the request details added to it's stack.
|
258
|
+
def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
133
259
|
easy = Curl::Easy.new(feed.feed_url) do |curl|
|
134
260
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
135
261
|
curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
|
136
262
|
curl.headers["If-None-Match"] = feed.etag if feed.etag
|
263
|
+
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
137
264
|
curl.follow_location = true
|
265
|
+
|
138
266
|
curl.on_success do |c|
|
139
267
|
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
140
268
|
updated_feed = Feed.parse(c.body_str)
|
@@ -145,6 +273,7 @@ module Feedzirra
|
|
145
273
|
responses[feed.feed_url] = feed
|
146
274
|
options[:on_success].call(feed) if options.has_key?(:on_success)
|
147
275
|
end
|
276
|
+
|
148
277
|
curl.on_failure do |c|
|
149
278
|
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
150
279
|
response_code = c.response_code
|
@@ -159,12 +288,24 @@ module Feedzirra
|
|
159
288
|
end
|
160
289
|
multi.add(easy)
|
161
290
|
end
|
162
|
-
|
291
|
+
|
292
|
+
# Determines the etag from the request headers.
|
293
|
+
#
|
294
|
+
# === Parameters
|
295
|
+
# [header<String>] Raw request header returned from the request
|
296
|
+
# === Returns
|
297
|
+
# A string of the etag or nil if it cannot be found in the headers.
|
163
298
|
def self.etag_from_header(header)
|
164
299
|
header =~ /.*ETag:\s(.*)\r/
|
165
300
|
$1
|
166
301
|
end
|
167
|
-
|
302
|
+
|
303
|
+
# Determines the last modified date from the request headers.
|
304
|
+
#
|
305
|
+
# === Parameters
|
306
|
+
# [header<String>] Raw request header returned from the request
|
307
|
+
# === Returns
|
308
|
+
# A Time object of the last modified date or nil if it cannot be found in the headers.
|
168
309
|
def self.last_modified_from_header(header)
|
169
310
|
header =~ /.*Last-Modified:\s(.*)\r/
|
170
311
|
Time.parse($1) if $1
|
@@ -1,15 +1,5 @@
|
|
1
1
|
module Feedzirra
|
2
2
|
module FeedEntryUtilities
|
3
|
-
module Sanitize
|
4
|
-
def sanitize!
|
5
|
-
self.replace(sanitize)
|
6
|
-
end
|
7
|
-
|
8
|
-
def sanitize
|
9
|
-
Dryopteris.sanitize(self)
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
3
|
attr_reader :published
|
14
4
|
|
15
5
|
def parse_datetime(string)
|
@@ -19,19 +9,7 @@ module Feedzirra
|
|
19
9
|
def published=(val)
|
20
10
|
@published = parse_datetime(val)
|
21
11
|
end
|
22
|
-
|
23
|
-
def content
|
24
|
-
@content.extend(Sanitize)
|
25
|
-
end
|
26
|
-
|
27
|
-
def title
|
28
|
-
@title.extend(Sanitize)
|
29
|
-
end
|
30
|
-
|
31
|
-
def author
|
32
|
-
@author.extend(Sanitize)
|
33
|
-
end
|
34
|
-
|
12
|
+
|
35
13
|
def sanitize!
|
36
14
|
self.title.sanitize!
|
37
15
|
self.author.sanitize!
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
# iTunes is RSS 2.0 + some apple extensions
|
3
|
+
# Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
|
4
|
+
class ITunesRSS
|
5
|
+
include SAXMachine
|
6
|
+
include FeedUtilities
|
7
|
+
|
8
|
+
attr_accessor :feed_url
|
9
|
+
|
10
|
+
# RSS 2.0 elements that need including
|
11
|
+
element :copyright
|
12
|
+
element :description
|
13
|
+
element :language
|
14
|
+
element :managingEditor
|
15
|
+
element :title
|
16
|
+
element :link, :as => :url
|
17
|
+
|
18
|
+
# If author is not present use managingEditor on the channel
|
19
|
+
element :"itunes:author", :as => :itunes_author
|
20
|
+
element :"itunes:block", :as => :itunes_block
|
21
|
+
element :"itunes:image", :value => :href, :as => :itunes_image
|
22
|
+
element :"itunes:explicit", :as => :itunes_explicit
|
23
|
+
element :"itunes:keywords", :as => :itunes_keywords
|
24
|
+
# New URL for the podcast feed
|
25
|
+
element :"itunes:new-feed-url", :as => :itunes_new_feed_url
|
26
|
+
element :"itunes:subtitle", :as => :itunes_subtitle
|
27
|
+
# If summary is not present, use the description tag
|
28
|
+
element :"itunes:summary", :as => :itunes_summary
|
29
|
+
|
30
|
+
# iTunes RSS feeds can have multiple main categories...
|
31
|
+
# ...and multiple sub-categories per category
|
32
|
+
# TODO subcategories not supported correctly - they are at the same level
|
33
|
+
# as the main categories
|
34
|
+
elements :"itunes:category", :as => :itunes_categories, :value => :text
|
35
|
+
|
36
|
+
elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
|
37
|
+
|
38
|
+
elements :item, :as => :entries, :class => ITunesRSSItem
|
39
|
+
|
40
|
+
def self.able_to_parse?(xml)
|
41
|
+
xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
# iTunes extensions to the standard RSS2.0 item
|
3
|
+
# Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
|
4
|
+
class ITunesRSSItem
|
5
|
+
include SAXMachine
|
6
|
+
include FeedUtilities
|
7
|
+
element :author
|
8
|
+
element :guid
|
9
|
+
element :title
|
10
|
+
element :link, :as => :url
|
11
|
+
element :description, :as => :summary
|
12
|
+
element :pubDate, :as => :published
|
13
|
+
|
14
|
+
# If author is not present use author tag on the item
|
15
|
+
element :"itunes:author", :as => :itunes_author
|
16
|
+
element :"itunes:block", :as => :itunes_block
|
17
|
+
element :"itunes:duration", :as => :itunes_duration
|
18
|
+
element :"itunes:explicit", :as => :itunes_explicit
|
19
|
+
element :"itunes:keywords", :as => :itunes_keywords
|
20
|
+
element :"itunes:subtitle", :as => :itunes_subtitle
|
21
|
+
# If summary is not present, use the description tag
|
22
|
+
element :"itunes:summary", :as => :itunes_summary
|
23
|
+
element :enclosure, :value => :length, :as => :enclosure_length
|
24
|
+
element :enclosure, :value => :type, :as => :enclosure_type
|
25
|
+
element :enclosure, :value => :url, :as => :enclosure_url
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/lib/feedzirra/rdf.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with RDF feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class RDF
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -8,7 +16,7 @@ module Feedzirra
|
|
8
16
|
|
9
17
|
attr_accessor :feed_url
|
10
18
|
|
11
|
-
def self.able_to_parse?(xml)
|
19
|
+
def self.able_to_parse?(xml) #:nodoc:
|
12
20
|
xml =~ /(rdf\:RDF)|(#{Regexp.escape("http://purl.org/rss/1.0")})|(rss version\=\"0\.9.?\")/ || false
|
13
21
|
end
|
14
22
|
end
|
data/lib/feedzirra/rdf_entry.rb
CHANGED
data/lib/feedzirra/rss.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with RSS feeds.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * feed_url
|
8
|
+
# * url
|
9
|
+
# * entries
|
2
10
|
class RSS
|
3
11
|
include SAXMachine
|
4
12
|
include FeedUtilities
|
@@ -8,7 +16,7 @@ module Feedzirra
|
|
8
16
|
|
9
17
|
attr_accessor :feed_url
|
10
18
|
|
11
|
-
def self.able_to_parse?(xml)
|
19
|
+
def self.able_to_parse?(xml) #:nodoc:
|
12
20
|
xml =~ /\<rss|rdf/
|
13
21
|
end
|
14
22
|
end
|
data/lib/feedzirra/rss_entry.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
1
|
module Feedzirra
|
2
|
+
# == Summary
|
3
|
+
# Parser for dealing with RDF feed entries.
|
4
|
+
#
|
5
|
+
# == Attributes
|
6
|
+
# * title
|
7
|
+
# * url
|
8
|
+
# * author
|
9
|
+
# * content
|
10
|
+
# * summary
|
11
|
+
# * published
|
12
|
+
# * categories
|
2
13
|
class RSSEntry
|
3
14
|
include SAXMachine
|
4
15
|
include FeedEntryUtilities
|