feedtools 0.2.23 → 0.2.24
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +13 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +24 -12
- data/lib/feed_tools/database_feed_cache.rb +8 -5
- data/lib/feed_tools/feed.rb +122 -240
- data/lib/feed_tools/feed_item.rb +31 -13
- data/lib/feed_tools/feed_structures.rb +5 -2
- data/lib/feed_tools/helpers/debug_helper.rb +1 -2
- data/lib/feed_tools/helpers/html_helper.rb +75 -43
- data/lib/feed_tools/helpers/retrieval_helper.rb +204 -6
- data/lib/feed_tools/helpers/uri_helper.rb +4 -1
- data/lib/feed_tools/vendor/htree/parse.rb +3 -1
- data/lib/feed_tools/version.rb +9 -0
- data/rakefile +6 -4
- data/test/unit/atom_test.rb +253 -4
- data/test/unit/cache_test.rb +22 -17
- data/test/unit/helper_test.rb +2 -2
- metadata +4 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
== FeedTools 0.2.24
|
2
|
+
* fixed autodiscovery bugs
|
3
|
+
* updated autodiscovery to handle relative uris
|
4
|
+
* added per feed configuration
|
5
|
+
* rewrote http retrieval code
|
6
|
+
* supports authenticated http proxies
|
7
|
+
* supports basic http auth
|
8
|
+
* fixed relative url resolution issues
|
9
|
+
* corrected db schema files to match migration file
|
10
|
+
* fixed bug in the save method
|
11
|
+
* fixed some major http bugs
|
12
|
+
* updated dependancies
|
13
|
+
* no longer causes problems with frozen rails
|
1
14
|
== FeedTools 0.2.23
|
2
15
|
* autodiscovery implemented
|
3
16
|
* now knows a title from a hole in the ground
|
data/db/schema.mysql.sql
CHANGED
data/db/schema.postgresql.sql
CHANGED
data/db/schema.sqlite.sql
CHANGED
data/lib/feed_tools.rb
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
if Object.const_defined?(:
|
24
|
+
if Object.const_defined?(:FEED_TOOLS_NAMESPACES)
|
25
25
|
warn("FeedTools may have been loaded improperly. This may be caused " +
|
26
26
|
"by the presence of the RUBYOPT environment variable or by using " +
|
27
27
|
"load instead of require. This can also be caused by missing " +
|
@@ -32,8 +32,6 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
32
32
|
ENV['RAILS_ENV'] ||
|
33
33
|
'development' # :nodoc:
|
34
34
|
|
35
|
-
FEED_TOOLS_VERSION = "0.2.23"
|
36
|
-
|
37
35
|
FEED_TOOLS_NAMESPACES = {
|
38
36
|
"admin" => "http://webns.net/mvcb/",
|
39
37
|
"ag" => "http://purl.org/rss/1.0/modules/aggregation/",
|
@@ -58,6 +56,7 @@ FEED_TOOLS_NAMESPACES = {
|
|
58
56
|
"image" => "http://purl.org/rss/1.0/modules/image/",
|
59
57
|
"feedburner" => "http://rssnamespace.org/feedburner/ext/1.0",
|
60
58
|
"foaf" => "http://xmlns.com/foaf/0.1/",
|
59
|
+
"foo" => "http://hsivonen.iki.fi/FooML",
|
61
60
|
"fm" => "http://freshmeat.net/rss/fm/",
|
62
61
|
"itunes" => "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
63
62
|
"l" => "http://purl.org/rss/1.0/modules/link/",
|
@@ -94,6 +93,8 @@ $:.unshift(File.dirname(__FILE__))
|
|
94
93
|
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
95
94
|
|
96
95
|
begin
|
96
|
+
require 'feed_tools/version'
|
97
|
+
|
97
98
|
begin
|
98
99
|
require 'iconv'
|
99
100
|
rescue Object
|
@@ -133,8 +134,12 @@ begin
|
|
133
134
|
require 'yaml'
|
134
135
|
require 'base64'
|
135
136
|
|
136
|
-
|
137
|
-
|
137
|
+
if !defined?(ActiveSupport)
|
138
|
+
require_gem('activesupport', '>= 1.1.1')
|
139
|
+
end
|
140
|
+
if !defined?(ActiveRecord)
|
141
|
+
require_gem('activerecord', '>= 1.11.1')
|
142
|
+
end
|
138
143
|
|
139
144
|
begin
|
140
145
|
require_gem('uuidtools', '>= 0.1.2')
|
@@ -186,13 +191,19 @@ module FeedTools
|
|
186
191
|
config_hash = {}
|
187
192
|
@configurations = {
|
188
193
|
:feed_cache => nil,
|
194
|
+
:disable_update_from_remote => false,
|
189
195
|
:proxy_address => nil,
|
190
196
|
:proxy_port => nil,
|
191
|
-
:
|
197
|
+
:proxy_user => nil,
|
198
|
+
:proxy_password => nil,
|
199
|
+
:user_agent =>
|
200
|
+
"FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING} " +
|
192
201
|
"+http://www.sporkmonger.com/projects/feedtools/",
|
193
|
-
:generator_name =>
|
194
|
-
|
195
|
-
:
|
202
|
+
:generator_name =>
|
203
|
+
"FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING}",
|
204
|
+
:generator_href =>
|
205
|
+
"http://www.sporkmonger.com/projects/feedtools/",
|
206
|
+
:tidy_enabled => false,
|
196
207
|
:tidy_options => {},
|
197
208
|
:idn_enabled => true,
|
198
209
|
:sanitization_enabled => true,
|
@@ -357,7 +368,7 @@ module REXML # :nodoc:
|
|
357
368
|
ns = node.namespace( prefix )
|
358
369
|
end
|
359
370
|
!(node.node_type == :element and
|
360
|
-
node.name.downcase == name and node.namespace == ns )
|
371
|
+
node.name.downcase == name.downcase and node.namespace == ns )
|
361
372
|
end
|
362
373
|
return n
|
363
374
|
|
@@ -420,7 +431,7 @@ module REXML # :nodoc:
|
|
420
431
|
for element in nodeset
|
421
432
|
if element.node_type == :element
|
422
433
|
for attribute_name in element.attributes.keys
|
423
|
-
if attribute_name.downcase == name
|
434
|
+
if attribute_name.downcase == name.downcase
|
424
435
|
attrib = element.attribute( attribute_name,
|
425
436
|
@namespaces[prefix] )
|
426
437
|
new_nodeset << attrib if attrib
|
@@ -438,7 +449,8 @@ module REXML # :nodoc:
|
|
438
449
|
return new_nodeset
|
439
450
|
|
440
451
|
when :parent
|
441
|
-
return internal_parse( path_stack,
|
452
|
+
return internal_parse( path_stack,
|
453
|
+
nodeset.collect{|n| n.parent}.compact )
|
442
454
|
|
443
455
|
when :ancestor
|
444
456
|
new_nodeset = []
|
@@ -51,14 +51,17 @@ module FeedTools
|
|
51
51
|
begin
|
52
52
|
possible_config_files = [
|
53
53
|
"./config/database.yml",
|
54
|
-
"../config/database.yml",
|
55
54
|
"./database.yml",
|
55
|
+
"../config/database.yml",
|
56
56
|
"../database.yml",
|
57
|
-
"../../database.yml"
|
57
|
+
"../../config/database.yml",
|
58
|
+
"../../database.yml",
|
59
|
+
"../../../config/database.yml",
|
60
|
+
"../../../database.yml"
|
58
61
|
]
|
59
62
|
database_config_file = nil
|
60
63
|
for file in possible_config_files
|
61
|
-
if File.exists?
|
64
|
+
if File.exists?(File.expand_path(file))
|
62
65
|
database_config_file = file
|
63
66
|
break
|
64
67
|
end
|
@@ -109,9 +112,9 @@ module FeedTools
|
|
109
112
|
# True if the appropriate database table already exists
|
110
113
|
def DatabaseFeedCache.table_exists?
|
111
114
|
begin
|
112
|
-
ActiveRecord::Base.connection.execute
|
115
|
+
ActiveRecord::Base.connection.execute("select id, href, title, " +
|
113
116
|
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
114
|
-
"from #{self.table_name()} limit 1"
|
117
|
+
"from #{self.table_name()} limit 1")
|
115
118
|
rescue ActiveRecord::StatementInvalid
|
116
119
|
return false
|
117
120
|
rescue
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -24,6 +24,7 @@
|
|
24
24
|
require 'rexml/document'
|
25
25
|
require 'feed_tools/feed_item'
|
26
26
|
require 'feed_tools/feed_structures'
|
27
|
+
require 'feed_tools/helpers/retrieval_helper'
|
27
28
|
require 'feed_tools/helpers/generic_helper'
|
28
29
|
require 'feed_tools/helpers/xml_helper'
|
29
30
|
require 'feed_tools/helpers/html_helper'
|
@@ -51,19 +52,22 @@ module FeedTools
|
|
51
52
|
@entries = nil
|
52
53
|
@live = false
|
53
54
|
@encoding = nil
|
55
|
+
@options = nil
|
54
56
|
end
|
55
57
|
|
56
58
|
# Loads the feed specified by the url, pulling the data from the
|
57
|
-
# cache if it hasn't expired.
|
58
|
-
#
|
59
|
-
# * <tt>:cache_only</tt> - If set to true, the feed will only be
|
60
|
-
# pulled from the cache.
|
59
|
+
# cache if it hasn't expired. Options supplied will override the
|
60
|
+
# default options.
|
61
61
|
def Feed.open(url, options={})
|
62
|
-
FeedTools::GenericHelper.validate_options(
|
63
|
-
|
64
|
-
|
62
|
+
FeedTools::GenericHelper.validate_options(
|
63
|
+
FeedTools.configurations.keys, options.keys)
|
64
|
+
|
65
|
+
# create the new feed
|
66
|
+
feed = FeedTools::Feed.new
|
67
|
+
|
68
|
+
feed.configurations = FeedTools.configurations.merge(options)
|
65
69
|
|
66
|
-
if
|
70
|
+
if feed.configurations[:feed_cache] != nil && FeedTools.feed_cache.nil?
|
67
71
|
raise(ArgumentError, "There is currently no caching mechanism set. " +
|
68
72
|
"Cannot retrieve cached feeds.")
|
69
73
|
end
|
@@ -71,16 +75,32 @@ module FeedTools
|
|
71
75
|
# clean up the url
|
72
76
|
url = FeedTools::UriHelper.normalize_url(url)
|
73
77
|
|
74
|
-
#
|
75
|
-
feed = FeedTools::Feed.new
|
78
|
+
# load the new feed
|
76
79
|
feed.href = url
|
77
|
-
feed.update! unless
|
80
|
+
feed.update! unless feed.configurations[:disable_update_from_remote]
|
78
81
|
return feed
|
79
82
|
end
|
83
|
+
|
84
|
+
# Returns the load options for this feed.
|
85
|
+
def configurations
|
86
|
+
if @configurations.blank?
|
87
|
+
@configurations = FeedTools.configurations.dup
|
88
|
+
end
|
89
|
+
return @configurations
|
90
|
+
end
|
91
|
+
|
92
|
+
# Sets the load options for this feed.
|
93
|
+
def configurations=(new_configurations)
|
94
|
+
@configurations = new_configurations
|
95
|
+
end
|
80
96
|
|
81
97
|
# Loads the feed from the remote url if the feed has expired from the
|
82
98
|
# cache or cannot be retrieved from the cache for some reason.
|
83
99
|
def update!
|
100
|
+
if self.configurations[:disable_update_from_remote]
|
101
|
+
# Don't do anything if this option is set
|
102
|
+
return
|
103
|
+
end
|
84
104
|
if !FeedTools.feed_cache.nil? &&
|
85
105
|
!FeedTools.feed_cache.set_up_correctly?
|
86
106
|
raise "Your feed cache system is incorrectly set up. " +
|
@@ -101,7 +121,7 @@ module FeedTools
|
|
101
121
|
# Handle autodiscovery
|
102
122
|
if self.http_headers['content-type'] =~ /text\/html/ ||
|
103
123
|
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
104
|
-
|
124
|
+
|
105
125
|
autodiscovered_url = nil
|
106
126
|
autodiscovered_url =
|
107
127
|
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
@@ -117,9 +137,19 @@ module FeedTools
|
|
117
137
|
"application/rdf+xml")
|
118
138
|
end
|
119
139
|
unless autodiscovered_url.nil?
|
140
|
+
begin
|
141
|
+
autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri(
|
142
|
+
autodiscovered_url, [self.href])
|
143
|
+
rescue Exception
|
144
|
+
end
|
120
145
|
self.feed_data = nil
|
121
146
|
self.href = autodiscovered_url
|
122
|
-
|
147
|
+
if FeedTools.feed_cache.nil?
|
148
|
+
self.cache_object = nil
|
149
|
+
else
|
150
|
+
self.cache_object =
|
151
|
+
FeedTools.feed_cache.find_by_href(autodiscovered_url)
|
152
|
+
end
|
123
153
|
self.update!
|
124
154
|
end
|
125
155
|
end
|
@@ -170,226 +200,67 @@ module FeedTools
|
|
170
200
|
|
171
201
|
# No need for http headers unless we're actually doing http
|
172
202
|
if retrieval_method == "http"
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
self.http_headers['last-modified'] unless
|
180
|
-
self.http_headers['last-modified'].nil?
|
181
|
-
end
|
182
|
-
unless FeedTools.configurations[:user_agent].nil?
|
183
|
-
headers["User-Agent"] = FeedTools.configurations[:user_agent]
|
184
|
-
end
|
185
|
-
|
186
|
-
# The http feed access method
|
187
|
-
http_fetch = lambda do |feed_url, request_headers, redirect_limit,
|
188
|
-
response_chain, no_headers|
|
189
|
-
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
190
|
-
feed_uri = nil
|
191
|
-
begin
|
192
|
-
feed_uri = URI.parse(feed_url)
|
193
|
-
rescue URI::InvalidURIError
|
194
|
-
# Uh, maybe try to fix it?
|
195
|
-
feed_uri = URI.parse(FeedTools::UriHelper.normalize_url(feed_url))
|
196
|
-
end
|
197
|
-
|
198
|
-
begin
|
199
|
-
proxy_address = (FeedTools.configurations[:proxy_address] || nil)
|
200
|
-
proxy_port = (FeedTools.configurations[:proxy_port].to_i || nil)
|
201
|
-
|
202
|
-
http = Net::HTTP::Proxy(proxy_address, proxy_port).new(
|
203
|
-
feed_uri.host, (feed_uri.port or 80))
|
204
|
-
http.start do
|
205
|
-
final_uri = feed_uri.path
|
206
|
-
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
207
|
-
request_headers = {} if no_headers
|
208
|
-
response = http.request_get(final_uri, request_headers)
|
209
|
-
|
210
|
-
case response
|
211
|
-
when Net::HTTPSuccess
|
212
|
-
# We've reached the final destination, process all previous
|
213
|
-
# redirections, and see if we need to update the url.
|
214
|
-
for redirected_response in response_chain
|
215
|
-
if redirected_response.last.code.to_i == 301
|
216
|
-
# Reset the cache object or we may get duplicate entries
|
217
|
-
self.cache_object = nil
|
218
|
-
self.href = redirected_response.last['location']
|
219
|
-
else
|
220
|
-
# Jump out as soon as we hit anything that isn't a
|
221
|
-
# permanently moved redirection.
|
222
|
-
break
|
223
|
-
end
|
224
|
-
end
|
225
|
-
response
|
226
|
-
when Net::HTTPRedirection
|
227
|
-
if response.code.to_i == 304
|
228
|
-
response.error!
|
229
|
-
else
|
230
|
-
if response['location'].nil?
|
231
|
-
raise FeedAccessError,
|
232
|
-
"No location to redirect to supplied: " + response.code
|
233
|
-
end
|
234
|
-
response_chain << [feed_url, response]
|
235
|
-
new_location = response['location']
|
236
|
-
if response_chain.assoc(new_location) != nil
|
237
|
-
raise FeedAccessError,
|
238
|
-
"Redirection loop detected: #{new_location}"
|
239
|
-
end
|
240
|
-
|
241
|
-
# Find out if we've already seen the url we've been
|
242
|
-
# redirected to.
|
243
|
-
found_redirect = false
|
244
|
-
begin
|
245
|
-
cached_feed = FeedTools::Feed.open(new_location,
|
246
|
-
:cache_only => true)
|
247
|
-
if cached_feed.cache_object != nil &&
|
248
|
-
cached_feed.cache_object.new_record? != true
|
249
|
-
if !cached_feed.expired? &&
|
250
|
-
!cached_feed.http_headers.blank?
|
251
|
-
# Copy the cached state
|
252
|
-
self.href = cached_feed.href
|
253
|
-
|
254
|
-
@feed_data = cached_feed.feed_data
|
255
|
-
@feed_data_type = cached_feed.feed_data_type
|
256
|
-
|
257
|
-
if @feed_data.blank?
|
258
|
-
raise "Invalid cache data."
|
259
|
-
end
|
203
|
+
begin
|
204
|
+
@http_response = (FeedTools::RetrievalHelper.http_get(
|
205
|
+
self.href, :feed_object => self) do |url, response|
|
206
|
+
# Find out if we've already seen the url we've been
|
207
|
+
# redirected to.
|
208
|
+
follow_redirect = true
|
260
209
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
210
|
+
begin
|
211
|
+
cached_feed = FeedTools::Feed.open(url,
|
212
|
+
:disable_update_from_remote => true)
|
213
|
+
if cached_feed.cache_object != nil &&
|
214
|
+
cached_feed.cache_object.new_record? != true
|
215
|
+
if !cached_feed.expired? &&
|
216
|
+
!cached_feed.http_headers.blank?
|
217
|
+
# Copy the cached state
|
218
|
+
self.href = cached_feed.href
|
219
|
+
|
220
|
+
@feed_data = cached_feed.feed_data
|
221
|
+
@feed_data_type = cached_feed.feed_data_type
|
222
|
+
|
223
|
+
if @feed_data.blank?
|
224
|
+
raise "Invalid cache data."
|
270
225
|
end
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
else
|
281
|
-
response
|
282
|
-
end
|
283
|
-
end
|
284
|
-
else
|
285
|
-
class << response
|
286
|
-
def response_chain
|
287
|
-
return @response_chain
|
226
|
+
|
227
|
+
@title = nil; self.title
|
228
|
+
@link = nil; self.link
|
229
|
+
|
230
|
+
self.last_retrieved = cached_feed.last_retrieved
|
231
|
+
self.http_headers = cached_feed.http_headers
|
232
|
+
self.cache_object = cached_feed.cache_object
|
233
|
+
@live = false
|
234
|
+
follow_redirect = false
|
288
235
|
end
|
289
236
|
end
|
290
|
-
|
291
|
-
|
292
|
-
response.error!
|
293
|
-
end
|
294
|
-
end
|
295
|
-
rescue SocketError
|
296
|
-
raise FeedAccessError, 'Socket error prevented feed retrieval'
|
297
|
-
rescue Timeout::Error
|
298
|
-
raise FeedAccessError, 'Timeout while attempting to retrieve feed'
|
299
|
-
rescue Errno::ENETUNREACH
|
300
|
-
raise FeedAccessError, 'Network was unreachable'
|
301
|
-
rescue Errno::ECONNRESET
|
302
|
-
raise FeedAccessError, 'Connection was reset by peer'
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
begin
|
307
|
-
begin
|
308
|
-
@http_response = http_fetch.call(self.href, headers, 10, [], false)
|
309
|
-
rescue => error
|
310
|
-
if error.respond_to?(:response)
|
311
|
-
# You might not believe this, but...
|
312
|
-
#
|
313
|
-
# Under certain circumstances, web servers will try to block
|
314
|
-
# based on the User-Agent header. This is *retarded*. But
|
315
|
-
# we won't let their stupid error stop us!
|
316
|
-
#
|
317
|
-
# This is, of course, a quick-n-dirty hack. But at least
|
318
|
-
# we get to blame other people's bad software and/or bad
|
319
|
-
# configuration files.
|
320
|
-
if error.response.code.to_i == 404 &&
|
321
|
-
FeedTools.configurations[:user_agent] != nil
|
322
|
-
@http_response = http_fetch.call(self.href, {}, 10, [], true)
|
323
|
-
if @http_response != nil && @http_response.code.to_i == 200
|
324
|
-
warn("The server appears to be blocking based on the " +
|
325
|
-
"User-Agent header. This is stupid, and you should " +
|
326
|
-
"inform the webmaster of this.")
|
327
|
-
end
|
328
|
-
else
|
329
|
-
raise error
|
237
|
+
rescue
|
238
|
+
# If anything goes wrong, ignore it.
|
330
239
|
end
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
unless @http_response.kind_of? Net::HTTPRedirection
|
240
|
+
follow_redirect
|
241
|
+
end)
|
242
|
+
case @http_response
|
243
|
+
when Net::HTTPSuccess
|
336
244
|
@feed_data = self.http_response.body
|
337
245
|
@http_headers = {}
|
338
246
|
self.http_response.each_header do |key, value|
|
339
247
|
self.http_headers[key.downcase] = value
|
340
248
|
end
|
341
249
|
self.last_retrieved = Time.now.gmtime
|
342
|
-
|
343
|
-
|
344
|
-
@live = false
|
345
|
-
if self.feed_data.nil?
|
346
|
-
raise
|
347
|
-
end
|
348
|
-
rescue Timeout::Error
|
349
|
-
# if we time out, do nothing, it should fall back to the feed_data
|
350
|
-
# stored in the cache.
|
351
|
-
@live = false
|
352
|
-
if self.feed_data.nil?
|
353
|
-
raise
|
354
|
-
end
|
355
|
-
rescue Errno::ECONNRESET
|
356
|
-
# if the connection gets reset by peer, oh well, fall back to the
|
357
|
-
# feed_data stored in the cache
|
358
|
-
@live = false
|
359
|
-
if self.feed_data.nil?
|
360
|
-
raise
|
361
|
-
end
|
362
|
-
rescue => error
|
363
|
-
# heck, if anything at all bad happens, fall back to the feed_data
|
364
|
-
# stored in the cache.
|
365
|
-
|
366
|
-
# If we can, get the HTTPResponse...
|
367
|
-
@http_response = nil
|
368
|
-
if error.respond_to?(:each_header)
|
369
|
-
@http_response = error
|
370
|
-
end
|
371
|
-
if error.respond_to?(:response) &&
|
372
|
-
error.response.respond_to?(:each_header)
|
373
|
-
@http_response = error.response
|
374
|
-
end
|
375
|
-
if @http_response != nil
|
250
|
+
@live = true
|
251
|
+
when Net::HTTPNotModified
|
376
252
|
@http_headers = {}
|
377
253
|
self.http_response.each_header do |key, value|
|
378
254
|
self.http_headers[key.downcase] = value
|
379
255
|
end
|
380
|
-
|
381
|
-
|
382
|
-
|
256
|
+
self.last_retrieved = Time.now.gmtime
|
257
|
+
@live = true
|
258
|
+
else
|
259
|
+
@live = false
|
383
260
|
end
|
261
|
+
rescue Exception => error
|
384
262
|
@live = false
|
385
263
|
if self.feed_data.nil?
|
386
|
-
if error.respond_to?(:response) &&
|
387
|
-
error.response.respond_to?(:response_chain)
|
388
|
-
redirects = error.response.response_chain.map do |pair|
|
389
|
-
pair.first
|
390
|
-
end
|
391
|
-
error.message << (" - Redirects: " + redirects.inspect)
|
392
|
-
end
|
393
264
|
raise error
|
394
265
|
end
|
395
266
|
end
|
@@ -407,7 +278,7 @@ module FeedTools
|
|
407
278
|
# with 'file://', strip the 'file://' off the front of the url.
|
408
279
|
file_name = self.href.gsub(/^file:\/\//, "")
|
409
280
|
if RUBY_PLATFORM =~ /mswin/
|
410
|
-
file_name = file_name[1..-1] if file_name[
|
281
|
+
file_name = file_name[1..-1] if file_name[0..0] == "/"
|
411
282
|
end
|
412
283
|
begin
|
413
284
|
open(file_name) do |file|
|
@@ -885,7 +756,7 @@ module FeedTools
|
|
885
756
|
end
|
886
757
|
rescue
|
887
758
|
end
|
888
|
-
if
|
759
|
+
if self.configurations[:url_normalization_enabled]
|
889
760
|
@href = FeedTools::UriHelper.normalize_url(@href)
|
890
761
|
end
|
891
762
|
@href.strip! unless @href.nil?
|
@@ -925,7 +796,7 @@ module FeedTools
|
|
925
796
|
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
926
797
|
self.feed_type, self.feed_version)
|
927
798
|
if self.feed_type == "atom" ||
|
928
|
-
|
799
|
+
self.configurations[:always_strip_wrapper_elements]
|
929
800
|
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
930
801
|
end
|
931
802
|
@title = nil if @title.blank?
|
@@ -943,7 +814,6 @@ module FeedTools
|
|
943
814
|
# Returns the feed subtitle
|
944
815
|
def subtitle
|
945
816
|
if @subtitle.nil?
|
946
|
-
repair_entities = false
|
947
817
|
subtitle_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
948
818
|
"atom10:subtitle",
|
949
819
|
"subtitle",
|
@@ -968,7 +838,7 @@ module FeedTools
|
|
968
838
|
@subtitle = FeedTools::HtmlHelper.process_text_construct(
|
969
839
|
subtitle_node, self.feed_type, self.feed_version)
|
970
840
|
if self.feed_type == "atom" ||
|
971
|
-
|
841
|
+
self.configurations[:always_strip_wrapper_elements]
|
972
842
|
@subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
|
973
843
|
end
|
974
844
|
if @subtitle.blank?
|
@@ -1125,7 +995,7 @@ module FeedTools
|
|
1125
995
|
end
|
1126
996
|
rescue
|
1127
997
|
end
|
1128
|
-
if
|
998
|
+
if self.configurations[:url_normalization_enabled]
|
1129
999
|
@link = FeedTools::UriHelper.normalize_url(@link)
|
1130
1000
|
end
|
1131
1001
|
unless self.cache_object.nil?
|
@@ -1179,7 +1049,7 @@ module FeedTools
|
|
1179
1049
|
end
|
1180
1050
|
rescue
|
1181
1051
|
end
|
1182
|
-
if
|
1052
|
+
if self.configurations[:url_normalization_enabled]
|
1183
1053
|
link_object.href =
|
1184
1054
|
FeedTools::UriHelper.normalize_url(link_object.href)
|
1185
1055
|
end
|
@@ -1609,12 +1479,12 @@ module FeedTools
|
|
1609
1479
|
unless time_string.blank?
|
1610
1480
|
@time = Time.parse(time_string).gmtime
|
1611
1481
|
else
|
1612
|
-
if
|
1482
|
+
if self.configurations[:timestamp_estimation_enabled]
|
1613
1483
|
@time = Time.now.gmtime
|
1614
1484
|
end
|
1615
1485
|
end
|
1616
1486
|
rescue
|
1617
|
-
if
|
1487
|
+
if self.configurations[:timestamp_estimation_enabled]
|
1618
1488
|
@time = Time.now.gmtime
|
1619
1489
|
end
|
1620
1490
|
end
|
@@ -1750,7 +1620,7 @@ module FeedTools
|
|
1750
1620
|
end
|
1751
1621
|
rescue
|
1752
1622
|
end
|
1753
|
-
if
|
1623
|
+
if self.configurations[:url_normalization_enabled]
|
1754
1624
|
image.href = FeedTools::UriHelper.normalize_url(image.href)
|
1755
1625
|
end
|
1756
1626
|
image.href.strip! unless image.href.nil?
|
@@ -1835,7 +1705,7 @@ module FeedTools
|
|
1835
1705
|
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
1836
1706
|
self.feed_type, self.feed_version)
|
1837
1707
|
if self.feed_type == "atom" ||
|
1838
|
-
|
1708
|
+
self.configurations[:always_strip_wrapper_elements]
|
1839
1709
|
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
1840
1710
|
end
|
1841
1711
|
end
|
@@ -1944,10 +1814,10 @@ module FeedTools
|
|
1944
1814
|
if @time_to_live.nil? || @time_to_live == 0
|
1945
1815
|
# Default to one hour
|
1946
1816
|
@time_to_live = 1.hour
|
1947
|
-
elsif
|
1948
|
-
|
1949
|
-
@time_to_live >=
|
1950
|
-
@time_to_live =
|
1817
|
+
elsif self.configurations[:max_ttl] != nil &&
|
1818
|
+
self.configurations[:max_ttl] != 0 &&
|
1819
|
+
@time_to_live >= self.configurations[:max_ttl].to_i
|
1820
|
+
@time_to_live = self.configurations[:max_ttl].to_i
|
1951
1821
|
end
|
1952
1822
|
@time_to_live = @time_to_live.round
|
1953
1823
|
return @time_to_live
|
@@ -2034,7 +1904,7 @@ module FeedTools
|
|
2034
1904
|
end
|
2035
1905
|
rescue
|
2036
1906
|
end
|
2037
|
-
if
|
1907
|
+
if self.configurations[:url_normalization_enabled]
|
2038
1908
|
@docs = FeedTools::UriHelper.normalize_url(@docs)
|
2039
1909
|
end
|
2040
1910
|
end
|
@@ -2145,17 +2015,17 @@ module FeedTools
|
|
2145
2015
|
end
|
2146
2016
|
|
2147
2017
|
# Sort the items
|
2148
|
-
if
|
2018
|
+
if self.configurations[:entry_sorting_property] == "time"
|
2149
2019
|
@entries = @entries.sort do |a, b|
|
2150
2020
|
(b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
|
2151
2021
|
end
|
2152
|
-
elsif
|
2153
|
-
sorting_property =
|
2022
|
+
elsif self.configurations[:entry_sorting_property] != nil
|
2023
|
+
sorting_property = self.configurations[:entry_sorting_property]
|
2154
2024
|
@entries = @entries.sort do |a, b|
|
2155
2025
|
eval("a.#{sorting_property}") <=> eval("b.#{sorting_property}")
|
2156
2026
|
end
|
2157
2027
|
else
|
2158
|
-
|
2028
|
+
return @entries.reverse
|
2159
2029
|
end
|
2160
2030
|
return @entries
|
2161
2031
|
end
|
@@ -2255,7 +2125,7 @@ module FeedTools
|
|
2255
2125
|
xml_builder=Builder::XmlMarkup.new(
|
2256
2126
|
:indent => 2, :escape_attrs => false))
|
2257
2127
|
xml_builder.instruct! :xml, :version => "1.0",
|
2258
|
-
:encoding => (
|
2128
|
+
:encoding => (self.configurations[:output_encoding] or "utf-8")
|
2259
2129
|
if feed_type.nil?
|
2260
2130
|
feed_type = self.feed_type
|
2261
2131
|
end
|
@@ -2335,7 +2205,7 @@ module FeedTools
|
|
2335
2205
|
end
|
2336
2206
|
xml_builder.tag!(
|
2337
2207
|
"admin:generatorAgent",
|
2338
|
-
"rdf:resource" =>
|
2208
|
+
"rdf:resource" => self.configurations[:generator_href])
|
2339
2209
|
build_xml_hook(feed_type, feed_version, xml_builder)
|
2340
2210
|
end
|
2341
2211
|
unless self.images.blank?
|
@@ -2408,7 +2278,7 @@ module FeedTools
|
|
2408
2278
|
end
|
2409
2279
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2410
2280
|
xml_builder.generator(
|
2411
|
-
|
2281
|
+
self.configurations[:generator_href])
|
2412
2282
|
build_xml_hook(feed_type, feed_version, xml_builder)
|
2413
2283
|
unless items.nil?
|
2414
2284
|
for item in items
|
@@ -2467,8 +2337,8 @@ module FeedTools
|
|
2467
2337
|
unless self.rights.blank?
|
2468
2338
|
xml_builder.rights(self.rights)
|
2469
2339
|
end
|
2470
|
-
xml_builder.generator(
|
2471
|
-
" - " +
|
2340
|
+
xml_builder.generator(self.configurations[:generator_name] +
|
2341
|
+
" - " + self.configurations[:generator_href])
|
2472
2342
|
if self.id != nil
|
2473
2343
|
unless FeedTools::UriHelper.is_uri? self.id
|
2474
2344
|
if self.link != nil
|
@@ -2498,6 +2368,18 @@ module FeedTools
|
|
2498
2368
|
|
2499
2369
|
# Persists the current feed state to the cache.
|
2500
2370
|
def save
|
2371
|
+
if self.configurations[:feed_cache].nil?
|
2372
|
+
# The cache is disabled for this feed, do nothing.
|
2373
|
+
return
|
2374
|
+
end
|
2375
|
+
if self.http_headers['content-type'] =~ /text\/html/ ||
|
2376
|
+
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
2377
|
+
if self.title.nil? && self.link.nil? && self.entries.blank?
|
2378
|
+
# Don't save html pages to the cache, it messes with
|
2379
|
+
# autodiscovery.
|
2380
|
+
return
|
2381
|
+
end
|
2382
|
+
end
|
2501
2383
|
unless self.href =~ /^file:\/\//
|
2502
2384
|
if FeedTools.feed_cache.nil?
|
2503
2385
|
raise "Caching is currently disabled. Cannot save to cache."
|