feedtools 0.2.23 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +24 -12
- data/lib/feed_tools/database_feed_cache.rb +8 -5
- data/lib/feed_tools/feed.rb +122 -240
- data/lib/feed_tools/feed_item.rb +31 -13
- data/lib/feed_tools/feed_structures.rb +5 -2
- data/lib/feed_tools/helpers/debug_helper.rb +1 -2
- data/lib/feed_tools/helpers/html_helper.rb +75 -43
- data/lib/feed_tools/helpers/retrieval_helper.rb +204 -6
- data/lib/feed_tools/helpers/uri_helper.rb +4 -1
- data/lib/feed_tools/vendor/htree/parse.rb +3 -1
- data/lib/feed_tools/version.rb +9 -0
- data/rakefile +6 -4
- data/test/unit/atom_test.rb +253 -4
- data/test/unit/cache_test.rb +22 -17
- data/test/unit/helper_test.rb +2 -2
- metadata +4 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
== FeedTools 0.2.24
|
2
|
+
* fixed autodiscovery bugs
|
3
|
+
* updated autodiscovery to handle relative uris
|
4
|
+
* added per feed configuration
|
5
|
+
* rewrote http retrieval code
|
6
|
+
* supports authenticated http proxies
|
7
|
+
* supports basic http auth
|
8
|
+
* fixed relative url resolution issues
|
9
|
+
* corrected db schema files to match migration file
|
10
|
+
* fixed bug in the save method
|
11
|
+
* fixed some major http bugs
|
12
|
+
* updated dependancies
|
13
|
+
* no longer causes problems with frozen rails
|
1
14
|
== FeedTools 0.2.23
|
2
15
|
* autodiscovery implemented
|
3
16
|
* now knows a title from a hole in the ground
|
data/db/schema.mysql.sql
CHANGED
data/db/schema.postgresql.sql
CHANGED
data/db/schema.sqlite.sql
CHANGED
data/lib/feed_tools.rb
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
if Object.const_defined?(:
|
24
|
+
if Object.const_defined?(:FEED_TOOLS_NAMESPACES)
|
25
25
|
warn("FeedTools may have been loaded improperly. This may be caused " +
|
26
26
|
"by the presence of the RUBYOPT environment variable or by using " +
|
27
27
|
"load instead of require. This can also be caused by missing " +
|
@@ -32,8 +32,6 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
32
32
|
ENV['RAILS_ENV'] ||
|
33
33
|
'development' # :nodoc:
|
34
34
|
|
35
|
-
FEED_TOOLS_VERSION = "0.2.23"
|
36
|
-
|
37
35
|
FEED_TOOLS_NAMESPACES = {
|
38
36
|
"admin" => "http://webns.net/mvcb/",
|
39
37
|
"ag" => "http://purl.org/rss/1.0/modules/aggregation/",
|
@@ -58,6 +56,7 @@ FEED_TOOLS_NAMESPACES = {
|
|
58
56
|
"image" => "http://purl.org/rss/1.0/modules/image/",
|
59
57
|
"feedburner" => "http://rssnamespace.org/feedburner/ext/1.0",
|
60
58
|
"foaf" => "http://xmlns.com/foaf/0.1/",
|
59
|
+
"foo" => "http://hsivonen.iki.fi/FooML",
|
61
60
|
"fm" => "http://freshmeat.net/rss/fm/",
|
62
61
|
"itunes" => "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
63
62
|
"l" => "http://purl.org/rss/1.0/modules/link/",
|
@@ -94,6 +93,8 @@ $:.unshift(File.dirname(__FILE__))
|
|
94
93
|
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
95
94
|
|
96
95
|
begin
|
96
|
+
require 'feed_tools/version'
|
97
|
+
|
97
98
|
begin
|
98
99
|
require 'iconv'
|
99
100
|
rescue Object
|
@@ -133,8 +134,12 @@ begin
|
|
133
134
|
require 'yaml'
|
134
135
|
require 'base64'
|
135
136
|
|
136
|
-
|
137
|
-
|
137
|
+
if !defined?(ActiveSupport)
|
138
|
+
require_gem('activesupport', '>= 1.1.1')
|
139
|
+
end
|
140
|
+
if !defined?(ActiveRecord)
|
141
|
+
require_gem('activerecord', '>= 1.11.1')
|
142
|
+
end
|
138
143
|
|
139
144
|
begin
|
140
145
|
require_gem('uuidtools', '>= 0.1.2')
|
@@ -186,13 +191,19 @@ module FeedTools
|
|
186
191
|
config_hash = {}
|
187
192
|
@configurations = {
|
188
193
|
:feed_cache => nil,
|
194
|
+
:disable_update_from_remote => false,
|
189
195
|
:proxy_address => nil,
|
190
196
|
:proxy_port => nil,
|
191
|
-
:
|
197
|
+
:proxy_user => nil,
|
198
|
+
:proxy_password => nil,
|
199
|
+
:user_agent =>
|
200
|
+
"FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING} " +
|
192
201
|
"+http://www.sporkmonger.com/projects/feedtools/",
|
193
|
-
:generator_name =>
|
194
|
-
|
195
|
-
:
|
202
|
+
:generator_name =>
|
203
|
+
"FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING}",
|
204
|
+
:generator_href =>
|
205
|
+
"http://www.sporkmonger.com/projects/feedtools/",
|
206
|
+
:tidy_enabled => false,
|
196
207
|
:tidy_options => {},
|
197
208
|
:idn_enabled => true,
|
198
209
|
:sanitization_enabled => true,
|
@@ -357,7 +368,7 @@ module REXML # :nodoc:
|
|
357
368
|
ns = node.namespace( prefix )
|
358
369
|
end
|
359
370
|
!(node.node_type == :element and
|
360
|
-
node.name.downcase == name and node.namespace == ns )
|
371
|
+
node.name.downcase == name.downcase and node.namespace == ns )
|
361
372
|
end
|
362
373
|
return n
|
363
374
|
|
@@ -420,7 +431,7 @@ module REXML # :nodoc:
|
|
420
431
|
for element in nodeset
|
421
432
|
if element.node_type == :element
|
422
433
|
for attribute_name in element.attributes.keys
|
423
|
-
if attribute_name.downcase == name
|
434
|
+
if attribute_name.downcase == name.downcase
|
424
435
|
attrib = element.attribute( attribute_name,
|
425
436
|
@namespaces[prefix] )
|
426
437
|
new_nodeset << attrib if attrib
|
@@ -438,7 +449,8 @@ module REXML # :nodoc:
|
|
438
449
|
return new_nodeset
|
439
450
|
|
440
451
|
when :parent
|
441
|
-
return internal_parse( path_stack,
|
452
|
+
return internal_parse( path_stack,
|
453
|
+
nodeset.collect{|n| n.parent}.compact )
|
442
454
|
|
443
455
|
when :ancestor
|
444
456
|
new_nodeset = []
|
@@ -51,14 +51,17 @@ module FeedTools
|
|
51
51
|
begin
|
52
52
|
possible_config_files = [
|
53
53
|
"./config/database.yml",
|
54
|
-
"../config/database.yml",
|
55
54
|
"./database.yml",
|
55
|
+
"../config/database.yml",
|
56
56
|
"../database.yml",
|
57
|
-
"../../database.yml"
|
57
|
+
"../../config/database.yml",
|
58
|
+
"../../database.yml",
|
59
|
+
"../../../config/database.yml",
|
60
|
+
"../../../database.yml"
|
58
61
|
]
|
59
62
|
database_config_file = nil
|
60
63
|
for file in possible_config_files
|
61
|
-
if File.exists?
|
64
|
+
if File.exists?(File.expand_path(file))
|
62
65
|
database_config_file = file
|
63
66
|
break
|
64
67
|
end
|
@@ -109,9 +112,9 @@ module FeedTools
|
|
109
112
|
# True if the appropriate database table already exists
|
110
113
|
def DatabaseFeedCache.table_exists?
|
111
114
|
begin
|
112
|
-
ActiveRecord::Base.connection.execute
|
115
|
+
ActiveRecord::Base.connection.execute("select id, href, title, " +
|
113
116
|
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
114
|
-
"from #{self.table_name()} limit 1"
|
117
|
+
"from #{self.table_name()} limit 1")
|
115
118
|
rescue ActiveRecord::StatementInvalid
|
116
119
|
return false
|
117
120
|
rescue
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -24,6 +24,7 @@
|
|
24
24
|
require 'rexml/document'
|
25
25
|
require 'feed_tools/feed_item'
|
26
26
|
require 'feed_tools/feed_structures'
|
27
|
+
require 'feed_tools/helpers/retrieval_helper'
|
27
28
|
require 'feed_tools/helpers/generic_helper'
|
28
29
|
require 'feed_tools/helpers/xml_helper'
|
29
30
|
require 'feed_tools/helpers/html_helper'
|
@@ -51,19 +52,22 @@ module FeedTools
|
|
51
52
|
@entries = nil
|
52
53
|
@live = false
|
53
54
|
@encoding = nil
|
55
|
+
@options = nil
|
54
56
|
end
|
55
57
|
|
56
58
|
# Loads the feed specified by the url, pulling the data from the
|
57
|
-
# cache if it hasn't expired.
|
58
|
-
#
|
59
|
-
# * <tt>:cache_only</tt> - If set to true, the feed will only be
|
60
|
-
# pulled from the cache.
|
59
|
+
# cache if it hasn't expired. Options supplied will override the
|
60
|
+
# default options.
|
61
61
|
def Feed.open(url, options={})
|
62
|
-
FeedTools::GenericHelper.validate_options(
|
63
|
-
|
64
|
-
|
62
|
+
FeedTools::GenericHelper.validate_options(
|
63
|
+
FeedTools.configurations.keys, options.keys)
|
64
|
+
|
65
|
+
# create the new feed
|
66
|
+
feed = FeedTools::Feed.new
|
67
|
+
|
68
|
+
feed.configurations = FeedTools.configurations.merge(options)
|
65
69
|
|
66
|
-
if
|
70
|
+
if feed.configurations[:feed_cache] != nil && FeedTools.feed_cache.nil?
|
67
71
|
raise(ArgumentError, "There is currently no caching mechanism set. " +
|
68
72
|
"Cannot retrieve cached feeds.")
|
69
73
|
end
|
@@ -71,16 +75,32 @@ module FeedTools
|
|
71
75
|
# clean up the url
|
72
76
|
url = FeedTools::UriHelper.normalize_url(url)
|
73
77
|
|
74
|
-
#
|
75
|
-
feed = FeedTools::Feed.new
|
78
|
+
# load the new feed
|
76
79
|
feed.href = url
|
77
|
-
feed.update! unless
|
80
|
+
feed.update! unless feed.configurations[:disable_update_from_remote]
|
78
81
|
return feed
|
79
82
|
end
|
83
|
+
|
84
|
+
# Returns the load options for this feed.
|
85
|
+
def configurations
|
86
|
+
if @configurations.blank?
|
87
|
+
@configurations = FeedTools.configurations.dup
|
88
|
+
end
|
89
|
+
return @configurations
|
90
|
+
end
|
91
|
+
|
92
|
+
# Sets the load options for this feed.
|
93
|
+
def configurations=(new_configurations)
|
94
|
+
@configurations = new_configurations
|
95
|
+
end
|
80
96
|
|
81
97
|
# Loads the feed from the remote url if the feed has expired from the
|
82
98
|
# cache or cannot be retrieved from the cache for some reason.
|
83
99
|
def update!
|
100
|
+
if self.configurations[:disable_update_from_remote]
|
101
|
+
# Don't do anything if this option is set
|
102
|
+
return
|
103
|
+
end
|
84
104
|
if !FeedTools.feed_cache.nil? &&
|
85
105
|
!FeedTools.feed_cache.set_up_correctly?
|
86
106
|
raise "Your feed cache system is incorrectly set up. " +
|
@@ -101,7 +121,7 @@ module FeedTools
|
|
101
121
|
# Handle autodiscovery
|
102
122
|
if self.http_headers['content-type'] =~ /text\/html/ ||
|
103
123
|
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
104
|
-
|
124
|
+
|
105
125
|
autodiscovered_url = nil
|
106
126
|
autodiscovered_url =
|
107
127
|
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
@@ -117,9 +137,19 @@ module FeedTools
|
|
117
137
|
"application/rdf+xml")
|
118
138
|
end
|
119
139
|
unless autodiscovered_url.nil?
|
140
|
+
begin
|
141
|
+
autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri(
|
142
|
+
autodiscovered_url, [self.href])
|
143
|
+
rescue Exception
|
144
|
+
end
|
120
145
|
self.feed_data = nil
|
121
146
|
self.href = autodiscovered_url
|
122
|
-
|
147
|
+
if FeedTools.feed_cache.nil?
|
148
|
+
self.cache_object = nil
|
149
|
+
else
|
150
|
+
self.cache_object =
|
151
|
+
FeedTools.feed_cache.find_by_href(autodiscovered_url)
|
152
|
+
end
|
123
153
|
self.update!
|
124
154
|
end
|
125
155
|
end
|
@@ -170,226 +200,67 @@ module FeedTools
|
|
170
200
|
|
171
201
|
# No need for http headers unless we're actually doing http
|
172
202
|
if retrieval_method == "http"
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
self.http_headers['last-modified'] unless
|
180
|
-
self.http_headers['last-modified'].nil?
|
181
|
-
end
|
182
|
-
unless FeedTools.configurations[:user_agent].nil?
|
183
|
-
headers["User-Agent"] = FeedTools.configurations[:user_agent]
|
184
|
-
end
|
185
|
-
|
186
|
-
# The http feed access method
|
187
|
-
http_fetch = lambda do |feed_url, request_headers, redirect_limit,
|
188
|
-
response_chain, no_headers|
|
189
|
-
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
190
|
-
feed_uri = nil
|
191
|
-
begin
|
192
|
-
feed_uri = URI.parse(feed_url)
|
193
|
-
rescue URI::InvalidURIError
|
194
|
-
# Uh, maybe try to fix it?
|
195
|
-
feed_uri = URI.parse(FeedTools::UriHelper.normalize_url(feed_url))
|
196
|
-
end
|
197
|
-
|
198
|
-
begin
|
199
|
-
proxy_address = (FeedTools.configurations[:proxy_address] || nil)
|
200
|
-
proxy_port = (FeedTools.configurations[:proxy_port].to_i || nil)
|
201
|
-
|
202
|
-
http = Net::HTTP::Proxy(proxy_address, proxy_port).new(
|
203
|
-
feed_uri.host, (feed_uri.port or 80))
|
204
|
-
http.start do
|
205
|
-
final_uri = feed_uri.path
|
206
|
-
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
207
|
-
request_headers = {} if no_headers
|
208
|
-
response = http.request_get(final_uri, request_headers)
|
209
|
-
|
210
|
-
case response
|
211
|
-
when Net::HTTPSuccess
|
212
|
-
# We've reached the final destination, process all previous
|
213
|
-
# redirections, and see if we need to update the url.
|
214
|
-
for redirected_response in response_chain
|
215
|
-
if redirected_response.last.code.to_i == 301
|
216
|
-
# Reset the cache object or we may get duplicate entries
|
217
|
-
self.cache_object = nil
|
218
|
-
self.href = redirected_response.last['location']
|
219
|
-
else
|
220
|
-
# Jump out as soon as we hit anything that isn't a
|
221
|
-
# permanently moved redirection.
|
222
|
-
break
|
223
|
-
end
|
224
|
-
end
|
225
|
-
response
|
226
|
-
when Net::HTTPRedirection
|
227
|
-
if response.code.to_i == 304
|
228
|
-
response.error!
|
229
|
-
else
|
230
|
-
if response['location'].nil?
|
231
|
-
raise FeedAccessError,
|
232
|
-
"No location to redirect to supplied: " + response.code
|
233
|
-
end
|
234
|
-
response_chain << [feed_url, response]
|
235
|
-
new_location = response['location']
|
236
|
-
if response_chain.assoc(new_location) != nil
|
237
|
-
raise FeedAccessError,
|
238
|
-
"Redirection loop detected: #{new_location}"
|
239
|
-
end
|
240
|
-
|
241
|
-
# Find out if we've already seen the url we've been
|
242
|
-
# redirected to.
|
243
|
-
found_redirect = false
|
244
|
-
begin
|
245
|
-
cached_feed = FeedTools::Feed.open(new_location,
|
246
|
-
:cache_only => true)
|
247
|
-
if cached_feed.cache_object != nil &&
|
248
|
-
cached_feed.cache_object.new_record? != true
|
249
|
-
if !cached_feed.expired? &&
|
250
|
-
!cached_feed.http_headers.blank?
|
251
|
-
# Copy the cached state
|
252
|
-
self.href = cached_feed.href
|
253
|
-
|
254
|
-
@feed_data = cached_feed.feed_data
|
255
|
-
@feed_data_type = cached_feed.feed_data_type
|
256
|
-
|
257
|
-
if @feed_data.blank?
|
258
|
-
raise "Invalid cache data."
|
259
|
-
end
|
203
|
+
begin
|
204
|
+
@http_response = (FeedTools::RetrievalHelper.http_get(
|
205
|
+
self.href, :feed_object => self) do |url, response|
|
206
|
+
# Find out if we've already seen the url we've been
|
207
|
+
# redirected to.
|
208
|
+
follow_redirect = true
|
260
209
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
210
|
+
begin
|
211
|
+
cached_feed = FeedTools::Feed.open(url,
|
212
|
+
:disable_update_from_remote => true)
|
213
|
+
if cached_feed.cache_object != nil &&
|
214
|
+
cached_feed.cache_object.new_record? != true
|
215
|
+
if !cached_feed.expired? &&
|
216
|
+
!cached_feed.http_headers.blank?
|
217
|
+
# Copy the cached state
|
218
|
+
self.href = cached_feed.href
|
219
|
+
|
220
|
+
@feed_data = cached_feed.feed_data
|
221
|
+
@feed_data_type = cached_feed.feed_data_type
|
222
|
+
|
223
|
+
if @feed_data.blank?
|
224
|
+
raise "Invalid cache data."
|
270
225
|
end
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
else
|
281
|
-
response
|
282
|
-
end
|
283
|
-
end
|
284
|
-
else
|
285
|
-
class << response
|
286
|
-
def response_chain
|
287
|
-
return @response_chain
|
226
|
+
|
227
|
+
@title = nil; self.title
|
228
|
+
@link = nil; self.link
|
229
|
+
|
230
|
+
self.last_retrieved = cached_feed.last_retrieved
|
231
|
+
self.http_headers = cached_feed.http_headers
|
232
|
+
self.cache_object = cached_feed.cache_object
|
233
|
+
@live = false
|
234
|
+
follow_redirect = false
|
288
235
|
end
|
289
236
|
end
|
290
|
-
|
291
|
-
|
292
|
-
response.error!
|
293
|
-
end
|
294
|
-
end
|
295
|
-
rescue SocketError
|
296
|
-
raise FeedAccessError, 'Socket error prevented feed retrieval'
|
297
|
-
rescue Timeout::Error
|
298
|
-
raise FeedAccessError, 'Timeout while attempting to retrieve feed'
|
299
|
-
rescue Errno::ENETUNREACH
|
300
|
-
raise FeedAccessError, 'Network was unreachable'
|
301
|
-
rescue Errno::ECONNRESET
|
302
|
-
raise FeedAccessError, 'Connection was reset by peer'
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
begin
|
307
|
-
begin
|
308
|
-
@http_response = http_fetch.call(self.href, headers, 10, [], false)
|
309
|
-
rescue => error
|
310
|
-
if error.respond_to?(:response)
|
311
|
-
# You might not believe this, but...
|
312
|
-
#
|
313
|
-
# Under certain circumstances, web servers will try to block
|
314
|
-
# based on the User-Agent header. This is *retarded*. But
|
315
|
-
# we won't let their stupid error stop us!
|
316
|
-
#
|
317
|
-
# This is, of course, a quick-n-dirty hack. But at least
|
318
|
-
# we get to blame other people's bad software and/or bad
|
319
|
-
# configuration files.
|
320
|
-
if error.response.code.to_i == 404 &&
|
321
|
-
FeedTools.configurations[:user_agent] != nil
|
322
|
-
@http_response = http_fetch.call(self.href, {}, 10, [], true)
|
323
|
-
if @http_response != nil && @http_response.code.to_i == 200
|
324
|
-
warn("The server appears to be blocking based on the " +
|
325
|
-
"User-Agent header. This is stupid, and you should " +
|
326
|
-
"inform the webmaster of this.")
|
327
|
-
end
|
328
|
-
else
|
329
|
-
raise error
|
237
|
+
rescue
|
238
|
+
# If anything goes wrong, ignore it.
|
330
239
|
end
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
unless @http_response.kind_of? Net::HTTPRedirection
|
240
|
+
follow_redirect
|
241
|
+
end)
|
242
|
+
case @http_response
|
243
|
+
when Net::HTTPSuccess
|
336
244
|
@feed_data = self.http_response.body
|
337
245
|
@http_headers = {}
|
338
246
|
self.http_response.each_header do |key, value|
|
339
247
|
self.http_headers[key.downcase] = value
|
340
248
|
end
|
341
249
|
self.last_retrieved = Time.now.gmtime
|
342
|
-
|
343
|
-
|
344
|
-
@live = false
|
345
|
-
if self.feed_data.nil?
|
346
|
-
raise
|
347
|
-
end
|
348
|
-
rescue Timeout::Error
|
349
|
-
# if we time out, do nothing, it should fall back to the feed_data
|
350
|
-
# stored in the cache.
|
351
|
-
@live = false
|
352
|
-
if self.feed_data.nil?
|
353
|
-
raise
|
354
|
-
end
|
355
|
-
rescue Errno::ECONNRESET
|
356
|
-
# if the connection gets reset by peer, oh well, fall back to the
|
357
|
-
# feed_data stored in the cache
|
358
|
-
@live = false
|
359
|
-
if self.feed_data.nil?
|
360
|
-
raise
|
361
|
-
end
|
362
|
-
rescue => error
|
363
|
-
# heck, if anything at all bad happens, fall back to the feed_data
|
364
|
-
# stored in the cache.
|
365
|
-
|
366
|
-
# If we can, get the HTTPResponse...
|
367
|
-
@http_response = nil
|
368
|
-
if error.respond_to?(:each_header)
|
369
|
-
@http_response = error
|
370
|
-
end
|
371
|
-
if error.respond_to?(:response) &&
|
372
|
-
error.response.respond_to?(:each_header)
|
373
|
-
@http_response = error.response
|
374
|
-
end
|
375
|
-
if @http_response != nil
|
250
|
+
@live = true
|
251
|
+
when Net::HTTPNotModified
|
376
252
|
@http_headers = {}
|
377
253
|
self.http_response.each_header do |key, value|
|
378
254
|
self.http_headers[key.downcase] = value
|
379
255
|
end
|
380
|
-
|
381
|
-
|
382
|
-
|
256
|
+
self.last_retrieved = Time.now.gmtime
|
257
|
+
@live = true
|
258
|
+
else
|
259
|
+
@live = false
|
383
260
|
end
|
261
|
+
rescue Exception => error
|
384
262
|
@live = false
|
385
263
|
if self.feed_data.nil?
|
386
|
-
if error.respond_to?(:response) &&
|
387
|
-
error.response.respond_to?(:response_chain)
|
388
|
-
redirects = error.response.response_chain.map do |pair|
|
389
|
-
pair.first
|
390
|
-
end
|
391
|
-
error.message << (" - Redirects: " + redirects.inspect)
|
392
|
-
end
|
393
264
|
raise error
|
394
265
|
end
|
395
266
|
end
|
@@ -407,7 +278,7 @@ module FeedTools
|
|
407
278
|
# with 'file://', strip the 'file://' off the front of the url.
|
408
279
|
file_name = self.href.gsub(/^file:\/\//, "")
|
409
280
|
if RUBY_PLATFORM =~ /mswin/
|
410
|
-
file_name = file_name[1..-1] if file_name[
|
281
|
+
file_name = file_name[1..-1] if file_name[0..0] == "/"
|
411
282
|
end
|
412
283
|
begin
|
413
284
|
open(file_name) do |file|
|
@@ -885,7 +756,7 @@ module FeedTools
|
|
885
756
|
end
|
886
757
|
rescue
|
887
758
|
end
|
888
|
-
if
|
759
|
+
if self.configurations[:url_normalization_enabled]
|
889
760
|
@href = FeedTools::UriHelper.normalize_url(@href)
|
890
761
|
end
|
891
762
|
@href.strip! unless @href.nil?
|
@@ -925,7 +796,7 @@ module FeedTools
|
|
925
796
|
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
926
797
|
self.feed_type, self.feed_version)
|
927
798
|
if self.feed_type == "atom" ||
|
928
|
-
|
799
|
+
self.configurations[:always_strip_wrapper_elements]
|
929
800
|
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
930
801
|
end
|
931
802
|
@title = nil if @title.blank?
|
@@ -943,7 +814,6 @@ module FeedTools
|
|
943
814
|
# Returns the feed subtitle
|
944
815
|
def subtitle
|
945
816
|
if @subtitle.nil?
|
946
|
-
repair_entities = false
|
947
817
|
subtitle_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
948
818
|
"atom10:subtitle",
|
949
819
|
"subtitle",
|
@@ -968,7 +838,7 @@ module FeedTools
|
|
968
838
|
@subtitle = FeedTools::HtmlHelper.process_text_construct(
|
969
839
|
subtitle_node, self.feed_type, self.feed_version)
|
970
840
|
if self.feed_type == "atom" ||
|
971
|
-
|
841
|
+
self.configurations[:always_strip_wrapper_elements]
|
972
842
|
@subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
|
973
843
|
end
|
974
844
|
if @subtitle.blank?
|
@@ -1125,7 +995,7 @@ module FeedTools
|
|
1125
995
|
end
|
1126
996
|
rescue
|
1127
997
|
end
|
1128
|
-
if
|
998
|
+
if self.configurations[:url_normalization_enabled]
|
1129
999
|
@link = FeedTools::UriHelper.normalize_url(@link)
|
1130
1000
|
end
|
1131
1001
|
unless self.cache_object.nil?
|
@@ -1179,7 +1049,7 @@ module FeedTools
|
|
1179
1049
|
end
|
1180
1050
|
rescue
|
1181
1051
|
end
|
1182
|
-
if
|
1052
|
+
if self.configurations[:url_normalization_enabled]
|
1183
1053
|
link_object.href =
|
1184
1054
|
FeedTools::UriHelper.normalize_url(link_object.href)
|
1185
1055
|
end
|
@@ -1609,12 +1479,12 @@ module FeedTools
|
|
1609
1479
|
unless time_string.blank?
|
1610
1480
|
@time = Time.parse(time_string).gmtime
|
1611
1481
|
else
|
1612
|
-
if
|
1482
|
+
if self.configurations[:timestamp_estimation_enabled]
|
1613
1483
|
@time = Time.now.gmtime
|
1614
1484
|
end
|
1615
1485
|
end
|
1616
1486
|
rescue
|
1617
|
-
if
|
1487
|
+
if self.configurations[:timestamp_estimation_enabled]
|
1618
1488
|
@time = Time.now.gmtime
|
1619
1489
|
end
|
1620
1490
|
end
|
@@ -1750,7 +1620,7 @@ module FeedTools
|
|
1750
1620
|
end
|
1751
1621
|
rescue
|
1752
1622
|
end
|
1753
|
-
if
|
1623
|
+
if self.configurations[:url_normalization_enabled]
|
1754
1624
|
image.href = FeedTools::UriHelper.normalize_url(image.href)
|
1755
1625
|
end
|
1756
1626
|
image.href.strip! unless image.href.nil?
|
@@ -1835,7 +1705,7 @@ module FeedTools
|
|
1835
1705
|
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
1836
1706
|
self.feed_type, self.feed_version)
|
1837
1707
|
if self.feed_type == "atom" ||
|
1838
|
-
|
1708
|
+
self.configurations[:always_strip_wrapper_elements]
|
1839
1709
|
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
1840
1710
|
end
|
1841
1711
|
end
|
@@ -1944,10 +1814,10 @@ module FeedTools
|
|
1944
1814
|
if @time_to_live.nil? || @time_to_live == 0
|
1945
1815
|
# Default to one hour
|
1946
1816
|
@time_to_live = 1.hour
|
1947
|
-
elsif
|
1948
|
-
|
1949
|
-
@time_to_live >=
|
1950
|
-
@time_to_live =
|
1817
|
+
elsif self.configurations[:max_ttl] != nil &&
|
1818
|
+
self.configurations[:max_ttl] != 0 &&
|
1819
|
+
@time_to_live >= self.configurations[:max_ttl].to_i
|
1820
|
+
@time_to_live = self.configurations[:max_ttl].to_i
|
1951
1821
|
end
|
1952
1822
|
@time_to_live = @time_to_live.round
|
1953
1823
|
return @time_to_live
|
@@ -2034,7 +1904,7 @@ module FeedTools
|
|
2034
1904
|
end
|
2035
1905
|
rescue
|
2036
1906
|
end
|
2037
|
-
if
|
1907
|
+
if self.configurations[:url_normalization_enabled]
|
2038
1908
|
@docs = FeedTools::UriHelper.normalize_url(@docs)
|
2039
1909
|
end
|
2040
1910
|
end
|
@@ -2145,17 +2015,17 @@ module FeedTools
|
|
2145
2015
|
end
|
2146
2016
|
|
2147
2017
|
# Sort the items
|
2148
|
-
if
|
2018
|
+
if self.configurations[:entry_sorting_property] == "time"
|
2149
2019
|
@entries = @entries.sort do |a, b|
|
2150
2020
|
(b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
|
2151
2021
|
end
|
2152
|
-
elsif
|
2153
|
-
sorting_property =
|
2022
|
+
elsif self.configurations[:entry_sorting_property] != nil
|
2023
|
+
sorting_property = self.configurations[:entry_sorting_property]
|
2154
2024
|
@entries = @entries.sort do |a, b|
|
2155
2025
|
eval("a.#{sorting_property}") <=> eval("b.#{sorting_property}")
|
2156
2026
|
end
|
2157
2027
|
else
|
2158
|
-
|
2028
|
+
return @entries.reverse
|
2159
2029
|
end
|
2160
2030
|
return @entries
|
2161
2031
|
end
|
@@ -2255,7 +2125,7 @@ module FeedTools
|
|
2255
2125
|
xml_builder=Builder::XmlMarkup.new(
|
2256
2126
|
:indent => 2, :escape_attrs => false))
|
2257
2127
|
xml_builder.instruct! :xml, :version => "1.0",
|
2258
|
-
:encoding => (
|
2128
|
+
:encoding => (self.configurations[:output_encoding] or "utf-8")
|
2259
2129
|
if feed_type.nil?
|
2260
2130
|
feed_type = self.feed_type
|
2261
2131
|
end
|
@@ -2335,7 +2205,7 @@ module FeedTools
|
|
2335
2205
|
end
|
2336
2206
|
xml_builder.tag!(
|
2337
2207
|
"admin:generatorAgent",
|
2338
|
-
"rdf:resource" =>
|
2208
|
+
"rdf:resource" => self.configurations[:generator_href])
|
2339
2209
|
build_xml_hook(feed_type, feed_version, xml_builder)
|
2340
2210
|
end
|
2341
2211
|
unless self.images.blank?
|
@@ -2408,7 +2278,7 @@ module FeedTools
|
|
2408
2278
|
end
|
2409
2279
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2410
2280
|
xml_builder.generator(
|
2411
|
-
|
2281
|
+
self.configurations[:generator_href])
|
2412
2282
|
build_xml_hook(feed_type, feed_version, xml_builder)
|
2413
2283
|
unless items.nil?
|
2414
2284
|
for item in items
|
@@ -2467,8 +2337,8 @@ module FeedTools
|
|
2467
2337
|
unless self.rights.blank?
|
2468
2338
|
xml_builder.rights(self.rights)
|
2469
2339
|
end
|
2470
|
-
xml_builder.generator(
|
2471
|
-
" - " +
|
2340
|
+
xml_builder.generator(self.configurations[:generator_name] +
|
2341
|
+
" - " + self.configurations[:generator_href])
|
2472
2342
|
if self.id != nil
|
2473
2343
|
unless FeedTools::UriHelper.is_uri? self.id
|
2474
2344
|
if self.link != nil
|
@@ -2498,6 +2368,18 @@ module FeedTools
|
|
2498
2368
|
|
2499
2369
|
# Persists the current feed state to the cache.
|
2500
2370
|
def save
|
2371
|
+
if self.configurations[:feed_cache].nil?
|
2372
|
+
# The cache is disabled for this feed, do nothing.
|
2373
|
+
return
|
2374
|
+
end
|
2375
|
+
if self.http_headers['content-type'] =~ /text\/html/ ||
|
2376
|
+
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
2377
|
+
if self.title.nil? && self.link.nil? && self.entries.blank?
|
2378
|
+
# Don't save html pages to the cache, it messes with
|
2379
|
+
# autodiscovery.
|
2380
|
+
return
|
2381
|
+
end
|
2382
|
+
end
|
2501
2383
|
unless self.href =~ /^file:\/\//
|
2502
2384
|
if FeedTools.feed_cache.nil?
|
2503
2385
|
raise "Caching is currently disabled. Cannot save to cache."
|