feedtools 0.2.23 → 0.2.24
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +13 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +24 -12
- data/lib/feed_tools/database_feed_cache.rb +8 -5
- data/lib/feed_tools/feed.rb +122 -240
- data/lib/feed_tools/feed_item.rb +31 -13
- data/lib/feed_tools/feed_structures.rb +5 -2
- data/lib/feed_tools/helpers/debug_helper.rb +1 -2
- data/lib/feed_tools/helpers/html_helper.rb +75 -43
- data/lib/feed_tools/helpers/retrieval_helper.rb +204 -6
- data/lib/feed_tools/helpers/uri_helper.rb +4 -1
- data/lib/feed_tools/vendor/htree/parse.rb +3 -1
- data/lib/feed_tools/version.rb +9 -0
- data/rakefile +6 -4
- data/test/unit/atom_test.rb +253 -4
- data/test/unit/cache_test.rb +22 -17
- data/test/unit/helper_test.rb +2 -2
- metadata +4 -3
data/lib/feed_tools/feed_item.rb
CHANGED
@@ -69,6 +69,24 @@ module FeedTools
|
|
69
69
|
return parent_feed
|
70
70
|
end
|
71
71
|
|
72
|
+
# Returns the load options for this feed.
|
73
|
+
def configurations
|
74
|
+
if @configurations.blank?
|
75
|
+
parent_feed = self.feed
|
76
|
+
if parent_feed != nil
|
77
|
+
@configurations = parent_feed.configurations.dup
|
78
|
+
else
|
79
|
+
@configurations = FeedTools.configurations.dup
|
80
|
+
end
|
81
|
+
end
|
82
|
+
return @configurations
|
83
|
+
end
|
84
|
+
|
85
|
+
# Sets the load options for this feed.
|
86
|
+
def configurations=(new_configurations)
|
87
|
+
@configurations = new_configurations
|
88
|
+
end
|
89
|
+
|
72
90
|
# Returns the feed item's encoding.
|
73
91
|
def encoding
|
74
92
|
if @encoding.nil?
|
@@ -202,10 +220,10 @@ module FeedTools
|
|
202
220
|
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
203
221
|
self.feed_type, self.feed_version)
|
204
222
|
if self.feed_type == "atom" ||
|
205
|
-
|
223
|
+
self.configurations[:always_strip_wrapper_elements]
|
206
224
|
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
207
225
|
end
|
208
|
-
if !@title.blank? &&
|
226
|
+
if !@title.blank? && self.configurations[:strip_comment_count]
|
209
227
|
# Some blogging tools include the number of comments in a post
|
210
228
|
# in the title... this is supremely ugly, and breaks any
|
211
229
|
# applications which expect the title to be static, so we're
|
@@ -257,7 +275,7 @@ module FeedTools
|
|
257
275
|
@content = FeedTools::HtmlHelper.process_text_construct(content_node,
|
258
276
|
self.feed_type, self.feed_version)
|
259
277
|
if self.feed_type == "atom" ||
|
260
|
-
|
278
|
+
self.configurations[:always_strip_wrapper_elements]
|
261
279
|
@content = FeedTools::HtmlHelper.strip_wrapper_element(@content)
|
262
280
|
end
|
263
281
|
if @content.blank?
|
@@ -310,7 +328,7 @@ module FeedTools
|
|
310
328
|
@summary = FeedTools::HtmlHelper.process_text_construct(summary_node,
|
311
329
|
self.feed_type, self.feed_version)
|
312
330
|
if self.feed_type == "atom" ||
|
313
|
-
|
331
|
+
self.configurations[:always_strip_wrapper_elements]
|
314
332
|
@summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary)
|
315
333
|
end
|
316
334
|
if @summary.blank?
|
@@ -456,7 +474,7 @@ module FeedTools
|
|
456
474
|
end
|
457
475
|
rescue
|
458
476
|
end
|
459
|
-
if
|
477
|
+
if self.configurations[:url_normalization_enabled]
|
460
478
|
@link = FeedTools::UriHelper.normalize_url(@link)
|
461
479
|
end
|
462
480
|
end
|
@@ -507,7 +525,7 @@ module FeedTools
|
|
507
525
|
end
|
508
526
|
rescue
|
509
527
|
end
|
510
|
-
if
|
528
|
+
if self.configurations[:url_normalization_enabled]
|
511
529
|
link_object.href =
|
512
530
|
FeedTools::UriHelper.normalize_url(link_object.href)
|
513
531
|
end
|
@@ -640,7 +658,7 @@ module FeedTools
|
|
640
658
|
end
|
641
659
|
rescue
|
642
660
|
end
|
643
|
-
if
|
661
|
+
if self.configurations[:url_normalization_enabled]
|
644
662
|
image.href = FeedTools::UriHelper.normalize_url(image.href)
|
645
663
|
end
|
646
664
|
image.href.strip! unless image.href.nil?
|
@@ -688,7 +706,7 @@ module FeedTools
|
|
688
706
|
"itunes:image/@href",
|
689
707
|
"itunes:link[@rel='image']/@href"
|
690
708
|
], :select_result_value => true)
|
691
|
-
if
|
709
|
+
if self.configurations[:url_normalization_enabled]
|
692
710
|
@itunes_image_link = FeedTools::UriHelper.normalize_url(@itunes_image_link)
|
693
711
|
end
|
694
712
|
end
|
@@ -706,7 +724,7 @@ module FeedTools
|
|
706
724
|
@media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
707
725
|
"media:thumbnail/@url"
|
708
726
|
], :select_result_value => true)
|
709
|
-
if
|
727
|
+
if self.configurations[:url_normalization_enabled]
|
710
728
|
@media_thumbnail_link = FeedTools::UriHelper.normalize_url(@media_thumbnail_link)
|
711
729
|
end
|
712
730
|
end
|
@@ -734,7 +752,7 @@ module FeedTools
|
|
734
752
|
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
735
753
|
self.feed_type, self.feed_version)
|
736
754
|
if self.feed_type == "atom" ||
|
737
|
-
|
755
|
+
self.configurations[:always_strip_wrapper_elements]
|
738
756
|
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
739
757
|
end
|
740
758
|
end
|
@@ -1456,14 +1474,14 @@ module FeedTools
|
|
1456
1474
|
begin
|
1457
1475
|
if !time_string.blank?
|
1458
1476
|
@time = Time.parse(time_string).gmtime
|
1459
|
-
elsif
|
1477
|
+
elsif self.configurations[:timestamp_estimation_enabled] &&
|
1460
1478
|
!self.title.nil? &&
|
1461
1479
|
(Time.parse(self.title) - Time.now).abs > 100
|
1462
1480
|
@time = Time.parse(self.title).gmtime
|
1463
1481
|
end
|
1464
1482
|
rescue
|
1465
1483
|
end
|
1466
|
-
if
|
1484
|
+
if self.configurations[:timestamp_estimation_enabled]
|
1467
1485
|
if options[:estimate_timestamp]
|
1468
1486
|
if @time.nil?
|
1469
1487
|
begin
|
@@ -1615,7 +1633,7 @@ module FeedTools
|
|
1615
1633
|
end
|
1616
1634
|
rescue
|
1617
1635
|
end
|
1618
|
-
if
|
1636
|
+
if self.configurations[:url_normalization_enabled]
|
1619
1637
|
@comments = FeedTools::UriHelper.normalize_url(@comments)
|
1620
1638
|
end
|
1621
1639
|
end
|
@@ -170,6 +170,10 @@ module FeedTools
|
|
170
170
|
alias_method :url=, :href=
|
171
171
|
alias_method :link, :href
|
172
172
|
alias_method :link=, :href=
|
173
|
+
|
174
|
+
def initialize
|
175
|
+
@expression = 'full'
|
176
|
+
end
|
173
177
|
|
174
178
|
# Returns true if this is the default enclosure
|
175
179
|
def is_default?
|
@@ -202,8 +206,7 @@ module FeedTools
|
|
202
206
|
# Allowed values are 'sample', 'full', 'nonstop'.
|
203
207
|
def expression=(new_expression)
|
204
208
|
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
|
205
|
-
|
206
|
-
"Permitted values are 'sample', 'full', 'nonstop'."
|
209
|
+
return @expression
|
207
210
|
end
|
208
211
|
@expression = new_expression.downcase
|
209
212
|
end
|
@@ -24,8 +24,7 @@
|
|
24
24
|
module FeedTools
|
25
25
|
module DebugHelper
|
26
26
|
# Forces a stack_trace without interfering with the program
|
27
|
-
def stack_trace
|
28
|
-
|
27
|
+
def self.stack_trace
|
29
28
|
fork do
|
30
29
|
ObjectSpace.each_object(Thread) do |th|
|
31
30
|
th.raise Exception, "Stack Dump" unless Thread.current == th
|
@@ -232,6 +232,17 @@ module FeedTools
|
|
232
232
|
return tidy_html
|
233
233
|
end
|
234
234
|
|
235
|
+
# Indents a text selection by a specified number of spaces.
|
236
|
+
def self.indent(text, spaces)
|
237
|
+
lines = text.split("\n")
|
238
|
+
buffer = ""
|
239
|
+
for line in lines
|
240
|
+
line = " " * spaces + line
|
241
|
+
buffer << line << "\n"
|
242
|
+
end
|
243
|
+
return buffer
|
244
|
+
end
|
245
|
+
|
235
246
|
# Unindents a text selection by a specified number of spaces.
|
236
247
|
def self.unindent(text, spaces)
|
237
248
|
lines = text.split("\n")
|
@@ -301,10 +312,11 @@ module FeedTools
|
|
301
312
|
html_node.delete_element(child)
|
302
313
|
end
|
303
314
|
end
|
304
|
-
|
305
|
-
if !(attribute =~ /^xmlns
|
306
|
-
unless acceptable_attributes.include?
|
307
|
-
|
315
|
+
child.attributes.each_attribute do |attribute|
|
316
|
+
if !(attribute.value =~ /^xmlns(:.+)?$/)
|
317
|
+
unless acceptable_attributes.include?(
|
318
|
+
attribute.value.downcase)
|
319
|
+
child.delete_attribute(attribute.value)
|
308
320
|
end
|
309
321
|
end
|
310
322
|
end
|
@@ -364,8 +376,6 @@ module FeedTools
|
|
364
376
|
].include?(type)
|
365
377
|
end
|
366
378
|
|
367
|
-
# can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
|
368
|
-
|
369
379
|
# Resolves all relative uris in a block of html.
|
370
380
|
def self.resolve_relative_uris(html, base_uri_sources=[])
|
371
381
|
relative_uri_attributes = [
|
@@ -398,23 +408,23 @@ module FeedTools
|
|
398
408
|
html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
|
399
409
|
|
400
410
|
resolve_node = lambda do |html_node|
|
401
|
-
if html_node.
|
402
|
-
for
|
403
|
-
if
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
element_attribute_pair[1]).instance_variable_set(
|
413
|
-
"@value", href)
|
414
|
-
end
|
415
|
-
end
|
411
|
+
if html_node.kind_of? REXML::Element
|
412
|
+
for element_attribute_pair in relative_uri_attributes
|
413
|
+
if html_node.name.downcase == element_attribute_pair[0]
|
414
|
+
attribute = html_node.attribute(element_attribute_pair[1])
|
415
|
+
if attribute != nil
|
416
|
+
href = attribute.value
|
417
|
+
href = FeedTools::UriHelper.resolve_relative_uri(
|
418
|
+
href, [html_node.base_uri] | base_uri_sources)
|
419
|
+
html_node.attribute(
|
420
|
+
element_attribute_pair[1]).instance_variable_set(
|
421
|
+
"@value", href)
|
416
422
|
end
|
417
423
|
end
|
424
|
+
end
|
425
|
+
end
|
426
|
+
if html_node.respond_to? :children
|
427
|
+
for child in html_node.children
|
418
428
|
resolve_node.call(child)
|
419
429
|
end
|
420
430
|
end
|
@@ -428,36 +438,55 @@ module FeedTools
|
|
428
438
|
# Returns a string containing normalized xhtml from within a REXML node.
|
429
439
|
def self.extract_xhtml(rexml_node)
|
430
440
|
rexml_node_dup = rexml_node.deep_clone
|
441
|
+
namespace_hash = FEED_TOOLS_NAMESPACES.dup
|
431
442
|
normalize_namespaced_xhtml = lambda do |node, node_dup|
|
432
443
|
if node.kind_of? REXML::Element
|
433
444
|
node_namespace = node.namespace
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
attribute_dup.
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
+
if node_namespace != namespace_hash['atom10'] &&
|
446
|
+
node_namespace != namespace_hash['atom03']
|
447
|
+
# Massive hack, relies on REXML not changing
|
448
|
+
for index in 0...node.attributes.values.size
|
449
|
+
attribute = node.attributes.values[index]
|
450
|
+
attribute_dup = node_dup.attributes.values[index]
|
451
|
+
if attribute.namespace == namespace_hash['xhtml']
|
452
|
+
attribute_dup.instance_variable_set(
|
453
|
+
"@expanded_name", attribute.name)
|
454
|
+
end
|
455
|
+
if node_namespace == namespace_hash['xhtml']
|
456
|
+
if attribute.name == 'xmlns'
|
457
|
+
node_dup.attributes.delete('xmlns')
|
458
|
+
end
|
445
459
|
end
|
446
460
|
end
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
461
|
+
if node_namespace == namespace_hash['xhtml']
|
462
|
+
node_dup.instance_variable_set("@expanded_name", node.name)
|
463
|
+
end
|
464
|
+
if !node_namespace.blank? && node.prefix.blank?
|
465
|
+
if node_namespace != namespace_hash['xhtml']
|
466
|
+
prefix = nil
|
467
|
+
for known_prefix in namespace_hash.keys
|
468
|
+
if namespace_hash[known_prefix] == node_namespace
|
469
|
+
prefix = known_prefix
|
470
|
+
end
|
471
|
+
end
|
472
|
+
if prefix.nil?
|
473
|
+
prefix = "unknown" +
|
474
|
+
Digest::SHA1.new(node_namespace).to_s[0..4]
|
475
|
+
namespace_hash[prefix] = node_namespace
|
476
|
+
end
|
477
|
+
node_dup.instance_variable_set("@expanded_name",
|
478
|
+
"#{prefix}:#{node.name}")
|
479
|
+
node_dup.instance_variable_set("@prefix",
|
480
|
+
prefix)
|
481
|
+
node_dup.add_namespace(prefix, node_namespace)
|
482
|
+
end
|
454
483
|
end
|
455
484
|
end
|
456
485
|
end
|
457
486
|
for index in 0...node.children.size
|
458
487
|
child = node.children[index]
|
459
|
-
child_dup = node_dup.children[index]
|
460
488
|
if child.kind_of? REXML::Element
|
489
|
+
child_dup = node_dup.children[index]
|
461
490
|
normalize_namespaced_xhtml.call(child, child_dup)
|
462
491
|
end
|
463
492
|
end
|
@@ -513,7 +542,9 @@ module FeedTools
|
|
513
542
|
type == "application/xhtml+xml" ||
|
514
543
|
content_node.namespace == FEED_TOOLS_NAMESPACES['xhtml']
|
515
544
|
content = FeedTools::HtmlHelper.extract_xhtml(content_node)
|
516
|
-
elsif type == "escaped" || mode == "escaped"
|
545
|
+
elsif type == "escaped" || mode == "escaped" ||
|
546
|
+
type == "html" || mode == "html" ||
|
547
|
+
type == "text/html" || mode == "text/html"
|
517
548
|
content = FeedTools::HtmlHelper.unescape_entities(
|
518
549
|
content_node.inner_xml.strip)
|
519
550
|
elsif type == "text" || mode == "text" ||
|
@@ -556,7 +587,7 @@ module FeedTools
|
|
556
587
|
doc = REXML::Document.new(xhtml.to_s.strip)
|
557
588
|
if doc.children.size == 1
|
558
589
|
child = doc.children[0]
|
559
|
-
if child.name.downcase == "div"
|
590
|
+
if child.kind_of?(REXML::Element) && child.name.downcase == "div"
|
560
591
|
return child.inner_xml.strip
|
561
592
|
end
|
562
593
|
end
|
@@ -574,7 +605,8 @@ module FeedTools
|
|
574
605
|
# This is technically very, very wrong. But it saves oodles of
|
575
606
|
# clock cycles, and probably works 99.999% of the time.
|
576
607
|
html_document = HTree.parse_xml(
|
577
|
-
FeedTools::HtmlHelper.tidy_html(
|
608
|
+
FeedTools::HtmlHelper.tidy_html(
|
609
|
+
html.gsub(/<body.*?>(.|\n)*<\/body>/, "<body>-</body>"))).to_rexml
|
578
610
|
html_node = nil
|
579
611
|
head_node = nil
|
580
612
|
link_nodes = []
|
@@ -22,20 +22,218 @@
|
|
22
22
|
#++
|
23
23
|
|
24
24
|
require 'feed_tools'
|
25
|
+
require 'feed_tools/helpers/uri_helper'
|
25
26
|
require 'net/http'
|
26
27
|
|
27
28
|
# TODO: Not used yet, don't load since it'll only be a performance hit
|
28
29
|
# require 'net/https'
|
29
30
|
# require 'net/ftp'
|
30
31
|
|
31
|
-
# Stolen from the Universal Feed Parser
|
32
|
-
FEED_TOOLS_ACCEPT_HEADER = "application/atom+xml,application/rdf+xml," +
|
33
|
-
"application/rss+xml,application/x-netcdf,application/xml;" +
|
34
|
-
"q=0.9,text/xml;q=0.2,*/*;q=0.1"
|
35
|
-
|
36
|
-
# TODO: Refactor http_fetch and other methods.
|
37
32
|
module FeedTools
|
38
33
|
# Methods for pulling remote data
|
39
34
|
module RetrievalHelper
|
35
|
+
# Stolen from the Universal Feed Parser
|
36
|
+
ACCEPT_HEADER = "application/atom+xml,application/rdf+xml," +
|
37
|
+
"application/rss+xml,application/x-netcdf,application/xml;" +
|
38
|
+
"q=0.9,text/xml;q=0.2,*/*;q=0.1"
|
39
|
+
|
40
|
+
# Makes an HTTP request and returns the HTTP response. Optionally
|
41
|
+
# takes a block that determines whether or not to follow a redirect.
|
42
|
+
# The block will be passed the HTTP redirect response as an argument.
|
43
|
+
def self.http_request(http_operation, url, options={}, &block)
|
44
|
+
response = nil
|
45
|
+
|
46
|
+
options = {
|
47
|
+
:feed_object => nil,
|
48
|
+
:form_data => nil,
|
49
|
+
:request_headers => {},
|
50
|
+
:follow_redirects => true,
|
51
|
+
:redirect_limit => 10,
|
52
|
+
:response_chain => []
|
53
|
+
}.merge(options)
|
54
|
+
|
55
|
+
if options[:redirect_limit] == 0
|
56
|
+
raise FeedAccessError, 'Redirect too deep'
|
57
|
+
end
|
58
|
+
|
59
|
+
if options[:response_chain].blank? ||
|
60
|
+
!options[:response_chain].kind_of?(Array)
|
61
|
+
options[:response_chain] = []
|
62
|
+
end
|
63
|
+
|
64
|
+
if !options[:request_headers].kind_of?(Hash)
|
65
|
+
options[:request_headers] = {}
|
66
|
+
end
|
67
|
+
if !options[:form_data].kind_of?(Hash)
|
68
|
+
options[:form_data] = nil
|
69
|
+
end
|
70
|
+
|
71
|
+
if options[:request_headers].blank? && options[:feed_object] != nil
|
72
|
+
options[:request_headers] = {}
|
73
|
+
unless options[:feed_object].http_headers.nil?
|
74
|
+
unless options[:feed_object].http_headers['etag'].nil?
|
75
|
+
options[:request_headers]["If-None-Match"] =
|
76
|
+
options[:feed_object].http_headers['etag']
|
77
|
+
end
|
78
|
+
unless options[:feed_object].http_headers['last-modified'].nil?
|
79
|
+
options[:request_headers]["If-Modified-Since"] =
|
80
|
+
options[:feed_object].http_headers['last-modified']
|
81
|
+
end
|
82
|
+
end
|
83
|
+
unless options[:feed_object].configurations[:user_agent].nil?
|
84
|
+
options[:request_headers]["User-Agent"] =
|
85
|
+
options[:feed_object].configurations[:user_agent]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
if options[:request_headers]["Accept"].nil?
|
89
|
+
options[:request_headers]["Accept"] =
|
90
|
+
FeedTools::RetrievalHelper::ACCEPT_HEADER
|
91
|
+
end
|
92
|
+
if options[:request_headers]["User-Agent"].nil?
|
93
|
+
options[:request_headers]["User-Agent"] =
|
94
|
+
FeedTools.configurations[:user_agent]
|
95
|
+
end
|
96
|
+
|
97
|
+
uri = nil
|
98
|
+
begin
|
99
|
+
uri = URI.parse(url)
|
100
|
+
rescue URI::InvalidURIError
|
101
|
+
# Uh, maybe try to fix it?
|
102
|
+
uri = URI.parse(FeedTools::UriHelper.normalize_url(url))
|
103
|
+
end
|
104
|
+
|
105
|
+
begin
|
106
|
+
proxy_address = nil
|
107
|
+
proxy_port = nil
|
108
|
+
proxy_user = nil
|
109
|
+
proxy_password = nil
|
110
|
+
|
111
|
+
if options[:feed_object] != nil
|
112
|
+
proxy_address =
|
113
|
+
options[:feed_object].configurations[:proxy_address] || nil
|
114
|
+
proxy_port =
|
115
|
+
options[:feed_object].configurations[:proxy_port].to_i || nil
|
116
|
+
proxy_user =
|
117
|
+
options[:feed_object].configurations[:proxy_user].to_i || nil
|
118
|
+
proxy_password =
|
119
|
+
options[:feed_object].configurations[:proxy_password].to_i || nil
|
120
|
+
end
|
121
|
+
|
122
|
+
# No need to check for nil
|
123
|
+
http = Net::HTTP::Proxy(
|
124
|
+
proxy_address, proxy_port, proxy_user, proxy_password).new(
|
125
|
+
uri.host, (uri.port or 80))
|
126
|
+
|
127
|
+
path = uri.path
|
128
|
+
path += ('?' + uri.query) if uri.query
|
129
|
+
|
130
|
+
request_params = [path, options[:request_headers]]
|
131
|
+
if http_operation == :post
|
132
|
+
options[:form_data] = {} if options[:form_data].blank?
|
133
|
+
request_params << options[:form_data]
|
134
|
+
end
|
135
|
+
response = http.send(http_operation, *request_params)
|
136
|
+
|
137
|
+
case response
|
138
|
+
when Net::HTTPSuccess
|
139
|
+
if options[:feed_object] != nil
|
140
|
+
# We've reached the final destination, process all previous
|
141
|
+
# redirections, and see if we need to update the url.
|
142
|
+
for redirected_response in options[:response_chain]
|
143
|
+
if redirected_response.last.code.to_i == 301
|
144
|
+
# Reset the cache object or we may get duplicate entries
|
145
|
+
|
146
|
+
# TODO: verify this line is necessary!
|
147
|
+
#=============================================================================
|
148
|
+
options[:feed_object].cache_object = nil
|
149
|
+
|
150
|
+
options[:feed_object].href =
|
151
|
+
redirected_response.last['location']
|
152
|
+
else
|
153
|
+
# Jump out as soon as we hit anything that isn't a
|
154
|
+
# permanently moved redirection.
|
155
|
+
break
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
when Net::HTTPNotModified
|
160
|
+
# Do nothing, we just don't want it processed as a redirection
|
161
|
+
when Net::HTTPRedirection
|
162
|
+
if response['location'].nil?
|
163
|
+
raise FeedAccessError,
|
164
|
+
"No location to redirect to supplied for " + response.code
|
165
|
+
end
|
166
|
+
options[:response_chain] << [url, response]
|
167
|
+
|
168
|
+
redirected_location = response['location']
|
169
|
+
redirected_location = FeedTools::UriHelper.resolve_relative_uri(
|
170
|
+
redirected_location, [uri.host])
|
171
|
+
|
172
|
+
if options[:response_chain].assoc(redirected_location) != nil
|
173
|
+
raise FeedAccessError,
|
174
|
+
"Redirection loop detected: #{redirected_location}"
|
175
|
+
end
|
176
|
+
|
177
|
+
# Let the block handle redirects
|
178
|
+
follow_redirect = true
|
179
|
+
if block != nil
|
180
|
+
follow_redirect = block.call(redirected_location, response)
|
181
|
+
end
|
182
|
+
|
183
|
+
if follow_redirect
|
184
|
+
response = FeedTools::RetrievalHelper.http_request(
|
185
|
+
http_operation,
|
186
|
+
redirected_location,
|
187
|
+
options.merge(
|
188
|
+
{:redirect_limit => (options[:redirect_limit] - 1)}),
|
189
|
+
&block)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
rescue SocketError
|
193
|
+
raise FeedAccessError, 'Socket error prevented feed retrieval'
|
194
|
+
rescue Timeout::Error
|
195
|
+
raise FeedAccessError, 'Timeout while attempting to retrieve feed'
|
196
|
+
rescue Errno::ENETUNREACH
|
197
|
+
raise FeedAccessError, 'Network was unreachable'
|
198
|
+
rescue Errno::ECONNRESET
|
199
|
+
raise FeedAccessError, 'Connection was reset by peer'
|
200
|
+
end
|
201
|
+
|
202
|
+
if response != nil
|
203
|
+
class << response
|
204
|
+
def response_chain
|
205
|
+
return @response_chain
|
206
|
+
end
|
207
|
+
end
|
208
|
+
response.instance_variable_set("@response_chain",
|
209
|
+
options[:response_chain])
|
210
|
+
end
|
211
|
+
|
212
|
+
return response
|
213
|
+
end
|
214
|
+
|
215
|
+
# Makes an HTTP GET request and returns the HTTP response. Optionally
|
216
|
+
# takes a block that determines whether or not to follow a redirect.
|
217
|
+
# The block will be passed the HTTP redirect response as an argument.
|
218
|
+
def self.http_get(url, options={}, &block)
|
219
|
+
return FeedTools::RetrievalHelper.http_request(
|
220
|
+
:get, url, options, &block)
|
221
|
+
end
|
222
|
+
|
223
|
+
# Makes an HTTP POST request and returns the HTTP response. Optionally
|
224
|
+
# takes a block that determines whether or not to follow a redirect.
|
225
|
+
# The block will be passed the HTTP redirect response as an argument.
|
226
|
+
def self.http_post(url, options={}, &block)
|
227
|
+
return FeedTools::RetrievalHelper.http_request(
|
228
|
+
:post, url, options, &block)
|
229
|
+
end
|
230
|
+
|
231
|
+
# Makes an HTTP HEAD request and returns the HTTP response. Optionally
|
232
|
+
# takes a block that determines whether or not to follow a redirect.
|
233
|
+
# The block will be passed the HTTP redirect response as an argument.
|
234
|
+
def http_head(url, options={}, &block)
|
235
|
+
return FeedTools::RetrievalHelper.http_request(
|
236
|
+
:head, url, options, &block)
|
237
|
+
end
|
40
238
|
end
|
41
239
|
end
|