feedtools 0.2.23 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +24 -12
- data/lib/feed_tools/database_feed_cache.rb +8 -5
- data/lib/feed_tools/feed.rb +122 -240
- data/lib/feed_tools/feed_item.rb +31 -13
- data/lib/feed_tools/feed_structures.rb +5 -2
- data/lib/feed_tools/helpers/debug_helper.rb +1 -2
- data/lib/feed_tools/helpers/html_helper.rb +75 -43
- data/lib/feed_tools/helpers/retrieval_helper.rb +204 -6
- data/lib/feed_tools/helpers/uri_helper.rb +4 -1
- data/lib/feed_tools/vendor/htree/parse.rb +3 -1
- data/lib/feed_tools/version.rb +9 -0
- data/rakefile +6 -4
- data/test/unit/atom_test.rb +253 -4
- data/test/unit/cache_test.rb +22 -17
- data/test/unit/helper_test.rb +2 -2
- metadata +4 -3
data/lib/feed_tools/feed_item.rb
CHANGED
@@ -69,6 +69,24 @@ module FeedTools
|
|
69
69
|
return parent_feed
|
70
70
|
end
|
71
71
|
|
72
|
+
# Returns the load options for this feed.
|
73
|
+
def configurations
|
74
|
+
if @configurations.blank?
|
75
|
+
parent_feed = self.feed
|
76
|
+
if parent_feed != nil
|
77
|
+
@configurations = parent_feed.configurations.dup
|
78
|
+
else
|
79
|
+
@configurations = FeedTools.configurations.dup
|
80
|
+
end
|
81
|
+
end
|
82
|
+
return @configurations
|
83
|
+
end
|
84
|
+
|
85
|
+
# Sets the load options for this feed.
|
86
|
+
def configurations=(new_configurations)
|
87
|
+
@configurations = new_configurations
|
88
|
+
end
|
89
|
+
|
72
90
|
# Returns the feed item's encoding.
|
73
91
|
def encoding
|
74
92
|
if @encoding.nil?
|
@@ -202,10 +220,10 @@ module FeedTools
|
|
202
220
|
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
203
221
|
self.feed_type, self.feed_version)
|
204
222
|
if self.feed_type == "atom" ||
|
205
|
-
|
223
|
+
self.configurations[:always_strip_wrapper_elements]
|
206
224
|
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
207
225
|
end
|
208
|
-
if !@title.blank? &&
|
226
|
+
if !@title.blank? && self.configurations[:strip_comment_count]
|
209
227
|
# Some blogging tools include the number of comments in a post
|
210
228
|
# in the title... this is supremely ugly, and breaks any
|
211
229
|
# applications which expect the title to be static, so we're
|
@@ -257,7 +275,7 @@ module FeedTools
|
|
257
275
|
@content = FeedTools::HtmlHelper.process_text_construct(content_node,
|
258
276
|
self.feed_type, self.feed_version)
|
259
277
|
if self.feed_type == "atom" ||
|
260
|
-
|
278
|
+
self.configurations[:always_strip_wrapper_elements]
|
261
279
|
@content = FeedTools::HtmlHelper.strip_wrapper_element(@content)
|
262
280
|
end
|
263
281
|
if @content.blank?
|
@@ -310,7 +328,7 @@ module FeedTools
|
|
310
328
|
@summary = FeedTools::HtmlHelper.process_text_construct(summary_node,
|
311
329
|
self.feed_type, self.feed_version)
|
312
330
|
if self.feed_type == "atom" ||
|
313
|
-
|
331
|
+
self.configurations[:always_strip_wrapper_elements]
|
314
332
|
@summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary)
|
315
333
|
end
|
316
334
|
if @summary.blank?
|
@@ -456,7 +474,7 @@ module FeedTools
|
|
456
474
|
end
|
457
475
|
rescue
|
458
476
|
end
|
459
|
-
if
|
477
|
+
if self.configurations[:url_normalization_enabled]
|
460
478
|
@link = FeedTools::UriHelper.normalize_url(@link)
|
461
479
|
end
|
462
480
|
end
|
@@ -507,7 +525,7 @@ module FeedTools
|
|
507
525
|
end
|
508
526
|
rescue
|
509
527
|
end
|
510
|
-
if
|
528
|
+
if self.configurations[:url_normalization_enabled]
|
511
529
|
link_object.href =
|
512
530
|
FeedTools::UriHelper.normalize_url(link_object.href)
|
513
531
|
end
|
@@ -640,7 +658,7 @@ module FeedTools
|
|
640
658
|
end
|
641
659
|
rescue
|
642
660
|
end
|
643
|
-
if
|
661
|
+
if self.configurations[:url_normalization_enabled]
|
644
662
|
image.href = FeedTools::UriHelper.normalize_url(image.href)
|
645
663
|
end
|
646
664
|
image.href.strip! unless image.href.nil?
|
@@ -688,7 +706,7 @@ module FeedTools
|
|
688
706
|
"itunes:image/@href",
|
689
707
|
"itunes:link[@rel='image']/@href"
|
690
708
|
], :select_result_value => true)
|
691
|
-
if
|
709
|
+
if self.configurations[:url_normalization_enabled]
|
692
710
|
@itunes_image_link = FeedTools::UriHelper.normalize_url(@itunes_image_link)
|
693
711
|
end
|
694
712
|
end
|
@@ -706,7 +724,7 @@ module FeedTools
|
|
706
724
|
@media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
707
725
|
"media:thumbnail/@url"
|
708
726
|
], :select_result_value => true)
|
709
|
-
if
|
727
|
+
if self.configurations[:url_normalization_enabled]
|
710
728
|
@media_thumbnail_link = FeedTools::UriHelper.normalize_url(@media_thumbnail_link)
|
711
729
|
end
|
712
730
|
end
|
@@ -734,7 +752,7 @@ module FeedTools
|
|
734
752
|
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
735
753
|
self.feed_type, self.feed_version)
|
736
754
|
if self.feed_type == "atom" ||
|
737
|
-
|
755
|
+
self.configurations[:always_strip_wrapper_elements]
|
738
756
|
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
739
757
|
end
|
740
758
|
end
|
@@ -1456,14 +1474,14 @@ module FeedTools
|
|
1456
1474
|
begin
|
1457
1475
|
if !time_string.blank?
|
1458
1476
|
@time = Time.parse(time_string).gmtime
|
1459
|
-
elsif
|
1477
|
+
elsif self.configurations[:timestamp_estimation_enabled] &&
|
1460
1478
|
!self.title.nil? &&
|
1461
1479
|
(Time.parse(self.title) - Time.now).abs > 100
|
1462
1480
|
@time = Time.parse(self.title).gmtime
|
1463
1481
|
end
|
1464
1482
|
rescue
|
1465
1483
|
end
|
1466
|
-
if
|
1484
|
+
if self.configurations[:timestamp_estimation_enabled]
|
1467
1485
|
if options[:estimate_timestamp]
|
1468
1486
|
if @time.nil?
|
1469
1487
|
begin
|
@@ -1615,7 +1633,7 @@ module FeedTools
|
|
1615
1633
|
end
|
1616
1634
|
rescue
|
1617
1635
|
end
|
1618
|
-
if
|
1636
|
+
if self.configurations[:url_normalization_enabled]
|
1619
1637
|
@comments = FeedTools::UriHelper.normalize_url(@comments)
|
1620
1638
|
end
|
1621
1639
|
end
|
@@ -170,6 +170,10 @@ module FeedTools
|
|
170
170
|
alias_method :url=, :href=
|
171
171
|
alias_method :link, :href
|
172
172
|
alias_method :link=, :href=
|
173
|
+
|
174
|
+
def initialize
|
175
|
+
@expression = 'full'
|
176
|
+
end
|
173
177
|
|
174
178
|
# Returns true if this is the default enclosure
|
175
179
|
def is_default?
|
@@ -202,8 +206,7 @@ module FeedTools
|
|
202
206
|
# Allowed values are 'sample', 'full', 'nonstop'.
|
203
207
|
def expression=(new_expression)
|
204
208
|
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
|
205
|
-
|
206
|
-
"Permitted values are 'sample', 'full', 'nonstop'."
|
209
|
+
return @expression
|
207
210
|
end
|
208
211
|
@expression = new_expression.downcase
|
209
212
|
end
|
@@ -24,8 +24,7 @@
|
|
24
24
|
module FeedTools
|
25
25
|
module DebugHelper
|
26
26
|
# Forces a stack_trace without interfering with the program
|
27
|
-
def stack_trace
|
28
|
-
|
27
|
+
def self.stack_trace
|
29
28
|
fork do
|
30
29
|
ObjectSpace.each_object(Thread) do |th|
|
31
30
|
th.raise Exception, "Stack Dump" unless Thread.current == th
|
@@ -232,6 +232,17 @@ module FeedTools
|
|
232
232
|
return tidy_html
|
233
233
|
end
|
234
234
|
|
235
|
+
# Indents a text selection by a specified number of spaces.
|
236
|
+
def self.indent(text, spaces)
|
237
|
+
lines = text.split("\n")
|
238
|
+
buffer = ""
|
239
|
+
for line in lines
|
240
|
+
line = " " * spaces + line
|
241
|
+
buffer << line << "\n"
|
242
|
+
end
|
243
|
+
return buffer
|
244
|
+
end
|
245
|
+
|
235
246
|
# Unindents a text selection by a specified number of spaces.
|
236
247
|
def self.unindent(text, spaces)
|
237
248
|
lines = text.split("\n")
|
@@ -301,10 +312,11 @@ module FeedTools
|
|
301
312
|
html_node.delete_element(child)
|
302
313
|
end
|
303
314
|
end
|
304
|
-
|
305
|
-
if !(attribute =~ /^xmlns
|
306
|
-
unless acceptable_attributes.include?
|
307
|
-
|
315
|
+
child.attributes.each_attribute do |attribute|
|
316
|
+
if !(attribute.value =~ /^xmlns(:.+)?$/)
|
317
|
+
unless acceptable_attributes.include?(
|
318
|
+
attribute.value.downcase)
|
319
|
+
child.delete_attribute(attribute.value)
|
308
320
|
end
|
309
321
|
end
|
310
322
|
end
|
@@ -364,8 +376,6 @@ module FeedTools
|
|
364
376
|
].include?(type)
|
365
377
|
end
|
366
378
|
|
367
|
-
# can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
|
368
|
-
|
369
379
|
# Resolves all relative uris in a block of html.
|
370
380
|
def self.resolve_relative_uris(html, base_uri_sources=[])
|
371
381
|
relative_uri_attributes = [
|
@@ -398,23 +408,23 @@ module FeedTools
|
|
398
408
|
html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
|
399
409
|
|
400
410
|
resolve_node = lambda do |html_node|
|
401
|
-
if html_node.
|
402
|
-
for
|
403
|
-
if
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
element_attribute_pair[1]).instance_variable_set(
|
413
|
-
"@value", href)
|
414
|
-
end
|
415
|
-
end
|
411
|
+
if html_node.kind_of? REXML::Element
|
412
|
+
for element_attribute_pair in relative_uri_attributes
|
413
|
+
if html_node.name.downcase == element_attribute_pair[0]
|
414
|
+
attribute = html_node.attribute(element_attribute_pair[1])
|
415
|
+
if attribute != nil
|
416
|
+
href = attribute.value
|
417
|
+
href = FeedTools::UriHelper.resolve_relative_uri(
|
418
|
+
href, [html_node.base_uri] | base_uri_sources)
|
419
|
+
html_node.attribute(
|
420
|
+
element_attribute_pair[1]).instance_variable_set(
|
421
|
+
"@value", href)
|
416
422
|
end
|
417
423
|
end
|
424
|
+
end
|
425
|
+
end
|
426
|
+
if html_node.respond_to? :children
|
427
|
+
for child in html_node.children
|
418
428
|
resolve_node.call(child)
|
419
429
|
end
|
420
430
|
end
|
@@ -428,36 +438,55 @@ module FeedTools
|
|
428
438
|
# Returns a string containing normalized xhtml from within a REXML node.
|
429
439
|
def self.extract_xhtml(rexml_node)
|
430
440
|
rexml_node_dup = rexml_node.deep_clone
|
441
|
+
namespace_hash = FEED_TOOLS_NAMESPACES.dup
|
431
442
|
normalize_namespaced_xhtml = lambda do |node, node_dup|
|
432
443
|
if node.kind_of? REXML::Element
|
433
444
|
node_namespace = node.namespace
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
attribute_dup.
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
+
if node_namespace != namespace_hash['atom10'] &&
|
446
|
+
node_namespace != namespace_hash['atom03']
|
447
|
+
# Massive hack, relies on REXML not changing
|
448
|
+
for index in 0...node.attributes.values.size
|
449
|
+
attribute = node.attributes.values[index]
|
450
|
+
attribute_dup = node_dup.attributes.values[index]
|
451
|
+
if attribute.namespace == namespace_hash['xhtml']
|
452
|
+
attribute_dup.instance_variable_set(
|
453
|
+
"@expanded_name", attribute.name)
|
454
|
+
end
|
455
|
+
if node_namespace == namespace_hash['xhtml']
|
456
|
+
if attribute.name == 'xmlns'
|
457
|
+
node_dup.attributes.delete('xmlns')
|
458
|
+
end
|
445
459
|
end
|
446
460
|
end
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
461
|
+
if node_namespace == namespace_hash['xhtml']
|
462
|
+
node_dup.instance_variable_set("@expanded_name", node.name)
|
463
|
+
end
|
464
|
+
if !node_namespace.blank? && node.prefix.blank?
|
465
|
+
if node_namespace != namespace_hash['xhtml']
|
466
|
+
prefix = nil
|
467
|
+
for known_prefix in namespace_hash.keys
|
468
|
+
if namespace_hash[known_prefix] == node_namespace
|
469
|
+
prefix = known_prefix
|
470
|
+
end
|
471
|
+
end
|
472
|
+
if prefix.nil?
|
473
|
+
prefix = "unknown" +
|
474
|
+
Digest::SHA1.new(node_namespace).to_s[0..4]
|
475
|
+
namespace_hash[prefix] = node_namespace
|
476
|
+
end
|
477
|
+
node_dup.instance_variable_set("@expanded_name",
|
478
|
+
"#{prefix}:#{node.name}")
|
479
|
+
node_dup.instance_variable_set("@prefix",
|
480
|
+
prefix)
|
481
|
+
node_dup.add_namespace(prefix, node_namespace)
|
482
|
+
end
|
454
483
|
end
|
455
484
|
end
|
456
485
|
end
|
457
486
|
for index in 0...node.children.size
|
458
487
|
child = node.children[index]
|
459
|
-
child_dup = node_dup.children[index]
|
460
488
|
if child.kind_of? REXML::Element
|
489
|
+
child_dup = node_dup.children[index]
|
461
490
|
normalize_namespaced_xhtml.call(child, child_dup)
|
462
491
|
end
|
463
492
|
end
|
@@ -513,7 +542,9 @@ module FeedTools
|
|
513
542
|
type == "application/xhtml+xml" ||
|
514
543
|
content_node.namespace == FEED_TOOLS_NAMESPACES['xhtml']
|
515
544
|
content = FeedTools::HtmlHelper.extract_xhtml(content_node)
|
516
|
-
elsif type == "escaped" || mode == "escaped"
|
545
|
+
elsif type == "escaped" || mode == "escaped" ||
|
546
|
+
type == "html" || mode == "html" ||
|
547
|
+
type == "text/html" || mode == "text/html"
|
517
548
|
content = FeedTools::HtmlHelper.unescape_entities(
|
518
549
|
content_node.inner_xml.strip)
|
519
550
|
elsif type == "text" || mode == "text" ||
|
@@ -556,7 +587,7 @@ module FeedTools
|
|
556
587
|
doc = REXML::Document.new(xhtml.to_s.strip)
|
557
588
|
if doc.children.size == 1
|
558
589
|
child = doc.children[0]
|
559
|
-
if child.name.downcase == "div"
|
590
|
+
if child.kind_of?(REXML::Element) && child.name.downcase == "div"
|
560
591
|
return child.inner_xml.strip
|
561
592
|
end
|
562
593
|
end
|
@@ -574,7 +605,8 @@ module FeedTools
|
|
574
605
|
# This is technically very, very wrong. But it saves oodles of
|
575
606
|
# clock cycles, and probably works 99.999% of the time.
|
576
607
|
html_document = HTree.parse_xml(
|
577
|
-
FeedTools::HtmlHelper.tidy_html(
|
608
|
+
FeedTools::HtmlHelper.tidy_html(
|
609
|
+
html.gsub(/<body.*?>(.|\n)*<\/body>/, "<body>-</body>"))).to_rexml
|
578
610
|
html_node = nil
|
579
611
|
head_node = nil
|
580
612
|
link_nodes = []
|
@@ -22,20 +22,218 @@
|
|
22
22
|
#++
|
23
23
|
|
24
24
|
require 'feed_tools'
|
25
|
+
require 'feed_tools/helpers/uri_helper'
|
25
26
|
require 'net/http'
|
26
27
|
|
27
28
|
# TODO: Not used yet, don't load since it'll only be a performance hit
|
28
29
|
# require 'net/https'
|
29
30
|
# require 'net/ftp'
|
30
31
|
|
31
|
-
# Stolen from the Universal Feed Parser
|
32
|
-
FEED_TOOLS_ACCEPT_HEADER = "application/atom+xml,application/rdf+xml," +
|
33
|
-
"application/rss+xml,application/x-netcdf,application/xml;" +
|
34
|
-
"q=0.9,text/xml;q=0.2,*/*;q=0.1"
|
35
|
-
|
36
|
-
# TODO: Refactor http_fetch and other methods.
|
37
32
|
module FeedTools
|
38
33
|
# Methods for pulling remote data
|
39
34
|
module RetrievalHelper
|
35
|
+
# Stolen from the Universal Feed Parser
|
36
|
+
ACCEPT_HEADER = "application/atom+xml,application/rdf+xml," +
|
37
|
+
"application/rss+xml,application/x-netcdf,application/xml;" +
|
38
|
+
"q=0.9,text/xml;q=0.2,*/*;q=0.1"
|
39
|
+
|
40
|
+
# Makes an HTTP request and returns the HTTP response. Optionally
|
41
|
+
# takes a block that determines whether or not to follow a redirect.
|
42
|
+
# The block will be passed the HTTP redirect response as an argument.
|
43
|
+
def self.http_request(http_operation, url, options={}, &block)
|
44
|
+
response = nil
|
45
|
+
|
46
|
+
options = {
|
47
|
+
:feed_object => nil,
|
48
|
+
:form_data => nil,
|
49
|
+
:request_headers => {},
|
50
|
+
:follow_redirects => true,
|
51
|
+
:redirect_limit => 10,
|
52
|
+
:response_chain => []
|
53
|
+
}.merge(options)
|
54
|
+
|
55
|
+
if options[:redirect_limit] == 0
|
56
|
+
raise FeedAccessError, 'Redirect too deep'
|
57
|
+
end
|
58
|
+
|
59
|
+
if options[:response_chain].blank? ||
|
60
|
+
!options[:response_chain].kind_of?(Array)
|
61
|
+
options[:response_chain] = []
|
62
|
+
end
|
63
|
+
|
64
|
+
if !options[:request_headers].kind_of?(Hash)
|
65
|
+
options[:request_headers] = {}
|
66
|
+
end
|
67
|
+
if !options[:form_data].kind_of?(Hash)
|
68
|
+
options[:form_data] = nil
|
69
|
+
end
|
70
|
+
|
71
|
+
if options[:request_headers].blank? && options[:feed_object] != nil
|
72
|
+
options[:request_headers] = {}
|
73
|
+
unless options[:feed_object].http_headers.nil?
|
74
|
+
unless options[:feed_object].http_headers['etag'].nil?
|
75
|
+
options[:request_headers]["If-None-Match"] =
|
76
|
+
options[:feed_object].http_headers['etag']
|
77
|
+
end
|
78
|
+
unless options[:feed_object].http_headers['last-modified'].nil?
|
79
|
+
options[:request_headers]["If-Modified-Since"] =
|
80
|
+
options[:feed_object].http_headers['last-modified']
|
81
|
+
end
|
82
|
+
end
|
83
|
+
unless options[:feed_object].configurations[:user_agent].nil?
|
84
|
+
options[:request_headers]["User-Agent"] =
|
85
|
+
options[:feed_object].configurations[:user_agent]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
if options[:request_headers]["Accept"].nil?
|
89
|
+
options[:request_headers]["Accept"] =
|
90
|
+
FeedTools::RetrievalHelper::ACCEPT_HEADER
|
91
|
+
end
|
92
|
+
if options[:request_headers]["User-Agent"].nil?
|
93
|
+
options[:request_headers]["User-Agent"] =
|
94
|
+
FeedTools.configurations[:user_agent]
|
95
|
+
end
|
96
|
+
|
97
|
+
uri = nil
|
98
|
+
begin
|
99
|
+
uri = URI.parse(url)
|
100
|
+
rescue URI::InvalidURIError
|
101
|
+
# Uh, maybe try to fix it?
|
102
|
+
uri = URI.parse(FeedTools::UriHelper.normalize_url(url))
|
103
|
+
end
|
104
|
+
|
105
|
+
begin
|
106
|
+
proxy_address = nil
|
107
|
+
proxy_port = nil
|
108
|
+
proxy_user = nil
|
109
|
+
proxy_password = nil
|
110
|
+
|
111
|
+
if options[:feed_object] != nil
|
112
|
+
proxy_address =
|
113
|
+
options[:feed_object].configurations[:proxy_address] || nil
|
114
|
+
proxy_port =
|
115
|
+
options[:feed_object].configurations[:proxy_port].to_i || nil
|
116
|
+
proxy_user =
|
117
|
+
options[:feed_object].configurations[:proxy_user].to_i || nil
|
118
|
+
proxy_password =
|
119
|
+
options[:feed_object].configurations[:proxy_password].to_i || nil
|
120
|
+
end
|
121
|
+
|
122
|
+
# No need to check for nil
|
123
|
+
http = Net::HTTP::Proxy(
|
124
|
+
proxy_address, proxy_port, proxy_user, proxy_password).new(
|
125
|
+
uri.host, (uri.port or 80))
|
126
|
+
|
127
|
+
path = uri.path
|
128
|
+
path += ('?' + uri.query) if uri.query
|
129
|
+
|
130
|
+
request_params = [path, options[:request_headers]]
|
131
|
+
if http_operation == :post
|
132
|
+
options[:form_data] = {} if options[:form_data].blank?
|
133
|
+
request_params << options[:form_data]
|
134
|
+
end
|
135
|
+
response = http.send(http_operation, *request_params)
|
136
|
+
|
137
|
+
case response
|
138
|
+
when Net::HTTPSuccess
|
139
|
+
if options[:feed_object] != nil
|
140
|
+
# We've reached the final destination, process all previous
|
141
|
+
# redirections, and see if we need to update the url.
|
142
|
+
for redirected_response in options[:response_chain]
|
143
|
+
if redirected_response.last.code.to_i == 301
|
144
|
+
# Reset the cache object or we may get duplicate entries
|
145
|
+
|
146
|
+
# TODO: verify this line is necessary!
|
147
|
+
#=============================================================================
|
148
|
+
options[:feed_object].cache_object = nil
|
149
|
+
|
150
|
+
options[:feed_object].href =
|
151
|
+
redirected_response.last['location']
|
152
|
+
else
|
153
|
+
# Jump out as soon as we hit anything that isn't a
|
154
|
+
# permanently moved redirection.
|
155
|
+
break
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
when Net::HTTPNotModified
|
160
|
+
# Do nothing, we just don't want it processed as a redirection
|
161
|
+
when Net::HTTPRedirection
|
162
|
+
if response['location'].nil?
|
163
|
+
raise FeedAccessError,
|
164
|
+
"No location to redirect to supplied for " + response.code
|
165
|
+
end
|
166
|
+
options[:response_chain] << [url, response]
|
167
|
+
|
168
|
+
redirected_location = response['location']
|
169
|
+
redirected_location = FeedTools::UriHelper.resolve_relative_uri(
|
170
|
+
redirected_location, [uri.host])
|
171
|
+
|
172
|
+
if options[:response_chain].assoc(redirected_location) != nil
|
173
|
+
raise FeedAccessError,
|
174
|
+
"Redirection loop detected: #{redirected_location}"
|
175
|
+
end
|
176
|
+
|
177
|
+
# Let the block handle redirects
|
178
|
+
follow_redirect = true
|
179
|
+
if block != nil
|
180
|
+
follow_redirect = block.call(redirected_location, response)
|
181
|
+
end
|
182
|
+
|
183
|
+
if follow_redirect
|
184
|
+
response = FeedTools::RetrievalHelper.http_request(
|
185
|
+
http_operation,
|
186
|
+
redirected_location,
|
187
|
+
options.merge(
|
188
|
+
{:redirect_limit => (options[:redirect_limit] - 1)}),
|
189
|
+
&block)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
rescue SocketError
|
193
|
+
raise FeedAccessError, 'Socket error prevented feed retrieval'
|
194
|
+
rescue Timeout::Error
|
195
|
+
raise FeedAccessError, 'Timeout while attempting to retrieve feed'
|
196
|
+
rescue Errno::ENETUNREACH
|
197
|
+
raise FeedAccessError, 'Network was unreachable'
|
198
|
+
rescue Errno::ECONNRESET
|
199
|
+
raise FeedAccessError, 'Connection was reset by peer'
|
200
|
+
end
|
201
|
+
|
202
|
+
if response != nil
|
203
|
+
class << response
|
204
|
+
def response_chain
|
205
|
+
return @response_chain
|
206
|
+
end
|
207
|
+
end
|
208
|
+
response.instance_variable_set("@response_chain",
|
209
|
+
options[:response_chain])
|
210
|
+
end
|
211
|
+
|
212
|
+
return response
|
213
|
+
end
|
214
|
+
|
215
|
+
# Makes an HTTP GET request and returns the HTTP response. Optionally
|
216
|
+
# takes a block that determines whether or not to follow a redirect.
|
217
|
+
# The block will be passed the HTTP redirect response as an argument.
|
218
|
+
def self.http_get(url, options={}, &block)
|
219
|
+
return FeedTools::RetrievalHelper.http_request(
|
220
|
+
:get, url, options, &block)
|
221
|
+
end
|
222
|
+
|
223
|
+
# Makes an HTTP POST request and returns the HTTP response. Optionally
|
224
|
+
# takes a block that determines whether or not to follow a redirect.
|
225
|
+
# The block will be passed the HTTP redirect response as an argument.
|
226
|
+
def self.http_post(url, options={}, &block)
|
227
|
+
return FeedTools::RetrievalHelper.http_request(
|
228
|
+
:post, url, options, &block)
|
229
|
+
end
|
230
|
+
|
231
|
+
# Makes an HTTP HEAD request and returns the HTTP response. Optionally
|
232
|
+
# takes a block that determines whether or not to follow a redirect.
|
233
|
+
# The block will be passed the HTTP redirect response as an argument.
|
234
|
+
def http_head(url, options={}, &block)
|
235
|
+
return FeedTools::RetrievalHelper.http_request(
|
236
|
+
:head, url, options, &block)
|
237
|
+
end
|
40
238
|
end
|
41
239
|
end
|