spix_parser 1.6.7 → 1.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,7 +64,9 @@ module Spix
|
|
64
64
|
|
65
65
|
def uris_from_links &block
|
66
66
|
from_links.map { |node|
|
67
|
-
|
67
|
+
path = parse_uri node.get_attribute('href')
|
68
|
+
path = @uri.merge path unless path.absolute?
|
69
|
+
Feed.new(path).tap do |item|
|
68
70
|
block.call item if block_given?
|
69
71
|
end
|
70
72
|
}
|
@@ -88,7 +90,9 @@ module Spix
|
|
88
90
|
|
89
91
|
def uris_from_anchors &block
|
90
92
|
from_anchors.map { |node|
|
91
|
-
|
93
|
+
path = parse_uri node.get_attribute('href')
|
94
|
+
path = @uri.merge path unless path.absolute?
|
95
|
+
Feed.new(path).tap do |item|
|
92
96
|
block.call item if block_given?
|
93
97
|
end
|
94
98
|
}
|
@@ -131,7 +135,7 @@ module Spix
|
|
131
135
|
def rss_or_atom_content_type_in? anchor
|
132
136
|
connection, path = connection_and_path_using address_from anchor
|
133
137
|
response = connection.request_head path
|
134
|
-
response['content-type'] =~ /rss|atom/
|
138
|
+
response['content-type'] =~ /rss|atom|xml/
|
135
139
|
rescue
|
136
140
|
true
|
137
141
|
end
|
@@ -47,6 +47,7 @@ module Spix
|
|
47
47
|
def find_shortcut_in doc
|
48
48
|
doc.xpath(
|
49
49
|
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
|
50
|
+
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "icon")]',
|
50
51
|
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
|
51
52
|
).map { |node| node.get_attribute "href" }
|
52
53
|
end
|
@@ -56,7 +57,7 @@ module Spix
|
|
56
57
|
resp = Net::HTTP.get_response uri
|
57
58
|
if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
|
58
59
|
path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
|
59
|
-
from_redirect =
|
60
|
+
from_redirect = base_uri.merge path
|
60
61
|
self.url = from_redirect.to_s
|
61
62
|
fetch from_redirect, limit - 1
|
62
63
|
else
|
data/lib/spix_parser/version.rb
CHANGED