spix_parser 1.6.7 → 1.6.8
Sign up to get free protection for your applications and to get access to all the features.
@@ -64,7 +64,9 @@ module Spix
|
|
64
64
|
|
65
65
|
def uris_from_links &block
|
66
66
|
from_links.map { |node|
|
67
|
-
|
67
|
+
path = parse_uri node.get_attribute('href')
|
68
|
+
path = @uri.merge path unless path.absolute?
|
69
|
+
Feed.new(path).tap do |item|
|
68
70
|
block.call item if block_given?
|
69
71
|
end
|
70
72
|
}
|
@@ -88,7 +90,9 @@ module Spix
|
|
88
90
|
|
89
91
|
def uris_from_anchors &block
|
90
92
|
from_anchors.map { |node|
|
91
|
-
|
93
|
+
path = parse_uri node.get_attribute('href')
|
94
|
+
path = @uri.merge path unless path.absolute?
|
95
|
+
Feed.new(path).tap do |item|
|
92
96
|
block.call item if block_given?
|
93
97
|
end
|
94
98
|
}
|
@@ -131,7 +135,7 @@ module Spix
|
|
131
135
|
def rss_or_atom_content_type_in? anchor
|
132
136
|
connection, path = connection_and_path_using address_from anchor
|
133
137
|
response = connection.request_head path
|
134
|
-
response['content-type'] =~ /rss|atom/
|
138
|
+
response['content-type'] =~ /rss|atom|xml/
|
135
139
|
rescue
|
136
140
|
true
|
137
141
|
end
|
@@ -47,6 +47,7 @@ module Spix
|
|
47
47
|
def find_shortcut_in doc
|
48
48
|
doc.xpath(
|
49
49
|
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
|
50
|
+
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "icon")]',
|
50
51
|
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
|
51
52
|
).map { |node| node.get_attribute "href" }
|
52
53
|
end
|
@@ -56,7 +57,7 @@ module Spix
|
|
56
57
|
resp = Net::HTTP.get_response uri
|
57
58
|
if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
|
58
59
|
path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
|
59
|
-
from_redirect =
|
60
|
+
from_redirect = base_uri.merge path
|
60
61
|
self.url = from_redirect.to_s
|
61
62
|
fetch from_redirect, limit - 1
|
62
63
|
else
|
data/lib/spix_parser/version.rb
CHANGED