spix_parser 1.6.4 → 1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'ruby-debug'
|
1
2
|
module Spix
|
2
3
|
module FeedDiscovery
|
3
4
|
class Document
|
@@ -51,7 +52,7 @@ module Spix
|
|
51
52
|
|
52
53
|
def feed_unsing_address uri, &block
|
53
54
|
begin
|
54
|
-
Feed.new(uri
|
55
|
+
Feed.new(uri).tap do |feed|
|
55
56
|
block.call feed if block_given?
|
56
57
|
end
|
57
58
|
rescue => error
|
@@ -101,71 +102,6 @@ module Spix
|
|
101
102
|
def feed?
|
102
103
|
%w[rss feed].include? @document.root.name
|
103
104
|
end
|
104
|
-
|
105
|
-
def favicon
|
106
|
-
shortcut_in_document or shortcut_from_original_page or shortcut_from(base_uri) or default_favico_if_exist
|
107
|
-
end
|
108
|
-
|
109
|
-
def shortcut_in_document
|
110
|
-
shortcuts = find_shortcut_in @document
|
111
|
-
shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
|
112
|
-
end
|
113
|
-
private :shortcut_in_document
|
114
|
-
|
115
|
-
def shortcut_from_original_page
|
116
|
-
if feed?
|
117
|
-
if node = @document.search('link').first
|
118
|
-
path = URI.parse node.content.strip
|
119
|
-
shortcut_from URI.parse path.select(:scheme, :host).join("://") rescue nil
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
123
|
-
private :shortcut_from_original_page
|
124
|
-
|
125
|
-
def shortcut_from base_uri
|
126
|
-
doc = get base_uri
|
127
|
-
shortcuts = find_shortcut_in doc
|
128
|
-
shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
|
129
|
-
|
130
|
-
rescue Net::HTTPError, Net::HTTPFatalError
|
131
|
-
logger.warn "error opening favicon: #{$!}"
|
132
|
-
nil
|
133
|
-
end
|
134
|
-
private :shortcut_from
|
135
|
-
|
136
|
-
def find_shortcut_in doc
|
137
|
-
doc.xpath(
|
138
|
-
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
|
139
|
-
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
|
140
|
-
).map { |node| node.get_attribute "href" }
|
141
|
-
end
|
142
|
-
|
143
|
-
def default_favico_if_exist
|
144
|
-
http = Net::HTTP.new base_uri.host, base_uri.port
|
145
|
-
resp = http.request_head 'favicon.ico'
|
146
|
-
base_uri.merge('favicon.ico').to_s unless resp.kind_of? Net::HTTPError
|
147
|
-
rescue
|
148
|
-
nil
|
149
|
-
end
|
150
|
-
private :default_favico_if_exist
|
151
|
-
|
152
|
-
def get uri
|
153
|
-
resp = Net::HTTP.get_response uri
|
154
|
-
doc = Nokogiri::HTML(resp.body)
|
155
|
-
if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
|
156
|
-
path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
|
157
|
-
get URI.parse path
|
158
|
-
else
|
159
|
-
doc
|
160
|
-
end
|
161
|
-
rescue
|
162
|
-
Nokogiri::HTML('')
|
163
|
-
end
|
164
|
-
|
165
|
-
def base_uri
|
166
|
-
@base_uri ||= URI.parse @uri.select(:scheme, :host).join("://")
|
167
|
-
end
|
168
|
-
private :base_uri
|
169
105
|
|
170
106
|
end
|
171
107
|
end
|
@@ -2,9 +2,9 @@ module Spix
|
|
2
2
|
module FeedDiscovery
|
3
3
|
class Feed < Hash
|
4
4
|
|
5
|
-
def initialize url
|
5
|
+
def initialize url
|
6
6
|
self.url = url.to_s
|
7
|
-
self.favicon =
|
7
|
+
self.favicon = get_favicon
|
8
8
|
self.title = get_title
|
9
9
|
end
|
10
10
|
|
@@ -24,13 +24,61 @@ module Spix
|
|
24
24
|
end
|
25
25
|
private :get_title
|
26
26
|
|
27
|
+
def get_favicon
|
28
|
+
if node = content.search('link').first
|
29
|
+
path = URI.parse node.content.strip
|
30
|
+
shortcut_from URI.parse path.select(:scheme, :host).join("://") rescue nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def shortcut_from base_uri
|
35
|
+
doc = get base_uri
|
36
|
+
shortcuts = find_shortcut_in doc
|
37
|
+
shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
|
38
|
+
|
39
|
+
rescue Net::HTTPError, Net::HTTPFatalError
|
40
|
+
logger.warn "error opening favicon: #{$!}"
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
private :shortcut_from
|
44
|
+
|
45
|
+
def find_shortcut_in doc
|
46
|
+
doc.xpath(
|
47
|
+
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
|
48
|
+
'//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
|
49
|
+
).map { |node| node.get_attribute "href" }
|
50
|
+
end
|
51
|
+
|
52
|
+
def get uri
|
53
|
+
resp = Net::HTTP.get_response uri
|
54
|
+
doc = Nokogiri::HTML(resp.body)
|
55
|
+
if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
|
56
|
+
path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
|
57
|
+
get URI.parse path
|
58
|
+
else
|
59
|
+
doc
|
60
|
+
end
|
61
|
+
rescue
|
62
|
+
Nokogiri::HTML('')
|
63
|
+
end
|
64
|
+
|
65
|
+
def base_uri
|
66
|
+
@base_uri ||= URI.parse uri.select(:scheme, :host).join("://")
|
67
|
+
end
|
68
|
+
private :base_uri
|
69
|
+
|
27
70
|
def content
|
71
|
+
@content ||= load_content
|
72
|
+
end
|
73
|
+
private :content
|
74
|
+
|
75
|
+
def load_content
|
28
76
|
req = Net::HTTP.new uri.host, uri.port
|
29
77
|
path = uri - uri.select(:scheme, :host).join("://")
|
30
78
|
resp = req.request_get path.to_s
|
31
79
|
Nokogiri::XML(resp.body)
|
32
80
|
end
|
33
|
-
private :
|
81
|
+
private :load_content
|
34
82
|
|
35
83
|
def uri
|
36
84
|
@uri ||= URI.parse url
|
data/lib/spix_parser/version.rb
CHANGED