spix_parser 1.6.4 → 1.6.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
1
+ require 'ruby-debug'
1
2
  module Spix
2
3
  module FeedDiscovery
3
4
  class Document
@@ -51,7 +52,7 @@ module Spix
51
52
 
52
53
  def feed_unsing_address uri, &block
53
54
  begin
54
- Feed.new(uri, favicon).tap do |feed|
55
+ Feed.new(uri).tap do |feed|
55
56
  block.call feed if block_given?
56
57
  end
57
58
  rescue => error
@@ -101,71 +102,6 @@ module Spix
101
102
  def feed?
102
103
  %w[rss feed].include? @document.root.name
103
104
  end
104
-
105
- def favicon
106
- shortcut_in_document or shortcut_from_original_page or shortcut_from(base_uri) or default_favico_if_exist
107
- end
108
-
109
- def shortcut_in_document
110
- shortcuts = find_shortcut_in @document
111
- shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
112
- end
113
- private :shortcut_in_document
114
-
115
- def shortcut_from_original_page
116
- if feed?
117
- if node = @document.search('link').first
118
- path = URI.parse node.content.strip
119
- shortcut_from URI.parse path.select(:scheme, :host).join("://") rescue nil
120
- end
121
- end
122
- end
123
- private :shortcut_from_original_page
124
-
125
- def shortcut_from base_uri
126
- doc = get base_uri
127
- shortcuts = find_shortcut_in doc
128
- shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
129
-
130
- rescue Net::HTTPError, Net::HTTPFatalError
131
- logger.warn "error opening favicon: #{$!}"
132
- nil
133
- end
134
- private :shortcut_from
135
-
136
- def find_shortcut_in doc
137
- doc.xpath(
138
- '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
139
- '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
140
- ).map { |node| node.get_attribute "href" }
141
- end
142
-
143
- def default_favico_if_exist
144
- http = Net::HTTP.new base_uri.host, base_uri.port
145
- resp = http.request_head 'favicon.ico'
146
- base_uri.merge('favicon.ico').to_s unless resp.kind_of? Net::HTTPError
147
- rescue
148
- nil
149
- end
150
- private :default_favico_if_exist
151
-
152
- def get uri
153
- resp = Net::HTTP.get_response uri
154
- doc = Nokogiri::HTML(resp.body)
155
- if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
156
- path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
157
- get URI.parse path
158
- else
159
- doc
160
- end
161
- rescue
162
- Nokogiri::HTML('')
163
- end
164
-
165
- def base_uri
166
- @base_uri ||= URI.parse @uri.select(:scheme, :host).join("://")
167
- end
168
- private :base_uri
169
105
 
170
106
  end
171
107
  end
@@ -2,9 +2,9 @@ module Spix
2
2
  module FeedDiscovery
3
3
  class Feed < Hash
4
4
 
5
- def initialize url, favicon
5
+ def initialize url
6
6
  self.url = url.to_s
7
- self.favicon = favicon
7
+ self.favicon = get_favicon
8
8
  self.title = get_title
9
9
  end
10
10
 
@@ -24,13 +24,61 @@ module Spix
24
24
  end
25
25
  private :get_title
26
26
 
27
+ def get_favicon
28
+ if node = content.search('link').first
29
+ path = URI.parse node.content.strip
30
+ shortcut_from URI.parse path.select(:scheme, :host).join("://") rescue nil
31
+ end
32
+ end
33
+
34
+ def shortcut_from base_uri
35
+ doc = get base_uri
36
+ shortcuts = find_shortcut_in doc
37
+ shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
38
+
39
+ rescue Net::HTTPError, Net::HTTPFatalError
40
+ logger.warn "error opening favicon: #{$!}"
41
+ nil
42
+ end
43
+ private :shortcut_from
44
+
45
+ def find_shortcut_in doc
46
+ doc.xpath(
47
+ '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
48
+ '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
49
+ ).map { |node| node.get_attribute "href" }
50
+ end
51
+
52
+ def get uri
53
+ resp = Net::HTTP.get_response uri
54
+ doc = Nokogiri::HTML(resp.body)
55
+ if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
56
+ path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
57
+ get URI.parse path
58
+ else
59
+ doc
60
+ end
61
+ rescue
62
+ Nokogiri::HTML('')
63
+ end
64
+
65
+ def base_uri
66
+ @base_uri ||= URI.parse uri.select(:scheme, :host).join("://")
67
+ end
68
+ private :base_uri
69
+
27
70
  def content
71
+ @content ||= load_content
72
+ end
73
+ private :content
74
+
75
+ def load_content
28
76
  req = Net::HTTP.new uri.host, uri.port
29
77
  path = uri - uri.select(:scheme, :host).join("://")
30
78
  resp = req.request_get path.to_s
31
79
  Nokogiri::XML(resp.body)
32
80
  end
33
- private :content
81
+ private :load_content
34
82
 
35
83
  def uri
36
84
  @uri ||= URI.parse url
@@ -4,7 +4,7 @@ module Spix
4
4
  module Version
5
5
  MAJOR = 1
6
6
  MINOR = 6
7
- TINY = 4
7
+ TINY = 5
8
8
 
9
9
  def self.current_version
10
10
  "#{MAJOR}.#{MINOR}.#{TINY}"
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: spix_parser
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.6.4
5
+ version: 1.6.5
6
6
  platform: ruby
7
7
  authors:
8
8
  - Marcio Lopes de Faria