spix_parser 1.6.4 → 1.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ require 'ruby-debug'
1
2
  module Spix
2
3
  module FeedDiscovery
3
4
  class Document
@@ -51,7 +52,7 @@ module Spix
51
52
 
52
53
  def feed_unsing_address uri, &block
53
54
  begin
54
- Feed.new(uri, favicon).tap do |feed|
55
+ Feed.new(uri).tap do |feed|
55
56
  block.call feed if block_given?
56
57
  end
57
58
  rescue => error
@@ -101,71 +102,6 @@ module Spix
101
102
  def feed?
102
103
  %w[rss feed].include? @document.root.name
103
104
  end
104
-
105
- def favicon
106
- shortcut_in_document or shortcut_from_original_page or shortcut_from(base_uri) or default_favico_if_exist
107
- end
108
-
109
- def shortcut_in_document
110
- shortcuts = find_shortcut_in @document
111
- shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
112
- end
113
- private :shortcut_in_document
114
-
115
- def shortcut_from_original_page
116
- if feed?
117
- if node = @document.search('link').first
118
- path = URI.parse node.content.strip
119
- shortcut_from URI.parse path.select(:scheme, :host).join("://") rescue nil
120
- end
121
- end
122
- end
123
- private :shortcut_from_original_page
124
-
125
- def shortcut_from base_uri
126
- doc = get base_uri
127
- shortcuts = find_shortcut_in doc
128
- shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
129
-
130
- rescue Net::HTTPError, Net::HTTPFatalError
131
- logger.warn "error opening favicon: #{$!}"
132
- nil
133
- end
134
- private :shortcut_from
135
-
136
- def find_shortcut_in doc
137
- doc.xpath(
138
- '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
139
- '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
140
- ).map { |node| node.get_attribute "href" }
141
- end
142
-
143
- def default_favico_if_exist
144
- http = Net::HTTP.new base_uri.host, base_uri.port
145
- resp = http.request_head 'favicon.ico'
146
- base_uri.merge('favicon.ico').to_s unless resp.kind_of? Net::HTTPError
147
- rescue
148
- nil
149
- end
150
- private :default_favico_if_exist
151
-
152
- def get uri
153
- resp = Net::HTTP.get_response uri
154
- doc = Nokogiri::HTML(resp.body)
155
- if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
156
- path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
157
- get URI.parse path
158
- else
159
- doc
160
- end
161
- rescue
162
- Nokogiri::HTML('')
163
- end
164
-
165
- def base_uri
166
- @base_uri ||= URI.parse @uri.select(:scheme, :host).join("://")
167
- end
168
- private :base_uri
169
105
 
170
106
  end
171
107
  end
@@ -2,9 +2,9 @@ module Spix
2
2
  module FeedDiscovery
3
3
  class Feed < Hash
4
4
 
5
- def initialize url, favicon
5
+ def initialize url
6
6
  self.url = url.to_s
7
- self.favicon = favicon
7
+ self.favicon = get_favicon
8
8
  self.title = get_title
9
9
  end
10
10
 
@@ -24,13 +24,61 @@ module Spix
24
24
  end
25
25
  private :get_title
26
26
 
27
+ def get_favicon
28
+ if node = content.search('link').first
29
+ path = URI.parse node.content.strip
30
+ shortcut_from URI.parse path.select(:scheme, :host).join("://") rescue nil
31
+ end
32
+ end
33
+
34
+ def shortcut_from base_uri
35
+ doc = get base_uri
36
+ shortcuts = find_shortcut_in doc
37
+ shortcuts.any? ? base_uri.merge(shortcuts.first.to_s).to_s : nil
38
+
39
+ rescue Net::HTTPError, Net::HTTPFatalError
40
+ logger.warn "error opening favicon: #{$!}"
41
+ nil
42
+ end
43
+ private :shortcut_from
44
+
45
+ def find_shortcut_in doc
46
+ doc.xpath(
47
+ '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]',
48
+ '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
49
+ ).map { |node| node.get_attribute "href" }
50
+ end
51
+
52
+ def get uri
53
+ resp = Net::HTTP.get_response uri
54
+ doc = Nokogiri::HTML(resp.body)
55
+ if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
56
+ path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
57
+ get URI.parse path
58
+ else
59
+ doc
60
+ end
61
+ rescue
62
+ Nokogiri::HTML('')
63
+ end
64
+
65
+ def base_uri
66
+ @base_uri ||= URI.parse uri.select(:scheme, :host).join("://")
67
+ end
68
+ private :base_uri
69
+
27
70
  def content
71
+ @content ||= load_content
72
+ end
73
+ private :content
74
+
75
+ def load_content
28
76
  req = Net::HTTP.new uri.host, uri.port
29
77
  path = uri - uri.select(:scheme, :host).join("://")
30
78
  resp = req.request_get path.to_s
31
79
  Nokogiri::XML(resp.body)
32
80
  end
33
- private :content
81
+ private :load_content
34
82
 
35
83
  def uri
36
84
  @uri ||= URI.parse url
@@ -4,7 +4,7 @@ module Spix
4
4
  module Version
5
5
  MAJOR = 1
6
6
  MINOR = 6
7
- TINY = 4
7
+ TINY = 5
8
8
 
9
9
  def self.current_version
10
10
  "#{MAJOR}.#{MINOR}.#{TINY}"
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: spix_parser
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.6.4
5
+ version: 1.6.5
6
6
  platform: ruby
7
7
  authors:
8
8
  - Marcio Lopes de Faria