ficon 0.6 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ficon/version.rb +1 -1
- data/lib/ficon.rb +48 -39
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 84e88d986bdad619358041cc478cf34418278ebdf74cb6ef12df7b653734d9dd
|
|
4
|
+
data.tar.gz: 1ed0fbcd8c01895246099fea8e26f35e358e9f4a46ecc5996add3ed6f8d193d6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 00d5013559e4a203ced2c780582a6f705a3883966d59c4f28c4a5bd69aac9a9a425cd3420adca17dec86993cca81825e80ba40ca1c4d633c40423c6c659bf4df
|
|
7
|
+
data.tar.gz: bb7311ea20869d3394789da06cfb759a64713dfc82eadb419e55561b98c49fac3797b741d8d4ab529fbae63cfbc6ed58ae22fa759404410d027f2a506a8538a2
|
data/lib/ficon/version.rb
CHANGED
data/lib/ficon.rb
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
3
|
-
require
|
|
4
|
-
require
|
|
5
|
-
require
|
|
6
|
-
require "debug"
|
|
1
|
+
require 'net/http'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
require 'uri'
|
|
4
|
+
require 'addressable/uri'
|
|
5
|
+
require 'resolv'
|
|
7
6
|
|
|
8
|
-
require_relative
|
|
9
|
-
require_relative
|
|
10
|
-
require_relative
|
|
7
|
+
require_relative 'ficon/version'
|
|
8
|
+
require_relative 'ficon/image'
|
|
9
|
+
require_relative 'ficon/cache'
|
|
11
10
|
|
|
12
11
|
class Ficon
|
|
13
12
|
attr_reader :site, :final_uri, :url_status
|
|
14
13
|
attr_accessor :user_agent
|
|
15
|
-
|
|
14
|
+
|
|
16
15
|
# URL health status constants
|
|
17
16
|
ALIVE = 'alive'
|
|
18
17
|
DEAD = 'dead'
|
|
@@ -39,10 +38,10 @@ class Ficon
|
|
|
39
38
|
@data ||= cache.data
|
|
40
39
|
|
|
41
40
|
if @data.nil? && response
|
|
42
|
-
@data = response.body.force_encoding(
|
|
41
|
+
@data = response.body.force_encoding('UTF-8')
|
|
43
42
|
cache.data = @data
|
|
44
|
-
cache.etag = response[
|
|
45
|
-
cache.not_before = response[
|
|
43
|
+
cache.etag = response['etag'] if response['etag']
|
|
44
|
+
cache.not_before = response['last-modified'] if response['last-modified']
|
|
46
45
|
end
|
|
47
46
|
|
|
48
47
|
@doc ||= Nokogiri::HTML(@data)
|
|
@@ -55,7 +54,7 @@ class Ficon
|
|
|
55
54
|
puts "#{e.inspect}"
|
|
56
55
|
puts "#{e.backtrace.join('\n')}"
|
|
57
56
|
else
|
|
58
|
-
puts
|
|
57
|
+
puts 'Please prepend http:// or https:// to the URL'
|
|
59
58
|
end
|
|
60
59
|
nil
|
|
61
60
|
rescue RuntimeError => e
|
|
@@ -101,7 +100,8 @@ class Ficon
|
|
|
101
100
|
end
|
|
102
101
|
|
|
103
102
|
def other_page_data(document)
|
|
104
|
-
@site[:title] =
|
|
103
|
+
@site[:title] =
|
|
104
|
+
document.at_xpath("//meta[@property='og:title']/@content")&.value || document.at_xpath('//title')&.text&.strip
|
|
105
105
|
@site[:description] = document.at_xpath("//meta[@property='og:description']/@content")&.value
|
|
106
106
|
canonical = document.at_xpath("//link[@rel='canonical']/@href")&.value
|
|
107
107
|
@site[:canonical] = canonical unless canonical == @uri.to_s
|
|
@@ -114,27 +114,36 @@ class Ficon
|
|
|
114
114
|
tile_color = doc.at_xpath("//meta[@name='msapplication-TileColor']/@content")&.value
|
|
115
115
|
|
|
116
116
|
paths = "//meta[@name='msapplication-TileImage']|//link[@type='image/ico' or @type='image/vnd.microsoft.icon']|//link[@rel='icon' or @rel='shortcut icon' or @rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon']"
|
|
117
|
-
results += doc.xpath(paths).collect
|
|
117
|
+
results += doc.xpath(paths).collect do |e|
|
|
118
|
+
e.values.select do |v|
|
|
119
|
+
v =~ /\.png$|\.jpg$|\.gif$|\.ico$|\.svg$|\.ico\?\d*$/
|
|
120
|
+
end
|
|
121
|
+
end.flatten.collect { |v| v[/^http/] || v[%r{^/}] ? v : '/' + v }
|
|
118
122
|
|
|
119
123
|
results.collect { |result| normalise(uri, result) }.uniq.collect do |url|
|
|
120
124
|
# Check if this is a tile image to pass the color
|
|
121
|
-
is_tile = doc.at_xpath("//meta[@name='msapplication-TileImage' and @content='#{url}' or @content='#{url.sub(
|
|
125
|
+
is_tile = doc.at_xpath("//meta[@name='msapplication-TileImage' and @content='#{url}' or @content='#{url.sub(
|
|
126
|
+
uri.to_s, ''
|
|
127
|
+
)}']")
|
|
122
128
|
Image.new(url, is_tile ? tile_color : nil)
|
|
123
129
|
end.sort_by(&:area).reverse
|
|
124
130
|
end
|
|
125
131
|
|
|
126
132
|
def self.page_images(uri, doc)
|
|
127
133
|
doc.xpath("//meta[@property='og:image']")
|
|
128
|
-
|
|
129
|
-
|
|
134
|
+
.collect { |e| e.values.reject(&:empty?) }.flatten
|
|
135
|
+
.collect { |v| v[/^http/] || v[%r{^/}] ? v : '/' + v }.collect do |result|
|
|
136
|
+
normalise(uri,
|
|
137
|
+
result)
|
|
138
|
+
end.uniq.collect { |i| Image.new(i) }.sort_by(&:area).reverse
|
|
130
139
|
end
|
|
131
140
|
|
|
132
141
|
def self.normalise(base, candidate)
|
|
133
142
|
parsed_candidate = URI(candidate)
|
|
134
143
|
base = URI(base) unless base.is_a? URI
|
|
135
144
|
|
|
136
|
-
parsed_candidate.host = base.host if parsed_candidate.host.nil?
|
|
137
|
-
parsed_candidate.scheme = base.scheme if parsed_candidate.scheme.nil?
|
|
145
|
+
parsed_candidate.host = base.host if parsed_candidate.host.nil? # Set relative URLs to absolute
|
|
146
|
+
parsed_candidate.scheme = base.scheme if parsed_candidate.scheme.nil? # Set the schema if missing
|
|
138
147
|
|
|
139
148
|
parsed_candidate.to_s
|
|
140
149
|
end
|
|
@@ -171,25 +180,25 @@ class Ficon
|
|
|
171
180
|
|
|
172
181
|
def fetch_url(uri, redirect_limit = 5)
|
|
173
182
|
uri = URI(uri) unless uri.is_a?(URI)
|
|
174
|
-
|
|
183
|
+
|
|
175
184
|
if redirect_limit <= 0
|
|
176
185
|
@url_status = DEAD
|
|
177
|
-
raise
|
|
186
|
+
raise 'Too many redirects'
|
|
178
187
|
end
|
|
179
188
|
|
|
180
|
-
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme ==
|
|
189
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
|
181
190
|
http.read_timeout = 10
|
|
182
191
|
http.open_timeout = 5
|
|
183
192
|
request = Net::HTTP::Get.new(uri)
|
|
184
|
-
request[
|
|
193
|
+
request['User-Agent'] = @user_agent
|
|
185
194
|
response = http.request(request)
|
|
186
|
-
|
|
195
|
+
|
|
187
196
|
# Set status based on response
|
|
188
197
|
@url_status = classify_response_status(response)
|
|
189
|
-
|
|
198
|
+
|
|
190
199
|
case response
|
|
191
200
|
when Net::HTTPRedirection
|
|
192
|
-
location = response[
|
|
201
|
+
location = response['location']
|
|
193
202
|
if location
|
|
194
203
|
new_uri = URI.join(uri.to_s, location)
|
|
195
204
|
@final_uri = Addressable::URI.parse(new_uri.to_s)
|
|
@@ -198,32 +207,32 @@ class Ficon
|
|
|
198
207
|
else
|
|
199
208
|
@final_uri = Addressable::URI.parse(uri.to_s)
|
|
200
209
|
end
|
|
201
|
-
|
|
210
|
+
|
|
202
211
|
response
|
|
203
212
|
end
|
|
204
|
-
rescue => e
|
|
213
|
+
rescue StandardError => e
|
|
205
214
|
@url_status = classify_exception_status(e)
|
|
206
|
-
|
|
215
|
+
|
|
207
216
|
# If HTTP request failed and we're using HTTP, try HTTPS automatically
|
|
208
|
-
if uri.scheme ==
|
|
209
|
-
|
|
210
|
-
|
|
217
|
+
if uri.scheme == 'http' &&
|
|
218
|
+
!uri.to_s.include?('://localhost') &&
|
|
219
|
+
!uri.host.match?(/^\d+\.\d+\.\d+\.\d+$/)
|
|
211
220
|
puts "HTTP request failed, trying HTTPS for #{uri}"
|
|
212
221
|
https_uri = uri.dup
|
|
213
|
-
https_uri.scheme =
|
|
222
|
+
https_uri.scheme = 'https'
|
|
214
223
|
https_uri.port = 443 if https_uri.port == 80
|
|
215
|
-
|
|
224
|
+
|
|
216
225
|
begin
|
|
217
226
|
https_response = fetch_url(https_uri, redirect_limit)
|
|
218
227
|
if https_response
|
|
219
|
-
puts
|
|
228
|
+
puts 'HTTPS request succeeded, using HTTPS URL'
|
|
220
229
|
return https_response
|
|
221
230
|
end
|
|
222
|
-
rescue => https_error
|
|
231
|
+
rescue StandardError => https_error
|
|
223
232
|
puts "HTTPS fallback also failed: #{https_error.inspect}"
|
|
224
233
|
end
|
|
225
234
|
end
|
|
226
|
-
|
|
235
|
+
|
|
227
236
|
puts "Failed to fetch #{uri}: #{e.inspect}"
|
|
228
237
|
nil
|
|
229
238
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ficon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.7'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dan Milne
|
|
@@ -159,7 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
159
159
|
- !ruby/object:Gem::Version
|
|
160
160
|
version: '0'
|
|
161
161
|
requirements: []
|
|
162
|
-
rubygems_version:
|
|
162
|
+
rubygems_version: 4.0.3
|
|
163
163
|
specification_version: 4
|
|
164
164
|
summary: Find website icons
|
|
165
165
|
test_files:
|