ficon 0.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 97c762adf3cdb126d047b66c426c3f388a3a8c61e2c6c009cff2f0dca3c7f219
4
- data.tar.gz: 53a177388f199f7133991f1067ad1a71d747c5d84c99b88d379d7878a3196eb5
3
+ metadata.gz: 4e8577eb04ba2dceefb974b436520ff73b8c37f6beb04c85ac62f02e774bbc94
4
+ data.tar.gz: 4196a3eb41905e40f0285eb132b3ae4d9e9c2c00f002584f6dc0b667893842db
5
5
  SHA512:
6
- metadata.gz: 2af1238cf43c03e84081ddfeeb69a545613e0348b33dab4aa3670d8e152e9ce4089f9b8c892764ecb143cf2aaa90b6afa4fd7b844bb611e98a8b18b27ce161b6
7
- data.tar.gz: 6eac859ed1529c3f22fcdaf5634a8e66ae1c625da7fbc0a1bce80afe53fc2a2a15145ac827cf65de414e68b7ea86be6cf25b808102606ab44f77def6a20124c4
6
+ metadata.gz: afbea646a672f8654bd8ba76a628c373ca1c40dbf051af8937656f755f65872031a6fc88e6a223914469a5baf10e303fc47e770ddb3abbf70305c1e08ebe1714
7
+ data.tar.gz: b08f3e0d30b5f93f503d03b5222938ab37e7c5fb423b5cbcf2df3fe217ed791aec4dc8cb8a32e540e41cd9bf06d7ea62b3bc062b159aa64452caa992c069de29
data/lib/ficon/cache.rb CHANGED
@@ -40,6 +40,33 @@ class Ficon
40
40
  db.execute("UPDATE urls SET not_before=? WHERE url=?", [_value, @url])
41
41
  end
42
42
 
43
+ def status
44
+ db.execute("select status from urls where url=? limit 1", @url).first&.first
45
+ end
46
+
47
+ def status=(_value)
48
+ db.execute("INSERT OR IGNORE INTO urls (url, status) VALUES (?, ?)", [@url, _value])
49
+ db.execute("UPDATE urls SET status=? WHERE url=?", [_value, @url])
50
+ end
51
+
52
+ def retry_count
53
+ db.execute("select retry_count from urls where url=? limit 1", @url).first&.first || 0
54
+ end
55
+
56
+ def retry_count=(_value)
57
+ db.execute("INSERT OR IGNORE INTO urls (url, retry_count) VALUES (?, ?)", [@url, _value])
58
+ db.execute("UPDATE urls SET retry_count=? WHERE url=?", [_value, @url])
59
+ end
60
+
61
+ def last_attempt
62
+ db.execute("select last_attempt from urls where url=? limit 1", @url).first&.first
63
+ end
64
+
65
+ def last_attempt=(_value)
66
+ db.execute("INSERT OR IGNORE INTO urls (url, last_attempt) VALUES (?, ?)", [@url, _value])
67
+ db.execute("UPDATE urls SET last_attempt=? WHERE url=?", [_value, @url])
68
+ end
69
+
43
70
  def self.db_file
44
71
  if ENV["FICON_DB"].nil?
45
72
  File.expand_path("~/.ficon.db")
@@ -49,8 +76,12 @@ class Ficon
49
76
  end
50
77
 
51
78
  def self.setup_cache(db)
52
- db.execute("CREATE TABLE urls(url, etag, not_before, data)")
79
+ db.execute("CREATE TABLE urls(url, etag, not_before, data, status, retry_count, last_attempt)")
53
80
  db.execute("CREATE UNIQUE INDEX `url` ON `urls` (`url`)")
54
81
  end
82
+
83
+ def self.clear_cache
84
+ File.delete(db_file) if File.exist?(db_file)
85
+ end
55
86
  end
56
87
  end
data/lib/ficon/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Ficon
2
- VERSION = "0.2"
2
+ VERSION = "0.4"
3
3
  end
data/lib/ficon.rb CHANGED
@@ -2,6 +2,7 @@ require "net/http"
2
2
  require "nokogiri"
3
3
  require "uri"
4
4
  require "addressable/uri"
5
+ require "resolv"
5
6
  require "debug"
6
7
 
7
8
  require_relative "ficon/version"
@@ -9,25 +10,35 @@ require_relative "ficon/image"
9
10
  require_relative "ficon/cache"
10
11
 
11
12
  class Ficon
12
- attr_reader :site
13
+ attr_reader :site, :final_uri, :url_status
13
14
  attr_accessor :user_agent
15
+
16
+ # URL health status constants
17
+ ALIVE = 'alive'
18
+ DEAD = 'dead'
19
+ SICK = 'sick'
20
+ BLOCKED = 'blocked'
14
21
 
15
22
  def initialize(uri, user_agent: nil)
16
23
  @uri = Addressable::URI.heuristic_parse(uri)
24
+ @final_uri = @uri
17
25
  @site = {}
18
- @user_agent = user_agent || "Ficon/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)"
26
+ @url_status = nil
27
+ @user_agent = user_agent || "FiconBot/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)"
19
28
  process
20
29
  end
21
30
 
22
31
  def doc
23
- cache = Cache.new(@uri)
32
+ # First try to fetch to determine final URL
33
+ response = fetch_url(@uri) unless @data
34
+ return nil if response.nil? && @data.nil?
24
35
 
25
- @data ||= cache.data
36
+ # Use final URL for caching
37
+ cache = Cache.new(@final_uri)
26
38
 
27
- if @data.nil?
28
- response = fetch_url(@uri)
29
- return nil unless response
39
+ @data ||= cache.data
30
40
 
41
+ if @data.nil? && response
31
42
  @data = response.body.force_encoding("UTF-8")
32
43
  cache.data = @data
33
44
  cache.etag = response["etag"] if response["etag"]
@@ -60,29 +71,31 @@ class Ficon
60
71
  end
61
72
 
62
73
  def report
63
- <<~REPORT
64
- Site icon: #{@site[:images].first}
65
- Page icon: #{@site[:page_images].first}
66
- Page title: #{@site[:title]}
67
- Page description: #{@site[:description]}
68
- Canonical URL: #{@site[:canonical]}
69
- REPORT
74
+ report_lines = []
75
+ report_lines << "Site icon: #{@site[:images].first}"
76
+ report_lines << "Page icon: #{@site[:page_images].first}"
77
+ report_lines << "Page title: #{@site[:title]}"
78
+ report_lines << "Page description: #{@site[:description]}"
79
+ report_lines << "Final URL: #{@final_uri}" if @final_uri.to_s != @uri.to_s
80
+ report_lines << "Canonical URL: #{@site[:canonical]}" if @site[:canonical]
81
+ report_lines << "URL Status: #{@url_status}" if @url_status
82
+ report_lines.join("\n") + "\n"
70
83
  end
71
84
 
72
- def site_icons
73
- @site[:images]
74
- end
85
+ def site_icons = @site[:images]
75
86
 
76
- def page_images
77
- @site[:page_images]
78
- end
87
+ def site_icon = site_icons&.first
79
88
 
80
- def title
81
- @site[:title]
82
- end
89
+ def page_images = @site[:page_images]
83
90
 
84
- def description
85
- @site[:description]
91
+ def page_image = page_images&.first
92
+
93
+ def title = @site[:title]
94
+
95
+ def description = @site[:description]
96
+
97
+ def self.clear_cache
98
+ Cache.clear_cache
86
99
  end
87
100
 
88
101
  def other_page_data
@@ -126,18 +139,69 @@ class Ficon
126
139
 
127
140
  private
128
141
 
129
- def fetch_url(uri)
142
+ def fetch_url(uri, redirect_limit = 5)
130
143
  uri = URI(uri) unless uri.is_a?(URI)
144
+
145
+ if redirect_limit <= 0
146
+ @url_status = DEAD
147
+ raise "Too many redirects"
148
+ end
131
149
 
132
150
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
133
151
  http.read_timeout = 10
134
152
  http.open_timeout = 5
135
153
  request = Net::HTTP::Get.new(uri)
136
154
  request["User-Agent"] = @user_agent
137
- http.request(request)
155
+ response = http.request(request)
156
+
157
+ # Set status based on response
158
+ @url_status = classify_response_status(response)
159
+
160
+ case response
161
+ when Net::HTTPRedirection
162
+ location = response["location"]
163
+ if location
164
+ new_uri = URI.join(uri.to_s, location)
165
+ @final_uri = Addressable::URI.parse(new_uri.to_s)
166
+ return fetch_url(new_uri, redirect_limit - 1)
167
+ end
168
+ else
169
+ @final_uri = Addressable::URI.parse(uri.to_s)
170
+ end
171
+
172
+ response
138
173
  end
139
- rescue Net::HTTPError, SocketError, Timeout::Error => e
174
+ rescue => e
175
+ @url_status = classify_exception_status(e)
140
176
  puts "Failed to fetch #{uri}: #{e.inspect}"
141
177
  nil
142
178
  end
179
+
180
+ def classify_response_status(response)
181
+ case response.code.to_i
182
+ when 200..299
183
+ ALIVE
184
+ when 404, 410
185
+ DEAD
186
+ when 401, 403, 429
187
+ BLOCKED
188
+ when 500..599
189
+ SICK
190
+ else
191
+ SICK
192
+ end
193
+ end
194
+
195
+ def classify_exception_status(exception)
196
+ case exception
197
+ when SocketError, Resolv::ResolutionError
198
+ DEAD # DNS resolution failures
199
+ when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED
200
+ SICK # Network issues worth retrying
201
+ when OpenSSL::SSL::SSLError
202
+ SICK # SSL certificate errors
203
+ else
204
+ SICK # Default to retryable for unknown errors
205
+ end
206
+ end
143
207
  end
data/test/ficon_test.rb CHANGED
@@ -58,7 +58,7 @@ class FiconTest < Minitest::Test
58
58
  def test_custom_user_agent
59
59
  # Test default user agent
60
60
  ficon_default = Ficon.new('https://example.com')
61
- assert_match(/^Ficon\/0\.2/, ficon_default.user_agent)
61
+ assert_match(/^FiconBot\/0\.\d+/, ficon_default.user_agent)
62
62
 
63
63
  # Test custom user agent
64
64
  custom_agent = 'MyApp/1.0 (Custom Bot)'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ficon
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Milne