ficon 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b300144548c846bb2d309ca04ca78198f9e37682e50f08ac2f60e95f54e84f5
4
- data.tar.gz: 90e5b7804b8e6c44e8c361c585e03a2553724322cebf84846c66c73c1652a234
3
+ metadata.gz: 4e8577eb04ba2dceefb974b436520ff73b8c37f6beb04c85ac62f02e774bbc94
4
+ data.tar.gz: 4196a3eb41905e40f0285eb132b3ae4d9e9c2c00f002584f6dc0b667893842db
5
5
  SHA512:
6
- metadata.gz: 6781246417b9d5cc5e1791acae5232dcd13ea9a934f0f977929fd57a5fa348dff96143b2acf327bd3ea7626e0eea65c92b585f4bd544c3fcd7764de76eee121b
7
- data.tar.gz: 0a2b83a50bdbc340e8877028f77f99de6ca23e9bdfa15b237f0d47390394ba3f9165ebfdf2a625bffdbebca40789cac8ce2cc3b6d9c6cf11c79ae2f23f8aca19
6
+ metadata.gz: afbea646a672f8654bd8ba76a628c373ca1c40dbf051af8937656f755f65872031a6fc88e6a223914469a5baf10e303fc47e770ddb3abbf70305c1e08ebe1714
7
+ data.tar.gz: b08f3e0d30b5f93f503d03b5222938ab37e7c5fb423b5cbcf2df3fe217ed791aec4dc8cb8a32e540e41cd9bf06d7ea62b3bc062b159aa64452caa992c069de29
data/lib/ficon/cache.rb CHANGED
@@ -40,6 +40,33 @@ class Ficon
40
40
  db.execute("UPDATE urls SET not_before=? WHERE url=?", [_value, @url])
41
41
  end
42
42
 
43
+ def status
44
+ db.execute("select status from urls where url=? limit 1", @url).first&.first
45
+ end
46
+
47
+ def status=(_value)
48
+ db.execute("INSERT OR IGNORE INTO urls (url, status) VALUES (?, ?)", [@url, _value])
49
+ db.execute("UPDATE urls SET status=? WHERE url=?", [_value, @url])
50
+ end
51
+
52
+ def retry_count
53
+ db.execute("select retry_count from urls where url=? limit 1", @url).first&.first || 0
54
+ end
55
+
56
+ def retry_count=(_value)
57
+ db.execute("INSERT OR IGNORE INTO urls (url, retry_count) VALUES (?, ?)", [@url, _value])
58
+ db.execute("UPDATE urls SET retry_count=? WHERE url=?", [_value, @url])
59
+ end
60
+
61
+ def last_attempt
62
+ db.execute("select last_attempt from urls where url=? limit 1", @url).first&.first
63
+ end
64
+
65
+ def last_attempt=(_value)
66
+ db.execute("INSERT OR IGNORE INTO urls (url, last_attempt) VALUES (?, ?)", [@url, _value])
67
+ db.execute("UPDATE urls SET last_attempt=? WHERE url=?", [_value, @url])
68
+ end
69
+
43
70
  def self.db_file
44
71
  if ENV["FICON_DB"].nil?
45
72
  File.expand_path("~/.ficon.db")
@@ -49,8 +76,12 @@ class Ficon
49
76
  end
50
77
 
51
78
  def self.setup_cache(db)
52
- db.execute("CREATE TABLE urls(url, etag, not_before, data)")
79
+ db.execute("CREATE TABLE urls(url, etag, not_before, data, status, retry_count, last_attempt)")
53
80
  db.execute("CREATE UNIQUE INDEX `url` ON `urls` (`url`)")
54
81
  end
82
+
83
+ def self.clear_cache
84
+ File.delete(db_file) if File.exist?(db_file)
85
+ end
55
86
  end
56
87
  end
data/lib/ficon/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Ficon
2
- VERSION = "0.3"
2
+ VERSION = "0.4"
3
3
  end
data/lib/ficon.rb CHANGED
@@ -2,6 +2,7 @@ require "net/http"
2
2
  require "nokogiri"
3
3
  require "uri"
4
4
  require "addressable/uri"
5
+ require "resolv"
5
6
  require "debug"
6
7
 
7
8
  require_relative "ficon/version"
@@ -9,14 +10,21 @@ require_relative "ficon/image"
9
10
  require_relative "ficon/cache"
10
11
 
11
12
  class Ficon
12
- attr_reader :site, :final_uri
13
+ attr_reader :site, :final_uri, :url_status
13
14
  attr_accessor :user_agent
15
+
16
+ # URL health status constants
17
+ ALIVE = 'alive'
18
+ DEAD = 'dead'
19
+ SICK = 'sick'
20
+ BLOCKED = 'blocked'
14
21
 
15
22
  def initialize(uri, user_agent: nil)
16
23
  @uri = Addressable::URI.heuristic_parse(uri)
17
24
  @final_uri = @uri
18
25
  @site = {}
19
- @user_agent = user_agent || "Ficon/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)"
26
+ @url_status = nil
27
+ @user_agent = user_agent || "FiconBot/#{VERSION} (Ruby icon finder; https://github.com/dkam/ficon)"
20
28
  process
21
29
  end
22
30
 
@@ -70,6 +78,7 @@ class Ficon
70
78
  report_lines << "Page description: #{@site[:description]}"
71
79
  report_lines << "Final URL: #{@final_uri}" if @final_uri.to_s != @uri.to_s
72
80
  report_lines << "Canonical URL: #{@site[:canonical]}" if @site[:canonical]
81
+ report_lines << "URL Status: #{@url_status}" if @url_status
73
82
  report_lines.join("\n") + "\n"
74
83
  end
75
84
 
@@ -85,6 +94,10 @@ class Ficon
85
94
 
86
95
  def description = @site[:description]
87
96
 
97
+ def self.clear_cache
98
+ Cache.clear_cache
99
+ end
100
+
88
101
  def other_page_data
89
102
  @site[:title] = doc.at_xpath("//meta[@property='og:title']/@content")&.value || @doc.at_xpath("//title")&.text&.strip
90
103
  @site[:description] = doc.at_xpath("//meta[@property='og:description']/@content")&.value
@@ -128,8 +141,11 @@ class Ficon
128
141
 
129
142
  def fetch_url(uri, redirect_limit = 5)
130
143
  uri = URI(uri) unless uri.is_a?(URI)
131
-
132
- raise "Too many redirects" if redirect_limit <= 0
144
+
145
+ if redirect_limit <= 0
146
+ @url_status = DEAD
147
+ raise "Too many redirects"
148
+ end
133
149
 
134
150
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
135
151
  http.read_timeout = 10
@@ -137,7 +153,10 @@ class Ficon
137
153
  request = Net::HTTP::Get.new(uri)
138
154
  request["User-Agent"] = @user_agent
139
155
  response = http.request(request)
140
-
156
+
157
+ # Set status based on response
158
+ @url_status = classify_response_status(response)
159
+
141
160
  case response
142
161
  when Net::HTTPRedirection
143
162
  location = response["location"]
@@ -149,11 +168,40 @@ class Ficon
149
168
  else
150
169
  @final_uri = Addressable::URI.parse(uri.to_s)
151
170
  end
152
-
171
+
153
172
  response
154
173
  end
155
- rescue Net::HTTPError, SocketError, Timeout::Error => e
174
+ rescue => e
175
+ @url_status = classify_exception_status(e)
156
176
  puts "Failed to fetch #{uri}: #{e.inspect}"
157
177
  nil
158
178
  end
179
+
180
+ def classify_response_status(response)
181
+ case response.code.to_i
182
+ when 200..299
183
+ ALIVE
184
+ when 404, 410
185
+ DEAD
186
+ when 401, 403, 429
187
+ BLOCKED
188
+ when 500..599
189
+ SICK
190
+ else
191
+ SICK
192
+ end
193
+ end
194
+
195
+ def classify_exception_status(exception)
196
+ case exception
197
+ when SocketError, Resolv::ResolutionError
198
+ DEAD # DNS resolution failures
199
+ when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED
200
+ SICK # Network issues worth retrying
201
+ when OpenSSL::SSL::SSLError
202
+ SICK # SSL certificate errors
203
+ else
204
+ SICK # Default to retryable for unknown errors
205
+ end
206
+ end
159
207
  end
data/test/ficon_test.rb CHANGED
@@ -58,7 +58,7 @@ class FiconTest < Minitest::Test
58
58
  def test_custom_user_agent
59
59
  # Test default user agent
60
60
  ficon_default = Ficon.new('https://example.com')
61
- assert_match(/^Ficon\/0\.2/, ficon_default.user_agent)
61
+ assert_match(/^FiconBot\/0\.\d+/, ficon_default.user_agent)
62
62
 
63
63
  # Test custom user agent
64
64
  custom_agent = 'MyApp/1.0 (Custom Bot)'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ficon
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Milne