ficon 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ficon/version.rb +1 -1
- data/lib/ficon.rb +44 -42
- data/test/ficon_test.rb +50 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9f5f2c6b36b5360a88f4eac14c07c96f7fefb9674fb6bed0bdf4a7be63ad2d0
|
4
|
+
data.tar.gz: f85066461f2384ed8e2ed3d302e5e3ffe537178dfaf40c128e056dd16c1e3027
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ac7f80ea66b97249dccbfcdd4c7adf82b741125c23dbd92339346fb63e772bcfcea893279cca1b05b100946d567ee4c82bdfc9ba85dcd4fd10c881bef987ae9
|
7
|
+
data.tar.gz: 8c4421c19189b779f19aca1699521e600c25f4a17807dc22a546913a28e0d230263642d8610ef68f81d548f66be4cff1654dc9a12a644319c281432555428b4e
|
data/lib/ficon/version.rb
CHANGED
data/lib/ficon.rb
CHANGED
@@ -64,9 +64,15 @@ class Ficon
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def process
|
67
|
-
|
68
|
-
|
69
|
-
|
67
|
+
document = doc
|
68
|
+
if document
|
69
|
+
@site[:images] = self.class.site_images(@uri, document) || []
|
70
|
+
@site[:page_images] = self.class.page_images(@uri, document) || []
|
71
|
+
other_page_data(document)
|
72
|
+
else
|
73
|
+
@site[:images] = []
|
74
|
+
@site[:page_images] = []
|
75
|
+
end
|
70
76
|
nil
|
71
77
|
end
|
72
78
|
|
@@ -82,13 +88,9 @@ class Ficon
|
|
82
88
|
report_lines.join("\n") + "\n"
|
83
89
|
end
|
84
90
|
|
85
|
-
def site_icons = @site[:images]
|
86
|
-
|
87
|
-
def site_icon = site_icons&.first
|
88
|
-
|
89
|
-
def page_images = @site[:page_images]
|
91
|
+
def site_icons = @site[:images] || []
|
90
92
|
|
91
|
-
def
|
93
|
+
def page_images = @site[:page_images] || []
|
92
94
|
|
93
95
|
def title = @site[:title]
|
94
96
|
|
@@ -98,11 +100,11 @@ class Ficon
|
|
98
100
|
Cache.clear_cache
|
99
101
|
end
|
100
102
|
|
101
|
-
def other_page_data
|
102
|
-
@site[:title] =
|
103
|
-
@site[:description] =
|
104
|
-
canonical =
|
105
|
-
@site[:canonical] = canonical unless canonical == @
|
103
|
+
def other_page_data(document)
|
104
|
+
@site[:title] = document.at_xpath("//meta[@property='og:title']/@content")&.value || document.at_xpath("//title")&.text&.strip
|
105
|
+
@site[:description] = document.at_xpath("//meta[@property='og:description']/@content")&.value
|
106
|
+
canonical = document.at_xpath("//link[@rel='canonical']/@href")&.value
|
107
|
+
@site[:canonical] = canonical unless canonical == @uri.to_s
|
106
108
|
end
|
107
109
|
|
108
110
|
def self.site_images(uri, doc)
|
@@ -137,6 +139,34 @@ class Ficon
|
|
137
139
|
parsed_candidate.to_s
|
138
140
|
end
|
139
141
|
|
142
|
+
def classify_response_status(response)
|
143
|
+
case response.code.to_i
|
144
|
+
when 200..299
|
145
|
+
ALIVE
|
146
|
+
when 404, 410
|
147
|
+
DEAD
|
148
|
+
when 401, 403, 429
|
149
|
+
BLOCKED
|
150
|
+
when 500..599
|
151
|
+
SICK
|
152
|
+
else
|
153
|
+
SICK
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def classify_exception_status(exception)
|
158
|
+
case exception
|
159
|
+
when SocketError, Resolv::ResolvError
|
160
|
+
DEAD # DNS resolution failures
|
161
|
+
when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED
|
162
|
+
SICK # Network issues worth retrying
|
163
|
+
when OpenSSL::SSL::SSLError
|
164
|
+
SICK # SSL certificate errors
|
165
|
+
else
|
166
|
+
SICK # Default to retryable for unknown errors
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
140
170
|
private
|
141
171
|
|
142
172
|
def fetch_url(uri, redirect_limit = 5)
|
@@ -176,32 +206,4 @@ class Ficon
|
|
176
206
|
puts "Failed to fetch #{uri}: #{e.inspect}"
|
177
207
|
nil
|
178
208
|
end
|
179
|
-
|
180
|
-
def classify_response_status(response)
|
181
|
-
case response.code.to_i
|
182
|
-
when 200..299
|
183
|
-
ALIVE
|
184
|
-
when 404, 410
|
185
|
-
DEAD
|
186
|
-
when 401, 403, 429
|
187
|
-
BLOCKED
|
188
|
-
when 500..599
|
189
|
-
SICK
|
190
|
-
else
|
191
|
-
SICK
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
def classify_exception_status(exception)
|
196
|
-
case exception
|
197
|
-
when SocketError, Resolv::ResolutionError
|
198
|
-
DEAD # DNS resolution failures
|
199
|
-
when Net::HTTPError, Timeout::Error, Errno::ECONNREFUSED
|
200
|
-
SICK # Network issues worth retrying
|
201
|
-
when OpenSSL::SSL::SSLError
|
202
|
-
SICK # SSL certificate errors
|
203
|
-
else
|
204
|
-
SICK # Default to retryable for unknown errors
|
205
|
-
end
|
206
|
-
end
|
207
209
|
end
|
data/test/ficon_test.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#require 'rubygems'
|
2
2
|
require 'debug'
|
3
|
+
require 'resolv'
|
3
4
|
|
4
5
|
require "minitest/autorun"
|
5
6
|
|
@@ -69,4 +70,53 @@ class FiconTest < Minitest::Test
|
|
69
70
|
ficon_custom.user_agent = 'Changed/2.0'
|
70
71
|
assert_equal 'Changed/2.0', ficon_custom.user_agent
|
71
72
|
end
|
73
|
+
|
74
|
+
def test_response_status_classification
|
75
|
+
ficon = Ficon.new('https://example.com')
|
76
|
+
|
77
|
+
# Test ALIVE status (2xx)
|
78
|
+
assert_equal Ficon::ALIVE, ficon.classify_response_status(mock_response(200))
|
79
|
+
assert_equal Ficon::ALIVE, ficon.classify_response_status(mock_response(201))
|
80
|
+
assert_equal Ficon::ALIVE, ficon.classify_response_status(mock_response(299))
|
81
|
+
|
82
|
+
# Test DEAD status (404, 410)
|
83
|
+
assert_equal Ficon::DEAD, ficon.classify_response_status(mock_response(404))
|
84
|
+
assert_equal Ficon::DEAD, ficon.classify_response_status(mock_response(410))
|
85
|
+
|
86
|
+
# Test BLOCKED status (401, 403, 429)
|
87
|
+
assert_equal Ficon::BLOCKED, ficon.classify_response_status(mock_response(401))
|
88
|
+
assert_equal Ficon::BLOCKED, ficon.classify_response_status(mock_response(403))
|
89
|
+
assert_equal Ficon::BLOCKED, ficon.classify_response_status(mock_response(429))
|
90
|
+
|
91
|
+
# Test SICK status (5xx and others)
|
92
|
+
assert_equal Ficon::SICK, ficon.classify_response_status(mock_response(500))
|
93
|
+
assert_equal Ficon::SICK, ficon.classify_response_status(mock_response(502))
|
94
|
+
assert_equal Ficon::SICK, ficon.classify_response_status(mock_response(503))
|
95
|
+
assert_equal Ficon::SICK, ficon.classify_response_status(mock_response(300)) # Other codes default to SICK
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_exception_status_classification
|
99
|
+
ficon = Ficon.new('https://example.com')
|
100
|
+
|
101
|
+
# Test DEAD status (DNS and resolution errors)
|
102
|
+
assert_equal Ficon::DEAD, ficon.classify_exception_status(SocketError.new)
|
103
|
+
assert_equal Ficon::DEAD, ficon.classify_exception_status(Resolv::ResolvError.new)
|
104
|
+
|
105
|
+
# Test SICK status (network and timeout errors)
|
106
|
+
assert_equal Ficon::SICK, ficon.classify_exception_status(Timeout::Error.new)
|
107
|
+
assert_equal Ficon::SICK, ficon.classify_exception_status(Errno::ECONNREFUSED.new)
|
108
|
+
assert_equal Ficon::SICK, ficon.classify_exception_status(OpenSSL::SSL::SSLError.new)
|
109
|
+
assert_equal Ficon::SICK, ficon.classify_exception_status(Net::HTTPError.new('error', nil))
|
110
|
+
|
111
|
+
# Test default to SICK for unknown exceptions
|
112
|
+
assert_equal Ficon::SICK, ficon.classify_exception_status(StandardError.new)
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
def mock_response(code)
|
118
|
+
response = Object.new
|
119
|
+
response.define_singleton_method(:code) { code }
|
120
|
+
response
|
121
|
+
end
|
72
122
|
end
|