source_monitor 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vbw-planning/milestones/default/STATE.md +0 -1
- data/CHANGELOG.md +15 -0
- data/Gemfile.lock +1 -1
- data/VERSION +1 -1
- data/config/brakeman.ignore +17 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +22 -1
- data/lib/source_monitor/http/aia_resolver.rb +128 -0
- data/lib/source_monitor/http.rb +9 -6
- data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +29 -2
- data/lib/source_monitor/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4710a57d0da9bd7c73f4f7b3a48a4bbfd31fa1927cf79423c9e8955f4b433378
|
|
4
|
+
data.tar.gz: e0847be423ba3022b3c853902de2061ed7b5b476801dc9f83aefdb957df239f4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ccccf11d1adb294ca141cb4efb8459f1948ad339d09feb2b55140d7983fd04ffdb772d5a75cfc3b73c7ec40c085e80026c8791357837f8184fee59df06efe5d8
|
|
7
|
+
data.tar.gz: 56224653d622b47a309f36bda5b3860118e4743fbafee7d657e09b7f6876377631d17d9a118d803e9c37bed69724852f5706e2cbabff5ef438a58febf0231831
|
|
@@ -50,7 +50,6 @@ Progress: [##########] 100%
|
|
|
50
50
|
- [phase-4]: Fix-everything approach for public API convention violations
|
|
51
51
|
- [phase-4]: 3 files slightly exceed 300 lines (entry_parser 390, queries 356, application_helper 346) -- all single-responsibility, cannot be split further
|
|
52
52
|
|
|
53
|
-
### Pending Todos
|
|
54
53
|
|
|
55
54
|
None
|
|
56
55
|
|
data/CHANGELOG.md
CHANGED
|
@@ -15,6 +15,21 @@ All notable changes to this project are documented below. The format follows [Ke
|
|
|
15
15
|
|
|
16
16
|
- No unreleased changes yet.
|
|
17
17
|
|
|
18
|
+
## [0.6.0] - 2026-02-17
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
|
|
22
|
+
- AIA (Authority Information Access) certificate resolution for SSL failures. When feed fetching or scraping encounters `certificate verify failed` errors due to missing intermediate certificates, the engine now automatically fetches the missing intermediate via AIA URLs and retries the request. This fixes feeds hosted on servers with incomplete certificate chains (e.g., Medium/Netflix Tech Blog on AWS).
|
|
23
|
+
- `SourceMonitor::HTTP::AIAResolver` module with thread-safe hostname-keyed cache (1-hour TTL), SNI support, and DER/PEM certificate parsing.
|
|
24
|
+
- `cert_store:` parameter on `SourceMonitor::HTTP.client` for passing custom certificate stores.
|
|
25
|
+
- Brakeman ignore configuration (`config/brakeman.ignore`) for the intentional `VERIFY_NONE` in the AIA resolver's leaf certificate fetch.
|
|
26
|
+
|
|
27
|
+
### Testing
|
|
28
|
+
|
|
29
|
+
- 1,028 tests, 0 failures (up from 1,003 in 0.5.x).
|
|
30
|
+
- RuboCop: 0 offenses.
|
|
31
|
+
- Brakeman: 0 warnings (1 intentional ignore).
|
|
32
|
+
|
|
18
33
|
## [0.5.3] - 2026-02-16
|
|
19
34
|
|
|
20
35
|
### Fixed
|
data/Gemfile.lock
CHANGED
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.6.0
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ignored_warnings": [
|
|
3
|
+
{
|
|
4
|
+
"warning_type": "SSL Verification Bypass",
|
|
5
|
+
"warning_code": 71,
|
|
6
|
+
"fingerprint": "17da2beb8f8ecf05b0ca1e3da89ee27d593395c711197f2f7dd38df759ea3720",
|
|
7
|
+
"check_name": "SSLVerify",
|
|
8
|
+
"message": "SSL certificate verification was bypassed",
|
|
9
|
+
"file": "lib/source_monitor/http/aia_resolver.rb",
|
|
10
|
+
"line": 77,
|
|
11
|
+
"code": "OpenSSL::SSL::SSLContext.new.verify_mode = OpenSSL::SSL::VERIFY_NONE",
|
|
12
|
+
"note": "Intentional: AIA resolver must connect without verification to fetch the leaf certificate from servers with broken certificate chains. This is the core purpose of the module -- it only uses VERIFY_NONE to read the cert, never to transmit data."
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"updated": "2026-02-17",
|
|
16
|
+
"brakeman_version": "8.0.2"
|
|
17
|
+
}
|
|
@@ -81,8 +81,11 @@ module SourceMonitor
|
|
|
81
81
|
raise error
|
|
82
82
|
rescue Faraday::TimeoutError => error
|
|
83
83
|
raise TimeoutError.new(error.message, original_error: error)
|
|
84
|
-
rescue Faraday::ConnectionFailed
|
|
84
|
+
rescue Faraday::ConnectionFailed => error
|
|
85
85
|
raise ConnectionError.new(error.message, original_error: error)
|
|
86
|
+
rescue Faraday::SSLError => error
|
|
87
|
+
attempt_aia_recovery(error, started_at, instrumentation_payload) ||
|
|
88
|
+
raise(ConnectionError.new(error.message, original_error: error))
|
|
86
89
|
rescue Faraday::ClientError => error
|
|
87
90
|
raise build_http_error_from_faraday(error)
|
|
88
91
|
rescue Faraday::Error => error
|
|
@@ -236,6 +239,24 @@ module SourceMonitor
|
|
|
236
239
|
)
|
|
237
240
|
end
|
|
238
241
|
|
|
242
|
+
def attempt_aia_recovery(_error, started_at, instrumentation_payload)
|
|
243
|
+
return if @aia_attempted
|
|
244
|
+
|
|
245
|
+
@aia_attempted = true
|
|
246
|
+
hostname = URI.parse(source.feed_url).host
|
|
247
|
+
intermediate = SourceMonitor::HTTP::AIAResolver.resolve(hostname)
|
|
248
|
+
return unless intermediate
|
|
249
|
+
|
|
250
|
+
store = SourceMonitor::HTTP::AIAResolver.enhanced_cert_store([ intermediate ])
|
|
251
|
+
@connection = SourceMonitor::HTTP.client(cert_store: store, headers: request_headers)
|
|
252
|
+
instrumentation_payload[:aia_resolved] = true
|
|
253
|
+
|
|
254
|
+
response = perform_request
|
|
255
|
+
handle_response(response, started_at, instrumentation_payload)
|
|
256
|
+
rescue StandardError
|
|
257
|
+
nil
|
|
258
|
+
end
|
|
259
|
+
|
|
239
260
|
def build_http_error_from_faraday(error)
|
|
240
261
|
response_hash = error.response || {}
|
|
241
262
|
headers = response_hash[:headers] || response_hash[:response_headers] || {}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "openssl"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "socket"
|
|
6
|
+
|
|
7
|
+
module SourceMonitor
|
|
8
|
+
module HTTP
|
|
9
|
+
module AIAResolver
|
|
10
|
+
CONNECT_TIMEOUT = 5
|
|
11
|
+
DOWNLOAD_TIMEOUT = 5
|
|
12
|
+
CACHE_TTL = 3600 # 1 hour
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def resolve(hostname, port: 443)
|
|
16
|
+
cached = cache_lookup(hostname)
|
|
17
|
+
return cached if cached
|
|
18
|
+
|
|
19
|
+
cert = fetch_leaf_certificate(hostname, port)
|
|
20
|
+
return unless cert
|
|
21
|
+
|
|
22
|
+
url = extract_aia_url(cert)
|
|
23
|
+
return unless url
|
|
24
|
+
|
|
25
|
+
intermediate = download_certificate(url)
|
|
26
|
+
return unless intermediate
|
|
27
|
+
|
|
28
|
+
cache_store(hostname, intermediate)
|
|
29
|
+
intermediate
|
|
30
|
+
rescue StandardError
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def enhanced_cert_store(additional_certs)
|
|
35
|
+
store = OpenSSL::X509::Store.new
|
|
36
|
+
store.set_default_paths
|
|
37
|
+
|
|
38
|
+
Array(additional_certs).each do |cert|
|
|
39
|
+
store.add_cert(cert)
|
|
40
|
+
rescue OpenSSL::X509::StoreError
|
|
41
|
+
# Already in store or invalid -- skip
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
store
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def clear_cache!
|
|
48
|
+
@mutex.synchronize { @cache.clear }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def cache_size
|
|
52
|
+
@mutex.synchronize { @cache.size }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def cache_lookup(hostname)
|
|
58
|
+
@mutex.synchronize do
|
|
59
|
+
entry = @cache[hostname]
|
|
60
|
+
return unless entry
|
|
61
|
+
return entry[:cert] if entry[:expires_at] > Time.now
|
|
62
|
+
|
|
63
|
+
@cache.delete(hostname)
|
|
64
|
+
nil
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def cache_store(hostname, cert)
|
|
69
|
+
@mutex.synchronize do
|
|
70
|
+
@cache[hostname] = { cert: cert, expires_at: Time.now + CACHE_TTL }
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def fetch_leaf_certificate(hostname, port)
|
|
75
|
+
tcp = Socket.tcp(hostname, port, connect_timeout: CONNECT_TIMEOUT)
|
|
76
|
+
ssl_context = OpenSSL::SSL::SSLContext.new
|
|
77
|
+
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
78
|
+
|
|
79
|
+
ssl = OpenSSL::SSL::SSLSocket.new(tcp, ssl_context)
|
|
80
|
+
ssl.hostname = hostname
|
|
81
|
+
ssl.connect
|
|
82
|
+
|
|
83
|
+
ssl.peer_cert
|
|
84
|
+
rescue StandardError
|
|
85
|
+
nil
|
|
86
|
+
ensure
|
|
87
|
+
ssl&.close rescue nil # rubocop:disable Style/RescueModifier
|
|
88
|
+
tcp&.close rescue nil # rubocop:disable Style/RescueModifier
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def extract_aia_url(cert)
|
|
92
|
+
return unless cert.respond_to?(:ca_issuer_uris)
|
|
93
|
+
|
|
94
|
+
uris = cert.ca_issuer_uris
|
|
95
|
+
return if uris.nil? || uris.empty?
|
|
96
|
+
|
|
97
|
+
uris.first.to_s
|
|
98
|
+
rescue StandardError
|
|
99
|
+
nil
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def download_certificate(url)
|
|
103
|
+
uri = URI.parse(url)
|
|
104
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
105
|
+
http.open_timeout = DOWNLOAD_TIMEOUT
|
|
106
|
+
http.read_timeout = DOWNLOAD_TIMEOUT
|
|
107
|
+
|
|
108
|
+
response = http.get(uri.request_uri)
|
|
109
|
+
return unless response.is_a?(Net::HTTPSuccess)
|
|
110
|
+
|
|
111
|
+
body = response.body
|
|
112
|
+
parse_certificate(body)
|
|
113
|
+
rescue StandardError
|
|
114
|
+
nil
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def parse_certificate(body)
|
|
118
|
+
OpenSSL::X509::Certificate.new(body) # tries DER first, then PEM
|
|
119
|
+
rescue OpenSSL::X509::CertificateError
|
|
120
|
+
nil
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
@mutex = Mutex.new
|
|
125
|
+
@cache = {}
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
data/lib/source_monitor/http.rb
CHANGED
|
@@ -9,6 +9,8 @@ require "active_support/core_ext/object/blank"
|
|
|
9
9
|
|
|
10
10
|
module SourceMonitor
|
|
11
11
|
module HTTP
|
|
12
|
+
autoload :AIAResolver, "source_monitor/http/aia_resolver"
|
|
13
|
+
|
|
12
14
|
DEFAULT_TIMEOUT = 15
|
|
13
15
|
DEFAULT_OPEN_TIMEOUT = 5
|
|
14
16
|
DEFAULT_MAX_REDIRECTS = 5
|
|
@@ -16,7 +18,7 @@ module SourceMonitor
|
|
|
16
18
|
RETRY_STATUSES = [ 429, 500, 502, 503, 504 ].freeze
|
|
17
19
|
|
|
18
20
|
class << self
|
|
19
|
-
def client(proxy: nil, headers: {}, timeout: nil, open_timeout: nil, retry_requests: true)
|
|
21
|
+
def client(proxy: nil, headers: {}, timeout: nil, open_timeout: nil, retry_requests: true, cert_store: nil)
|
|
20
22
|
settings = SourceMonitor.config.http
|
|
21
23
|
|
|
22
24
|
effective_proxy = resolve_proxy(proxy, settings)
|
|
@@ -30,14 +32,15 @@ module SourceMonitor
|
|
|
30
32
|
timeout: effective_timeout,
|
|
31
33
|
open_timeout: effective_open_timeout,
|
|
32
34
|
settings: settings,
|
|
33
|
-
enable_retry: retry_requests
|
|
35
|
+
enable_retry: retry_requests,
|
|
36
|
+
cert_store: cert_store
|
|
34
37
|
)
|
|
35
38
|
end
|
|
36
39
|
end
|
|
37
40
|
|
|
38
41
|
private
|
|
39
42
|
|
|
40
|
-
def configure_request(connection, headers, timeout:, open_timeout:, settings:, enable_retry:) # rubocop:disable Metrics/MethodLength
|
|
43
|
+
def configure_request(connection, headers, timeout:, open_timeout:, settings:, enable_retry:, cert_store: nil) # rubocop:disable Metrics/MethodLength
|
|
41
44
|
if enable_retry
|
|
42
45
|
connection.request :retry,
|
|
43
46
|
max: settings.retry_max || 4,
|
|
@@ -58,7 +61,7 @@ module SourceMonitor
|
|
|
58
61
|
connection.headers[key] = value
|
|
59
62
|
end
|
|
60
63
|
|
|
61
|
-
configure_ssl(connection, settings)
|
|
64
|
+
configure_ssl(connection, settings, cert_store: cert_store)
|
|
62
65
|
|
|
63
66
|
connection.adapter Faraday.default_adapter
|
|
64
67
|
end
|
|
@@ -67,7 +70,7 @@ module SourceMonitor
|
|
|
67
70
|
# fail to verify certificate chains that depend on intermediate CAs
|
|
68
71
|
# (e.g., Medium/Netflix on AWS). OpenSSL::X509::Store#set_default_paths
|
|
69
72
|
# loads all system-trusted CAs including intermediates.
|
|
70
|
-
def configure_ssl(connection, settings)
|
|
73
|
+
def configure_ssl(connection, settings, cert_store: nil)
|
|
71
74
|
connection.ssl.verify = settings.ssl_verify != false
|
|
72
75
|
|
|
73
76
|
if settings.ssl_ca_file
|
|
@@ -75,7 +78,7 @@ module SourceMonitor
|
|
|
75
78
|
elsif settings.ssl_ca_path
|
|
76
79
|
connection.ssl.ca_path = settings.ssl_ca_path
|
|
77
80
|
else
|
|
78
|
-
connection.ssl.cert_store = default_cert_store
|
|
81
|
+
connection.ssl.cert_store = cert_store || default_cert_store
|
|
79
82
|
end
|
|
80
83
|
end
|
|
81
84
|
|
|
@@ -10,6 +10,7 @@ module SourceMonitor
|
|
|
10
10
|
|
|
11
11
|
def initialize(http: SourceMonitor::HTTP)
|
|
12
12
|
@http = http
|
|
13
|
+
@aia_attempted = false
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def fetch(url:, settings: nil)
|
|
@@ -25,6 +26,11 @@ module SourceMonitor
|
|
|
25
26
|
message: "Non-success HTTP status"
|
|
26
27
|
)
|
|
27
28
|
end
|
|
29
|
+
rescue Faraday::SSLError => error
|
|
30
|
+
result = attempt_aia_recovery(url, settings)
|
|
31
|
+
return result if result
|
|
32
|
+
|
|
33
|
+
Result.new(status: :failed, error: error.class.name, message: error.message)
|
|
28
34
|
rescue Faraday::ClientError => error
|
|
29
35
|
Result.new(
|
|
30
36
|
status: :failed,
|
|
@@ -40,13 +46,34 @@ module SourceMonitor
|
|
|
40
46
|
|
|
41
47
|
attr_reader :http
|
|
42
48
|
|
|
43
|
-
def
|
|
49
|
+
def attempt_aia_recovery(url, settings)
|
|
50
|
+
return if @aia_attempted
|
|
51
|
+
|
|
52
|
+
@aia_attempted = true
|
|
53
|
+
hostname = URI.parse(url).host
|
|
54
|
+
intermediate = SourceMonitor::HTTP::AIAResolver.resolve(hostname)
|
|
55
|
+
return unless intermediate
|
|
56
|
+
|
|
57
|
+
store = SourceMonitor::HTTP::AIAResolver.enhanced_cert_store([ intermediate ])
|
|
58
|
+
response = connection(settings, cert_store: store).get(url)
|
|
59
|
+
|
|
60
|
+
if success_status?(response.status)
|
|
61
|
+
Result.new(status: :success, body: response.body, headers: response.headers, http_status: response.status)
|
|
62
|
+
else
|
|
63
|
+
Result.new(status: :failed, http_status: response.status, error: "http_error", message: "Non-success HTTP status")
|
|
64
|
+
end
|
|
65
|
+
rescue StandardError
|
|
66
|
+
nil
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def connection(settings, cert_store: nil)
|
|
44
70
|
normalized = normalize_settings(settings)
|
|
45
71
|
http.client(
|
|
46
72
|
proxy: normalized[:proxy],
|
|
47
73
|
headers: normalized[:headers],
|
|
48
74
|
timeout: normalized[:timeout] || SourceMonitor::HTTP::DEFAULT_TIMEOUT,
|
|
49
|
-
open_timeout: normalized[:open_timeout] || SourceMonitor::HTTP::DEFAULT_OPEN_TIMEOUT
|
|
75
|
+
open_timeout: normalized[:open_timeout] || SourceMonitor::HTTP::DEFAULT_OPEN_TIMEOUT,
|
|
76
|
+
cert_store: cert_store
|
|
50
77
|
)
|
|
51
78
|
end
|
|
52
79
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: source_monitor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- dchuk
|
|
@@ -540,6 +540,7 @@ files:
|
|
|
540
540
|
- app/views/source_monitor/sources/index.html.erb
|
|
541
541
|
- app/views/source_monitor/sources/new.html.erb
|
|
542
542
|
- app/views/source_monitor/sources/show.html.erb
|
|
543
|
+
- config/brakeman.ignore
|
|
543
544
|
- config/coverage_baseline.json
|
|
544
545
|
- config/initializers/feedjira.rb
|
|
545
546
|
- config/routes.rb
|
|
@@ -630,6 +631,7 @@ files:
|
|
|
630
631
|
- lib/source_monitor/health/source_health_monitor.rb
|
|
631
632
|
- lib/source_monitor/health/source_health_reset.rb
|
|
632
633
|
- lib/source_monitor/http.rb
|
|
634
|
+
- lib/source_monitor/http/aia_resolver.rb
|
|
633
635
|
- lib/source_monitor/images/content_rewriter.rb
|
|
634
636
|
- lib/source_monitor/images/downloader.rb
|
|
635
637
|
- lib/source_monitor/import_sessions/entry_normalizer.rb
|