source_monitor 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +15 -0
- data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +3 -3
- data/.claude/skills/sm-configure/reference/configuration-reference.md +3 -3
- data/.claude/skills/sm-domain-model/SKILL.md +2 -2
- data/.claude/skills/sm-domain-model/reference/table-structure.md +3 -1
- data/.claude/skills/sm-engine-migration/SKILL.md +1 -1
- data/.claude/skills/sm-engine-migration/reference/migration-conventions.md +1 -1
- data/.claude/skills/sm-health-rule/SKILL.md +18 -21
- data/.claude/skills/sm-health-rule/reference/health-system.md +1 -1
- data/.claude/skills/sm-host-setup/reference/initializer-template.md +2 -2
- data/.claude/skills/sm-upgrade/reference/version-history.md +17 -12
- data/CHANGELOG.md +42 -0
- data/CLAUDE.md +2 -2
- data/Gemfile +1 -0
- data/Gemfile.lock +4 -1
- data/README.md +3 -3
- data/VERSION +1 -1
- data/app/assets/builds/source_monitor/application.css +132 -12
- data/app/assets/builds/source_monitor/application.js +25 -1
- data/app/assets/builds/source_monitor/application.js.map +2 -2
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +8 -0
- data/app/assets/javascripts/source_monitor/controllers/select_all_controller.js +22 -2
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +1 -1
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +57 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +10 -1
- data/app/controllers/source_monitor/import_history_dismissals_controller.rb +20 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +10 -2
- data/app/controllers/source_monitor/source_scrape_tests_controller.rb +73 -0
- data/app/controllers/source_monitor/sources_controller.rb +51 -9
- data/app/helpers/source_monitor/application_helper.rb +24 -0
- data/app/helpers/source_monitor/health_badge_helper.rb +7 -20
- data/app/jobs/source_monitor/fetch_feed_job.rb +32 -3
- data/app/jobs/source_monitor/source_health_check_job.rb +1 -1
- data/app/models/source_monitor/fetch_log.rb +4 -0
- data/app/models/source_monitor/import_history.rb +2 -0
- data/app/models/source_monitor/source.rb +47 -2
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +94 -68
- data/app/views/source_monitor/dashboard/_scrape_recommendations.html.erb +17 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +19 -0
- data/app/views/source_monitor/dashboard/index.html.erb +7 -1
- data/app/views/source_monitor/import_sessions/health_check/_row.html.erb +2 -2
- data/app/views/source_monitor/shared/_pagination.html.erb +74 -0
- data/app/views/source_monitor/source_scrape_tests/_result.html.erb +81 -0
- data/app/views/source_monitor/source_scrape_tests/show.html.erb +60 -0
- data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +29 -0
- data/app/views/source_monitor/sources/_details.html.erb +19 -1
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +1 -1
- data/app/views/source_monitor/sources/_import_history_panel.html.erb +12 -5
- data/app/views/source_monitor/sources/_row.html.erb +34 -6
- data/app/views/source_monitor/sources/index.html.erb +184 -132
- data/config/brakeman.ignore +11 -1
- data/config/routes.rb +5 -0
- data/db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb +7 -0
- data/db/migrate/20260306233004_add_error_category_to_fetch_logs.rb +8 -0
- data/db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb +11 -0
- data/db/migrate/20260312120000_simplify_health_status_values.rb +20 -0
- data/docs/configuration.md +9 -1
- data/docs/troubleshooting.md +9 -0
- data/docs/upgrade.md +31 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +2 -3
- data/lib/source_monitor/analytics/scrape_recommendations.rb +27 -0
- data/lib/source_monitor/configuration/health_settings.rb +0 -2
- data/lib/source_monitor/configuration/scraping_settings.rb +8 -1
- data/lib/source_monitor/dashboard/queries/stats_query.rb +12 -1
- data/lib/source_monitor/dashboard/queries.rb +6 -3
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +6 -5
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +40 -54
- data/lib/source_monitor/favicons/discoverer.rb +16 -0
- data/lib/source_monitor/favicons/svg_converter.rb +60 -0
- data/lib/source_monitor/fetching/cloudflare_bypass.rb +79 -0
- data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +82 -2
- data/lib/source_monitor/fetching/feed_fetcher.rb +55 -1
- data/lib/source_monitor/fetching/fetch_error.rb +27 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +4 -0
- data/lib/source_monitor/fetching/retry_policy.rb +4 -0
- data/lib/source_monitor/health/import_source_health_check.rb +3 -3
- data/lib/source_monitor/health/source_health_monitor.rb +9 -14
- data/lib/source_monitor/health/source_health_reset.rb +1 -1
- data/lib/source_monitor/pagination/paginator.rb +18 -1
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +3 -0
- metadata +17 -1
|
@@ -85,10 +85,9 @@ SourceMonitor.configure do |config|
|
|
|
85
85
|
|
|
86
86
|
# ---- Source health monitoring ---------------------------------------
|
|
87
87
|
# Tune how many fetches SourceMonitor evaluates when determining health
|
|
88
|
-
# status, as well as thresholds for
|
|
88
|
+
# status, as well as thresholds for automatic pauses.
|
|
89
89
|
config.health.window_size = 20
|
|
90
|
-
config.health.healthy_threshold = 0.8
|
|
91
|
-
config.health.warning_threshold = 0.5
|
|
90
|
+
config.health.healthy_threshold = 0.8 # Ratio for "working" status
|
|
92
91
|
config.health.auto_pause_threshold = 0.2
|
|
93
92
|
config.health.auto_resume_threshold = 0.6
|
|
94
93
|
config.health.auto_pause_cooldown_minutes = 60
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Analytics
|
|
5
|
+
class ScrapeRecommendations
|
|
6
|
+
def initialize(threshold: SourceMonitor.config.scraping.scrape_recommendation_threshold)
|
|
7
|
+
@threshold = threshold.to_i
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def candidates_count
|
|
11
|
+
@candidates_count ||= Source.scrape_candidates(threshold: @threshold).count
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def candidate_ids
|
|
15
|
+
@candidate_ids ||= Source.scrape_candidates(threshold: @threshold).pluck(:id)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def candidate?(source_id)
|
|
19
|
+
candidate_ids.include?(source_id)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
attr_reader :threshold
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -5,7 +5,6 @@ module SourceMonitor
|
|
|
5
5
|
class HealthSettings
|
|
6
6
|
attr_accessor :window_size,
|
|
7
7
|
:healthy_threshold,
|
|
8
|
-
:warning_threshold,
|
|
9
8
|
:auto_pause_threshold,
|
|
10
9
|
:auto_resume_threshold,
|
|
11
10
|
:auto_pause_cooldown_minutes
|
|
@@ -17,7 +16,6 @@ module SourceMonitor
|
|
|
17
16
|
def reset!
|
|
18
17
|
@window_size = 20
|
|
19
18
|
@healthy_threshold = 0.8
|
|
20
|
-
@warning_threshold = 0.5
|
|
21
19
|
@auto_pause_threshold = 0.2
|
|
22
20
|
@auto_resume_threshold = 0.6
|
|
23
21
|
@auto_pause_cooldown_minutes = 60
|
|
@@ -3,11 +3,13 @@
|
|
|
3
3
|
module SourceMonitor
|
|
4
4
|
class Configuration
|
|
5
5
|
class ScrapingSettings
|
|
6
|
-
attr_accessor :max_in_flight_per_source, :max_bulk_batch_size, :min_scrape_interval
|
|
6
|
+
attr_accessor :max_in_flight_per_source, :max_bulk_batch_size, :min_scrape_interval,
|
|
7
|
+
:scrape_recommendation_threshold
|
|
7
8
|
|
|
8
9
|
DEFAULT_MAX_IN_FLIGHT = nil
|
|
9
10
|
DEFAULT_MAX_BULK_BATCH_SIZE = 100
|
|
10
11
|
DEFAULT_MIN_SCRAPE_INTERVAL = 1.0
|
|
12
|
+
DEFAULT_SCRAPE_RECOMMENDATION_THRESHOLD = 200
|
|
11
13
|
|
|
12
14
|
def initialize
|
|
13
15
|
reset!
|
|
@@ -17,6 +19,7 @@ module SourceMonitor
|
|
|
17
19
|
@max_in_flight_per_source = DEFAULT_MAX_IN_FLIGHT
|
|
18
20
|
@max_bulk_batch_size = DEFAULT_MAX_BULK_BATCH_SIZE
|
|
19
21
|
@min_scrape_interval = DEFAULT_MIN_SCRAPE_INTERVAL
|
|
22
|
+
@scrape_recommendation_threshold = DEFAULT_SCRAPE_RECOMMENDATION_THRESHOLD
|
|
20
23
|
end
|
|
21
24
|
|
|
22
25
|
def max_in_flight_per_source=(value)
|
|
@@ -31,6 +34,10 @@ module SourceMonitor
|
|
|
31
34
|
@min_scrape_interval = normalize_numeric_float(value)
|
|
32
35
|
end
|
|
33
36
|
|
|
37
|
+
def scrape_recommendation_threshold=(value)
|
|
38
|
+
@scrape_recommendation_threshold = normalize_numeric(value)
|
|
39
|
+
end
|
|
40
|
+
|
|
34
41
|
private
|
|
35
42
|
|
|
36
43
|
def normalize_numeric(value)
|
|
@@ -14,7 +14,9 @@ module SourceMonitor
|
|
|
14
14
|
active_sources: integer_value(source_counts["active_sources"]),
|
|
15
15
|
failed_sources: integer_value(source_counts["failed_sources"]),
|
|
16
16
|
total_items: total_items_count,
|
|
17
|
-
fetches_today: fetches_today_count
|
|
17
|
+
fetches_today: fetches_today_count,
|
|
18
|
+
health_distribution: health_distribution,
|
|
19
|
+
scrape_candidates_count: scrape_candidates_count
|
|
18
20
|
}
|
|
19
21
|
end
|
|
20
22
|
|
|
@@ -62,6 +64,15 @@ module SourceMonitor
|
|
|
62
64
|
reference_time.in_time_zone.beginning_of_day
|
|
63
65
|
end
|
|
64
66
|
|
|
67
|
+
def health_distribution
|
|
68
|
+
raw_counts = SourceMonitor::Source.active.group(:health_status).count
|
|
69
|
+
%w[working declining improving failing].each_with_object({}) { |s, h| h[s] = raw_counts.fetch(s, 0) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def scrape_candidates_count
|
|
73
|
+
SourceMonitor::Analytics::ScrapeRecommendations.new.candidates_count
|
|
74
|
+
end
|
|
75
|
+
|
|
65
76
|
def integer_value(value)
|
|
66
77
|
value.to_i
|
|
67
78
|
end
|
|
@@ -60,10 +60,10 @@ module SourceMonitor
|
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
-
def upcoming_fetch_schedule
|
|
64
|
-
cache.fetch(:upcoming_fetch_schedule) do
|
|
63
|
+
def upcoming_fetch_schedule(pages: {})
|
|
64
|
+
cache.fetch([ :upcoming_fetch_schedule, pages ]) do
|
|
65
65
|
measure(:upcoming_fetch_schedule) do
|
|
66
|
-
SourceMonitor::Dashboard::UpcomingFetchSchedule.new(scope: SourceMonitor::Source.active)
|
|
66
|
+
SourceMonitor::Dashboard::UpcomingFetchSchedule.new(scope: SourceMonitor::Source.active, pages: pages)
|
|
67
67
|
end
|
|
68
68
|
end
|
|
69
69
|
end
|
|
@@ -110,6 +110,9 @@ module SourceMonitor
|
|
|
110
110
|
SourceMonitor::Metrics.gauge(:dashboard_stats_failed_sources, stats[:failed_sources])
|
|
111
111
|
SourceMonitor::Metrics.gauge(:dashboard_stats_total_items, stats[:total_items])
|
|
112
112
|
SourceMonitor::Metrics.gauge(:dashboard_stats_fetches_today, stats[:fetches_today])
|
|
113
|
+
stats[:health_distribution]&.each do |status, count|
|
|
114
|
+
SourceMonitor::Metrics.gauge(:"dashboard_stats_health_#{status}", count)
|
|
115
|
+
end
|
|
113
116
|
end
|
|
114
117
|
|
|
115
118
|
def queue_name_map
|
|
@@ -31,21 +31,22 @@ module SourceMonitor
|
|
|
31
31
|
|
|
32
32
|
def fetch_event(event)
|
|
33
33
|
domain = source_domain(event.source_feed_url)
|
|
34
|
+
label = domain ? "#{domain} \u2014 Fetch ##{event.id}" : "Fetch ##{event.id}"
|
|
34
35
|
{
|
|
35
|
-
label:
|
|
36
|
+
label: label,
|
|
36
37
|
description: "#{event.items_created.to_i} created / #{event.items_updated.to_i} updated",
|
|
37
38
|
status: event.success? ? :success : :failure,
|
|
38
39
|
type: :fetch,
|
|
39
40
|
time: event.occurred_at,
|
|
40
|
-
path: url_helpers.fetch_log_path(event.id)
|
|
41
|
-
url_display: domain,
|
|
42
|
-
url_href: event.source_feed_url
|
|
41
|
+
path: url_helpers.fetch_log_path(event.id)
|
|
43
42
|
}
|
|
44
43
|
end
|
|
45
44
|
|
|
46
45
|
def scrape_event(event)
|
|
46
|
+
name = event.source_name.presence
|
|
47
|
+
label = name ? "#{name} \u2014 Scrape ##{event.id}" : "Scrape ##{event.id}"
|
|
47
48
|
{
|
|
48
|
-
label:
|
|
49
|
+
label: label,
|
|
49
50
|
description: (event.scraper_adapter.presence || "Scraper"),
|
|
50
51
|
status: event.success? ? :success : :failure,
|
|
51
52
|
type: :scrape,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "source_monitor/pagination/paginator"
|
|
4
|
+
|
|
3
5
|
module SourceMonitor
|
|
4
6
|
module Dashboard
|
|
5
7
|
class UpcomingFetchSchedule
|
|
@@ -12,6 +14,9 @@ module SourceMonitor
|
|
|
12
14
|
:window_end,
|
|
13
15
|
:include_unscheduled,
|
|
14
16
|
:sources,
|
|
17
|
+
:page,
|
|
18
|
+
:has_next_page,
|
|
19
|
+
:has_previous_page,
|
|
15
20
|
keyword_init: true
|
|
16
21
|
) do
|
|
17
22
|
def empty?
|
|
@@ -27,11 +32,15 @@ module SourceMonitor
|
|
|
27
32
|
{ key: "240+", label: "240 minutes +", min_minutes: 240, max_minutes: nil, include_unscheduled: true }
|
|
28
33
|
].freeze
|
|
29
34
|
|
|
35
|
+
DEFAULT_PER_PAGE = 10
|
|
36
|
+
|
|
30
37
|
attr_reader :scope, :reference_time
|
|
31
38
|
|
|
32
|
-
def initialize(scope: SourceMonitor::Source.active, reference_time: Time.current)
|
|
39
|
+
def initialize(scope: SourceMonitor::Source.active, reference_time: Time.current, pages: {}, per_page: DEFAULT_PER_PAGE)
|
|
33
40
|
@scope = scope
|
|
34
41
|
@reference_time = reference_time
|
|
42
|
+
@pages = pages
|
|
43
|
+
@per_page = per_page
|
|
35
44
|
end
|
|
36
45
|
|
|
37
46
|
def groups
|
|
@@ -40,21 +49,22 @@ module SourceMonitor
|
|
|
40
49
|
|
|
41
50
|
private
|
|
42
51
|
|
|
52
|
+
attr_reader :pages, :per_page
|
|
53
|
+
|
|
43
54
|
def build_groups
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
definitions[definition[:key]][:sources] << source if definition
|
|
48
|
-
end
|
|
55
|
+
INTERVAL_DEFINITIONS.filter_map do |definition|
|
|
56
|
+
bucket_scope = scope_for_bucket(definition)
|
|
57
|
+
next unless bucket_scope.exists?
|
|
49
58
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
next unless definition
|
|
59
|
+
page_number = pages.fetch(definition[:key], 1).to_i
|
|
60
|
+
page_number = 1 if page_number < 1
|
|
53
61
|
|
|
54
|
-
|
|
55
|
-
|
|
62
|
+
result = SourceMonitor::Pagination::Paginator.new(
|
|
63
|
+
scope: bucket_scope.order(:next_fetch_at, :name),
|
|
64
|
+
page: page_number,
|
|
65
|
+
per_page: per_page
|
|
66
|
+
).paginate
|
|
56
67
|
|
|
57
|
-
definitions.values.map do |definition|
|
|
58
68
|
Group.new(
|
|
59
69
|
key: definition[:key],
|
|
60
70
|
label: definition[:label],
|
|
@@ -63,64 +73,40 @@ module SourceMonitor
|
|
|
63
73
|
window_start: window_start_for(definition[:min_minutes]),
|
|
64
74
|
window_end: window_end_for(definition[:max_minutes]),
|
|
65
75
|
include_unscheduled: definition[:include_unscheduled],
|
|
66
|
-
sources:
|
|
76
|
+
sources: result.records,
|
|
77
|
+
page: result.page,
|
|
78
|
+
has_next_page: result.has_next_page,
|
|
79
|
+
has_previous_page: result.has_previous_page
|
|
67
80
|
)
|
|
68
81
|
end
|
|
69
82
|
end
|
|
70
83
|
|
|
71
|
-
def
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
def definition_for(next_fetch_at)
|
|
86
|
-
minutes = minutes_until(next_fetch_at)
|
|
87
|
-
|
|
88
|
-
INTERVAL_DEFINITIONS.find do |definition|
|
|
89
|
-
min = definition[:min_minutes]
|
|
90
|
-
max = definition[:max_minutes]
|
|
91
|
-
|
|
92
|
-
minutes >= min && (max.nil? || minutes < max)
|
|
84
|
+
def scope_for_bucket(definition)
|
|
85
|
+
window_start = reference_time + definition[:min_minutes].minutes
|
|
86
|
+
max_minutes = definition[:max_minutes]
|
|
87
|
+
|
|
88
|
+
if max_minutes.nil?
|
|
89
|
+
# Last bucket: 240+ minutes OR unscheduled (nil next_fetch_at)
|
|
90
|
+
scheduled = scope.where(next_fetch_at: window_start..)
|
|
91
|
+
unscheduled = scope.where(next_fetch_at: nil)
|
|
92
|
+
scheduled.or(unscheduled)
|
|
93
|
+
else
|
|
94
|
+
window_end = reference_time + max_minutes.minutes
|
|
95
|
+
scope.where(next_fetch_at: window_start...window_end)
|
|
93
96
|
end
|
|
94
97
|
end
|
|
95
98
|
|
|
96
|
-
def minutes_until(timestamp)
|
|
97
|
-
return Float::INFINITY if timestamp.blank?
|
|
98
|
-
|
|
99
|
-
minutes = (timestamp - reference_time) / 60.0
|
|
100
|
-
return 0 if minutes.negative?
|
|
101
|
-
|
|
102
|
-
minutes
|
|
103
|
-
end
|
|
104
|
-
|
|
105
99
|
def window_start_for(min_minutes)
|
|
106
|
-
return nil if min_minutes.nil?
|
|
100
|
+
return nil if min_minutes.nil?
|
|
107
101
|
|
|
108
102
|
reference_time + min_minutes.minutes
|
|
109
103
|
end
|
|
110
104
|
|
|
111
105
|
def window_end_for(max_minutes)
|
|
112
|
-
return nil if max_minutes.nil?
|
|
106
|
+
return nil if max_minutes.nil?
|
|
113
107
|
|
|
114
108
|
reference_time + max_minutes.minutes
|
|
115
109
|
end
|
|
116
|
-
|
|
117
|
-
def sort_sources(sources)
|
|
118
|
-
future_cap = reference_time + 100.years
|
|
119
|
-
|
|
120
|
-
sources.sort_by do |source|
|
|
121
|
-
[ source.next_fetch_at || future_cap, source.name.to_s ]
|
|
122
|
-
end
|
|
123
|
-
end
|
|
124
110
|
end
|
|
125
111
|
end
|
|
126
112
|
end
|
|
@@ -144,6 +144,10 @@ module SourceMonitor
|
|
|
144
144
|
return unless body && body.bytesize > 0
|
|
145
145
|
return if body.bytesize > settings.max_download_size
|
|
146
146
|
|
|
147
|
+
if content_type == "image/svg+xml"
|
|
148
|
+
return convert_svg_to_result(body, url)
|
|
149
|
+
end
|
|
150
|
+
|
|
147
151
|
filename = derive_filename(url, content_type)
|
|
148
152
|
|
|
149
153
|
Result.new(
|
|
@@ -156,6 +160,18 @@ module SourceMonitor
|
|
|
156
160
|
nil
|
|
157
161
|
end
|
|
158
162
|
|
|
163
|
+
def convert_svg_to_result(svg_body, url)
|
|
164
|
+
converted = SvgConverter.call(svg_body, filename: derive_filename(url, "image/svg+xml"))
|
|
165
|
+
return nil unless converted
|
|
166
|
+
|
|
167
|
+
Result.new(
|
|
168
|
+
io: converted[:io],
|
|
169
|
+
filename: converted[:filename],
|
|
170
|
+
content_type: converted[:content_type],
|
|
171
|
+
url: url
|
|
172
|
+
)
|
|
173
|
+
end
|
|
174
|
+
|
|
159
175
|
def derive_filename(favicon_url, content_type)
|
|
160
176
|
uri = URI.parse(favicon_url)
|
|
161
177
|
basename = File.basename(uri.path) if uri.path.present?
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Favicons
|
|
5
|
+
class SvgConverter
|
|
6
|
+
PNG_CONTENT_TYPE = "image/png"
|
|
7
|
+
DEFAULT_SIZE = 64
|
|
8
|
+
|
|
9
|
+
# Converts an SVG string to PNG bytes using MiniMagick.
|
|
10
|
+
# Returns a Hash with :io, :content_type, :filename or nil on failure.
|
|
11
|
+
def self.call(svg_body, filename: "favicon.png", size: DEFAULT_SIZE)
|
|
12
|
+
return nil unless defined?(MiniMagick)
|
|
13
|
+
|
|
14
|
+
new(svg_body, filename: filename, size: size).call
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(svg_body, filename:, size:)
|
|
18
|
+
@svg_body = svg_body
|
|
19
|
+
@filename = filename.sub(/\.svg\z/i, ".png")
|
|
20
|
+
@size = size
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def call
|
|
24
|
+
convert_svg_to_png
|
|
25
|
+
rescue StandardError => e
|
|
26
|
+
log_conversion_failure(e)
|
|
27
|
+
nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def convert_svg_to_png
|
|
33
|
+
image = MiniMagick::Image.read(@svg_body, ".svg")
|
|
34
|
+
image.format("png")
|
|
35
|
+
image.resize("#{@size}x#{@size}")
|
|
36
|
+
|
|
37
|
+
png_bytes = image.to_blob
|
|
38
|
+
|
|
39
|
+
return nil if png_bytes.nil? || png_bytes.empty?
|
|
40
|
+
|
|
41
|
+
{
|
|
42
|
+
io: StringIO.new(png_bytes),
|
|
43
|
+
content_type: PNG_CONTENT_TYPE,
|
|
44
|
+
filename: @filename
|
|
45
|
+
}
|
|
46
|
+
ensure
|
|
47
|
+
image&.destroy!
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def log_conversion_failure(error)
|
|
51
|
+
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
52
|
+
|
|
53
|
+
Rails.logger.warn(
|
|
54
|
+
"[SourceMonitor::Favicons::SvgConverter] SVG conversion failed: #{error.message}"
|
|
55
|
+
)
|
|
56
|
+
rescue StandardError # rubocop:disable Lint/SuppressedException
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
class CloudflareBypass
|
|
6
|
+
USER_AGENTS = [
|
|
7
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
|
8
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15",
|
|
9
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
|
|
10
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
CLOUDFLARE_MARKERS = FeedFetcher::CLOUDFLARE_MARKERS
|
|
14
|
+
SNIFF_LIMIT = FeedFetcher::SNIFF_LIMIT
|
|
15
|
+
|
|
16
|
+
attr_reader :response, :feed_url
|
|
17
|
+
|
|
18
|
+
def initialize(response:, feed_url:)
|
|
19
|
+
@response = response
|
|
20
|
+
@feed_url = feed_url
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def call
|
|
24
|
+
attempt_cookie_replay || attempt_ua_rotation
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def attempt_cookie_replay
|
|
30
|
+
cookies = extract_cookies(response)
|
|
31
|
+
return if cookies.blank?
|
|
32
|
+
|
|
33
|
+
headers = { "Cookie" => cookies, "Cache-Control" => "no-cache", "Pragma" => "no-cache" }
|
|
34
|
+
result = fetch_with_headers(headers)
|
|
35
|
+
result unless cloudflare_blocked?(result)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def attempt_ua_rotation
|
|
39
|
+
USER_AGENTS.each do |ua|
|
|
40
|
+
headers = {
|
|
41
|
+
"User-Agent" => ua,
|
|
42
|
+
"Cache-Control" => "no-cache",
|
|
43
|
+
"Pragma" => "no-cache"
|
|
44
|
+
}
|
|
45
|
+
result = fetch_with_headers(headers)
|
|
46
|
+
return result unless cloudflare_blocked?(result)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
nil
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def fetch_with_headers(headers)
|
|
53
|
+
client = SourceMonitor::HTTP.client(headers: headers, retry_requests: false)
|
|
54
|
+
client.get(feed_url)
|
|
55
|
+
rescue StandardError
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def cloudflare_blocked?(response)
|
|
60
|
+
return true if response.nil?
|
|
61
|
+
|
|
62
|
+
body = response.body
|
|
63
|
+
return true if body.blank?
|
|
64
|
+
|
|
65
|
+
snippet = body[0, SNIFF_LIMIT].downcase
|
|
66
|
+
CLOUDFLARE_MARKERS.any? { |marker| snippet.include?(marker.downcase) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def extract_cookies(resp)
|
|
70
|
+
set_cookie = resp&.headers&.dig("set-cookie")
|
|
71
|
+
return if set_cookie.blank?
|
|
72
|
+
|
|
73
|
+
Array(set_cookie).filter_map { |cookie|
|
|
74
|
+
cookie.to_s.split(";").first.presence
|
|
75
|
+
}.join("; ").presence
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -4,6 +4,17 @@ module SourceMonitor
|
|
|
4
4
|
module Fetching
|
|
5
5
|
class FeedFetcher
|
|
6
6
|
class SourceUpdater
|
|
7
|
+
CONSECUTIVE_FAILURE_PAUSE_THRESHOLD = 5
|
|
8
|
+
|
|
9
|
+
ERROR_CATEGORY_MAP = {
|
|
10
|
+
SourceMonitor::Fetching::TimeoutError => "network",
|
|
11
|
+
SourceMonitor::Fetching::ConnectionError => "network",
|
|
12
|
+
SourceMonitor::Fetching::ParsingError => "parse",
|
|
13
|
+
SourceMonitor::Fetching::BlockedError => "blocked",
|
|
14
|
+
SourceMonitor::Fetching::AuthenticationError => "auth",
|
|
15
|
+
SourceMonitor::Fetching::UnexpectedResponseError => "unknown"
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
7
18
|
attr_reader :source, :adaptive_interval
|
|
8
19
|
|
|
9
20
|
def initialize(source:, adaptive_interval:)
|
|
@@ -19,6 +30,7 @@ module SourceMonitor
|
|
|
19
30
|
last_error: nil,
|
|
20
31
|
last_error_at: nil,
|
|
21
32
|
failure_count: 0,
|
|
33
|
+
consecutive_fetch_failures: 0,
|
|
22
34
|
feed_format: derive_feed_format(feed)
|
|
23
35
|
}
|
|
24
36
|
|
|
@@ -47,7 +59,8 @@ module SourceMonitor
|
|
|
47
59
|
last_http_status: response.status,
|
|
48
60
|
last_error: nil,
|
|
49
61
|
last_error_at: nil,
|
|
50
|
-
failure_count: 0
|
|
62
|
+
failure_count: 0,
|
|
63
|
+
consecutive_fetch_failures: 0
|
|
51
64
|
}
|
|
52
65
|
|
|
53
66
|
if (etag = response.headers["etag"] || response.headers["ETag"])
|
|
@@ -74,13 +87,15 @@ module SourceMonitor
|
|
|
74
87
|
last_http_status: error.http_status,
|
|
75
88
|
last_error: error.message,
|
|
76
89
|
last_error_at: now,
|
|
77
|
-
failure_count: source.failure_count.to_i + 1
|
|
90
|
+
failure_count: source.failure_count.to_i + 1,
|
|
91
|
+
consecutive_fetch_failures: source.consecutive_fetch_failures.to_i + 1
|
|
78
92
|
}
|
|
79
93
|
|
|
80
94
|
adaptive_interval.apply_adaptive_interval!(attrs, content_changed: false, failure: true)
|
|
81
95
|
attrs[:metadata] = updated_metadata
|
|
82
96
|
decision = apply_retry_strategy!(attrs, error, now)
|
|
83
97
|
source.update!(attrs)
|
|
98
|
+
check_consecutive_failure_auto_pause!
|
|
84
99
|
decision
|
|
85
100
|
end
|
|
86
101
|
|
|
@@ -101,6 +116,7 @@ module SourceMonitor
|
|
|
101
116
|
error_class: error&.class&.name,
|
|
102
117
|
error_message: error&.message,
|
|
103
118
|
error_backtrace: error_backtrace(error),
|
|
119
|
+
error_category: categorize_error(error),
|
|
104
120
|
metadata: feed_metadata(feed, error: error, feed_signature: feed_signature, item_errors: item_errors)
|
|
105
121
|
)
|
|
106
122
|
end
|
|
@@ -139,6 +155,56 @@ module SourceMonitor
|
|
|
139
155
|
attributes[:fetch_circuit_until] = nil
|
|
140
156
|
end
|
|
141
157
|
|
|
158
|
+
def check_consecutive_failure_auto_pause!
|
|
159
|
+
return if source.consecutive_fetch_failures < CONSECUTIVE_FAILURE_PAUSE_THRESHOLD
|
|
160
|
+
return if source.auto_paused_until.present? && source.auto_paused_until.future?
|
|
161
|
+
|
|
162
|
+
now = Time.current
|
|
163
|
+
cooldown = [ SourceMonitor.config.health.auto_pause_cooldown_minutes.to_i, 1 ].max
|
|
164
|
+
pause_until = now + cooldown.minutes
|
|
165
|
+
|
|
166
|
+
source.update_columns(
|
|
167
|
+
auto_paused_until: pause_until,
|
|
168
|
+
auto_paused_at: now,
|
|
169
|
+
health_status: "failing",
|
|
170
|
+
health_status_changed_at: now,
|
|
171
|
+
backoff_until: pause_until,
|
|
172
|
+
next_fetch_at: pause_until
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
notify_auto_pause(now)
|
|
176
|
+
rescue StandardError => error
|
|
177
|
+
Rails.logger.error(
|
|
178
|
+
"[SourceMonitor::SourceUpdater] Auto-pause check failed for source #{source.id}: #{error.message}"
|
|
179
|
+
) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def notify_auto_pause(timestamp)
|
|
183
|
+
message = "Source '#{source.name}' auto-paused after #{CONSECUTIVE_FAILURE_PAUSE_THRESHOLD} consecutive fetch failures"
|
|
184
|
+
|
|
185
|
+
source.fetch_logs.create!(
|
|
186
|
+
success: false,
|
|
187
|
+
started_at: timestamp,
|
|
188
|
+
completed_at: timestamp,
|
|
189
|
+
duration_ms: 0,
|
|
190
|
+
http_status: nil,
|
|
191
|
+
error_class: "SourceMonitor::AutoPause",
|
|
192
|
+
error_message: message,
|
|
193
|
+
metadata: { event: "auto_pause", consecutive_failures: source.consecutive_fetch_failures }
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
SourceMonitor::Realtime.broadcast_toast(
|
|
197
|
+
message: "#{message}.",
|
|
198
|
+
level: :warning,
|
|
199
|
+
delay_ms: 8000
|
|
200
|
+
)
|
|
201
|
+
SourceMonitor::Realtime.broadcast_source(source)
|
|
202
|
+
rescue StandardError => error
|
|
203
|
+
Rails.logger.warn(
|
|
204
|
+
"[SourceMonitor::SourceUpdater] Auto-pause notification failed for source #{source.id}: #{error.message}"
|
|
205
|
+
) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
206
|
+
end
|
|
207
|
+
|
|
142
208
|
def enqueue_favicon_fetch_if_needed
|
|
143
209
|
return unless defined?(ActiveStorage)
|
|
144
210
|
return unless SourceMonitor.config.favicons.enabled?
|
|
@@ -192,6 +258,20 @@ module SourceMonitor
|
|
|
192
258
|
nil
|
|
193
259
|
end
|
|
194
260
|
|
|
261
|
+
def categorize_error(error)
|
|
262
|
+
return if error.nil?
|
|
263
|
+
|
|
264
|
+
if error.is_a?(SourceMonitor::Fetching::HTTPError)
|
|
265
|
+
status = error.status.to_i
|
|
266
|
+
return "auth" if status == 401 || status == 403
|
|
267
|
+
return "network"
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
ERROR_CATEGORY_MAP.fetch(error.class) do
|
|
271
|
+
error.is_a?(SourceMonitor::Fetching::FetchError) ? "unknown" : nil
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
195
275
|
def derive_feed_format(feed)
|
|
196
276
|
return unless feed
|
|
197
277
|
|