source_monitor 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +15 -0
  3. data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +3 -3
  4. data/.claude/skills/sm-configure/reference/configuration-reference.md +3 -3
  5. data/.claude/skills/sm-domain-model/SKILL.md +2 -2
  6. data/.claude/skills/sm-domain-model/reference/table-structure.md +3 -1
  7. data/.claude/skills/sm-engine-migration/SKILL.md +1 -1
  8. data/.claude/skills/sm-engine-migration/reference/migration-conventions.md +1 -1
  9. data/.claude/skills/sm-health-rule/SKILL.md +18 -21
  10. data/.claude/skills/sm-health-rule/reference/health-system.md +1 -1
  11. data/.claude/skills/sm-host-setup/reference/initializer-template.md +2 -2
  12. data/.claude/skills/sm-upgrade/reference/version-history.md +17 -12
  13. data/CHANGELOG.md +42 -0
  14. data/CLAUDE.md +2 -2
  15. data/Gemfile +1 -0
  16. data/Gemfile.lock +4 -1
  17. data/README.md +3 -3
  18. data/VERSION +1 -1
  19. data/app/assets/builds/source_monitor/application.css +132 -12
  20. data/app/assets/builds/source_monitor/application.js +25 -1
  21. data/app/assets/builds/source_monitor/application.js.map +2 -2
  22. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +8 -0
  23. data/app/assets/javascripts/source_monitor/controllers/select_all_controller.js +22 -2
  24. data/app/assets/stylesheets/source_monitor/application.tailwind.css +1 -1
  25. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +57 -0
  26. data/app/controllers/source_monitor/dashboard_controller.rb +10 -1
  27. data/app/controllers/source_monitor/import_history_dismissals_controller.rb +20 -0
  28. data/app/controllers/source_monitor/source_retries_controller.rb +10 -2
  29. data/app/controllers/source_monitor/source_scrape_tests_controller.rb +73 -0
  30. data/app/controllers/source_monitor/sources_controller.rb +51 -9
  31. data/app/helpers/source_monitor/application_helper.rb +24 -0
  32. data/app/helpers/source_monitor/health_badge_helper.rb +7 -20
  33. data/app/jobs/source_monitor/fetch_feed_job.rb +32 -3
  34. data/app/jobs/source_monitor/source_health_check_job.rb +1 -1
  35. data/app/models/source_monitor/fetch_log.rb +4 -0
  36. data/app/models/source_monitor/import_history.rb +2 -0
  37. data/app/models/source_monitor/source.rb +47 -2
  38. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +94 -68
  39. data/app/views/source_monitor/dashboard/_scrape_recommendations.html.erb +17 -0
  40. data/app/views/source_monitor/dashboard/_stats.html.erb +19 -0
  41. data/app/views/source_monitor/dashboard/index.html.erb +7 -1
  42. data/app/views/source_monitor/import_sessions/health_check/_row.html.erb +2 -2
  43. data/app/views/source_monitor/shared/_pagination.html.erb +74 -0
  44. data/app/views/source_monitor/source_scrape_tests/_result.html.erb +81 -0
  45. data/app/views/source_monitor/source_scrape_tests/show.html.erb +60 -0
  46. data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +29 -0
  47. data/app/views/source_monitor/sources/_details.html.erb +19 -1
  48. data/app/views/source_monitor/sources/_empty_state_row.html.erb +1 -1
  49. data/app/views/source_monitor/sources/_import_history_panel.html.erb +12 -5
  50. data/app/views/source_monitor/sources/_row.html.erb +34 -6
  51. data/app/views/source_monitor/sources/index.html.erb +184 -132
  52. data/config/brakeman.ignore +11 -1
  53. data/config/routes.rb +5 -0
  54. data/db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb +7 -0
  55. data/db/migrate/20260306233004_add_error_category_to_fetch_logs.rb +8 -0
  56. data/db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb +11 -0
  57. data/db/migrate/20260312120000_simplify_health_status_values.rb +20 -0
  58. data/docs/configuration.md +9 -1
  59. data/docs/troubleshooting.md +9 -0
  60. data/docs/upgrade.md +31 -0
  61. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +2 -3
  62. data/lib/source_monitor/analytics/scrape_recommendations.rb +27 -0
  63. data/lib/source_monitor/configuration/health_settings.rb +0 -2
  64. data/lib/source_monitor/configuration/scraping_settings.rb +8 -1
  65. data/lib/source_monitor/dashboard/queries/stats_query.rb +12 -1
  66. data/lib/source_monitor/dashboard/queries.rb +6 -3
  67. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +6 -5
  68. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +40 -54
  69. data/lib/source_monitor/favicons/discoverer.rb +16 -0
  70. data/lib/source_monitor/favicons/svg_converter.rb +60 -0
  71. data/lib/source_monitor/fetching/cloudflare_bypass.rb +79 -0
  72. data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +82 -2
  73. data/lib/source_monitor/fetching/feed_fetcher.rb +55 -1
  74. data/lib/source_monitor/fetching/fetch_error.rb +27 -0
  75. data/lib/source_monitor/fetching/fetch_runner.rb +4 -0
  76. data/lib/source_monitor/fetching/retry_policy.rb +4 -0
  77. data/lib/source_monitor/health/import_source_health_check.rb +3 -3
  78. data/lib/source_monitor/health/source_health_monitor.rb +9 -14
  79. data/lib/source_monitor/health/source_health_reset.rb +1 -1
  80. data/lib/source_monitor/pagination/paginator.rb +18 -1
  81. data/lib/source_monitor/version.rb +1 -1
  82. data/lib/source_monitor.rb +3 -0
  83. metadata +17 -1
@@ -85,10 +85,9 @@ SourceMonitor.configure do |config|
85
85
 
86
86
  # ---- Source health monitoring ---------------------------------------
87
87
  # Tune how many fetches SourceMonitor evaluates when determining health
88
- # status, as well as thresholds for warnings and automatic pauses.
88
+ # status, as well as thresholds for automatic pauses.
89
89
  config.health.window_size = 20
90
- config.health.healthy_threshold = 0.8
91
- config.health.warning_threshold = 0.5
90
+ config.health.healthy_threshold = 0.8 # Ratio for "working" status
92
91
  config.health.auto_pause_threshold = 0.2
93
92
  config.health.auto_resume_threshold = 0.6
94
93
  config.health.auto_pause_cooldown_minutes = 60
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Analytics
5
+ class ScrapeRecommendations
6
+ def initialize(threshold: SourceMonitor.config.scraping.scrape_recommendation_threshold)
7
+ @threshold = threshold.to_i
8
+ end
9
+
10
+ def candidates_count
11
+ @candidates_count ||= Source.scrape_candidates(threshold: @threshold).count
12
+ end
13
+
14
+ def candidate_ids
15
+ @candidate_ids ||= Source.scrape_candidates(threshold: @threshold).pluck(:id)
16
+ end
17
+
18
+ def candidate?(source_id)
19
+ candidate_ids.include?(source_id)
20
+ end
21
+
22
+ private
23
+
24
+ attr_reader :threshold
25
+ end
26
+ end
27
+ end
@@ -5,7 +5,6 @@ module SourceMonitor
5
5
  class HealthSettings
6
6
  attr_accessor :window_size,
7
7
  :healthy_threshold,
8
- :warning_threshold,
9
8
  :auto_pause_threshold,
10
9
  :auto_resume_threshold,
11
10
  :auto_pause_cooldown_minutes
@@ -17,7 +16,6 @@ module SourceMonitor
17
16
  def reset!
18
17
  @window_size = 20
19
18
  @healthy_threshold = 0.8
20
- @warning_threshold = 0.5
21
19
  @auto_pause_threshold = 0.2
22
20
  @auto_resume_threshold = 0.6
23
21
  @auto_pause_cooldown_minutes = 60
@@ -3,11 +3,13 @@
3
3
  module SourceMonitor
4
4
  class Configuration
5
5
  class ScrapingSettings
6
- attr_accessor :max_in_flight_per_source, :max_bulk_batch_size, :min_scrape_interval
6
+ attr_accessor :max_in_flight_per_source, :max_bulk_batch_size, :min_scrape_interval,
7
+ :scrape_recommendation_threshold
7
8
 
8
9
  DEFAULT_MAX_IN_FLIGHT = nil
9
10
  DEFAULT_MAX_BULK_BATCH_SIZE = 100
10
11
  DEFAULT_MIN_SCRAPE_INTERVAL = 1.0
12
+ DEFAULT_SCRAPE_RECOMMENDATION_THRESHOLD = 200
11
13
 
12
14
  def initialize
13
15
  reset!
@@ -17,6 +19,7 @@ module SourceMonitor
17
19
  @max_in_flight_per_source = DEFAULT_MAX_IN_FLIGHT
18
20
  @max_bulk_batch_size = DEFAULT_MAX_BULK_BATCH_SIZE
19
21
  @min_scrape_interval = DEFAULT_MIN_SCRAPE_INTERVAL
22
+ @scrape_recommendation_threshold = DEFAULT_SCRAPE_RECOMMENDATION_THRESHOLD
20
23
  end
21
24
 
22
25
  def max_in_flight_per_source=(value)
@@ -31,6 +34,10 @@ module SourceMonitor
31
34
  @min_scrape_interval = normalize_numeric_float(value)
32
35
  end
33
36
 
37
+ def scrape_recommendation_threshold=(value)
38
+ @scrape_recommendation_threshold = normalize_numeric(value)
39
+ end
40
+
34
41
  private
35
42
 
36
43
  def normalize_numeric(value)
@@ -14,7 +14,9 @@ module SourceMonitor
14
14
  active_sources: integer_value(source_counts["active_sources"]),
15
15
  failed_sources: integer_value(source_counts["failed_sources"]),
16
16
  total_items: total_items_count,
17
- fetches_today: fetches_today_count
17
+ fetches_today: fetches_today_count,
18
+ health_distribution: health_distribution,
19
+ scrape_candidates_count: scrape_candidates_count
18
20
  }
19
21
  end
20
22
 
@@ -62,6 +64,15 @@ module SourceMonitor
62
64
  reference_time.in_time_zone.beginning_of_day
63
65
  end
64
66
 
67
+ def health_distribution
68
+ raw_counts = SourceMonitor::Source.active.group(:health_status).count
69
+ %w[working declining improving failing].each_with_object({}) { |s, h| h[s] = raw_counts.fetch(s, 0) }
70
+ end
71
+
72
+ def scrape_candidates_count
73
+ SourceMonitor::Analytics::ScrapeRecommendations.new.candidates_count
74
+ end
75
+
65
76
  def integer_value(value)
66
77
  value.to_i
67
78
  end
@@ -60,10 +60,10 @@ module SourceMonitor
60
60
  end
61
61
  end
62
62
 
63
- def upcoming_fetch_schedule
64
- cache.fetch(:upcoming_fetch_schedule) do
63
+ def upcoming_fetch_schedule(pages: {})
64
+ cache.fetch([ :upcoming_fetch_schedule, pages ]) do
65
65
  measure(:upcoming_fetch_schedule) do
66
- SourceMonitor::Dashboard::UpcomingFetchSchedule.new(scope: SourceMonitor::Source.active)
66
+ SourceMonitor::Dashboard::UpcomingFetchSchedule.new(scope: SourceMonitor::Source.active, pages: pages)
67
67
  end
68
68
  end
69
69
  end
@@ -110,6 +110,9 @@ module SourceMonitor
110
110
  SourceMonitor::Metrics.gauge(:dashboard_stats_failed_sources, stats[:failed_sources])
111
111
  SourceMonitor::Metrics.gauge(:dashboard_stats_total_items, stats[:total_items])
112
112
  SourceMonitor::Metrics.gauge(:dashboard_stats_fetches_today, stats[:fetches_today])
113
+ stats[:health_distribution]&.each do |status, count|
114
+ SourceMonitor::Metrics.gauge(:"dashboard_stats_health_#{status}", count)
115
+ end
113
116
  end
114
117
 
115
118
  def queue_name_map
@@ -31,21 +31,22 @@ module SourceMonitor
31
31
 
32
32
  def fetch_event(event)
33
33
  domain = source_domain(event.source_feed_url)
34
+ label = domain ? "#{domain} \u2014 Fetch ##{event.id}" : "Fetch ##{event.id}"
34
35
  {
35
- label: "Fetch ##{event.id}",
36
+ label: label,
36
37
  description: "#{event.items_created.to_i} created / #{event.items_updated.to_i} updated",
37
38
  status: event.success? ? :success : :failure,
38
39
  type: :fetch,
39
40
  time: event.occurred_at,
40
- path: url_helpers.fetch_log_path(event.id),
41
- url_display: domain,
42
- url_href: event.source_feed_url
41
+ path: url_helpers.fetch_log_path(event.id)
43
42
  }
44
43
  end
45
44
 
46
45
  def scrape_event(event)
46
+ name = event.source_name.presence
47
+ label = name ? "#{name} \u2014 Scrape ##{event.id}" : "Scrape ##{event.id}"
47
48
  {
48
- label: "Scrape ##{event.id}",
49
+ label: label,
49
50
  description: (event.scraper_adapter.presence || "Scraper"),
50
51
  status: event.success? ? :success : :failure,
51
52
  type: :scrape,
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "source_monitor/pagination/paginator"
4
+
3
5
  module SourceMonitor
4
6
  module Dashboard
5
7
  class UpcomingFetchSchedule
@@ -12,6 +14,9 @@ module SourceMonitor
12
14
  :window_end,
13
15
  :include_unscheduled,
14
16
  :sources,
17
+ :page,
18
+ :has_next_page,
19
+ :has_previous_page,
15
20
  keyword_init: true
16
21
  ) do
17
22
  def empty?
@@ -27,11 +32,15 @@ module SourceMonitor
27
32
  { key: "240+", label: "240 minutes +", min_minutes: 240, max_minutes: nil, include_unscheduled: true }
28
33
  ].freeze
29
34
 
35
+ DEFAULT_PER_PAGE = 10
36
+
30
37
  attr_reader :scope, :reference_time
31
38
 
32
- def initialize(scope: SourceMonitor::Source.active, reference_time: Time.current)
39
+ def initialize(scope: SourceMonitor::Source.active, reference_time: Time.current, pages: {}, per_page: DEFAULT_PER_PAGE)
33
40
  @scope = scope
34
41
  @reference_time = reference_time
42
+ @pages = pages
43
+ @per_page = per_page
35
44
  end
36
45
 
37
46
  def groups
@@ -40,21 +49,22 @@ module SourceMonitor
40
49
 
41
50
  private
42
51
 
52
+ attr_reader :pages, :per_page
53
+
43
54
  def build_groups
44
- definitions = build_definitions
45
- scheduled_sources.each do |source|
46
- definition = definition_for(source.next_fetch_at)
47
- definitions[definition[:key]][:sources] << source if definition
48
- end
55
+ INTERVAL_DEFINITIONS.filter_map do |definition|
56
+ bucket_scope = scope_for_bucket(definition)
57
+ next unless bucket_scope.exists?
49
58
 
50
- unscheduled_sources.each do |source|
51
- definition = definitions.values.find { |value| value[:include_unscheduled] }
52
- next unless definition
59
+ page_number = pages.fetch(definition[:key], 1).to_i
60
+ page_number = 1 if page_number < 1
53
61
 
54
- definition[:sources] << source
55
- end
62
+ result = SourceMonitor::Pagination::Paginator.new(
63
+ scope: bucket_scope.order(:next_fetch_at, :name),
64
+ page: page_number,
65
+ per_page: per_page
66
+ ).paginate
56
67
 
57
- definitions.values.map do |definition|
58
68
  Group.new(
59
69
  key: definition[:key],
60
70
  label: definition[:label],
@@ -63,64 +73,40 @@ module SourceMonitor
63
73
  window_start: window_start_for(definition[:min_minutes]),
64
74
  window_end: window_end_for(definition[:max_minutes]),
65
75
  include_unscheduled: definition[:include_unscheduled],
66
- sources: sort_sources(definition[:sources])
76
+ sources: result.records,
77
+ page: result.page,
78
+ has_next_page: result.has_next_page,
79
+ has_previous_page: result.has_previous_page
67
80
  )
68
81
  end
69
82
  end
70
83
 
71
- def build_definitions
72
- INTERVAL_DEFINITIONS.each_with_object({}) do |definition, memo|
73
- memo[definition[:key]] = definition.merge(sources: [])
74
- end
75
- end
76
-
77
- def scheduled_sources
78
- scope.where.not(next_fetch_at: nil).order(:next_fetch_at)
79
- end
80
-
81
- def unscheduled_sources
82
- scope.where(next_fetch_at: nil).order(:name)
83
- end
84
-
85
- def definition_for(next_fetch_at)
86
- minutes = minutes_until(next_fetch_at)
87
-
88
- INTERVAL_DEFINITIONS.find do |definition|
89
- min = definition[:min_minutes]
90
- max = definition[:max_minutes]
91
-
92
- minutes >= min && (max.nil? || minutes < max)
84
+ def scope_for_bucket(definition)
85
+ window_start = reference_time + definition[:min_minutes].minutes
86
+ max_minutes = definition[:max_minutes]
87
+
88
+ if max_minutes.nil?
89
+ # Last bucket: 240+ minutes OR unscheduled (nil next_fetch_at)
90
+ scheduled = scope.where(next_fetch_at: window_start..)
91
+ unscheduled = scope.where(next_fetch_at: nil)
92
+ scheduled.or(unscheduled)
93
+ else
94
+ window_end = reference_time + max_minutes.minutes
95
+ scope.where(next_fetch_at: window_start...window_end)
93
96
  end
94
97
  end
95
98
 
96
- def minutes_until(timestamp)
97
- return Float::INFINITY if timestamp.blank?
98
-
99
- minutes = (timestamp - reference_time) / 60.0
100
- return 0 if minutes.negative?
101
-
102
- minutes
103
- end
104
-
105
99
  def window_start_for(min_minutes)
106
- return nil if min_minutes.nil? || min_minutes.infinite?
100
+ return nil if min_minutes.nil?
107
101
 
108
102
  reference_time + min_minutes.minutes
109
103
  end
110
104
 
111
105
  def window_end_for(max_minutes)
112
- return nil if max_minutes.nil? || max_minutes.infinite?
106
+ return nil if max_minutes.nil?
113
107
 
114
108
  reference_time + max_minutes.minutes
115
109
  end
116
-
117
- def sort_sources(sources)
118
- future_cap = reference_time + 100.years
119
-
120
- sources.sort_by do |source|
121
- [ source.next_fetch_at || future_cap, source.name.to_s ]
122
- end
123
- end
124
110
  end
125
111
  end
126
112
  end
@@ -144,6 +144,10 @@ module SourceMonitor
144
144
  return unless body && body.bytesize > 0
145
145
  return if body.bytesize > settings.max_download_size
146
146
 
147
+ if content_type == "image/svg+xml"
148
+ return convert_svg_to_result(body, url)
149
+ end
150
+
147
151
  filename = derive_filename(url, content_type)
148
152
 
149
153
  Result.new(
@@ -156,6 +160,18 @@ module SourceMonitor
156
160
  nil
157
161
  end
158
162
 
163
+ def convert_svg_to_result(svg_body, url)
164
+ converted = SvgConverter.call(svg_body, filename: derive_filename(url, "image/svg+xml"))
165
+ return nil unless converted
166
+
167
+ Result.new(
168
+ io: converted[:io],
169
+ filename: converted[:filename],
170
+ content_type: converted[:content_type],
171
+ url: url
172
+ )
173
+ end
174
+
159
175
  def derive_filename(favicon_url, content_type)
160
176
  uri = URI.parse(favicon_url)
161
177
  basename = File.basename(uri.path) if uri.path.present?
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Favicons
5
+ class SvgConverter
6
+ PNG_CONTENT_TYPE = "image/png"
7
+ DEFAULT_SIZE = 64
8
+
9
+ # Converts an SVG string to PNG bytes using MiniMagick.
10
+ # Returns a Hash with :io, :content_type, :filename or nil on failure.
11
+ def self.call(svg_body, filename: "favicon.png", size: DEFAULT_SIZE)
12
+ return nil unless defined?(MiniMagick)
13
+
14
+ new(svg_body, filename: filename, size: size).call
15
+ end
16
+
17
+ def initialize(svg_body, filename:, size:)
18
+ @svg_body = svg_body
19
+ @filename = filename.sub(/\.svg\z/i, ".png")
20
+ @size = size
21
+ end
22
+
23
+ def call
24
+ convert_svg_to_png
25
+ rescue StandardError => e
26
+ log_conversion_failure(e)
27
+ nil
28
+ end
29
+
30
+ private
31
+
32
+ def convert_svg_to_png
33
+ image = MiniMagick::Image.read(@svg_body, ".svg")
34
+ image.format("png")
35
+ image.resize("#{@size}x#{@size}")
36
+
37
+ png_bytes = image.to_blob
38
+
39
+ return nil if png_bytes.nil? || png_bytes.empty?
40
+
41
+ {
42
+ io: StringIO.new(png_bytes),
43
+ content_type: PNG_CONTENT_TYPE,
44
+ filename: @filename
45
+ }
46
+ ensure
47
+ image&.destroy!
48
+ end
49
+
50
+ def log_conversion_failure(error)
51
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
52
+
53
+ Rails.logger.warn(
54
+ "[SourceMonitor::Favicons::SvgConverter] SVG conversion failed: #{error.message}"
55
+ )
56
+ rescue StandardError # rubocop:disable Lint/SuppressedException
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ class CloudflareBypass
6
+ USER_AGENTS = [
7
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
8
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15",
9
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
10
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
11
+ ].freeze
12
+
13
+ CLOUDFLARE_MARKERS = FeedFetcher::CLOUDFLARE_MARKERS
14
+ SNIFF_LIMIT = FeedFetcher::SNIFF_LIMIT
15
+
16
+ attr_reader :response, :feed_url
17
+
18
+ def initialize(response:, feed_url:)
19
+ @response = response
20
+ @feed_url = feed_url
21
+ end
22
+
23
+ def call
24
+ attempt_cookie_replay || attempt_ua_rotation
25
+ end
26
+
27
+ private
28
+
29
+ def attempt_cookie_replay
30
+ cookies = extract_cookies(response)
31
+ return if cookies.blank?
32
+
33
+ headers = { "Cookie" => cookies, "Cache-Control" => "no-cache", "Pragma" => "no-cache" }
34
+ result = fetch_with_headers(headers)
35
+ result unless cloudflare_blocked?(result)
36
+ end
37
+
38
+ def attempt_ua_rotation
39
+ USER_AGENTS.each do |ua|
40
+ headers = {
41
+ "User-Agent" => ua,
42
+ "Cache-Control" => "no-cache",
43
+ "Pragma" => "no-cache"
44
+ }
45
+ result = fetch_with_headers(headers)
46
+ return result unless cloudflare_blocked?(result)
47
+ end
48
+
49
+ nil
50
+ end
51
+
52
+ def fetch_with_headers(headers)
53
+ client = SourceMonitor::HTTP.client(headers: headers, retry_requests: false)
54
+ client.get(feed_url)
55
+ rescue StandardError
56
+ nil
57
+ end
58
+
59
+ def cloudflare_blocked?(response)
60
+ return true if response.nil?
61
+
62
+ body = response.body
63
+ return true if body.blank?
64
+
65
+ snippet = body[0, SNIFF_LIMIT].downcase
66
+ CLOUDFLARE_MARKERS.any? { |marker| snippet.include?(marker.downcase) }
67
+ end
68
+
69
+ def extract_cookies(resp)
70
+ set_cookie = resp&.headers&.dig("set-cookie")
71
+ return if set_cookie.blank?
72
+
73
+ Array(set_cookie).filter_map { |cookie|
74
+ cookie.to_s.split(";").first.presence
75
+ }.join("; ").presence
76
+ end
77
+ end
78
+ end
79
+ end
@@ -4,6 +4,17 @@ module SourceMonitor
4
4
  module Fetching
5
5
  class FeedFetcher
6
6
  class SourceUpdater
7
+ CONSECUTIVE_FAILURE_PAUSE_THRESHOLD = 5
8
+
9
+ ERROR_CATEGORY_MAP = {
10
+ SourceMonitor::Fetching::TimeoutError => "network",
11
+ SourceMonitor::Fetching::ConnectionError => "network",
12
+ SourceMonitor::Fetching::ParsingError => "parse",
13
+ SourceMonitor::Fetching::BlockedError => "blocked",
14
+ SourceMonitor::Fetching::AuthenticationError => "auth",
15
+ SourceMonitor::Fetching::UnexpectedResponseError => "unknown"
16
+ }.freeze
17
+
7
18
  attr_reader :source, :adaptive_interval
8
19
 
9
20
  def initialize(source:, adaptive_interval:)
@@ -19,6 +30,7 @@ module SourceMonitor
19
30
  last_error: nil,
20
31
  last_error_at: nil,
21
32
  failure_count: 0,
33
+ consecutive_fetch_failures: 0,
22
34
  feed_format: derive_feed_format(feed)
23
35
  }
24
36
 
@@ -47,7 +59,8 @@ module SourceMonitor
47
59
  last_http_status: response.status,
48
60
  last_error: nil,
49
61
  last_error_at: nil,
50
- failure_count: 0
62
+ failure_count: 0,
63
+ consecutive_fetch_failures: 0
51
64
  }
52
65
 
53
66
  if (etag = response.headers["etag"] || response.headers["ETag"])
@@ -74,13 +87,15 @@ module SourceMonitor
74
87
  last_http_status: error.http_status,
75
88
  last_error: error.message,
76
89
  last_error_at: now,
77
- failure_count: source.failure_count.to_i + 1
90
+ failure_count: source.failure_count.to_i + 1,
91
+ consecutive_fetch_failures: source.consecutive_fetch_failures.to_i + 1
78
92
  }
79
93
 
80
94
  adaptive_interval.apply_adaptive_interval!(attrs, content_changed: false, failure: true)
81
95
  attrs[:metadata] = updated_metadata
82
96
  decision = apply_retry_strategy!(attrs, error, now)
83
97
  source.update!(attrs)
98
+ check_consecutive_failure_auto_pause!
84
99
  decision
85
100
  end
86
101
 
@@ -101,6 +116,7 @@ module SourceMonitor
101
116
  error_class: error&.class&.name,
102
117
  error_message: error&.message,
103
118
  error_backtrace: error_backtrace(error),
119
+ error_category: categorize_error(error),
104
120
  metadata: feed_metadata(feed, error: error, feed_signature: feed_signature, item_errors: item_errors)
105
121
  )
106
122
  end
@@ -139,6 +155,56 @@ module SourceMonitor
139
155
  attributes[:fetch_circuit_until] = nil
140
156
  end
141
157
 
158
+ def check_consecutive_failure_auto_pause!
159
+ return if source.consecutive_fetch_failures < CONSECUTIVE_FAILURE_PAUSE_THRESHOLD
160
+ return if source.auto_paused_until.present? && source.auto_paused_until.future?
161
+
162
+ now = Time.current
163
+ cooldown = [ SourceMonitor.config.health.auto_pause_cooldown_minutes.to_i, 1 ].max
164
+ pause_until = now + cooldown.minutes
165
+
166
+ source.update_columns(
167
+ auto_paused_until: pause_until,
168
+ auto_paused_at: now,
169
+ health_status: "failing",
170
+ health_status_changed_at: now,
171
+ backoff_until: pause_until,
172
+ next_fetch_at: pause_until
173
+ )
174
+
175
+ notify_auto_pause(now)
176
+ rescue StandardError => error
177
+ Rails.logger.error(
178
+ "[SourceMonitor::SourceUpdater] Auto-pause check failed for source #{source.id}: #{error.message}"
179
+ ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
180
+ end
181
+
182
+ def notify_auto_pause(timestamp)
183
+ message = "Source '#{source.name}' auto-paused after #{CONSECUTIVE_FAILURE_PAUSE_THRESHOLD} consecutive fetch failures"
184
+
185
+ source.fetch_logs.create!(
186
+ success: false,
187
+ started_at: timestamp,
188
+ completed_at: timestamp,
189
+ duration_ms: 0,
190
+ http_status: nil,
191
+ error_class: "SourceMonitor::AutoPause",
192
+ error_message: message,
193
+ metadata: { event: "auto_pause", consecutive_failures: source.consecutive_fetch_failures }
194
+ )
195
+
196
+ SourceMonitor::Realtime.broadcast_toast(
197
+ message: "#{message}.",
198
+ level: :warning,
199
+ delay_ms: 8000
200
+ )
201
+ SourceMonitor::Realtime.broadcast_source(source)
202
+ rescue StandardError => error
203
+ Rails.logger.warn(
204
+ "[SourceMonitor::SourceUpdater] Auto-pause notification failed for source #{source.id}: #{error.message}"
205
+ ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
206
+ end
207
+
142
208
  def enqueue_favicon_fetch_if_needed
143
209
  return unless defined?(ActiveStorage)
144
210
  return unless SourceMonitor.config.favicons.enabled?
@@ -192,6 +258,20 @@ module SourceMonitor
192
258
  nil
193
259
  end
194
260
 
261
+ def categorize_error(error)
262
+ return if error.nil?
263
+
264
+ if error.is_a?(SourceMonitor::Fetching::HTTPError)
265
+ status = error.status.to_i
266
+ return "auth" if status == 401 || status == 403
267
+ return "network"
268
+ end
269
+
270
+ ERROR_CATEGORY_MAP.fetch(error.class) do
271
+ error.is_a?(SourceMonitor::Fetching::FetchError) ? "unknown" : nil
272
+ end
273
+ end
274
+
195
275
  def derive_feed_format(feed)
196
276
  return unless feed
197
277