source_monitor 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/rails-audit.md +77 -0
  3. data/CHANGELOG.md +50 -0
  4. data/CLAUDE.md +2 -2
  5. data/Gemfile.lock +7 -20
  6. data/RAILS_AUDIT.md +424 -0
  7. data/VERSION +1 -1
  8. data/app/assets/builds/source_monitor/application.css +4 -24
  9. data/app/assets/builds/source_monitor/application.js +57 -89
  10. data/app/assets/builds/source_monitor/application.js.map +4 -4
  11. data/app/assets/javascripts/source_monitor/application.js +3 -6
  12. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +6 -86
  13. data/app/assets/javascripts/source_monitor/controllers/filter_submit_controller.js +13 -0
  14. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  15. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +3 -13
  16. data/app/components/source_monitor/application_component.rb +10 -0
  17. data/app/components/source_monitor/filter_dropdown_component.rb +62 -0
  18. data/app/components/source_monitor/icon_component.rb +140 -0
  19. data/app/components/source_monitor/status_badge_component.html.erb +8 -0
  20. data/app/components/source_monitor/status_badge_component.rb +96 -0
  21. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +4 -0
  22. data/app/controllers/concerns/source_monitor/set_source.rb +13 -0
  23. data/app/controllers/source_monitor/application_controller.rb +17 -0
  24. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +6 -10
  25. data/app/controllers/source_monitor/dashboard_controller.rb +5 -1
  26. data/app/controllers/source_monitor/import_history_dismissals_controller.rb +1 -1
  27. data/app/controllers/source_monitor/import_sessions_controller.rb +30 -9
  28. data/app/controllers/source_monitor/item_scrapes_controller.rb +70 -0
  29. data/app/controllers/source_monitor/items_controller.rb +2 -69
  30. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +1 -4
  31. data/app/controllers/source_monitor/source_favicon_fetches_controller.rb +2 -12
  32. data/app/controllers/source_monitor/source_fetches_controller.rb +1 -6
  33. data/app/controllers/source_monitor/source_health_checks_controller.rb +9 -16
  34. data/app/controllers/source_monitor/source_health_resets_controller.rb +1 -6
  35. data/app/controllers/source_monitor/source_retries_controller.rb +1 -6
  36. data/app/controllers/source_monitor/source_scrape_tests_controller.rb +2 -4
  37. data/app/controllers/source_monitor/source_turbo_responses.rb +1 -3
  38. data/app/controllers/source_monitor/sources_controller.rb +15 -20
  39. data/app/helpers/source_monitor/application_helper.rb +15 -31
  40. data/app/helpers/source_monitor/health_badge_helper.rb +8 -0
  41. data/app/jobs/source_monitor/download_content_images_job.rb +1 -59
  42. data/app/jobs/source_monitor/favicon_fetch_job.rb +1 -58
  43. data/app/jobs/source_monitor/fetch_feed_job.rb +2 -52
  44. data/app/jobs/source_monitor/import_opml_job.rb +6 -145
  45. data/app/jobs/source_monitor/import_session_health_check_job.rb +15 -76
  46. data/app/jobs/source_monitor/item_cleanup_job.rb +5 -0
  47. data/app/jobs/source_monitor/log_cleanup_job.rb +13 -2
  48. data/app/jobs/source_monitor/schedule_fetches_job.rb +8 -0
  49. data/app/jobs/source_monitor/scrape_item_job.rb +6 -52
  50. data/app/jobs/source_monitor/source_health_check_job.rb +1 -72
  51. data/app/models/concerns/source_monitor/loggable.rb +12 -0
  52. data/app/models/source_monitor/fetch_log.rb +0 -8
  53. data/app/models/source_monitor/health_check_log.rb +0 -8
  54. data/app/models/source_monitor/import_history.rb +14 -0
  55. data/app/models/source_monitor/import_session.rb +2 -0
  56. data/app/models/source_monitor/item.rb +15 -0
  57. data/app/models/source_monitor/item_content.rb +4 -3
  58. data/app/models/source_monitor/scrape_log.rb +4 -6
  59. data/app/models/source_monitor/source.rb +28 -19
  60. data/app/presenters/source_monitor/base_presenter.rb +19 -0
  61. data/app/presenters/source_monitor/source_details_presenter.rb +61 -0
  62. data/app/presenters/source_monitor/sources_filter_presenter.rb +61 -0
  63. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +3 -3
  64. data/app/views/source_monitor/dashboard/_stat_card.html.erb +2 -1
  65. data/app/views/source_monitor/dashboard/_stats.html.erb +5 -7
  66. data/app/views/source_monitor/items/_details.html.erb +11 -14
  67. data/app/views/source_monitor/items/index.html.erb +10 -35
  68. data/app/views/source_monitor/logs/index.html.erb +20 -41
  69. data/app/views/source_monitor/shared/_form_errors.html.erb +14 -0
  70. data/app/views/source_monitor/source_scrape_tests/_result.html.erb +1 -29
  71. data/app/views/source_monitor/source_scrape_tests/_result_content.html.erb +33 -0
  72. data/app/views/source_monitor/source_scrape_tests/show.html.erb +1 -29
  73. data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +2 -2
  74. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +7 -5
  75. data/app/views/source_monitor/sources/_details.html.erb +24 -52
  76. data/app/views/source_monitor/sources/_health_status_badge.html.erb +4 -6
  77. data/app/views/source_monitor/sources/_row.html.erb +7 -18
  78. data/app/views/source_monitor/sources/edit.html.erb +1 -10
  79. data/app/views/source_monitor/sources/index.html.erb +26 -46
  80. data/app/views/source_monitor/sources/new.html.erb +1 -10
  81. data/config/routes.rb +1 -1
  82. data/db/migrate/20260313120000_add_composite_indexes_to_log_tables.rb +14 -0
  83. data/db/migrate/20260314120000_align_health_status_default.rb +11 -0
  84. data/lib/source_monitor/analytics/sources_index_metrics.rb +15 -0
  85. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +10 -4
  86. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +21 -5
  87. data/lib/source_monitor/favicons/fetcher.rb +86 -0
  88. data/lib/source_monitor/fetching/cloudflare_bypass.rb +14 -5
  89. data/lib/source_monitor/fetching/completion/event_publisher.rb +12 -0
  90. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +15 -2
  91. data/lib/source_monitor/fetching/completion/retention_handler.rb +11 -3
  92. data/lib/source_monitor/fetching/feed_fetcher.rb +2 -21
  93. data/lib/source_monitor/fetching/fetch_runner.rb +12 -3
  94. data/lib/source_monitor/fetching/retry_orchestrator.rb +102 -0
  95. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +9 -0
  96. data/lib/source_monitor/health/source_health_check_orchestrator.rb +95 -0
  97. data/lib/source_monitor/health.rb +1 -0
  98. data/lib/source_monitor/images/downloader.rb +6 -7
  99. data/lib/source_monitor/images/processor.rb +98 -0
  100. data/lib/source_monitor/import_sessions/health_check_updater.rb +95 -0
  101. data/lib/source_monitor/import_sessions/opml_importer.rb +163 -0
  102. data/lib/source_monitor/items/item_creator.rb +0 -21
  103. data/lib/source_monitor/logs/query.rb +20 -0
  104. data/lib/source_monitor/queries/scrape_candidates_query.rb +30 -0
  105. data/lib/source_monitor/queries.rb +7 -0
  106. data/lib/source_monitor/scheduler.rb +5 -0
  107. data/lib/source_monitor/scraping/bulk_result_presenter.rb +11 -8
  108. data/lib/source_monitor/scraping/runner.rb +52 -0
  109. data/lib/source_monitor/scraping/scheduler.rb +5 -0
  110. data/lib/source_monitor/scraping/state.rb +4 -2
  111. data/lib/source_monitor/security/parameter_sanitizer.rb +7 -0
  112. data/lib/source_monitor/version.rb +1 -1
  113. data/lib/source_monitor.rb +7 -0
  114. data/source_monitor.gemspec +1 -0
  115. metadata +47 -1
@@ -1,16 +1,7 @@
1
1
  <div class="mx-auto max-w-2xl py-10">
2
2
  <h1 class="text-3xl font-semibold">New Source</h1>
3
3
 
4
- <% if @source.errors.any? %>
5
- <div class="mt-4 rounded border border-red-300 bg-red-50 p-4">
6
- <h2 class="font-medium text-red-700">Please fix the following:</h2>
7
- <ul class="mt-2 list-disc space-y-1 pl-5 text-red-700">
8
- <% @source.errors.full_messages.each do |message| %>
9
- <li><%= message %></li>
10
- <% end %>
11
- </ul>
12
- </div>
13
- <% end %>
4
+ <%= render "source_monitor/shared/form_errors", record: @source %>
14
5
 
15
6
  <div class="mt-6">
16
7
  <%= render "form", source: @source %>
data/config/routes.rb CHANGED
@@ -17,7 +17,7 @@ SourceMonitor::Engine.routes.draw do
17
17
  end
18
18
  end
19
19
  resources :items, only: %i[index show] do
20
- post :scrape, on: :member
20
+ resource :scrape, only: :create, controller: "item_scrapes"
21
21
  end
22
22
  resources :bulk_scrape_enablements, only: :create
23
23
  resources :sources do
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddCompositeIndexesToLogTables < ActiveRecord::Migration[8.0]
4
+ def change
5
+ add_index :sourcemon_fetch_logs, [ :source_id, :started_at ],
6
+ name: "index_fetch_logs_on_source_id_and_started_at"
7
+ add_index :sourcemon_scrape_logs, [ :source_id, :started_at ],
8
+ name: "index_scrape_logs_on_source_id_and_started_at"
9
+ add_index :sourcemon_scrape_logs, [ :item_id, :started_at ],
10
+ name: "index_scrape_logs_on_item_id_and_started_at"
11
+ add_index :sourcemon_health_check_logs, [ :source_id, :started_at ],
12
+ name: "index_health_check_logs_on_source_id_and_started_at"
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AlignHealthStatusDefault < ActiveRecord::Migration[8.1]
4
+ def up
5
+ change_column_default :sourcemon_sources, :health_status, from: "healthy", to: "working"
6
+ end
7
+
8
+ def down
9
+ change_column_default :sourcemon_sources, :health_status, from: "working", to: "healthy"
10
+ end
11
+ end
@@ -43,6 +43,21 @@ module SourceMonitor
43
43
  @item_activity_rates ||= SourceActivityRates.new(scope: result_scope, lookback:, now:).per_source_rates
44
44
  end
45
45
 
46
+ def word_count_averages(source_ids)
47
+ if source_ids.any?
48
+ base = SourceMonitor::ItemContent.joins(:item).where(sourcemon_items: { source_id: source_ids })
49
+ feed = base.where.not(feed_word_count: nil)
50
+ .group("sourcemon_items.source_id")
51
+ .average(:feed_word_count)
52
+ scraped = base.where.not(scraped_word_count: nil)
53
+ .group("sourcemon_items.source_id")
54
+ .average(:scraped_word_count)
55
+ { feed:, scraped: }
56
+ else
57
+ { feed: {}, scraped: {} }
58
+ end
59
+ end
60
+
46
61
  def fetch_interval_filter
47
62
  min = integer_param(search_params["fetch_interval_minutes_gteq"])
48
63
  max = integer_param(search_params["fetch_interval_minutes_lt"]) || integer_param(search_params["fetch_interval_minutes_lteq"])
@@ -43,7 +43,7 @@ module SourceMonitor
43
43
  end
44
44
 
45
45
  def sanitized_sql
46
- ActiveRecord::Base.send(:sanitize_sql_array, [ unified_sql_template, limit ])
46
+ ActiveRecord::Base.send(:sanitize_sql_array, [ unified_sql_template, limit, limit, limit, limit ])
47
47
  end
48
48
 
49
49
  def unified_sql_template
@@ -61,11 +61,11 @@ module SourceMonitor
61
61
  source_id,
62
62
  source_feed_url
63
63
  FROM (
64
- #{fetch_log_sql}
64
+ (#{fetch_log_sql})
65
65
  UNION ALL
66
- #{scrape_log_sql}
66
+ (#{scrape_log_sql})
67
67
  UNION ALL
68
- #{item_sql}
68
+ (#{item_sql})
69
69
  ) AS dashboard_events
70
70
  WHERE occurred_at IS NOT NULL
71
71
  ORDER BY occurred_at DESC
@@ -91,6 +91,8 @@ module SourceMonitor
91
91
  FROM #{SourceMonitor::FetchLog.quoted_table_name}
92
92
  LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
93
93
  ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::FetchLog.quoted_table_name}.source_id
94
+ ORDER BY #{SourceMonitor::FetchLog.quoted_table_name}.started_at DESC NULLS LAST
95
+ LIMIT ?
94
96
  SQL
95
97
  end
96
98
 
@@ -114,6 +116,8 @@ module SourceMonitor
114
116
  ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::ScrapeLog.quoted_table_name}.source_id
115
117
  LEFT JOIN #{SourceMonitor::Item.quoted_table_name}
116
118
  ON #{SourceMonitor::Item.quoted_table_name}.id = #{SourceMonitor::ScrapeLog.quoted_table_name}.item_id
119
+ ORDER BY #{SourceMonitor::ScrapeLog.quoted_table_name}.started_at DESC NULLS LAST
120
+ LIMIT ?
117
121
  SQL
118
122
  end
119
123
 
@@ -135,6 +139,8 @@ module SourceMonitor
135
139
  FROM #{SourceMonitor::Item.quoted_table_name}
136
140
  LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
137
141
  ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::Item.quoted_table_name}.source_id
142
+ ORDER BY #{SourceMonitor::Item.quoted_table_name}.created_at DESC NULLS LAST
143
+ LIMIT ?
138
144
  SQL
139
145
  end
140
146
 
@@ -22,17 +22,33 @@ module SourceMonitor
22
22
  @item_callback ||= lambda { |_event| broadcast_dashboard_updates }
23
23
  end
24
24
 
25
+ STAT_CARDS = [
26
+ { key: "total_sources", label: "Sources", stat: :total_sources, caption: "Total registered" },
27
+ { key: "active_sources", label: "Active", stat: :active_sources, caption: "Fetching on schedule" },
28
+ { key: "failed_sources", label: "Failures", stat: :failed_sources, caption: "Require attention" },
29
+ { key: "total_items", label: "Items", stat: :total_items, caption: "Stored entries" },
30
+ { key: "fetches_today", label: "Fetches Today", stat: :fetches_today, caption: "Completed runs" }
31
+ ].freeze
32
+
25
33
  def broadcast_dashboard_updates
26
34
  return unless turbo_streams_available?
27
35
 
28
36
  queries = SourceMonitor::Dashboard::Queries.new
29
37
  url_helpers = SourceMonitor::Engine.routes.url_helpers
38
+ stats = queries.stats
30
39
 
31
- Turbo::StreamsChannel.broadcast_replace_to(
32
- STREAM_NAME,
33
- target: "source_monitor_dashboard_stats",
34
- html: render_partial("source_monitor/dashboard/stats", stats: queries.stats)
35
- )
40
+ STAT_CARDS.each do |card|
41
+ Turbo::StreamsChannel.broadcast_replace_to(
42
+ STREAM_NAME,
43
+ target: "source_monitor_stat_#{card[:key]}",
44
+ html: render_partial("source_monitor/dashboard/stat_card", stat_card: {
45
+ key: card[:key],
46
+ label: card[:label],
47
+ value: stats[card[:stat]],
48
+ caption: card[:caption]
49
+ })
50
+ )
51
+ end
36
52
 
37
53
  Turbo::StreamsChannel.broadcast_replace_to(
38
54
  STREAM_NAME,
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Favicons
5
+ # Coordinates favicon fetching for a source: checks prerequisites
6
+ # (ActiveStorage, config, cooldown), delegates to Discoverer, and
7
+ # handles attachment or failure recording. Extracted from FaviconFetchJob.
8
+ class Fetcher
9
+ TRANSIENT_ERRORS = [
10
+ Timeout::Error, Errno::ETIMEDOUT,
11
+ Faraday::TimeoutError, Faraday::ConnectionFailed,
12
+ Net::OpenTimeout, Net::ReadTimeout
13
+ ].freeze
14
+
15
+ def initialize(source)
16
+ @source = source
17
+ end
18
+
19
+ def call
20
+ return unless defined?(ActiveStorage)
21
+ return unless SourceMonitor.config.favicons.enabled?
22
+ return if source.website_url.blank?
23
+ return if source.favicon.attached?
24
+ return if within_cooldown?
25
+
26
+ result = SourceMonitor::Favicons::Discoverer.new(source.website_url).call
27
+
28
+ if result
29
+ attach_favicon(result)
30
+ else
31
+ record_failed_attempt
32
+ end
33
+ rescue ActiveRecord::Deadlocked
34
+ raise
35
+ rescue *TRANSIENT_ERRORS => error
36
+ log_error("Transient error", error)
37
+ raise
38
+ rescue StandardError => error
39
+ record_failed_attempt
40
+ log_error("Failed", error)
41
+ end
42
+
43
+ private
44
+
45
+ attr_reader :source
46
+
47
+ def within_cooldown?
48
+ last_attempt = source.metadata&.dig("favicon_last_attempted_at")
49
+ return false if last_attempt.blank?
50
+
51
+ cooldown_days = SourceMonitor.config.favicons.retry_cooldown_days
52
+ Time.parse(last_attempt) > cooldown_days.days.ago
53
+ rescue ArgumentError, TypeError
54
+ false
55
+ end
56
+
57
+ def attach_favicon(result)
58
+ blob = ActiveStorage::Blob.create_and_upload!(
59
+ io: result.io,
60
+ filename: result.filename,
61
+ content_type: result.content_type
62
+ )
63
+ source.favicon.attach(blob)
64
+ end
65
+
66
+ def record_failed_attempt
67
+ metadata = (source.metadata || {}).merge(
68
+ "favicon_last_attempted_at" => Time.current.iso8601
69
+ )
70
+ source.update_column(:metadata, metadata)
71
+ rescue StandardError
72
+ nil
73
+ end
74
+
75
+ def log_error(prefix, error)
76
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
77
+
78
+ Rails.logger.warn(
79
+ "[SourceMonitor::Favicons::Fetcher] #{prefix} for source #{source&.id}: #{error.class} - #{error.message}"
80
+ )
81
+ rescue StandardError
82
+ nil
83
+ end
84
+ end
85
+ end
86
+ end
@@ -12,12 +12,15 @@ module SourceMonitor
12
12
 
13
13
  CLOUDFLARE_MARKERS = FeedFetcher::CLOUDFLARE_MARKERS
14
14
  SNIFF_LIMIT = FeedFetcher::SNIFF_LIMIT
15
+ DEFAULT_MAX_ATTEMPTS = 2
16
+ BYPASS_TIMEOUT = 10
15
17
 
16
- attr_reader :response, :feed_url
18
+ attr_reader :response, :feed_url, :max_attempts
17
19
 
18
- def initialize(response:, feed_url:)
20
+ def initialize(response:, feed_url:, max_attempts: DEFAULT_MAX_ATTEMPTS)
19
21
  @response = response
20
22
  @feed_url = feed_url
23
+ @max_attempts = max_attempts
21
24
  end
22
25
 
23
26
  def call
@@ -36,7 +39,7 @@ module SourceMonitor
36
39
  end
37
40
 
38
41
  def attempt_ua_rotation
39
- USER_AGENTS.each do |ua|
42
+ USER_AGENTS.first(max_attempts).each do |ua|
40
43
  headers = {
41
44
  "User-Agent" => ua,
42
45
  "Cache-Control" => "no-cache",
@@ -50,9 +53,15 @@ module SourceMonitor
50
53
  end
51
54
 
52
55
  def fetch_with_headers(headers)
53
- client = SourceMonitor::HTTP.client(headers: headers, retry_requests: false)
56
+ client = SourceMonitor::HTTP.client(
57
+ headers: headers,
58
+ timeout: BYPASS_TIMEOUT,
59
+ open_timeout: [ BYPASS_TIMEOUT / 2, 5 ].min,
60
+ retry_requests: false
61
+ )
54
62
  client.get(feed_url)
55
- rescue StandardError
63
+ rescue StandardError => e
64
+ Rails.logger.warn("[SourceMonitor] CloudflareBypass request failed for #{feed_url}: #{e.class}: #{e.message}") if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
56
65
  nil
57
66
  end
58
67
 
@@ -5,12 +5,24 @@ module SourceMonitor
5
5
  module Completion
6
6
  # Publishes fetch completion events to the configured event dispatcher.
7
7
  class EventPublisher
8
+ Result = Struct.new(:status, :error, keyword_init: true) do
9
+ def success?
10
+ status != :failed
11
+ end
12
+ end
13
+
8
14
  def initialize(dispatcher: SourceMonitor::Events)
9
15
  @dispatcher = dispatcher
10
16
  end
11
17
 
12
18
  def call(source:, result:)
13
19
  dispatcher.after_fetch_completed(source: source, result: result)
20
+ Result.new(status: :published)
21
+ rescue StandardError => error
22
+ Rails.logger.error(
23
+ "[SourceMonitor::Fetching::Completion::EventPublisher] Event dispatch failed for source #{source.id}: #{error.class} - #{error.message}"
24
+ ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
25
+ Result.new(status: :failed, error: error)
14
26
  end
15
27
 
16
28
  private
@@ -5,25 +5,38 @@ module SourceMonitor
5
5
  module Completion
6
6
  # Enqueues follow-up scraping work for items created during a fetch.
7
7
  class FollowUpHandler
8
+ Result = Struct.new(:status, :enqueued_count, :errors, keyword_init: true) do
9
+ def success?
10
+ status != :failed
11
+ end
12
+ end
13
+
8
14
  def initialize(enqueuer_class: SourceMonitor::Scraping::Enqueuer, job_class: SourceMonitor::ScrapeItemJob)
9
15
  @enqueuer_class = enqueuer_class
10
16
  @job_class = job_class
11
17
  end
12
18
 
13
19
  def call(source:, result:)
14
- return unless should_enqueue?(source:, result:)
20
+ return Result.new(status: :skipped, enqueued_count: 0, errors: []) unless should_enqueue?(source:, result:)
21
+
22
+ enqueued = 0
23
+ errors = []
15
24
 
16
25
  Array(result.item_processing&.created_items).each do |item|
17
26
  next unless item.present? && item.scraped_at.nil?
18
27
 
19
28
  begin
20
29
  enqueuer_class.enqueue(item:, source:, job_class:, reason: :auto)
30
+ enqueued += 1
21
31
  rescue StandardError => error
32
+ errors << error
22
33
  Rails.logger.error(
23
- "[SourceMonitor] FollowUpHandler: failed to enqueue scrape for item #{item.id}: #{error.class}: #{error.message}"
34
+ "[SourceMonitor::Fetching::Completion::FollowUpHandler] Failed to enqueue scrape for item #{item.id}: #{error.class}: #{error.message}"
24
35
  ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
25
36
  end
26
37
  end
38
+
39
+ Result.new(status: :applied, enqueued_count: enqueued, errors: errors)
27
40
  end
28
41
 
29
42
  private
@@ -5,20 +5,28 @@ module SourceMonitor
5
5
  module Completion
6
6
  # Applies item retention after a fetch completes.
7
7
  class RetentionHandler
8
+ Result = Struct.new(:status, :removed_total, :error, keyword_init: true) do
9
+ def success?
10
+ status != :failed
11
+ end
12
+ end
13
+
8
14
  def initialize(pruner: SourceMonitor::Items::RetentionPruner)
9
15
  @pruner = pruner
10
16
  end
11
17
 
12
18
  def call(source:, result:) # rubocop:disable Lint/UnusedMethodArgument
13
- pruner.call(
19
+ pruner_result = pruner.call(
14
20
  source: source,
15
21
  strategy: SourceMonitor.config.retention.strategy
16
22
  )
23
+ removed = pruner_result.respond_to?(:removed_total) ? pruner_result.removed_total : 0
24
+ Result.new(status: :applied, removed_total: removed)
17
25
  rescue StandardError => error
18
26
  Rails.logger.error(
19
- "[SourceMonitor] Retention pruning failed for source #{source.id}: #{error.class} - #{error.message}"
27
+ "[SourceMonitor::Fetching::Completion::RetentionHandler] Retention pruning failed for source #{source.id}: #{error.class} - #{error.message}"
20
28
  )
21
- nil
29
+ Result.new(status: :failed, removed_total: 0, error: error)
22
30
  end
23
31
 
24
32
  private
@@ -27,13 +27,6 @@ module SourceMonitor
27
27
  )
28
28
  ResponseWrapper = Struct.new(:status, :headers, :body, keyword_init: true)
29
29
 
30
- MIN_FETCH_INTERVAL = AdaptiveInterval::MIN_FETCH_INTERVAL
31
- MAX_FETCH_INTERVAL = AdaptiveInterval::MAX_FETCH_INTERVAL
32
- INCREASE_FACTOR = AdaptiveInterval::INCREASE_FACTOR
33
- DECREASE_FACTOR = AdaptiveInterval::DECREASE_FACTOR
34
- FAILURE_INCREASE_FACTOR = AdaptiveInterval::FAILURE_INCREASE_FACTOR
35
- JITTER_PERCENT = AdaptiveInterval::JITTER_PERCENT
36
-
37
30
  attr_reader :source, :client, :jitter_proc
38
31
 
39
32
  def initialize(source:, client: nil, jitter: nil)
@@ -328,7 +321,8 @@ module SourceMonitor
328
321
 
329
322
  response = perform_request
330
323
  handle_response(response, started_at, instrumentation_payload)
331
- rescue StandardError
324
+ rescue StandardError => e
325
+ Rails.logger.warn("[SourceMonitor] AIA recovery failed for #{source.feed_url}: #{e.class}: #{e.message}") if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
332
326
  nil
333
327
  end
334
328
 
@@ -389,19 +383,6 @@ module SourceMonitor
389
383
  def entry_processor
390
384
  @entry_processor ||= EntryProcessor.new(source: source)
391
385
  end
392
-
393
- # Forwarding methods for backward compatibility with tests
394
- def process_feed_entries(feed) = entry_processor.process_feed_entries(feed)
395
- def jitter_offset(interval_seconds) = adaptive_interval.jitter_offset(interval_seconds)
396
- def adjusted_interval_with_jitter(interval_seconds) = adaptive_interval.adjusted_interval_with_jitter(interval_seconds)
397
- def updated_metadata(feed_signature: nil) = source_updater.updated_metadata(feed_signature: feed_signature)
398
- def feed_signature_changed?(feed_signature) = source_updater.feed_signature_changed?(feed_signature)
399
- def configured_seconds(minutes_value, default) = adaptive_interval.configured_seconds(minutes_value, default)
400
- def configured_positive(value, default) = adaptive_interval.configured_positive(value, default)
401
- def configured_non_negative(value, default) = adaptive_interval.configured_non_negative(value, default)
402
- def interval_minutes_for(interval_seconds) = adaptive_interval.interval_minutes_for(interval_seconds)
403
- def parse_http_time(value) = source_updater.parse_http_time(value)
404
- def extract_numeric(value) = adaptive_interval.extract_numeric(value)
405
386
  end
406
387
  end
407
388
  end
@@ -59,13 +59,13 @@ module SourceMonitor
59
59
  lock.with_lock do
60
60
  mark_fetching!
61
61
  result = fetcher_class.new(source: source).call
62
- retention_handler.call(source:, result:)
63
- follow_up_handler.call(source:, result:)
62
+ log_handler_result("RetentionHandler", retention_handler.call(source:, result:))
63
+ log_handler_result("FollowUpHandler", follow_up_handler.call(source:, result:))
64
64
  schedule_retry_if_needed(result)
65
65
  mark_complete!(result)
66
66
  end
67
67
 
68
- event_publisher.call(source:, result:)
68
+ log_handler_result("EventPublisher", event_publisher.call(source:, result:))
69
69
  result
70
70
  rescue SourceMonitor::Fetching::AdvisoryLock::NotAcquiredError => error
71
71
  raise ConcurrencyError, error.message
@@ -138,6 +138,15 @@ module SourceMonitor
138
138
  self.class.send(:update_source_state!, source, attrs)
139
139
  end
140
140
 
141
+ def log_handler_result(handler_name, handler_result)
142
+ return unless handler_result.respond_to?(:success?) && !handler_result.success?
143
+
144
+ error_detail = handler_result.respond_to?(:error) && handler_result.error ? ": #{handler_result.error.message}" : ""
145
+ Rails.logger.warn(
146
+ "[SourceMonitor::Fetching::FetchRunner] #{handler_name} failed for source #{source.id}#{error_detail}"
147
+ ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
148
+ end
149
+
141
150
  def schedule_retry_if_needed(result)
142
151
  decision = result&.retry_decision
143
152
  return unless decision&.retry?
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ # Executes retry/circuit-breaker decisions produced by RetryPolicy.
6
+ #
7
+ # Accepts a source, the original fetch error, and a RetryPolicy::Decision,
8
+ # then either enqueues a retry job, opens the circuit, or resets retry state.
9
+ #
10
+ # Returns a Result struct indicating which path was taken.
11
+ class RetryOrchestrator
12
+ Result = Struct.new(:status, :source, :error, :decision, keyword_init: true) do
13
+ def retry_enqueued?
14
+ status == :retry_enqueued
15
+ end
16
+
17
+ def circuit_opened?
18
+ status == :circuit_opened
19
+ end
20
+
21
+ def exhausted?
22
+ status == :exhausted
23
+ end
24
+ end
25
+
26
+ def self.call(source:, error:, decision:, job_class: SourceMonitor::FetchFeedJob, now: Time.current)
27
+ new(source:, error:, decision:, job_class:, now:).call
28
+ end
29
+
30
+ def initialize(source:, error:, decision:, job_class:, now:)
31
+ @source = source
32
+ @error = error
33
+ @decision = decision
34
+ @job_class = job_class
35
+ @now = now
36
+ end
37
+
38
+ def call
39
+ if decision.retry?
40
+ enqueue_retry!
41
+ elsif decision.open_circuit?
42
+ open_circuit!
43
+ else
44
+ reset_retry_state!
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ attr_reader :source, :error, :decision, :job_class, :now
51
+
52
+ def enqueue_retry!
53
+ retry_at = now + (decision.wait || 0)
54
+
55
+ source.with_lock do
56
+ source.reload
57
+ source.update!(
58
+ fetch_retry_attempt: decision.next_attempt,
59
+ fetch_circuit_opened_at: nil,
60
+ fetch_circuit_until: nil,
61
+ next_fetch_at: retry_at,
62
+ backoff_until: retry_at,
63
+ fetch_status: "queued"
64
+ )
65
+ end
66
+
67
+ job_class.set(wait: decision.wait || 0).perform_later(source.id)
68
+
69
+ Result.new(status: :retry_enqueued, source: source, error: error, decision: decision)
70
+ end
71
+
72
+ def open_circuit!
73
+ source.with_lock do
74
+ source.reload
75
+ source.update!(
76
+ fetch_retry_attempt: 0,
77
+ fetch_circuit_opened_at: now,
78
+ fetch_circuit_until: decision.circuit_until,
79
+ next_fetch_at: decision.circuit_until,
80
+ backoff_until: decision.circuit_until,
81
+ fetch_status: "failed"
82
+ )
83
+ end
84
+
85
+ Result.new(status: :circuit_opened, source: source, error: error, decision: decision)
86
+ end
87
+
88
+ def reset_retry_state!
89
+ source.with_lock do
90
+ source.reload
91
+ source.update!(
92
+ fetch_retry_attempt: 0,
93
+ fetch_circuit_opened_at: nil,
94
+ fetch_circuit_until: nil
95
+ )
96
+ end
97
+
98
+ Result.new(status: :exhausted, source: source, error: error, decision: decision)
99
+ end
100
+ end
101
+ end
102
+ end
@@ -102,6 +102,15 @@ module SourceMonitor
102
102
  return ::SolidQueue::Job.none unless jobs_supported?
103
103
 
104
104
  queue_name = SourceMonitor.queue_name(:fetch)
105
+ # SolidQueue stores job arguments as JSON in the `arguments` text column.
106
+ # The format is: {"job_class":"...", "arguments":[source_id, ...], ...}
107
+ # We cast to jsonb and extract the first positional argument to match
108
+ # jobs targeting this source.
109
+ #
110
+ # Tested against: SolidQueue 1.1.x (Rails 8.x). The serialization format
111
+ # is part of ActiveJob's serialize/deserialize contract. If SolidQueue
112
+ # changes its storage format, this query will silently return no matches
113
+ # (safe failure). Re-verify on SolidQueue major version upgrades.
105
114
  ::SolidQueue::Job.
106
115
  where(queue_name: queue_name).
107
116
  where("arguments::jsonb -> 'arguments' ->> 0 = ?", source.id.to_s)