source_monitor 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/rails-audit.md +77 -0
  3. data/CHANGELOG.md +50 -0
  4. data/CLAUDE.md +2 -2
  5. data/Gemfile.lock +7 -20
  6. data/RAILS_AUDIT.md +424 -0
  7. data/VERSION +1 -1
  8. data/app/assets/builds/source_monitor/application.css +4 -24
  9. data/app/assets/builds/source_monitor/application.js +57 -89
  10. data/app/assets/builds/source_monitor/application.js.map +4 -4
  11. data/app/assets/javascripts/source_monitor/application.js +3 -6
  12. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +6 -86
  13. data/app/assets/javascripts/source_monitor/controllers/filter_submit_controller.js +13 -0
  14. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  15. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +3 -13
  16. data/app/components/source_monitor/application_component.rb +10 -0
  17. data/app/components/source_monitor/filter_dropdown_component.rb +62 -0
  18. data/app/components/source_monitor/icon_component.rb +140 -0
  19. data/app/components/source_monitor/status_badge_component.html.erb +8 -0
  20. data/app/components/source_monitor/status_badge_component.rb +96 -0
  21. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +4 -0
  22. data/app/controllers/concerns/source_monitor/set_source.rb +13 -0
  23. data/app/controllers/source_monitor/application_controller.rb +17 -0
  24. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +6 -10
  25. data/app/controllers/source_monitor/dashboard_controller.rb +5 -1
  26. data/app/controllers/source_monitor/import_history_dismissals_controller.rb +1 -1
  27. data/app/controllers/source_monitor/import_sessions_controller.rb +30 -9
  28. data/app/controllers/source_monitor/item_scrapes_controller.rb +70 -0
  29. data/app/controllers/source_monitor/items_controller.rb +2 -69
  30. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +1 -4
  31. data/app/controllers/source_monitor/source_favicon_fetches_controller.rb +2 -12
  32. data/app/controllers/source_monitor/source_fetches_controller.rb +1 -6
  33. data/app/controllers/source_monitor/source_health_checks_controller.rb +9 -16
  34. data/app/controllers/source_monitor/source_health_resets_controller.rb +1 -6
  35. data/app/controllers/source_monitor/source_retries_controller.rb +1 -6
  36. data/app/controllers/source_monitor/source_scrape_tests_controller.rb +2 -4
  37. data/app/controllers/source_monitor/source_turbo_responses.rb +1 -3
  38. data/app/controllers/source_monitor/sources_controller.rb +15 -20
  39. data/app/helpers/source_monitor/application_helper.rb +15 -31
  40. data/app/helpers/source_monitor/health_badge_helper.rb +8 -0
  41. data/app/jobs/source_monitor/download_content_images_job.rb +1 -59
  42. data/app/jobs/source_monitor/favicon_fetch_job.rb +1 -58
  43. data/app/jobs/source_monitor/fetch_feed_job.rb +2 -52
  44. data/app/jobs/source_monitor/import_opml_job.rb +6 -145
  45. data/app/jobs/source_monitor/import_session_health_check_job.rb +15 -76
  46. data/app/jobs/source_monitor/item_cleanup_job.rb +5 -0
  47. data/app/jobs/source_monitor/log_cleanup_job.rb +13 -2
  48. data/app/jobs/source_monitor/schedule_fetches_job.rb +8 -0
  49. data/app/jobs/source_monitor/scrape_item_job.rb +6 -52
  50. data/app/jobs/source_monitor/source_health_check_job.rb +1 -72
  51. data/app/models/concerns/source_monitor/loggable.rb +12 -0
  52. data/app/models/source_monitor/fetch_log.rb +0 -8
  53. data/app/models/source_monitor/health_check_log.rb +0 -8
  54. data/app/models/source_monitor/import_history.rb +14 -0
  55. data/app/models/source_monitor/import_session.rb +2 -0
  56. data/app/models/source_monitor/item.rb +15 -0
  57. data/app/models/source_monitor/item_content.rb +4 -3
  58. data/app/models/source_monitor/scrape_log.rb +4 -6
  59. data/app/models/source_monitor/source.rb +28 -19
  60. data/app/presenters/source_monitor/base_presenter.rb +19 -0
  61. data/app/presenters/source_monitor/source_details_presenter.rb +61 -0
  62. data/app/presenters/source_monitor/sources_filter_presenter.rb +61 -0
  63. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +3 -3
  64. data/app/views/source_monitor/dashboard/_stat_card.html.erb +2 -1
  65. data/app/views/source_monitor/dashboard/_stats.html.erb +5 -7
  66. data/app/views/source_monitor/items/_details.html.erb +11 -14
  67. data/app/views/source_monitor/items/index.html.erb +10 -35
  68. data/app/views/source_monitor/logs/index.html.erb +20 -41
  69. data/app/views/source_monitor/shared/_form_errors.html.erb +14 -0
  70. data/app/views/source_monitor/source_scrape_tests/_result.html.erb +1 -29
  71. data/app/views/source_monitor/source_scrape_tests/_result_content.html.erb +33 -0
  72. data/app/views/source_monitor/source_scrape_tests/show.html.erb +1 -29
  73. data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +2 -2
  74. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +7 -5
  75. data/app/views/source_monitor/sources/_details.html.erb +24 -52
  76. data/app/views/source_monitor/sources/_health_status_badge.html.erb +4 -6
  77. data/app/views/source_monitor/sources/_row.html.erb +7 -18
  78. data/app/views/source_monitor/sources/edit.html.erb +1 -10
  79. data/app/views/source_monitor/sources/index.html.erb +26 -46
  80. data/app/views/source_monitor/sources/new.html.erb +1 -10
  81. data/config/routes.rb +1 -1
  82. data/db/migrate/20260313120000_add_composite_indexes_to_log_tables.rb +14 -0
  83. data/db/migrate/20260314120000_align_health_status_default.rb +11 -0
  84. data/lib/source_monitor/analytics/sources_index_metrics.rb +15 -0
  85. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +10 -4
  86. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +21 -5
  87. data/lib/source_monitor/favicons/fetcher.rb +86 -0
  88. data/lib/source_monitor/fetching/cloudflare_bypass.rb +14 -5
  89. data/lib/source_monitor/fetching/completion/event_publisher.rb +12 -0
  90. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +15 -2
  91. data/lib/source_monitor/fetching/completion/retention_handler.rb +11 -3
  92. data/lib/source_monitor/fetching/feed_fetcher.rb +2 -21
  93. data/lib/source_monitor/fetching/fetch_runner.rb +12 -3
  94. data/lib/source_monitor/fetching/retry_orchestrator.rb +102 -0
  95. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +9 -0
  96. data/lib/source_monitor/health/source_health_check_orchestrator.rb +95 -0
  97. data/lib/source_monitor/health.rb +1 -0
  98. data/lib/source_monitor/images/downloader.rb +6 -7
  99. data/lib/source_monitor/images/processor.rb +98 -0
  100. data/lib/source_monitor/import_sessions/health_check_updater.rb +95 -0
  101. data/lib/source_monitor/import_sessions/opml_importer.rb +163 -0
  102. data/lib/source_monitor/items/item_creator.rb +0 -21
  103. data/lib/source_monitor/logs/query.rb +20 -0
  104. data/lib/source_monitor/queries/scrape_candidates_query.rb +30 -0
  105. data/lib/source_monitor/queries.rb +7 -0
  106. data/lib/source_monitor/scheduler.rb +5 -0
  107. data/lib/source_monitor/scraping/bulk_result_presenter.rb +11 -8
  108. data/lib/source_monitor/scraping/runner.rb +52 -0
  109. data/lib/source_monitor/scraping/scheduler.rb +5 -0
  110. data/lib/source_monitor/scraping/state.rb +4 -2
  111. data/lib/source_monitor/security/parameter_sanitizer.rb +7 -0
  112. data/lib/source_monitor/version.rb +1 -1
  113. data/lib/source_monitor.rb +7 -0
  114. data/source_monitor.gemspec +1 -0
  115. metadata +47 -1
@@ -9,66 +9,8 @@ module SourceMonitor
9
9
  def perform(item_id)
10
10
  item = SourceMonitor::Item.find_by(id: item_id)
11
11
  return unless item
12
- return unless SourceMonitor.config.images.download_enabled?
13
12
 
14
- html = item.content
15
- return if html.blank?
16
-
17
- # Build or find item_content for attachment storage
18
- item_content = item.item_content || item.build_item_content
19
-
20
- # Skip if images already attached (idempotency)
21
- return if item_content.persisted? && item_content.images.attached?
22
-
23
- base_url = item.url
24
- rewriter = SourceMonitor::Images::ContentRewriter.new(html, base_url: base_url)
25
- image_urls = rewriter.image_urls
26
- return if image_urls.empty?
27
-
28
- # Save item_content first so we can attach blobs to it
29
- item_content.save! unless item_content.persisted?
30
-
31
- # Download images and build URL mapping
32
- url_mapping = download_images(item_content, image_urls)
33
- return if url_mapping.empty?
34
-
35
- # Rewrite HTML with Active Storage URLs
36
- rewritten_html = rewriter.rewrite do |original_url|
37
- url_mapping[original_url]
38
- end
39
-
40
- # Update the item content with rewritten HTML
41
- item.update!(content: rewritten_html)
42
- end
43
-
44
- private
45
-
46
- def download_images(item_content, image_urls)
47
- url_mapping = {}
48
- settings = SourceMonitor.config.images
49
-
50
- image_urls.each do |image_url|
51
- result = SourceMonitor::Images::Downloader.new(image_url, settings: settings).call
52
- next unless result
53
-
54
- blob = ActiveStorage::Blob.create_and_upload!(
55
- io: result.io,
56
- filename: result.filename,
57
- content_type: result.content_type
58
- )
59
- item_content.images.attach(blob)
60
-
61
- # Generate a serving URL for the blob
62
- url_mapping[image_url] = Rails.application.routes.url_helpers.rails_blob_path(blob, only_path: true)
63
- rescue ActiveRecord::Deadlocked
64
- raise # let job framework retry on database deadlock
65
- rescue StandardError
66
- # Individual image failure should not block others.
67
- # Original URL will be preserved (graceful fallback).
68
- next
69
- end
70
-
71
- url_mapping
13
+ SourceMonitor::Images::Processor.new(item).call
72
14
  end
73
15
  end
74
16
  end
@@ -7,67 +7,10 @@ module SourceMonitor
7
7
  discard_on ActiveJob::DeserializationError
8
8
 
9
9
  def perform(source_id)
10
- return unless defined?(ActiveStorage)
11
-
12
10
  source = SourceMonitor::Source.find_by(id: source_id)
13
11
  return unless source
14
- return unless SourceMonitor.config.favicons.enabled?
15
- return if source.website_url.blank?
16
- return if source.favicon.attached?
17
- return if within_cooldown?(source)
18
-
19
- result = SourceMonitor::Favicons::Discoverer.new(source.website_url).call
20
-
21
- if result
22
- attach_favicon(source, result)
23
- else
24
- record_failed_attempt(source)
25
- end
26
- rescue ActiveRecord::Deadlocked
27
- raise # let job framework retry on database deadlock
28
- rescue StandardError => error
29
- record_failed_attempt(source) if source
30
- log_error(source, error)
31
- end
32
-
33
- private
34
-
35
- def within_cooldown?(source)
36
- last_attempt = source.metadata&.dig("favicon_last_attempted_at")
37
- return false if last_attempt.blank?
38
-
39
- cooldown_days = SourceMonitor.config.favicons.retry_cooldown_days
40
- Time.parse(last_attempt) > cooldown_days.days.ago
41
- rescue ArgumentError, TypeError
42
- false
43
- end
44
-
45
- def attach_favicon(source, result)
46
- blob = ActiveStorage::Blob.create_and_upload!(
47
- io: result.io,
48
- filename: result.filename,
49
- content_type: result.content_type
50
- )
51
- source.favicon.attach(blob)
52
- end
53
-
54
- def record_failed_attempt(source)
55
- metadata = (source.metadata || {}).merge(
56
- "favicon_last_attempted_at" => Time.current.iso8601
57
- )
58
- source.update_column(:metadata, metadata)
59
- rescue StandardError
60
- nil
61
- end
62
-
63
- def log_error(source, error)
64
- return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
65
12
 
66
- Rails.logger.warn(
67
- "[SourceMonitor::FaviconFetchJob] Failed for source #{source&.id}: #{error.class} - #{error.message}"
68
- )
69
- rescue StandardError
70
- nil
13
+ SourceMonitor::Favicons::Fetcher.new(source).call
71
14
  end
72
15
  end
73
16
  end
@@ -72,63 +72,13 @@ module SourceMonitor
72
72
  decision = SourceMonitor::Fetching::RetryPolicy.new(source:, error:, now: Time.current).decision
73
73
  return raise error unless decision
74
74
 
75
- if decision.retry?
76
- enqueue_retry!(source, decision)
77
- elsif decision.open_circuit?
78
- open_circuit!(source, decision)
79
- raise error
80
- else
81
- reset_retry_state!(source)
82
- raise error
83
- end
75
+ result = SourceMonitor::Fetching::RetryOrchestrator.call(source:, error:, decision:)
76
+ raise error unless result.retry_enqueued?
84
77
  rescue StandardError => policy_error
85
78
  log_retry_failure(source, error, policy_error)
86
79
  raise error
87
80
  end
88
81
 
89
- def enqueue_retry!(source, decision)
90
- retry_at = Time.current + (decision.wait || 0)
91
-
92
- source.with_lock do
93
- source.reload
94
- source.update!(
95
- fetch_retry_attempt: decision.next_attempt,
96
- fetch_circuit_opened_at: nil,
97
- fetch_circuit_until: nil,
98
- next_fetch_at: retry_at,
99
- backoff_until: retry_at,
100
- fetch_status: "queued"
101
- )
102
- end
103
-
104
- retry_job wait: decision.wait || 0
105
- end
106
-
107
- def open_circuit!(source, decision)
108
- source.with_lock do
109
- source.reload
110
- source.update!(
111
- fetch_retry_attempt: 0,
112
- fetch_circuit_opened_at: Time.current,
113
- fetch_circuit_until: decision.circuit_until,
114
- next_fetch_at: decision.circuit_until,
115
- backoff_until: decision.circuit_until,
116
- fetch_status: "failed"
117
- )
118
- end
119
- end
120
-
121
- def reset_retry_state!(source)
122
- source.with_lock do
123
- source.reload
124
- source.update!(
125
- fetch_retry_attempt: 0,
126
- fetch_circuit_opened_at: nil,
127
- fetch_circuit_until: nil
128
- )
129
- end
130
- end
131
-
132
82
  def transient_error?(error)
133
83
  error.is_a?(SourceMonitor::Fetching::FetchError)
134
84
  end
@@ -1,10 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "set"
4
- require "source_monitor/import_sessions/entry_normalizer"
5
- require "source_monitor/realtime/broadcaster"
6
- require "source_monitor/sources/params"
7
-
8
3
  module SourceMonitor
9
4
  class ImportOpmlJob < ApplicationJob
10
5
  source_monitor_queue :maintenance
@@ -12,148 +7,14 @@ module SourceMonitor
12
7
  discard_on ActiveJob::DeserializationError
13
8
 
14
9
  def perform(import_session_id, import_history_id)
15
- @import_session = SourceMonitor::ImportSession.find_by(id: import_session_id)
16
- @import_history = SourceMonitor::ImportHistory.find_by(id: import_history_id)
10
+ import_session = SourceMonitor::ImportSession.find_by(id: import_session_id)
11
+ import_history = SourceMonitor::ImportHistory.find_by(id: import_history_id)
17
12
  return unless import_session && import_history
18
13
 
19
- import_history.update_columns(started_at: Time.current) unless import_history.started_at
20
-
21
- processed = Set.new
22
-
23
- selected_entries.each do |entry|
24
- process_entry(entry, processed)
25
- end
26
-
27
- import_history.update!(
28
- imported_sources: imported_sources,
29
- failed_sources: failed_sources,
30
- skipped_duplicates: skipped_duplicates,
31
- bulk_settings: import_session.bulk_settings.presence || {},
32
- completed_at: Time.current
33
- )
34
-
35
- broadcast_completion(import_history)
36
- end
37
-
38
- private
39
-
40
- attr_reader :import_session, :import_history
41
-
42
- def selected_entries
43
- ids = Array(import_session.selected_source_ids).map(&:to_s)
44
-
45
- Array(import_session.parsed_sources)
46
- .map { |entry| SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry) }
47
- .select { |entry| ids.include?(entry[:id]) }
48
- end
49
-
50
- def process_entry(entry, processed)
51
- feed_url = entry[:feed_url].to_s
52
-
53
- if feed_url.blank?
54
- failed_sources << failure_payload(feed_url, "MissingFeedURL", "Feed URL is missing")
55
- return
56
- end
57
-
58
- normalized_url = feed_url.downcase
59
-
60
- if processed.include?(normalized_url)
61
- skipped_duplicates << skipped_payload(feed_url, "duplicate in import selection")
62
- return
63
- end
64
-
65
- if duplicate_source?(normalized_url)
66
- skipped_duplicates << skipped_payload(feed_url, "already exists")
67
- processed << normalized_url
68
- return
69
- end
70
-
71
- source = SourceMonitor::Source.new(build_attributes(entry))
72
-
73
- if source.save
74
- imported_sources << { id: source.id, feed_url: source.feed_url, name: source.name }
75
- SourceMonitor::FaviconFetchJob.perform_later(source.id) if should_fetch_favicon?(source)
76
- processed << normalized_url
77
- else
78
- failed_sources << failure_payload(feed_url, "ValidationFailed", source.errors.full_messages.to_sentence)
79
- end
80
- rescue ActiveRecord::RecordNotUnique
81
- skipped_duplicates << skipped_payload(feed_url, "already exists")
82
- processed << normalized_url
83
- rescue StandardError => error
84
- failed_sources << failure_payload(feed_url, error.class.name, error.message)
85
- end
86
-
87
- def duplicate_source?(normalized_feed_url)
88
- SourceMonitor::Source.where("LOWER(feed_url) = ?", normalized_feed_url).exists?
89
- end
90
-
91
- def build_attributes(entry)
92
- defaults = SourceMonitor::Sources::Params.default_attributes.deep_dup
93
- settings = SourceMonitor::Security::ParameterSanitizer.sanitize(import_session.bulk_settings.presence || {})
94
- settings = settings.deep_symbolize_keys
95
-
96
- defaults.merge(settings).merge(identity_attributes(entry))
97
- end
98
-
99
- def identity_attributes(entry)
100
- {
101
- name: entry[:title].presence || entry[:feed_url],
102
- feed_url: entry[:feed_url],
103
- website_url: entry[:website_url]
104
- }
105
- end
106
-
107
- def imported_sources
108
- @imported_sources ||= []
109
- end
110
-
111
- def failed_sources
112
- @failed_sources ||= []
113
- end
114
-
115
- def skipped_duplicates
116
- @skipped_duplicates ||= []
117
- end
118
-
119
- def failure_payload(feed_url, error_class, message)
120
- {
121
- feed_url: feed_url,
122
- error_class: error_class,
123
- error_message: message
124
- }
125
- end
126
-
127
- def skipped_payload(feed_url, reason)
128
- {
129
- feed_url: feed_url,
130
- reason: reason
131
- }
132
- end
133
-
134
- def should_fetch_favicon?(source)
135
- defined?(ActiveStorage) &&
136
- SourceMonitor.config.favicons.enabled? &&
137
- source.website_url.present?
138
- rescue StandardError
139
- false
140
- end
141
-
142
- def broadcast_completion(history)
143
- return unless defined?(Turbo::StreamsChannel)
144
-
145
- histories = SourceMonitor::ImportHistory.recent_for(history.user_id).limit(5)
146
-
147
- Turbo::StreamsChannel.broadcast_replace_to(
148
- SourceMonitor::Realtime::Broadcaster::SOURCE_INDEX_STREAM,
149
- target: "source_monitor_import_history_panel",
150
- html: SourceMonitor::SourcesController.render(
151
- partial: "source_monitor/sources/import_history_panel",
152
- locals: { import_histories: histories }
153
- )
154
- )
155
- rescue StandardError => error
156
- Rails.logger.error("[SourceMonitor::ImportOpmlJob] broadcast failed: #{error.class}: #{error.message}") if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
14
+ SourceMonitor::ImportSessions::OPMLImporter.new(
15
+ import_session: import_session,
16
+ import_history: import_history
17
+ ).call
157
18
  end
158
19
  end
159
20
  end
@@ -4,90 +4,29 @@ module SourceMonitor
4
4
  class ImportSessionHealthCheckJob < ApplicationJob
5
5
  source_monitor_queue :maintenance
6
6
 
7
- require "source_monitor/health/import_source_health_check"
8
- require "source_monitor/import_sessions/entry_normalizer"
9
- require "source_monitor/import_sessions/health_check_broadcaster"
10
-
11
7
  discard_on ActiveJob::DeserializationError
12
8
 
9
+ rescue_from ActiveRecord::Deadlocked do |error|
10
+ Rails.logger&.warn("[SourceMonitor::ImportSessionHealthCheckJob] Deadlock: #{error.message}")
11
+ retry_job(wait: 2.seconds + rand(3).seconds)
12
+ end
13
+
13
14
  def perform(import_session_id, entry_id)
14
15
  import_session = SourceMonitor::ImportSession.find_by(id: import_session_id)
15
16
  return unless import_session
16
- return unless active_for?(import_session)
17
-
18
- result = perform_health_check(import_session, entry_id)
19
- return unless result
20
-
21
- updated_entry = nil
22
-
23
- import_session.with_lock do
24
- import_session.reload
25
- return unless active_for?(import_session)
26
-
27
- entries = Array(import_session.parsed_sources).map(&:to_h)
28
- index = entries.index { |candidate| entry_id_for(candidate) == entry_id.to_s }
29
- return unless index
30
17
 
31
- entries[index] = entries[index].merge(
32
- "health_status" => result.status,
33
- "health_error" => result.error_message
18
+ SourceMonitor::ImportSessions::HealthCheckUpdater.new(
19
+ import_session: import_session,
20
+ entry_id: entry_id
21
+ ).call
22
+ rescue ActiveRecord::Deadlocked
23
+ raise # re-raise so rescue_from handler catches it
24
+ rescue StandardError => error
25
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
26
+ Rails.logger.error(
27
+ "[SourceMonitor::ImportSessionHealthCheckJob] #{error.class}: #{error.message}"
34
28
  )
35
-
36
- selected_ids = Array(import_session.selected_source_ids).map(&:to_s)
37
- selected_ids -= [ entry_id.to_s ] if result.status == "unhealthy"
38
-
39
- attrs = {
40
- parsed_sources: entries,
41
- selected_source_ids: selected_ids,
42
- health_check_completed_at: completion_time(entries, import_session.health_check_targets)
43
- }.compact
44
-
45
- import_session.update!(attrs)
46
- normalized_entry = SourceMonitor::ImportSessions::EntryNormalizer.normalize(entries[index])
47
- updated_entry = normalized_entry.merge(selected: selected_ids.include?(entry_id.to_s))
48
29
  end
49
-
50
- broadcaster = SourceMonitor::ImportSessions::HealthCheckBroadcaster.new(import_session)
51
- broadcaster.broadcast_row(updated_entry) if updated_entry
52
- broadcaster.broadcast_progress
53
- rescue StandardError => error
54
- Rails.logger.error(
55
- "[SourceMonitor::ImportSessionHealthCheckJob] #{error.class}: #{error.message}"
56
- ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
57
- end
58
-
59
- private
60
-
61
- def active_for?(import_session)
62
- import_session.current_step == "health_check" && import_session.health_checks_active?
63
- end
64
-
65
- def perform_health_check(import_session, entry_id)
66
- entry = find_entry(import_session, entry_id)
67
- return unless entry
68
-
69
- SourceMonitor::Health::ImportSourceHealthCheck.new(feed_url: entry_feed_url(entry)).call
70
- end
71
-
72
- def find_entry(import_session, entry_id)
73
- Array(import_session.parsed_sources).find { |entry| entry_id_for(entry) == entry_id.to_s }
74
- end
75
-
76
- def entry_id_for(entry)
77
- entry.to_h["id"].presence || entry.to_h[:id].presence || entry.to_h["feed_url"].to_s
78
- end
79
-
80
- def entry_feed_url(entry)
81
- entry.to_h["feed_url"] || entry.to_h[:feed_url]
82
- end
83
-
84
- def completion_time(entries, targets)
85
- normalized = Array(entries).map { |entry| SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry) }
86
- filtered = normalized.select { |entry| targets.include?(entry[:id]) }
87
- return nil if filtered.empty?
88
-
89
- completed = filtered.count { |entry| %w[healthy unhealthy].include?(entry[:health_status].to_s) }
90
- completed >= filtered.size ? Time.current : nil
91
30
  end
92
31
  end
93
32
  end
@@ -6,6 +6,11 @@ module SourceMonitor
6
6
 
7
7
  source_monitor_queue :maintenance
8
8
 
9
+ rescue_from ActiveRecord::Deadlocked do |error|
10
+ Rails.logger&.warn("[SourceMonitor::ItemCleanupJob] Deadlock: #{error.message}")
11
+ retry_job(wait: 2.seconds + rand(3).seconds)
12
+ end
13
+
9
14
  def perform(options = nil)
10
15
  options = SourceMonitor::Jobs::CleanupOptions.normalize(options)
11
16
 
@@ -7,6 +7,11 @@ module SourceMonitor
7
7
 
8
8
  source_monitor_queue :maintenance
9
9
 
10
+ rescue_from ActiveRecord::Deadlocked do |error|
11
+ Rails.logger&.warn("[SourceMonitor::LogCleanupJob] Deadlock: #{error.message}")
12
+ retry_job(wait: 2.seconds + rand(3).seconds)
13
+ end
14
+
10
15
  def perform(options = nil)
11
16
  options = SourceMonitor::Jobs::CleanupOptions.normalize(options)
12
17
 
@@ -36,12 +41,18 @@ module SourceMonitor
36
41
 
37
42
  def prune_fetch_logs(cutoff)
38
43
  SourceMonitor::FetchLog.where(SourceMonitor::FetchLog.arel_table[:started_at].lt(cutoff))
39
- .in_batches(of: 500) { |batch| batch.delete_all }
44
+ .in_batches(of: 500) do |batch|
45
+ SourceMonitor::LogEntry.where(loggable_type: "SourceMonitor::FetchLog", loggable_id: batch.select(:id)).delete_all
46
+ batch.delete_all
47
+ end
40
48
  end
41
49
 
42
50
  def prune_scrape_logs(cutoff)
43
51
  SourceMonitor::ScrapeLog.where(SourceMonitor::ScrapeLog.arel_table[:started_at].lt(cutoff))
44
- .in_batches(of: 500) { |batch| batch.delete_all }
52
+ .in_batches(of: 500) do |batch|
53
+ SourceMonitor::LogEntry.where(loggable_type: "SourceMonitor::ScrapeLog", loggable_id: batch.select(:id)).delete_all
54
+ batch.delete_all
55
+ end
45
56
  end
46
57
  end
47
58
  end
@@ -4,9 +4,17 @@ module SourceMonitor
4
4
  class ScheduleFetchesJob < ApplicationJob
5
5
  source_monitor_queue :fetch
6
6
 
7
+ rescue_from ActiveRecord::Deadlocked do |error|
8
+ Rails.logger&.warn("[SourceMonitor::ScheduleFetchesJob] Deadlock: #{error.message}")
9
+ retry_job(wait: 2.seconds + rand(3).seconds)
10
+ end
11
+
7
12
  def perform(options = nil)
8
13
  limit = extract_limit(options)
9
14
  SourceMonitor::Scheduler.run(limit:)
15
+ rescue StandardError => error
16
+ Rails.logger&.error("[SourceMonitor::ScheduleFetchesJob] #{error.class}: #{error.message}")
17
+ raise
10
18
  end
11
19
 
12
20
  private
@@ -6,62 +6,16 @@ module SourceMonitor
6
6
 
7
7
  discard_on ActiveJob::DeserializationError
8
8
 
9
+ rescue_from ActiveRecord::Deadlocked do |error|
10
+ Rails.logger&.warn("[SourceMonitor::ScrapeItemJob] Deadlock: #{error.message}")
11
+ retry_job(wait: 2.seconds + rand(3).seconds)
12
+ end
13
+
9
14
  def perform(item_id)
10
- log("job:start", item_id: item_id)
11
15
  item = SourceMonitor::Item.includes(:source).find_by(id: item_id)
12
16
  return unless item
13
17
 
14
- source = item.source
15
- unless source&.scraping_enabled?
16
- log("job:skipped_scraping_disabled", item: item)
17
- SourceMonitor::Scraping::State.clear_inflight!(item)
18
- return
19
- end
20
-
21
- remaining = time_until_scrape_allowed(source)
22
- if remaining&.positive?
23
- SourceMonitor::Scraping::State.clear_inflight!(item)
24
- self.class.set(wait: remaining.seconds).perform_later(item_id)
25
- log("job:deferred", item: item, wait_seconds: remaining)
26
- return
27
- end
28
-
29
- SourceMonitor::Scraping::State.mark_processing!(item)
30
- SourceMonitor::Scraping::ItemScraper.new(item:, source:).call
31
- log("job:completed", item: item, status: item.scrape_status)
32
- rescue StandardError => error
33
- log("job:error", item: item, error: error.message)
34
- SourceMonitor::Scraping::State.mark_failed!(item)
35
- raise
36
- ensure
37
- SourceMonitor::Scraping::State.clear_inflight!(item) if item
38
- end
39
-
40
- private
41
-
42
- def time_until_scrape_allowed(source)
43
- interval = source.min_scrape_interval || SourceMonitor.config.scraping.min_scrape_interval
44
- return nil if interval.nil? || interval <= 0
45
-
46
- last_scrape_at = source.scrape_logs.maximum(:started_at)
47
- return nil unless last_scrape_at
48
-
49
- elapsed = Time.current - last_scrape_at
50
- remaining = interval - elapsed
51
- remaining.positive? ? remaining.ceil : nil
52
- end
53
-
54
- def log(stage, item: nil, item_id: nil, **extra)
55
- return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
56
-
57
- payload = {
58
- stage: "SourceMonitor::ScrapeItemJob##{stage}",
59
- item_id: item&.id || item_id,
60
- source_id: item&.source_id
61
- }.merge(extra.compact)
62
- Rails.logger.info("[SourceMonitor::ManualScrape] #{payload.to_json}")
63
- rescue StandardError
64
- nil
18
+ SourceMonitor::Scraping::Runner.new(item).call
65
19
  end
66
20
  end
67
21
  end
@@ -10,78 +10,7 @@ module SourceMonitor
10
10
  source = SourceMonitor::Source.find_by(id: source_id)
11
11
  return unless source
12
12
 
13
- result = SourceMonitor::Health::SourceHealthCheck.new(source: source).call
14
- broadcast_outcome(source, result)
15
- trigger_fetch_if_degraded(source, result)
16
- result
17
- rescue StandardError => error
18
- Rails.logger&.error(
19
- "[SourceMonitor::SourceHealthCheckJob] error for source #{source_id}: #{error.class}: #{error.message}"
20
- ) if defined?(Rails) && Rails.respond_to?(:logger)
21
-
22
- record_unexpected_failure(source, error) if source
23
- broadcast_outcome(source, nil, error) if source
24
- nil
25
- end
26
-
27
- DEGRADED_STATUSES = %w[declining failing].freeze
28
-
29
- private
30
-
31
- def trigger_fetch_if_degraded(source, result)
32
- return unless result&.success?
33
- return unless DEGRADED_STATUSES.include?(source.health_status.to_s)
34
-
35
- SourceMonitor::FetchFeedJob.perform_later(source.id, force: true)
36
- end
37
-
38
- def record_unexpected_failure(source, error)
39
- SourceMonitor::HealthCheckLog.create!(
40
- source: source,
41
- success: false,
42
- started_at: Time.current,
43
- completed_at: Time.current,
44
- duration_ms: 0,
45
- error_class: error.class.name,
46
- error_message: error.message
47
- )
48
- rescue StandardError
49
- nil
50
- end
51
-
52
- def broadcast_outcome(source, result, error = nil)
53
- SourceMonitor::Realtime.broadcast_source(source)
54
-
55
- message, level = toast_payload(source, result, error)
56
- return if message.blank?
57
-
58
- SourceMonitor::Realtime.broadcast_toast(message:, level:)
59
- end
60
-
61
- def toast_payload(source, result, error)
62
- if error
63
- return [
64
- "Health check failed for #{source.name}: #{error.message}",
65
- :error
66
- ]
67
- end
68
-
69
- if result&.success?
70
- [
71
- "Health check succeeded for #{source.name}.",
72
- :success
73
- ]
74
- else
75
- failure_reason = result&.error&.message
76
- http_status = result&.log&.http_status
77
- message = "Health check failed for #{source.name}"
78
- message += " (HTTP #{http_status})" if http_status.present?
79
- message += ": #{failure_reason}" if failure_reason.present?
80
- [
81
- "#{message}.",
82
- :error
83
- ]
84
- end
13
+ SourceMonitor::Health::SourceHealthCheckOrchestrator.new(source).call
85
14
  end
86
15
  end
87
16
  end