source_monitor 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/commands/rails-audit.md +77 -0
- data/CHANGELOG.md +50 -0
- data/CLAUDE.md +2 -2
- data/Gemfile.lock +7 -20
- data/RAILS_AUDIT.md +424 -0
- data/VERSION +1 -1
- data/app/assets/builds/source_monitor/application.css +4 -24
- data/app/assets/builds/source_monitor/application.js +57 -89
- data/app/assets/builds/source_monitor/application.js.map +4 -4
- data/app/assets/javascripts/source_monitor/application.js +3 -6
- data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +6 -86
- data/app/assets/javascripts/source_monitor/controllers/filter_submit_controller.js +13 -0
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
- data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +3 -13
- data/app/components/source_monitor/application_component.rb +10 -0
- data/app/components/source_monitor/filter_dropdown_component.rb +62 -0
- data/app/components/source_monitor/icon_component.rb +140 -0
- data/app/components/source_monitor/status_badge_component.html.erb +8 -0
- data/app/components/source_monitor/status_badge_component.rb +96 -0
- data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +4 -0
- data/app/controllers/concerns/source_monitor/set_source.rb +13 -0
- data/app/controllers/source_monitor/application_controller.rb +17 -0
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +6 -10
- data/app/controllers/source_monitor/dashboard_controller.rb +5 -1
- data/app/controllers/source_monitor/import_history_dismissals_controller.rb +1 -1
- data/app/controllers/source_monitor/import_sessions_controller.rb +30 -9
- data/app/controllers/source_monitor/item_scrapes_controller.rb +70 -0
- data/app/controllers/source_monitor/items_controller.rb +2 -69
- data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +1 -4
- data/app/controllers/source_monitor/source_favicon_fetches_controller.rb +2 -12
- data/app/controllers/source_monitor/source_fetches_controller.rb +1 -6
- data/app/controllers/source_monitor/source_health_checks_controller.rb +9 -16
- data/app/controllers/source_monitor/source_health_resets_controller.rb +1 -6
- data/app/controllers/source_monitor/source_retries_controller.rb +1 -6
- data/app/controllers/source_monitor/source_scrape_tests_controller.rb +2 -4
- data/app/controllers/source_monitor/source_turbo_responses.rb +1 -3
- data/app/controllers/source_monitor/sources_controller.rb +15 -20
- data/app/helpers/source_monitor/application_helper.rb +15 -31
- data/app/helpers/source_monitor/health_badge_helper.rb +8 -0
- data/app/jobs/source_monitor/download_content_images_job.rb +1 -59
- data/app/jobs/source_monitor/favicon_fetch_job.rb +1 -58
- data/app/jobs/source_monitor/fetch_feed_job.rb +2 -52
- data/app/jobs/source_monitor/import_opml_job.rb +6 -145
- data/app/jobs/source_monitor/import_session_health_check_job.rb +15 -76
- data/app/jobs/source_monitor/item_cleanup_job.rb +5 -0
- data/app/jobs/source_monitor/log_cleanup_job.rb +13 -2
- data/app/jobs/source_monitor/schedule_fetches_job.rb +8 -0
- data/app/jobs/source_monitor/scrape_item_job.rb +6 -52
- data/app/jobs/source_monitor/source_health_check_job.rb +1 -72
- data/app/models/concerns/source_monitor/loggable.rb +12 -0
- data/app/models/source_monitor/fetch_log.rb +0 -8
- data/app/models/source_monitor/health_check_log.rb +0 -8
- data/app/models/source_monitor/import_history.rb +14 -0
- data/app/models/source_monitor/import_session.rb +2 -0
- data/app/models/source_monitor/item.rb +15 -0
- data/app/models/source_monitor/item_content.rb +4 -3
- data/app/models/source_monitor/scrape_log.rb +4 -6
- data/app/models/source_monitor/source.rb +28 -19
- data/app/presenters/source_monitor/base_presenter.rb +19 -0
- data/app/presenters/source_monitor/source_details_presenter.rb +61 -0
- data/app/presenters/source_monitor/sources_filter_presenter.rb +61 -0
- data/app/views/source_monitor/dashboard/_recent_activity.html.erb +3 -3
- data/app/views/source_monitor/dashboard/_stat_card.html.erb +2 -1
- data/app/views/source_monitor/dashboard/_stats.html.erb +5 -7
- data/app/views/source_monitor/items/_details.html.erb +11 -14
- data/app/views/source_monitor/items/index.html.erb +10 -35
- data/app/views/source_monitor/logs/index.html.erb +20 -41
- data/app/views/source_monitor/shared/_form_errors.html.erb +14 -0
- data/app/views/source_monitor/source_scrape_tests/_result.html.erb +1 -29
- data/app/views/source_monitor/source_scrape_tests/_result_content.html.erb +33 -0
- data/app/views/source_monitor/source_scrape_tests/show.html.erb +1 -29
- data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +2 -2
- data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +7 -5
- data/app/views/source_monitor/sources/_details.html.erb +24 -52
- data/app/views/source_monitor/sources/_health_status_badge.html.erb +4 -6
- data/app/views/source_monitor/sources/_row.html.erb +7 -18
- data/app/views/source_monitor/sources/edit.html.erb +1 -10
- data/app/views/source_monitor/sources/index.html.erb +26 -46
- data/app/views/source_monitor/sources/new.html.erb +1 -10
- data/config/routes.rb +1 -1
- data/db/migrate/20260313120000_add_composite_indexes_to_log_tables.rb +14 -0
- data/db/migrate/20260314120000_align_health_status_default.rb +11 -0
- data/lib/source_monitor/analytics/sources_index_metrics.rb +15 -0
- data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +10 -4
- data/lib/source_monitor/dashboard/turbo_broadcaster.rb +21 -5
- data/lib/source_monitor/favicons/fetcher.rb +86 -0
- data/lib/source_monitor/fetching/cloudflare_bypass.rb +14 -5
- data/lib/source_monitor/fetching/completion/event_publisher.rb +12 -0
- data/lib/source_monitor/fetching/completion/follow_up_handler.rb +15 -2
- data/lib/source_monitor/fetching/completion/retention_handler.rb +11 -3
- data/lib/source_monitor/fetching/feed_fetcher.rb +2 -21
- data/lib/source_monitor/fetching/fetch_runner.rb +12 -3
- data/lib/source_monitor/fetching/retry_orchestrator.rb +102 -0
- data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +9 -0
- data/lib/source_monitor/health/source_health_check_orchestrator.rb +95 -0
- data/lib/source_monitor/health.rb +1 -0
- data/lib/source_monitor/images/downloader.rb +6 -7
- data/lib/source_monitor/images/processor.rb +98 -0
- data/lib/source_monitor/import_sessions/health_check_updater.rb +95 -0
- data/lib/source_monitor/import_sessions/opml_importer.rb +163 -0
- data/lib/source_monitor/items/item_creator.rb +0 -21
- data/lib/source_monitor/logs/query.rb +20 -0
- data/lib/source_monitor/queries/scrape_candidates_query.rb +30 -0
- data/lib/source_monitor/queries.rb +7 -0
- data/lib/source_monitor/scheduler.rb +5 -0
- data/lib/source_monitor/scraping/bulk_result_presenter.rb +11 -8
- data/lib/source_monitor/scraping/runner.rb +52 -0
- data/lib/source_monitor/scraping/scheduler.rb +5 -0
- data/lib/source_monitor/scraping/state.rb +4 -2
- data/lib/source_monitor/security/parameter_sanitizer.rb +7 -0
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +7 -0
- data/source_monitor.gemspec +1 -0
- metadata +47 -1
|
@@ -9,66 +9,8 @@ module SourceMonitor
|
|
|
9
9
|
def perform(item_id)
|
|
10
10
|
item = SourceMonitor::Item.find_by(id: item_id)
|
|
11
11
|
return unless item
|
|
12
|
-
return unless SourceMonitor.config.images.download_enabled?
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
return if html.blank?
|
|
16
|
-
|
|
17
|
-
# Build or find item_content for attachment storage
|
|
18
|
-
item_content = item.item_content || item.build_item_content
|
|
19
|
-
|
|
20
|
-
# Skip if images already attached (idempotency)
|
|
21
|
-
return if item_content.persisted? && item_content.images.attached?
|
|
22
|
-
|
|
23
|
-
base_url = item.url
|
|
24
|
-
rewriter = SourceMonitor::Images::ContentRewriter.new(html, base_url: base_url)
|
|
25
|
-
image_urls = rewriter.image_urls
|
|
26
|
-
return if image_urls.empty?
|
|
27
|
-
|
|
28
|
-
# Save item_content first so we can attach blobs to it
|
|
29
|
-
item_content.save! unless item_content.persisted?
|
|
30
|
-
|
|
31
|
-
# Download images and build URL mapping
|
|
32
|
-
url_mapping = download_images(item_content, image_urls)
|
|
33
|
-
return if url_mapping.empty?
|
|
34
|
-
|
|
35
|
-
# Rewrite HTML with Active Storage URLs
|
|
36
|
-
rewritten_html = rewriter.rewrite do |original_url|
|
|
37
|
-
url_mapping[original_url]
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Update the item content with rewritten HTML
|
|
41
|
-
item.update!(content: rewritten_html)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
private
|
|
45
|
-
|
|
46
|
-
def download_images(item_content, image_urls)
|
|
47
|
-
url_mapping = {}
|
|
48
|
-
settings = SourceMonitor.config.images
|
|
49
|
-
|
|
50
|
-
image_urls.each do |image_url|
|
|
51
|
-
result = SourceMonitor::Images::Downloader.new(image_url, settings: settings).call
|
|
52
|
-
next unless result
|
|
53
|
-
|
|
54
|
-
blob = ActiveStorage::Blob.create_and_upload!(
|
|
55
|
-
io: result.io,
|
|
56
|
-
filename: result.filename,
|
|
57
|
-
content_type: result.content_type
|
|
58
|
-
)
|
|
59
|
-
item_content.images.attach(blob)
|
|
60
|
-
|
|
61
|
-
# Generate a serving URL for the blob
|
|
62
|
-
url_mapping[image_url] = Rails.application.routes.url_helpers.rails_blob_path(blob, only_path: true)
|
|
63
|
-
rescue ActiveRecord::Deadlocked
|
|
64
|
-
raise # let job framework retry on database deadlock
|
|
65
|
-
rescue StandardError
|
|
66
|
-
# Individual image failure should not block others.
|
|
67
|
-
# Original URL will be preserved (graceful fallback).
|
|
68
|
-
next
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
url_mapping
|
|
13
|
+
SourceMonitor::Images::Processor.new(item).call
|
|
72
14
|
end
|
|
73
15
|
end
|
|
74
16
|
end
|
|
@@ -7,67 +7,10 @@ module SourceMonitor
|
|
|
7
7
|
discard_on ActiveJob::DeserializationError
|
|
8
8
|
|
|
9
9
|
def perform(source_id)
|
|
10
|
-
return unless defined?(ActiveStorage)
|
|
11
|
-
|
|
12
10
|
source = SourceMonitor::Source.find_by(id: source_id)
|
|
13
11
|
return unless source
|
|
14
|
-
return unless SourceMonitor.config.favicons.enabled?
|
|
15
|
-
return if source.website_url.blank?
|
|
16
|
-
return if source.favicon.attached?
|
|
17
|
-
return if within_cooldown?(source)
|
|
18
|
-
|
|
19
|
-
result = SourceMonitor::Favicons::Discoverer.new(source.website_url).call
|
|
20
|
-
|
|
21
|
-
if result
|
|
22
|
-
attach_favicon(source, result)
|
|
23
|
-
else
|
|
24
|
-
record_failed_attempt(source)
|
|
25
|
-
end
|
|
26
|
-
rescue ActiveRecord::Deadlocked
|
|
27
|
-
raise # let job framework retry on database deadlock
|
|
28
|
-
rescue StandardError => error
|
|
29
|
-
record_failed_attempt(source) if source
|
|
30
|
-
log_error(source, error)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
private
|
|
34
|
-
|
|
35
|
-
def within_cooldown?(source)
|
|
36
|
-
last_attempt = source.metadata&.dig("favicon_last_attempted_at")
|
|
37
|
-
return false if last_attempt.blank?
|
|
38
|
-
|
|
39
|
-
cooldown_days = SourceMonitor.config.favicons.retry_cooldown_days
|
|
40
|
-
Time.parse(last_attempt) > cooldown_days.days.ago
|
|
41
|
-
rescue ArgumentError, TypeError
|
|
42
|
-
false
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def attach_favicon(source, result)
|
|
46
|
-
blob = ActiveStorage::Blob.create_and_upload!(
|
|
47
|
-
io: result.io,
|
|
48
|
-
filename: result.filename,
|
|
49
|
-
content_type: result.content_type
|
|
50
|
-
)
|
|
51
|
-
source.favicon.attach(blob)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def record_failed_attempt(source)
|
|
55
|
-
metadata = (source.metadata || {}).merge(
|
|
56
|
-
"favicon_last_attempted_at" => Time.current.iso8601
|
|
57
|
-
)
|
|
58
|
-
source.update_column(:metadata, metadata)
|
|
59
|
-
rescue StandardError
|
|
60
|
-
nil
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def log_error(source, error)
|
|
64
|
-
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
65
12
|
|
|
66
|
-
|
|
67
|
-
"[SourceMonitor::FaviconFetchJob] Failed for source #{source&.id}: #{error.class} - #{error.message}"
|
|
68
|
-
)
|
|
69
|
-
rescue StandardError
|
|
70
|
-
nil
|
|
13
|
+
SourceMonitor::Favicons::Fetcher.new(source).call
|
|
71
14
|
end
|
|
72
15
|
end
|
|
73
16
|
end
|
|
@@ -72,63 +72,13 @@ module SourceMonitor
|
|
|
72
72
|
decision = SourceMonitor::Fetching::RetryPolicy.new(source:, error:, now: Time.current).decision
|
|
73
73
|
return raise error unless decision
|
|
74
74
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
elsif decision.open_circuit?
|
|
78
|
-
open_circuit!(source, decision)
|
|
79
|
-
raise error
|
|
80
|
-
else
|
|
81
|
-
reset_retry_state!(source)
|
|
82
|
-
raise error
|
|
83
|
-
end
|
|
75
|
+
result = SourceMonitor::Fetching::RetryOrchestrator.call(source:, error:, decision:)
|
|
76
|
+
raise error unless result.retry_enqueued?
|
|
84
77
|
rescue StandardError => policy_error
|
|
85
78
|
log_retry_failure(source, error, policy_error)
|
|
86
79
|
raise error
|
|
87
80
|
end
|
|
88
81
|
|
|
89
|
-
def enqueue_retry!(source, decision)
|
|
90
|
-
retry_at = Time.current + (decision.wait || 0)
|
|
91
|
-
|
|
92
|
-
source.with_lock do
|
|
93
|
-
source.reload
|
|
94
|
-
source.update!(
|
|
95
|
-
fetch_retry_attempt: decision.next_attempt,
|
|
96
|
-
fetch_circuit_opened_at: nil,
|
|
97
|
-
fetch_circuit_until: nil,
|
|
98
|
-
next_fetch_at: retry_at,
|
|
99
|
-
backoff_until: retry_at,
|
|
100
|
-
fetch_status: "queued"
|
|
101
|
-
)
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
retry_job wait: decision.wait || 0
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def open_circuit!(source, decision)
|
|
108
|
-
source.with_lock do
|
|
109
|
-
source.reload
|
|
110
|
-
source.update!(
|
|
111
|
-
fetch_retry_attempt: 0,
|
|
112
|
-
fetch_circuit_opened_at: Time.current,
|
|
113
|
-
fetch_circuit_until: decision.circuit_until,
|
|
114
|
-
next_fetch_at: decision.circuit_until,
|
|
115
|
-
backoff_until: decision.circuit_until,
|
|
116
|
-
fetch_status: "failed"
|
|
117
|
-
)
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
def reset_retry_state!(source)
|
|
122
|
-
source.with_lock do
|
|
123
|
-
source.reload
|
|
124
|
-
source.update!(
|
|
125
|
-
fetch_retry_attempt: 0,
|
|
126
|
-
fetch_circuit_opened_at: nil,
|
|
127
|
-
fetch_circuit_until: nil
|
|
128
|
-
)
|
|
129
|
-
end
|
|
130
|
-
end
|
|
131
|
-
|
|
132
82
|
def transient_error?(error)
|
|
133
83
|
error.is_a?(SourceMonitor::Fetching::FetchError)
|
|
134
84
|
end
|
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "set"
|
|
4
|
-
require "source_monitor/import_sessions/entry_normalizer"
|
|
5
|
-
require "source_monitor/realtime/broadcaster"
|
|
6
|
-
require "source_monitor/sources/params"
|
|
7
|
-
|
|
8
3
|
module SourceMonitor
|
|
9
4
|
class ImportOpmlJob < ApplicationJob
|
|
10
5
|
source_monitor_queue :maintenance
|
|
@@ -12,148 +7,14 @@ module SourceMonitor
|
|
|
12
7
|
discard_on ActiveJob::DeserializationError
|
|
13
8
|
|
|
14
9
|
def perform(import_session_id, import_history_id)
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
import_session = SourceMonitor::ImportSession.find_by(id: import_session_id)
|
|
11
|
+
import_history = SourceMonitor::ImportHistory.find_by(id: import_history_id)
|
|
17
12
|
return unless import_session && import_history
|
|
18
13
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
selected_entries.each do |entry|
|
|
24
|
-
process_entry(entry, processed)
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
import_history.update!(
|
|
28
|
-
imported_sources: imported_sources,
|
|
29
|
-
failed_sources: failed_sources,
|
|
30
|
-
skipped_duplicates: skipped_duplicates,
|
|
31
|
-
bulk_settings: import_session.bulk_settings.presence || {},
|
|
32
|
-
completed_at: Time.current
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
broadcast_completion(import_history)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
private
|
|
39
|
-
|
|
40
|
-
attr_reader :import_session, :import_history
|
|
41
|
-
|
|
42
|
-
def selected_entries
|
|
43
|
-
ids = Array(import_session.selected_source_ids).map(&:to_s)
|
|
44
|
-
|
|
45
|
-
Array(import_session.parsed_sources)
|
|
46
|
-
.map { |entry| SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry) }
|
|
47
|
-
.select { |entry| ids.include?(entry[:id]) }
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def process_entry(entry, processed)
|
|
51
|
-
feed_url = entry[:feed_url].to_s
|
|
52
|
-
|
|
53
|
-
if feed_url.blank?
|
|
54
|
-
failed_sources << failure_payload(feed_url, "MissingFeedURL", "Feed URL is missing")
|
|
55
|
-
return
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
normalized_url = feed_url.downcase
|
|
59
|
-
|
|
60
|
-
if processed.include?(normalized_url)
|
|
61
|
-
skipped_duplicates << skipped_payload(feed_url, "duplicate in import selection")
|
|
62
|
-
return
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
if duplicate_source?(normalized_url)
|
|
66
|
-
skipped_duplicates << skipped_payload(feed_url, "already exists")
|
|
67
|
-
processed << normalized_url
|
|
68
|
-
return
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
source = SourceMonitor::Source.new(build_attributes(entry))
|
|
72
|
-
|
|
73
|
-
if source.save
|
|
74
|
-
imported_sources << { id: source.id, feed_url: source.feed_url, name: source.name }
|
|
75
|
-
SourceMonitor::FaviconFetchJob.perform_later(source.id) if should_fetch_favicon?(source)
|
|
76
|
-
processed << normalized_url
|
|
77
|
-
else
|
|
78
|
-
failed_sources << failure_payload(feed_url, "ValidationFailed", source.errors.full_messages.to_sentence)
|
|
79
|
-
end
|
|
80
|
-
rescue ActiveRecord::RecordNotUnique
|
|
81
|
-
skipped_duplicates << skipped_payload(feed_url, "already exists")
|
|
82
|
-
processed << normalized_url
|
|
83
|
-
rescue StandardError => error
|
|
84
|
-
failed_sources << failure_payload(feed_url, error.class.name, error.message)
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
def duplicate_source?(normalized_feed_url)
|
|
88
|
-
SourceMonitor::Source.where("LOWER(feed_url) = ?", normalized_feed_url).exists?
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
def build_attributes(entry)
|
|
92
|
-
defaults = SourceMonitor::Sources::Params.default_attributes.deep_dup
|
|
93
|
-
settings = SourceMonitor::Security::ParameterSanitizer.sanitize(import_session.bulk_settings.presence || {})
|
|
94
|
-
settings = settings.deep_symbolize_keys
|
|
95
|
-
|
|
96
|
-
defaults.merge(settings).merge(identity_attributes(entry))
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
def identity_attributes(entry)
|
|
100
|
-
{
|
|
101
|
-
name: entry[:title].presence || entry[:feed_url],
|
|
102
|
-
feed_url: entry[:feed_url],
|
|
103
|
-
website_url: entry[:website_url]
|
|
104
|
-
}
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def imported_sources
|
|
108
|
-
@imported_sources ||= []
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def failed_sources
|
|
112
|
-
@failed_sources ||= []
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def skipped_duplicates
|
|
116
|
-
@skipped_duplicates ||= []
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
def failure_payload(feed_url, error_class, message)
|
|
120
|
-
{
|
|
121
|
-
feed_url: feed_url,
|
|
122
|
-
error_class: error_class,
|
|
123
|
-
error_message: message
|
|
124
|
-
}
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
def skipped_payload(feed_url, reason)
|
|
128
|
-
{
|
|
129
|
-
feed_url: feed_url,
|
|
130
|
-
reason: reason
|
|
131
|
-
}
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
def should_fetch_favicon?(source)
|
|
135
|
-
defined?(ActiveStorage) &&
|
|
136
|
-
SourceMonitor.config.favicons.enabled? &&
|
|
137
|
-
source.website_url.present?
|
|
138
|
-
rescue StandardError
|
|
139
|
-
false
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
def broadcast_completion(history)
|
|
143
|
-
return unless defined?(Turbo::StreamsChannel)
|
|
144
|
-
|
|
145
|
-
histories = SourceMonitor::ImportHistory.recent_for(history.user_id).limit(5)
|
|
146
|
-
|
|
147
|
-
Turbo::StreamsChannel.broadcast_replace_to(
|
|
148
|
-
SourceMonitor::Realtime::Broadcaster::SOURCE_INDEX_STREAM,
|
|
149
|
-
target: "source_monitor_import_history_panel",
|
|
150
|
-
html: SourceMonitor::SourcesController.render(
|
|
151
|
-
partial: "source_monitor/sources/import_history_panel",
|
|
152
|
-
locals: { import_histories: histories }
|
|
153
|
-
)
|
|
154
|
-
)
|
|
155
|
-
rescue StandardError => error
|
|
156
|
-
Rails.logger.error("[SourceMonitor::ImportOpmlJob] broadcast failed: #{error.class}: #{error.message}") if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
14
|
+
SourceMonitor::ImportSessions::OPMLImporter.new(
|
|
15
|
+
import_session: import_session,
|
|
16
|
+
import_history: import_history
|
|
17
|
+
).call
|
|
157
18
|
end
|
|
158
19
|
end
|
|
159
20
|
end
|
|
@@ -4,90 +4,29 @@ module SourceMonitor
|
|
|
4
4
|
class ImportSessionHealthCheckJob < ApplicationJob
|
|
5
5
|
source_monitor_queue :maintenance
|
|
6
6
|
|
|
7
|
-
require "source_monitor/health/import_source_health_check"
|
|
8
|
-
require "source_monitor/import_sessions/entry_normalizer"
|
|
9
|
-
require "source_monitor/import_sessions/health_check_broadcaster"
|
|
10
|
-
|
|
11
7
|
discard_on ActiveJob::DeserializationError
|
|
12
8
|
|
|
9
|
+
rescue_from ActiveRecord::Deadlocked do |error|
|
|
10
|
+
Rails.logger&.warn("[SourceMonitor::ImportSessionHealthCheckJob] Deadlock: #{error.message}")
|
|
11
|
+
retry_job(wait: 2.seconds + rand(3).seconds)
|
|
12
|
+
end
|
|
13
|
+
|
|
13
14
|
def perform(import_session_id, entry_id)
|
|
14
15
|
import_session = SourceMonitor::ImportSession.find_by(id: import_session_id)
|
|
15
16
|
return unless import_session
|
|
16
|
-
return unless active_for?(import_session)
|
|
17
|
-
|
|
18
|
-
result = perform_health_check(import_session, entry_id)
|
|
19
|
-
return unless result
|
|
20
|
-
|
|
21
|
-
updated_entry = nil
|
|
22
|
-
|
|
23
|
-
import_session.with_lock do
|
|
24
|
-
import_session.reload
|
|
25
|
-
return unless active_for?(import_session)
|
|
26
|
-
|
|
27
|
-
entries = Array(import_session.parsed_sources).map(&:to_h)
|
|
28
|
-
index = entries.index { |candidate| entry_id_for(candidate) == entry_id.to_s }
|
|
29
|
-
return unless index
|
|
30
17
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
18
|
+
SourceMonitor::ImportSessions::HealthCheckUpdater.new(
|
|
19
|
+
import_session: import_session,
|
|
20
|
+
entry_id: entry_id
|
|
21
|
+
).call
|
|
22
|
+
rescue ActiveRecord::Deadlocked
|
|
23
|
+
raise # re-raise so rescue_from handler catches it
|
|
24
|
+
rescue StandardError => error
|
|
25
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
26
|
+
Rails.logger.error(
|
|
27
|
+
"[SourceMonitor::ImportSessionHealthCheckJob] #{error.class}: #{error.message}"
|
|
34
28
|
)
|
|
35
|
-
|
|
36
|
-
selected_ids = Array(import_session.selected_source_ids).map(&:to_s)
|
|
37
|
-
selected_ids -= [ entry_id.to_s ] if result.status == "unhealthy"
|
|
38
|
-
|
|
39
|
-
attrs = {
|
|
40
|
-
parsed_sources: entries,
|
|
41
|
-
selected_source_ids: selected_ids,
|
|
42
|
-
health_check_completed_at: completion_time(entries, import_session.health_check_targets)
|
|
43
|
-
}.compact
|
|
44
|
-
|
|
45
|
-
import_session.update!(attrs)
|
|
46
|
-
normalized_entry = SourceMonitor::ImportSessions::EntryNormalizer.normalize(entries[index])
|
|
47
|
-
updated_entry = normalized_entry.merge(selected: selected_ids.include?(entry_id.to_s))
|
|
48
29
|
end
|
|
49
|
-
|
|
50
|
-
broadcaster = SourceMonitor::ImportSessions::HealthCheckBroadcaster.new(import_session)
|
|
51
|
-
broadcaster.broadcast_row(updated_entry) if updated_entry
|
|
52
|
-
broadcaster.broadcast_progress
|
|
53
|
-
rescue StandardError => error
|
|
54
|
-
Rails.logger.error(
|
|
55
|
-
"[SourceMonitor::ImportSessionHealthCheckJob] #{error.class}: #{error.message}"
|
|
56
|
-
) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
private
|
|
60
|
-
|
|
61
|
-
def active_for?(import_session)
|
|
62
|
-
import_session.current_step == "health_check" && import_session.health_checks_active?
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def perform_health_check(import_session, entry_id)
|
|
66
|
-
entry = find_entry(import_session, entry_id)
|
|
67
|
-
return unless entry
|
|
68
|
-
|
|
69
|
-
SourceMonitor::Health::ImportSourceHealthCheck.new(feed_url: entry_feed_url(entry)).call
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def find_entry(import_session, entry_id)
|
|
73
|
-
Array(import_session.parsed_sources).find { |entry| entry_id_for(entry) == entry_id.to_s }
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def entry_id_for(entry)
|
|
77
|
-
entry.to_h["id"].presence || entry.to_h[:id].presence || entry.to_h["feed_url"].to_s
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
def entry_feed_url(entry)
|
|
81
|
-
entry.to_h["feed_url"] || entry.to_h[:feed_url]
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
def completion_time(entries, targets)
|
|
85
|
-
normalized = Array(entries).map { |entry| SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry) }
|
|
86
|
-
filtered = normalized.select { |entry| targets.include?(entry[:id]) }
|
|
87
|
-
return nil if filtered.empty?
|
|
88
|
-
|
|
89
|
-
completed = filtered.count { |entry| %w[healthy unhealthy].include?(entry[:health_status].to_s) }
|
|
90
|
-
completed >= filtered.size ? Time.current : nil
|
|
91
30
|
end
|
|
92
31
|
end
|
|
93
32
|
end
|
|
@@ -6,6 +6,11 @@ module SourceMonitor
|
|
|
6
6
|
|
|
7
7
|
source_monitor_queue :maintenance
|
|
8
8
|
|
|
9
|
+
rescue_from ActiveRecord::Deadlocked do |error|
|
|
10
|
+
Rails.logger&.warn("[SourceMonitor::ItemCleanupJob] Deadlock: #{error.message}")
|
|
11
|
+
retry_job(wait: 2.seconds + rand(3).seconds)
|
|
12
|
+
end
|
|
13
|
+
|
|
9
14
|
def perform(options = nil)
|
|
10
15
|
options = SourceMonitor::Jobs::CleanupOptions.normalize(options)
|
|
11
16
|
|
|
@@ -7,6 +7,11 @@ module SourceMonitor
|
|
|
7
7
|
|
|
8
8
|
source_monitor_queue :maintenance
|
|
9
9
|
|
|
10
|
+
rescue_from ActiveRecord::Deadlocked do |error|
|
|
11
|
+
Rails.logger&.warn("[SourceMonitor::LogCleanupJob] Deadlock: #{error.message}")
|
|
12
|
+
retry_job(wait: 2.seconds + rand(3).seconds)
|
|
13
|
+
end
|
|
14
|
+
|
|
10
15
|
def perform(options = nil)
|
|
11
16
|
options = SourceMonitor::Jobs::CleanupOptions.normalize(options)
|
|
12
17
|
|
|
@@ -36,12 +41,18 @@ module SourceMonitor
|
|
|
36
41
|
|
|
37
42
|
def prune_fetch_logs(cutoff)
|
|
38
43
|
SourceMonitor::FetchLog.where(SourceMonitor::FetchLog.arel_table[:started_at].lt(cutoff))
|
|
39
|
-
.in_batches(of: 500)
|
|
44
|
+
.in_batches(of: 500) do |batch|
|
|
45
|
+
SourceMonitor::LogEntry.where(loggable_type: "SourceMonitor::FetchLog", loggable_id: batch.select(:id)).delete_all
|
|
46
|
+
batch.delete_all
|
|
47
|
+
end
|
|
40
48
|
end
|
|
41
49
|
|
|
42
50
|
def prune_scrape_logs(cutoff)
|
|
43
51
|
SourceMonitor::ScrapeLog.where(SourceMonitor::ScrapeLog.arel_table[:started_at].lt(cutoff))
|
|
44
|
-
.in_batches(of: 500)
|
|
52
|
+
.in_batches(of: 500) do |batch|
|
|
53
|
+
SourceMonitor::LogEntry.where(loggable_type: "SourceMonitor::ScrapeLog", loggable_id: batch.select(:id)).delete_all
|
|
54
|
+
batch.delete_all
|
|
55
|
+
end
|
|
45
56
|
end
|
|
46
57
|
end
|
|
47
58
|
end
|
|
@@ -4,9 +4,17 @@ module SourceMonitor
|
|
|
4
4
|
class ScheduleFetchesJob < ApplicationJob
|
|
5
5
|
source_monitor_queue :fetch
|
|
6
6
|
|
|
7
|
+
rescue_from ActiveRecord::Deadlocked do |error|
|
|
8
|
+
Rails.logger&.warn("[SourceMonitor::ScheduleFetchesJob] Deadlock: #{error.message}")
|
|
9
|
+
retry_job(wait: 2.seconds + rand(3).seconds)
|
|
10
|
+
end
|
|
11
|
+
|
|
7
12
|
def perform(options = nil)
|
|
8
13
|
limit = extract_limit(options)
|
|
9
14
|
SourceMonitor::Scheduler.run(limit:)
|
|
15
|
+
rescue StandardError => error
|
|
16
|
+
Rails.logger&.error("[SourceMonitor::ScheduleFetchesJob] #{error.class}: #{error.message}")
|
|
17
|
+
raise
|
|
10
18
|
end
|
|
11
19
|
|
|
12
20
|
private
|
|
@@ -6,62 +6,16 @@ module SourceMonitor
|
|
|
6
6
|
|
|
7
7
|
discard_on ActiveJob::DeserializationError
|
|
8
8
|
|
|
9
|
+
rescue_from ActiveRecord::Deadlocked do |error|
|
|
10
|
+
Rails.logger&.warn("[SourceMonitor::ScrapeItemJob] Deadlock: #{error.message}")
|
|
11
|
+
retry_job(wait: 2.seconds + rand(3).seconds)
|
|
12
|
+
end
|
|
13
|
+
|
|
9
14
|
def perform(item_id)
|
|
10
|
-
log("job:start", item_id: item_id)
|
|
11
15
|
item = SourceMonitor::Item.includes(:source).find_by(id: item_id)
|
|
12
16
|
return unless item
|
|
13
17
|
|
|
14
|
-
|
|
15
|
-
unless source&.scraping_enabled?
|
|
16
|
-
log("job:skipped_scraping_disabled", item: item)
|
|
17
|
-
SourceMonitor::Scraping::State.clear_inflight!(item)
|
|
18
|
-
return
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
remaining = time_until_scrape_allowed(source)
|
|
22
|
-
if remaining&.positive?
|
|
23
|
-
SourceMonitor::Scraping::State.clear_inflight!(item)
|
|
24
|
-
self.class.set(wait: remaining.seconds).perform_later(item_id)
|
|
25
|
-
log("job:deferred", item: item, wait_seconds: remaining)
|
|
26
|
-
return
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
SourceMonitor::Scraping::State.mark_processing!(item)
|
|
30
|
-
SourceMonitor::Scraping::ItemScraper.new(item:, source:).call
|
|
31
|
-
log("job:completed", item: item, status: item.scrape_status)
|
|
32
|
-
rescue StandardError => error
|
|
33
|
-
log("job:error", item: item, error: error.message)
|
|
34
|
-
SourceMonitor::Scraping::State.mark_failed!(item)
|
|
35
|
-
raise
|
|
36
|
-
ensure
|
|
37
|
-
SourceMonitor::Scraping::State.clear_inflight!(item) if item
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
private
|
|
41
|
-
|
|
42
|
-
def time_until_scrape_allowed(source)
|
|
43
|
-
interval = source.min_scrape_interval || SourceMonitor.config.scraping.min_scrape_interval
|
|
44
|
-
return nil if interval.nil? || interval <= 0
|
|
45
|
-
|
|
46
|
-
last_scrape_at = source.scrape_logs.maximum(:started_at)
|
|
47
|
-
return nil unless last_scrape_at
|
|
48
|
-
|
|
49
|
-
elapsed = Time.current - last_scrape_at
|
|
50
|
-
remaining = interval - elapsed
|
|
51
|
-
remaining.positive? ? remaining.ceil : nil
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def log(stage, item: nil, item_id: nil, **extra)
|
|
55
|
-
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
56
|
-
|
|
57
|
-
payload = {
|
|
58
|
-
stage: "SourceMonitor::ScrapeItemJob##{stage}",
|
|
59
|
-
item_id: item&.id || item_id,
|
|
60
|
-
source_id: item&.source_id
|
|
61
|
-
}.merge(extra.compact)
|
|
62
|
-
Rails.logger.info("[SourceMonitor::ManualScrape] #{payload.to_json}")
|
|
63
|
-
rescue StandardError
|
|
64
|
-
nil
|
|
18
|
+
SourceMonitor::Scraping::Runner.new(item).call
|
|
65
19
|
end
|
|
66
20
|
end
|
|
67
21
|
end
|
|
@@ -10,78 +10,7 @@ module SourceMonitor
|
|
|
10
10
|
source = SourceMonitor::Source.find_by(id: source_id)
|
|
11
11
|
return unless source
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
broadcast_outcome(source, result)
|
|
15
|
-
trigger_fetch_if_degraded(source, result)
|
|
16
|
-
result
|
|
17
|
-
rescue StandardError => error
|
|
18
|
-
Rails.logger&.error(
|
|
19
|
-
"[SourceMonitor::SourceHealthCheckJob] error for source #{source_id}: #{error.class}: #{error.message}"
|
|
20
|
-
) if defined?(Rails) && Rails.respond_to?(:logger)
|
|
21
|
-
|
|
22
|
-
record_unexpected_failure(source, error) if source
|
|
23
|
-
broadcast_outcome(source, nil, error) if source
|
|
24
|
-
nil
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
DEGRADED_STATUSES = %w[declining failing].freeze
|
|
28
|
-
|
|
29
|
-
private
|
|
30
|
-
|
|
31
|
-
def trigger_fetch_if_degraded(source, result)
|
|
32
|
-
return unless result&.success?
|
|
33
|
-
return unless DEGRADED_STATUSES.include?(source.health_status.to_s)
|
|
34
|
-
|
|
35
|
-
SourceMonitor::FetchFeedJob.perform_later(source.id, force: true)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
def record_unexpected_failure(source, error)
|
|
39
|
-
SourceMonitor::HealthCheckLog.create!(
|
|
40
|
-
source: source,
|
|
41
|
-
success: false,
|
|
42
|
-
started_at: Time.current,
|
|
43
|
-
completed_at: Time.current,
|
|
44
|
-
duration_ms: 0,
|
|
45
|
-
error_class: error.class.name,
|
|
46
|
-
error_message: error.message
|
|
47
|
-
)
|
|
48
|
-
rescue StandardError
|
|
49
|
-
nil
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def broadcast_outcome(source, result, error = nil)
|
|
53
|
-
SourceMonitor::Realtime.broadcast_source(source)
|
|
54
|
-
|
|
55
|
-
message, level = toast_payload(source, result, error)
|
|
56
|
-
return if message.blank?
|
|
57
|
-
|
|
58
|
-
SourceMonitor::Realtime.broadcast_toast(message:, level:)
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def toast_payload(source, result, error)
|
|
62
|
-
if error
|
|
63
|
-
return [
|
|
64
|
-
"Health check failed for #{source.name}: #{error.message}",
|
|
65
|
-
:error
|
|
66
|
-
]
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
if result&.success?
|
|
70
|
-
[
|
|
71
|
-
"Health check succeeded for #{source.name}.",
|
|
72
|
-
:success
|
|
73
|
-
]
|
|
74
|
-
else
|
|
75
|
-
failure_reason = result&.error&.message
|
|
76
|
-
http_status = result&.log&.http_status
|
|
77
|
-
message = "Health check failed for #{source.name}"
|
|
78
|
-
message += " (HTTP #{http_status})" if http_status.present?
|
|
79
|
-
message += ": #{failure_reason}" if failure_reason.present?
|
|
80
|
-
[
|
|
81
|
-
"#{message}.",
|
|
82
|
-
:error
|
|
83
|
-
]
|
|
84
|
-
end
|
|
13
|
+
SourceMonitor::Health::SourceHealthCheckOrchestrator.new(source).call
|
|
85
14
|
end
|
|
86
15
|
end
|
|
87
16
|
end
|