source_monitor 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/rails-audit.md +77 -0
  3. data/CHANGELOG.md +50 -0
  4. data/CLAUDE.md +2 -2
  5. data/Gemfile.lock +7 -20
  6. data/RAILS_AUDIT.md +424 -0
  7. data/VERSION +1 -1
  8. data/app/assets/builds/source_monitor/application.css +4 -24
  9. data/app/assets/builds/source_monitor/application.js +57 -89
  10. data/app/assets/builds/source_monitor/application.js.map +4 -4
  11. data/app/assets/javascripts/source_monitor/application.js +3 -6
  12. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +6 -86
  13. data/app/assets/javascripts/source_monitor/controllers/filter_submit_controller.js +13 -0
  14. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  15. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +3 -13
  16. data/app/components/source_monitor/application_component.rb +10 -0
  17. data/app/components/source_monitor/filter_dropdown_component.rb +62 -0
  18. data/app/components/source_monitor/icon_component.rb +140 -0
  19. data/app/components/source_monitor/status_badge_component.html.erb +8 -0
  20. data/app/components/source_monitor/status_badge_component.rb +96 -0
  21. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +4 -0
  22. data/app/controllers/concerns/source_monitor/set_source.rb +13 -0
  23. data/app/controllers/source_monitor/application_controller.rb +17 -0
  24. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +6 -10
  25. data/app/controllers/source_monitor/dashboard_controller.rb +5 -1
  26. data/app/controllers/source_monitor/import_history_dismissals_controller.rb +1 -1
  27. data/app/controllers/source_monitor/import_sessions_controller.rb +30 -9
  28. data/app/controllers/source_monitor/item_scrapes_controller.rb +70 -0
  29. data/app/controllers/source_monitor/items_controller.rb +2 -69
  30. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +1 -4
  31. data/app/controllers/source_monitor/source_favicon_fetches_controller.rb +2 -12
  32. data/app/controllers/source_monitor/source_fetches_controller.rb +1 -6
  33. data/app/controllers/source_monitor/source_health_checks_controller.rb +9 -16
  34. data/app/controllers/source_monitor/source_health_resets_controller.rb +1 -6
  35. data/app/controllers/source_monitor/source_retries_controller.rb +1 -6
  36. data/app/controllers/source_monitor/source_scrape_tests_controller.rb +2 -4
  37. data/app/controllers/source_monitor/source_turbo_responses.rb +1 -3
  38. data/app/controllers/source_monitor/sources_controller.rb +15 -20
  39. data/app/helpers/source_monitor/application_helper.rb +15 -31
  40. data/app/helpers/source_monitor/health_badge_helper.rb +8 -0
  41. data/app/jobs/source_monitor/download_content_images_job.rb +1 -59
  42. data/app/jobs/source_monitor/favicon_fetch_job.rb +1 -58
  43. data/app/jobs/source_monitor/fetch_feed_job.rb +2 -52
  44. data/app/jobs/source_monitor/import_opml_job.rb +6 -145
  45. data/app/jobs/source_monitor/import_session_health_check_job.rb +15 -76
  46. data/app/jobs/source_monitor/item_cleanup_job.rb +5 -0
  47. data/app/jobs/source_monitor/log_cleanup_job.rb +13 -2
  48. data/app/jobs/source_monitor/schedule_fetches_job.rb +8 -0
  49. data/app/jobs/source_monitor/scrape_item_job.rb +6 -52
  50. data/app/jobs/source_monitor/source_health_check_job.rb +1 -72
  51. data/app/models/concerns/source_monitor/loggable.rb +12 -0
  52. data/app/models/source_monitor/fetch_log.rb +0 -8
  53. data/app/models/source_monitor/health_check_log.rb +0 -8
  54. data/app/models/source_monitor/import_history.rb +14 -0
  55. data/app/models/source_monitor/import_session.rb +2 -0
  56. data/app/models/source_monitor/item.rb +15 -0
  57. data/app/models/source_monitor/item_content.rb +4 -3
  58. data/app/models/source_monitor/scrape_log.rb +4 -6
  59. data/app/models/source_monitor/source.rb +28 -19
  60. data/app/presenters/source_monitor/base_presenter.rb +19 -0
  61. data/app/presenters/source_monitor/source_details_presenter.rb +61 -0
  62. data/app/presenters/source_monitor/sources_filter_presenter.rb +61 -0
  63. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +3 -3
  64. data/app/views/source_monitor/dashboard/_stat_card.html.erb +2 -1
  65. data/app/views/source_monitor/dashboard/_stats.html.erb +5 -7
  66. data/app/views/source_monitor/items/_details.html.erb +11 -14
  67. data/app/views/source_monitor/items/index.html.erb +10 -35
  68. data/app/views/source_monitor/logs/index.html.erb +20 -41
  69. data/app/views/source_monitor/shared/_form_errors.html.erb +14 -0
  70. data/app/views/source_monitor/source_scrape_tests/_result.html.erb +1 -29
  71. data/app/views/source_monitor/source_scrape_tests/_result_content.html.erb +33 -0
  72. data/app/views/source_monitor/source_scrape_tests/show.html.erb +1 -29
  73. data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +2 -2
  74. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +7 -5
  75. data/app/views/source_monitor/sources/_details.html.erb +24 -52
  76. data/app/views/source_monitor/sources/_health_status_badge.html.erb +4 -6
  77. data/app/views/source_monitor/sources/_row.html.erb +7 -18
  78. data/app/views/source_monitor/sources/edit.html.erb +1 -10
  79. data/app/views/source_monitor/sources/index.html.erb +26 -46
  80. data/app/views/source_monitor/sources/new.html.erb +1 -10
  81. data/config/routes.rb +1 -1
  82. data/db/migrate/20260313120000_add_composite_indexes_to_log_tables.rb +14 -0
  83. data/db/migrate/20260314120000_align_health_status_default.rb +11 -0
  84. data/lib/source_monitor/analytics/sources_index_metrics.rb +15 -0
  85. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +10 -4
  86. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +21 -5
  87. data/lib/source_monitor/favicons/fetcher.rb +86 -0
  88. data/lib/source_monitor/fetching/cloudflare_bypass.rb +14 -5
  89. data/lib/source_monitor/fetching/completion/event_publisher.rb +12 -0
  90. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +15 -2
  91. data/lib/source_monitor/fetching/completion/retention_handler.rb +11 -3
  92. data/lib/source_monitor/fetching/feed_fetcher.rb +2 -21
  93. data/lib/source_monitor/fetching/fetch_runner.rb +12 -3
  94. data/lib/source_monitor/fetching/retry_orchestrator.rb +102 -0
  95. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +9 -0
  96. data/lib/source_monitor/health/source_health_check_orchestrator.rb +95 -0
  97. data/lib/source_monitor/health.rb +1 -0
  98. data/lib/source_monitor/images/downloader.rb +6 -7
  99. data/lib/source_monitor/images/processor.rb +98 -0
  100. data/lib/source_monitor/import_sessions/health_check_updater.rb +95 -0
  101. data/lib/source_monitor/import_sessions/opml_importer.rb +163 -0
  102. data/lib/source_monitor/items/item_creator.rb +0 -21
  103. data/lib/source_monitor/logs/query.rb +20 -0
  104. data/lib/source_monitor/queries/scrape_candidates_query.rb +30 -0
  105. data/lib/source_monitor/queries.rb +7 -0
  106. data/lib/source_monitor/scheduler.rb +5 -0
  107. data/lib/source_monitor/scraping/bulk_result_presenter.rb +11 -8
  108. data/lib/source_monitor/scraping/runner.rb +52 -0
  109. data/lib/source_monitor/scraping/scheduler.rb +5 -0
  110. data/lib/source_monitor/scraping/state.rb +4 -2
  111. data/lib/source_monitor/security/parameter_sanitizer.rb +7 -0
  112. data/lib/source_monitor/version.rb +1 -1
  113. data/lib/source_monitor.rb +7 -0
  114. data/source_monitor.gemspec +1 -0
  115. metadata +47 -1
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Health
5
+ # Orchestrates a source health check: runs the probe, broadcasts
6
+ # UI updates with toast notifications, triggers a follow-up fetch
7
+ # for degraded sources, and handles unexpected errors gracefully.
8
+ # Extracted from SourceHealthCheckJob.
9
+ class SourceHealthCheckOrchestrator
10
+ DEGRADED_STATUSES = %w[declining failing].freeze
11
+
12
+ def initialize(source)
13
+ @source = source
14
+ end
15
+
16
+ def call
17
+ result = SourceMonitor::Health::SourceHealthCheck.new(source: source).call
18
+ broadcast_outcome(result)
19
+ trigger_fetch_if_degraded(result)
20
+ rescue StandardError => error
21
+ log_error(error)
22
+ record_unexpected_failure(error)
23
+ broadcast_outcome(nil, error)
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :source
29
+
30
+ def trigger_fetch_if_degraded(result)
31
+ return unless result&.success?
32
+ return unless DEGRADED_STATUSES.include?(source.health_status.to_s)
33
+
34
+ SourceMonitor::FetchFeedJob.perform_later(source.id, force: true)
35
+ end
36
+
37
+ def record_unexpected_failure(error)
38
+ SourceMonitor::HealthCheckLog.create!(
39
+ source: source,
40
+ success: false,
41
+ started_at: Time.current,
42
+ completed_at: Time.current,
43
+ duration_ms: 0,
44
+ error_class: error.class.name,
45
+ error_message: error.message
46
+ )
47
+ rescue StandardError
48
+ nil
49
+ end
50
+
51
+ def broadcast_outcome(result, error = nil)
52
+ SourceMonitor::Realtime.broadcast_source(source)
53
+
54
+ message, level = toast_payload(result, error)
55
+ return if message.blank?
56
+
57
+ SourceMonitor::Realtime.broadcast_toast(message: message, level: level)
58
+ end
59
+
60
+ def toast_payload(result, error)
61
+ if error
62
+ return [
63
+ "Health check failed for #{source.name}: #{error.message}",
64
+ :error
65
+ ]
66
+ end
67
+
68
+ if result&.success?
69
+ [
70
+ "Health check succeeded for #{source.name}.",
71
+ :success
72
+ ]
73
+ else
74
+ failure_reason = result&.error&.message
75
+ http_status = result&.log&.http_status
76
+ message = "Health check failed for #{source.name}"
77
+ message += " (HTTP #{http_status})" if http_status.present?
78
+ message += ": #{failure_reason}" if failure_reason.present?
79
+ [
80
+ "#{message}.",
81
+ :error
82
+ ]
83
+ end
84
+ end
85
+
86
+ def log_error(error)
87
+ return unless defined?(Rails) && Rails.respond_to?(:logger)
88
+
89
+ Rails.logger&.error(
90
+ "[SourceMonitor::Health::SourceHealthCheckOrchestrator] error for source #{source.id}: #{error.class}: #{error.message}"
91
+ )
92
+ end
93
+ end
94
+ end
95
+ end
@@ -3,6 +3,7 @@
3
3
  require "source_monitor/health/source_health_monitor"
4
4
  require "source_monitor/health/source_health_reset"
5
5
  require "source_monitor/health/source_health_check"
6
+ require "source_monitor/health/source_health_check_orchestrator"
6
7
  require "source_monitor/health/import_source_health_check"
7
8
 
8
9
  module SourceMonitor
@@ -43,13 +43,12 @@ module SourceMonitor
43
43
  private
44
44
 
45
45
  def fetch_image
46
- connection = Faraday.new do |f|
47
- f.options.timeout = settings.download_timeout
48
- f.options.open_timeout = [ settings.download_timeout / 2, 5 ].min
49
- f.headers["User-Agent"] = SourceMonitor.config.http.user_agent || "SourceMonitor/#{SourceMonitor::VERSION}"
50
- f.headers["Accept"] = "image/*"
51
- f.adapter Faraday.default_adapter
52
- end
46
+ connection = SourceMonitor::HTTP.client(
47
+ timeout: settings.download_timeout,
48
+ open_timeout: [ settings.download_timeout / 2, 5 ].min,
49
+ headers: { "Accept" => "image/*" },
50
+ retry_requests: false
51
+ )
53
52
 
54
53
  response = connection.get(url)
55
54
  return response if response.status == 200
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+
5
+ module SourceMonitor
6
+ module Images
7
+ # Orchestrates downloading images from an item's HTML content, attaching
8
+ # them via ActiveStorage, and rewriting the HTML to use local blob URLs.
9
+ # Extracted from DownloadContentImagesJob for testability and reuse.
10
+ class Processor
11
+ TRANSIENT_ERRORS = [
12
+ Timeout::Error, Errno::ETIMEDOUT,
13
+ Faraday::TimeoutError, Faraday::ConnectionFailed,
14
+ Net::OpenTimeout, Net::ReadTimeout
15
+ ].freeze
16
+
17
+ def initialize(item)
18
+ @item = item
19
+ end
20
+
21
+ def call
22
+ return unless SourceMonitor.config.images.download_enabled?
23
+
24
+ html = item.content
25
+ return if html.blank?
26
+
27
+ item_content = item.item_content || item.build_item_content
28
+
29
+ # Skip if images already attached (idempotency)
30
+ return if item_content.persisted? && item_content.images.attached?
31
+
32
+ base_url = item.url
33
+ rewriter = SourceMonitor::Images::ContentRewriter.new(html, base_url: base_url)
34
+ image_urls = rewriter.image_urls
35
+ return if image_urls.empty?
36
+
37
+ # Save item_content first so we can attach blobs to it
38
+ item_content.save! unless item_content.persisted?
39
+
40
+ # Download images and build URL mapping
41
+ url_mapping = download_images(item_content, image_urls)
42
+ return if url_mapping.empty?
43
+
44
+ # Rewrite HTML with Active Storage URLs
45
+ rewritten_html = rewriter.rewrite do |original_url|
46
+ url_mapping[original_url]
47
+ end
48
+
49
+ # Update the item content with rewritten HTML
50
+ item.update!(content: rewritten_html)
51
+ end
52
+
53
+ private
54
+
55
+ attr_reader :item
56
+
57
+ def download_images(item_content, image_urls)
58
+ url_mapping = {}
59
+ settings = SourceMonitor.config.images
60
+
61
+ image_urls.each do |image_url|
62
+ result = SourceMonitor::Images::Downloader.new(image_url, settings: settings).call
63
+ next unless result
64
+
65
+ blob = ActiveStorage::Blob.create_and_upload!(
66
+ io: result.io,
67
+ filename: result.filename,
68
+ content_type: result.content_type
69
+ )
70
+ item_content.images.attach(blob)
71
+
72
+ url_mapping[image_url] = Rails.application.routes.url_helpers.rails_blob_path(blob, only_path: true)
73
+ rescue ActiveRecord::Deadlocked
74
+ raise # let job framework retry on database deadlock
75
+ rescue *TRANSIENT_ERRORS
76
+ raise # re-raise transient errors to abort job for framework retry
77
+ rescue StandardError => error
78
+ # Individual image failure should not block others.
79
+ # Original URL will be preserved (graceful fallback).
80
+ log_image_error(image_url, error)
81
+ next
82
+ end
83
+
84
+ url_mapping
85
+ end
86
+
87
+ def log_image_error(image_url, error)
88
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
89
+
90
+ Rails.logger.warn(
91
+ "[SourceMonitor::Images::Processor] Skipping image #{image_url}: #{error.class} - #{error.message}"
92
+ )
93
+ rescue StandardError
94
+ nil
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "source_monitor/health/import_source_health_check"
4
+ require "source_monitor/import_sessions/entry_normalizer"
5
+ require "source_monitor/import_sessions/health_check_broadcaster"
6
+
7
+ module SourceMonitor
8
+ module ImportSessions
9
+ # Performs a health check for a single OPML import entry: probes the feed
10
+ # URL, acquires a row lock to merge the result into the import session,
11
+ # and broadcasts UI updates. Extracted from ImportSessionHealthCheckJob.
12
+ class HealthCheckUpdater
13
+ def initialize(import_session:, entry_id:)
14
+ @import_session = import_session
15
+ @entry_id = entry_id
16
+ end
17
+
18
+ def call
19
+ return unless active_for?(import_session)
20
+
21
+ result = perform_health_check
22
+ return unless result
23
+
24
+ updated_entry = nil
25
+
26
+ import_session.with_lock do
27
+ import_session.reload
28
+ return unless active_for?(import_session)
29
+
30
+ entries = Array(import_session.parsed_sources).map(&:to_h)
31
+ index = entries.index { |candidate| entry_id_for(candidate) == entry_id.to_s }
32
+ return unless index
33
+
34
+ entries[index] = entries[index].merge(
35
+ "health_status" => result.status,
36
+ "health_error" => result.error_message
37
+ )
38
+
39
+ selected_ids = Array(import_session.selected_source_ids).map(&:to_s)
40
+ selected_ids -= [ entry_id.to_s ] if result.status == "unhealthy"
41
+
42
+ attrs = {
43
+ parsed_sources: entries,
44
+ selected_source_ids: selected_ids,
45
+ health_check_completed_at: completion_time(entries, import_session.health_check_targets)
46
+ }.compact
47
+
48
+ import_session.update!(attrs)
49
+ normalized_entry = SourceMonitor::ImportSessions::EntryNormalizer.normalize(entries[index])
50
+ updated_entry = normalized_entry.merge(selected: selected_ids.include?(entry_id.to_s))
51
+ end
52
+
53
+ broadcaster = SourceMonitor::ImportSessions::HealthCheckBroadcaster.new(import_session)
54
+ broadcaster.broadcast_row(updated_entry) if updated_entry
55
+ broadcaster.broadcast_progress
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :import_session, :entry_id
61
+
62
+ def active_for?(session)
63
+ session.current_step == "health_check" && session.health_checks_active?
64
+ end
65
+
66
+ def perform_health_check
67
+ entry = find_entry
68
+ return unless entry
69
+
70
+ SourceMonitor::Health::ImportSourceHealthCheck.new(feed_url: entry_feed_url(entry)).call
71
+ end
72
+
73
+ def find_entry
74
+ Array(import_session.parsed_sources).find { |entry| entry_id_for(entry) == entry_id.to_s }
75
+ end
76
+
77
+ def entry_id_for(entry)
78
+ entry.to_h["id"].presence || entry.to_h[:id].presence || entry.to_h["feed_url"].to_s
79
+ end
80
+
81
+ def entry_feed_url(entry)
82
+ entry.to_h["feed_url"] || entry.to_h[:feed_url]
83
+ end
84
+
85
+ def completion_time(entries, targets)
86
+ normalized = Array(entries).map { |entry| SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry) }
87
+ filtered = normalized.select { |entry| targets.include?(entry[:id]) }
88
+ return nil if filtered.empty?
89
+
90
+ completed = filtered.count { |entry| %w[healthy unhealthy].include?(entry[:health_status].to_s) }
91
+ completed >= filtered.size ? Time.current : nil
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "source_monitor/import_sessions/entry_normalizer"
5
+ require "source_monitor/realtime/broadcaster"
6
+ require "source_monitor/sources/params"
7
+
8
+ module SourceMonitor
9
+ module ImportSessions
10
+ # Orchestrates OPML import: selects entries, deduplicates, creates sources,
11
+ # records results, and broadcasts completion. Extracted from ImportOpmlJob
12
+ # so import logic can be invoked synchronously (console, tests).
13
+ class OPMLImporter
14
+ def initialize(import_session:, import_history:)
15
+ @import_session = import_session
16
+ @import_history = import_history
17
+ end
18
+
19
+ def call
20
+ import_history.update_columns(started_at: Time.current) unless import_history.started_at
21
+
22
+ processed = Set.new
23
+
24
+ selected_entries.each do |entry|
25
+ process_entry(entry, processed)
26
+ end
27
+
28
+ import_history.update!(
29
+ imported_sources: imported_sources,
30
+ failed_sources: failed_sources,
31
+ skipped_duplicates: skipped_duplicates,
32
+ bulk_settings: import_session.bulk_settings.presence || {},
33
+ completed_at: Time.current
34
+ )
35
+
36
+ broadcast_completion(import_history)
37
+ end
38
+
39
+ private
40
+
41
+ attr_reader :import_session, :import_history
42
+
43
+ def selected_entries
44
+ ids = Array(import_session.selected_source_ids).map(&:to_s)
45
+
46
+ Array(import_session.parsed_sources)
47
+ .map { |entry| SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry) }
48
+ .select { |entry| ids.include?(entry[:id]) }
49
+ end
50
+
51
+ def process_entry(entry, processed)
52
+ feed_url = entry[:feed_url].to_s
53
+
54
+ if feed_url.blank?
55
+ failed_sources << failure_payload(feed_url, "MissingFeedURL", "Feed URL is missing")
56
+ return
57
+ end
58
+
59
+ normalized_url = feed_url.downcase
60
+
61
+ if processed.include?(normalized_url)
62
+ skipped_duplicates << skipped_payload(feed_url, "duplicate in import selection")
63
+ return
64
+ end
65
+
66
+ if duplicate_source?(normalized_url)
67
+ skipped_duplicates << skipped_payload(feed_url, "already exists")
68
+ processed << normalized_url
69
+ return
70
+ end
71
+
72
+ source = SourceMonitor::Source.new(build_attributes(entry))
73
+
74
+ if source.save
75
+ imported_sources << { id: source.id, feed_url: source.feed_url, name: source.name }
76
+ SourceMonitor::FaviconFetchJob.perform_later(source.id) if should_fetch_favicon?(source)
77
+ processed << normalized_url
78
+ else
79
+ failed_sources << failure_payload(feed_url, "ValidationFailed", source.errors.full_messages.to_sentence)
80
+ end
81
+ rescue ActiveRecord::RecordNotUnique
82
+ skipped_duplicates << skipped_payload(feed_url, "already exists")
83
+ processed << normalized_url
84
+ rescue StandardError => error
85
+ failed_sources << failure_payload(feed_url, error.class.name, error.message)
86
+ end
87
+
88
+ def duplicate_source?(normalized_feed_url)
89
+ SourceMonitor::Source.where("LOWER(feed_url) = ?", normalized_feed_url).exists?
90
+ end
91
+
92
+ def build_attributes(entry)
93
+ defaults = SourceMonitor::Sources::Params.default_attributes.deep_dup
94
+ settings = SourceMonitor::Security::ParameterSanitizer.sanitize(import_session.bulk_settings.presence || {})
95
+ settings = settings.deep_symbolize_keys
96
+
97
+ defaults.merge(settings).merge(identity_attributes(entry))
98
+ end
99
+
100
+ def identity_attributes(entry)
101
+ {
102
+ name: entry[:title].presence || entry[:feed_url],
103
+ feed_url: entry[:feed_url],
104
+ website_url: entry[:website_url]
105
+ }
106
+ end
107
+
108
+ def imported_sources
109
+ @imported_sources ||= []
110
+ end
111
+
112
+ def failed_sources
113
+ @failed_sources ||= []
114
+ end
115
+
116
+ def skipped_duplicates
117
+ @skipped_duplicates ||= []
118
+ end
119
+
120
+ def failure_payload(feed_url, error_class, message)
121
+ {
122
+ feed_url: feed_url,
123
+ error_class: error_class,
124
+ error_message: message
125
+ }
126
+ end
127
+
128
+ def skipped_payload(feed_url, reason)
129
+ {
130
+ feed_url: feed_url,
131
+ reason: reason
132
+ }
133
+ end
134
+
135
+ def should_fetch_favicon?(source)
136
+ defined?(ActiveStorage) &&
137
+ SourceMonitor.config.favicons.enabled? &&
138
+ source.website_url.present?
139
+ rescue StandardError
140
+ false
141
+ end
142
+
143
+ def broadcast_completion(history)
144
+ return unless defined?(Turbo::StreamsChannel)
145
+
146
+ histories = SourceMonitor::ImportHistory.recent_for(history.user_id).limit(5)
147
+
148
+ Turbo::StreamsChannel.broadcast_replace_to(
149
+ SourceMonitor::Realtime::Broadcaster::SOURCE_INDEX_STREAM,
150
+ target: "source_monitor_import_history_panel",
151
+ html: SourceMonitor::SourcesController.render(
152
+ partial: "source_monitor/sources/import_history_panel",
153
+ locals: { import_histories: histories }
154
+ )
155
+ )
156
+ rescue StandardError => error
157
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
158
+ Rails.logger.error("[SourceMonitor::ImportSessions::OPMLImporter] broadcast failed: #{error.class}: #{error.message}")
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -120,7 +120,6 @@ module SourceMonitor
120
120
  new_item = SourceMonitor::Item.new(source_id: source.id)
121
121
  apply_attributes(new_item, attributes)
122
122
  new_item.save!
123
- new_item.ensure_feed_content_record
124
123
  Result.new(item: new_item, status: :created)
125
124
  rescue ActiveRecord::RecordNotUnique
126
125
  handle_concurrent_duplicate(attributes, raw_guid_present:)
@@ -176,26 +175,6 @@ module SourceMonitor
176
175
  def content_extractor
177
176
  @content_extractor ||= ContentExtractor.new(source: source)
178
177
  end
179
-
180
- # Forwarding methods for backward compatibility with tests
181
- def process_feed_content(raw_content, title:) = content_extractor.process_feed_content(raw_content, title: title)
182
- def should_process_feed_content?(raw_content) = content_extractor.should_process_feed_content?(raw_content)
183
- def feed_content_parser_class = content_extractor.feed_content_parser_class
184
- def wrap_content_for_readability(content, title:) = content_extractor.wrap_content_for_readability(content, title: title)
185
- def default_feed_readability_options = content_extractor.default_feed_readability_options
186
- def build_feed_content_metadata(result:, raw_content:, processed_content:)
187
- content_extractor.build_feed_content_metadata(result: result, raw_content: raw_content, processed_content: processed_content)
188
- end
189
- def html_fragment?(value) = content_extractor.html_fragment?(value)
190
- def deep_copy(value) = content_extractor.deep_copy(value)
191
- def string_or_nil(value) = entry_parser.string_or_nil(value)
192
- def sanitize_string_array(values) = entry_parser.sanitize_string_array(values)
193
- def split_keywords(value) = entry_parser.split_keywords(value)
194
- def safe_integer(value) = entry_parser.safe_integer(value)
195
- def json_entry? = entry_parser.json_entry?
196
- def atom_entry? = entry_parser.atom_entry?
197
- def normalize_metadata(value) = entry_parser.normalize_metadata(value)
198
- def generate_fingerprint(title, url, content) = entry_parser.generate_fingerprint(title, url, content)
199
178
  end
200
179
  end
201
180
  end
@@ -9,6 +9,7 @@ module SourceMonitor
9
9
  :per_page,
10
10
  :has_next_page,
11
11
  :has_previous_page,
12
+ :total_count,
12
13
  :filter_set,
13
14
  keyword_init: true
14
15
  ) do
@@ -19,6 +20,24 @@ module SourceMonitor
19
20
  def has_previous_page?
20
21
  !!self[:has_previous_page]
21
22
  end
23
+
24
+ def next_page
25
+ return nil unless has_next_page?
26
+
27
+ page + 1
28
+ end
29
+
30
+ def previous_page
31
+ return nil unless has_previous_page?
32
+
33
+ [ page - 1, 1 ].max
34
+ end
35
+
36
+ def total_pages
37
+ return 1 if total_count.nil? || total_count <= 0
38
+
39
+ [ 1, (total_count.to_f / per_page).ceil ].max
40
+ end
22
41
  end
23
42
 
24
43
  def initialize(params:)
@@ -38,6 +57,7 @@ module SourceMonitor
38
57
  per_page: pagination_result.per_page,
39
58
  has_next_page: pagination_result.has_next_page?,
40
59
  has_previous_page: pagination_result.has_previous_page?,
60
+ total_count: pagination_result.total_count,
41
61
  filter_set:
42
62
  )
43
63
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Queries
5
+ class ScrapeCandidatesQuery
6
+ def initialize(threshold: SourceMonitor.config.scraping.scrape_recommendation_threshold)
7
+ @threshold = threshold.to_i
8
+ end
9
+
10
+ def call
11
+ return SourceMonitor::Source.none if @threshold <= 0
12
+
13
+ SourceMonitor::Source.active
14
+ .where(scraping_enabled: false)
15
+ .where(id: source_ids_below_threshold)
16
+ end
17
+
18
+ private
19
+
20
+ def source_ids_below_threshold
21
+ SourceMonitor::Item
22
+ .joins(:item_content)
23
+ .where.not(SourceMonitor::ItemContent.table_name => { feed_word_count: nil })
24
+ .group(:source_id)
25
+ .having("AVG(#{SourceMonitor::ItemContent.table_name}.feed_word_count) < ?", @threshold)
26
+ .select(:source_id)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Queries
5
+ autoload :ScrapeCandidatesQuery, "source_monitor/queries/scrape_candidates_query"
6
+ end
7
+ end
@@ -37,6 +37,11 @@ module SourceMonitor
37
37
 
38
38
  source_ids.size
39
39
  end
40
+ rescue StandardError => error
41
+ Rails.logger.warn(
42
+ "[SourceMonitor::Scheduler] Scheduler run failed: #{error.class} - #{error.message}"
43
+ ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
44
+ 0
40
45
  end
41
46
 
42
47
  private
@@ -5,11 +5,10 @@ module SourceMonitor
5
5
  # Presenter for building flash messages from BulkSourceScraper results
6
6
  # Extracts complex message formatting logic from the controller
7
7
  class BulkResultPresenter
8
- attr_reader :result, :pluralizer
8
+ attr_reader :result
9
9
 
10
- def initialize(result:, pluralizer:)
10
+ def initialize(result:)
11
11
  @result = result
12
- @pluralizer = pluralizer
13
12
  end
14
13
 
15
14
  def to_flash_payload
@@ -25,14 +24,18 @@ module SourceMonitor
25
24
 
26
25
  private
27
26
 
27
+ def pluralize(count, word)
28
+ "#{count} #{count == 1 ? word : word.pluralize}"
29
+ end
30
+
28
31
  def build_success_payload
29
32
  label = BulkSourceScraper.selection_label(result.selection)
30
- pluralized_enqueued = pluralizer.call(result.enqueued_count, "item")
33
+ pluralized_enqueued = pluralize(result.enqueued_count, "item")
31
34
 
32
35
  message = "Queued scraping for #{pluralized_enqueued} from the #{label}."
33
36
 
34
37
  if result.already_enqueued_count.positive?
35
- pluralized_already = pluralizer.call(result.already_enqueued_count, "item")
38
+ pluralized_already = pluralize(result.already_enqueued_count, "item")
36
39
  message = "#{message} #{pluralized_already.capitalize} already in progress."
37
40
  end
38
41
 
@@ -44,12 +47,12 @@ module SourceMonitor
44
47
  parts = []
45
48
 
46
49
  if result.enqueued_count.positive?
47
- pluralized_enqueued = pluralizer.call(result.enqueued_count, "item")
50
+ pluralized_enqueued = pluralize(result.enqueued_count, "item")
48
51
  parts << "Queued #{pluralized_enqueued} from the #{label}"
49
52
  end
50
53
 
51
54
  if result.already_enqueued_count.positive?
52
- pluralized_already = pluralizer.call(result.already_enqueued_count, "item")
55
+ pluralized_already = pluralize(result.already_enqueued_count, "item")
53
56
  parts << "#{pluralized_already.capitalize} already in progress"
54
57
  end
55
58
 
@@ -62,7 +65,7 @@ module SourceMonitor
62
65
  if other_failures.values.sum.positive?
63
66
  skipped = other_failures.map do |status, count|
64
67
  label_key = status.to_s.tr("_", " ")
65
- "#{pluralizer.call(count, label_key)}"
68
+ "#{pluralize(count, label_key)}"
66
69
  end.join(", ")
67
70
  parts << "Skipped #{skipped}"
68
71
  end