source_monitor 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rubocop.yml +12 -0
  4. data/.ruby-version +1 -0
  5. data/AGENTS.md +132 -0
  6. data/CHANGELOG.md +66 -0
  7. data/CONTRIBUTING.md +31 -0
  8. data/Gemfile +30 -0
  9. data/Gemfile.lock +411 -0
  10. data/MIT-LICENSE +20 -0
  11. data/README.md +108 -0
  12. data/Rakefile +8 -0
  13. data/app/assets/builds/.keep +0 -0
  14. data/app/assets/config/source_monitor_manifest.js +4 -0
  15. data/app/assets/images/source_monitor/.keep +0 -0
  16. data/app/assets/javascripts/source_monitor/application.js +20 -0
  17. data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
  18. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
  19. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  20. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
  21. data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
  22. data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
  23. data/app/assets/svgs/source_monitor/.keep +0 -0
  24. data/app/controllers/concerns/.keep +0 -0
  25. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
  26. data/app/controllers/source_monitor/application_controller.rb +62 -0
  27. data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
  28. data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
  29. data/app/controllers/source_monitor/health_controller.rb +10 -0
  30. data/app/controllers/source_monitor/items_controller.rb +116 -0
  31. data/app/controllers/source_monitor/logs_controller.rb +15 -0
  32. data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
  33. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
  34. data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
  35. data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
  36. data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
  37. data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
  38. data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
  39. data/app/controllers/source_monitor/sources_controller.rb +179 -0
  40. data/app/helpers/source_monitor/application_helper.rb +327 -0
  41. data/app/jobs/source_monitor/application_job.rb +13 -0
  42. data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
  43. data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
  44. data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
  45. data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
  46. data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
  47. data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
  48. data/app/mailers/source_monitor/application_mailer.rb +17 -0
  49. data/app/models/concerns/.keep +0 -0
  50. data/app/models/concerns/source_monitor/loggable.rb +18 -0
  51. data/app/models/source_monitor/application_record.rb +5 -0
  52. data/app/models/source_monitor/fetch_log.rb +31 -0
  53. data/app/models/source_monitor/health_check_log.rb +28 -0
  54. data/app/models/source_monitor/item.rb +102 -0
  55. data/app/models/source_monitor/item_content.rb +11 -0
  56. data/app/models/source_monitor/log_entry.rb +56 -0
  57. data/app/models/source_monitor/scrape_log.rb +31 -0
  58. data/app/models/source_monitor/source.rb +115 -0
  59. data/app/views/layouts/source_monitor/application.html.erb +54 -0
  60. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
  61. data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
  62. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
  63. data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
  64. data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
  65. data/app/views/source_monitor/dashboard/index.html.erb +48 -0
  66. data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
  67. data/app/views/source_monitor/items/_details.html.erb +234 -0
  68. data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
  69. data/app/views/source_monitor/items/index.html.erb +147 -0
  70. data/app/views/source_monitor/items/show.html.erb +3 -0
  71. data/app/views/source_monitor/logs/index.html.erb +208 -0
  72. data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
  73. data/app/views/source_monitor/shared/_toast.html.erb +34 -0
  74. data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
  75. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
  76. data/app/views/source_monitor/sources/_details.html.erb +302 -0
  77. data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
  78. data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
  79. data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
  80. data/app/views/source_monitor/sources/_form.html.erb +143 -0
  81. data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
  82. data/app/views/source_monitor/sources/_row.html.erb +102 -0
  83. data/app/views/source_monitor/sources/edit.html.erb +28 -0
  84. data/app/views/source_monitor/sources/index.html.erb +153 -0
  85. data/app/views/source_monitor/sources/new.html.erb +22 -0
  86. data/app/views/source_monitor/sources/show.html.erb +3 -0
  87. data/config/coverage_baseline.json +2010 -0
  88. data/config/initializers/feedjira.rb +19 -0
  89. data/config/routes.rb +18 -0
  90. data/config/tailwind.config.js +17 -0
  91. data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
  92. data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
  93. data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
  94. data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
  95. data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
  96. data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
  97. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
  98. data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
  99. data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
  100. data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
  101. data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
  102. data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
  103. data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
  104. data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
  105. data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
  106. data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
  107. data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
  108. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
  109. data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
  110. data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
  111. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
  112. data/docs/configuration.md +170 -0
  113. data/docs/deployment.md +63 -0
  114. data/docs/gh-cli-workflow.md +44 -0
  115. data/docs/installation.md +144 -0
  116. data/docs/troubleshooting.md +76 -0
  117. data/eslint.config.mjs +27 -0
  118. data/lib/generators/source_monitor/install/install_generator.rb +59 -0
  119. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
  120. data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
  121. data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
  122. data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
  123. data/lib/source_monitor/assets/bundler.rb +49 -0
  124. data/lib/source_monitor/assets.rb +6 -0
  125. data/lib/source_monitor/configuration.rb +654 -0
  126. data/lib/source_monitor/dashboard/queries.rb +356 -0
  127. data/lib/source_monitor/dashboard/quick_action.rb +7 -0
  128. data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
  129. data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
  130. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
  131. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
  132. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
  133. data/lib/source_monitor/engine.rb +107 -0
  134. data/lib/source_monitor/events.rb +110 -0
  135. data/lib/source_monitor/feedjira_extensions.rb +103 -0
  136. data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
  137. data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
  138. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
  139. data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
  140. data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
  141. data/lib/source_monitor/fetching/fetch_error.rb +88 -0
  142. data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
  143. data/lib/source_monitor/fetching/retry_policy.rb +85 -0
  144. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
  145. data/lib/source_monitor/health/source_health_check.rb +100 -0
  146. data/lib/source_monitor/health/source_health_monitor.rb +210 -0
  147. data/lib/source_monitor/health/source_health_reset.rb +68 -0
  148. data/lib/source_monitor/health.rb +46 -0
  149. data/lib/source_monitor/http.rb +85 -0
  150. data/lib/source_monitor/instrumentation.rb +52 -0
  151. data/lib/source_monitor/items/item_creator.rb +601 -0
  152. data/lib/source_monitor/items/retention_pruner.rb +146 -0
  153. data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
  154. data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
  155. data/lib/source_monitor/items/retention_strategies.rb +9 -0
  156. data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
  157. data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
  158. data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
  159. data/lib/source_monitor/jobs/visibility.rb +133 -0
  160. data/lib/source_monitor/logs/entry_sync.rb +69 -0
  161. data/lib/source_monitor/logs/filter_set.rb +163 -0
  162. data/lib/source_monitor/logs/query.rb +81 -0
  163. data/lib/source_monitor/logs/table_presenter.rb +161 -0
  164. data/lib/source_monitor/metrics.rb +77 -0
  165. data/lib/source_monitor/model_extensions.rb +109 -0
  166. data/lib/source_monitor/models/sanitizable.rb +76 -0
  167. data/lib/source_monitor/models/url_normalizable.rb +84 -0
  168. data/lib/source_monitor/pagination/paginator.rb +90 -0
  169. data/lib/source_monitor/realtime/adapter.rb +97 -0
  170. data/lib/source_monitor/realtime/broadcaster.rb +237 -0
  171. data/lib/source_monitor/realtime.rb +17 -0
  172. data/lib/source_monitor/release/changelog.rb +59 -0
  173. data/lib/source_monitor/release/runner.rb +73 -0
  174. data/lib/source_monitor/scheduler.rb +82 -0
  175. data/lib/source_monitor/scrapers/base.rb +105 -0
  176. data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
  177. data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
  178. data/lib/source_monitor/scrapers/readability.rb +156 -0
  179. data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
  180. data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
  181. data/lib/source_monitor/scraping/enqueuer.rb +125 -0
  182. data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
  183. data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
  184. data/lib/source_monitor/scraping/item_scraper.rb +84 -0
  185. data/lib/source_monitor/scraping/scheduler.rb +43 -0
  186. data/lib/source_monitor/scraping/state.rb +79 -0
  187. data/lib/source_monitor/security/authentication.rb +85 -0
  188. data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
  189. data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
  190. data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
  191. data/lib/source_monitor/version.rb +3 -0
  192. data/lib/source_monitor.rb +149 -0
  193. data/lib/tasks/recover_stalled_fetches.rake +16 -0
  194. data/lib/tasks/source_monitor_assets.rake +28 -0
  195. data/lib/tasks/source_monitor_tasks.rake +29 -0
  196. data/lib/tasks/test_smoke.rake +12 -0
  197. data/package-lock.json +3997 -0
  198. data/package.json +29 -0
  199. data/postcss.config.js +6 -0
  200. data/source_monitor.gemspec +46 -0
  201. data/stylelint.config.js +12 -0
  202. metadata +469 -0
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class FetchFeedJob < ApplicationJob
5
+ FETCH_CONCURRENCY_RETRY_WAIT = 30.seconds
6
+ EARLY_EXECUTION_LEEWAY = 30.seconds
7
+
8
+ source_monitor_queue :fetch
9
+
10
+ discard_on ActiveJob::DeserializationError
11
+ retry_on SourceMonitor::Fetching::FetchRunner::ConcurrencyError,
12
+ wait: FETCH_CONCURRENCY_RETRY_WAIT,
13
+ attempts: 5
14
+
15
+ def perform(source_id, force: false)
16
+ source = SourceMonitor::Source.find_by(id: source_id)
17
+ return unless source
18
+
19
+ return unless should_run?(source, force: force)
20
+
21
+ SourceMonitor::Fetching::FetchRunner.new(source: source, force: force).run
22
+ rescue SourceMonitor::Fetching::FetchError => error
23
+ handle_transient_error(source, error)
24
+ end
25
+
26
+ private
27
+
28
+ def should_run?(source, force:)
29
+ return true if force
30
+
31
+ status = source.fetch_status.to_s
32
+ return true if %w[queued fetching].include?(status)
33
+
34
+ next_fetch_at = source.next_fetch_at
35
+ return true if next_fetch_at.blank?
36
+
37
+ next_fetch_at <= Time.current + EARLY_EXECUTION_LEEWAY
38
+ end
39
+
40
+ def handle_transient_error(source, error)
41
+ raise error unless transient_error?(error) && source
42
+
43
+ decision = SourceMonitor::Fetching::RetryPolicy.new(source:, error:, now: Time.current).decision
44
+ return raise error unless decision
45
+
46
+ if decision.retry?
47
+ enqueue_retry!(source, decision)
48
+ elsif decision.open_circuit?
49
+ open_circuit!(source, decision)
50
+ raise error
51
+ else
52
+ reset_retry_state!(source)
53
+ raise error
54
+ end
55
+ rescue StandardError => policy_error
56
+ log_retry_failure(source, error, policy_error)
57
+ raise error
58
+ end
59
+
60
+ def enqueue_retry!(source, decision)
61
+ retry_at = Time.current + (decision.wait || 0)
62
+
63
+ source.with_lock do
64
+ source.reload
65
+ source.update!(
66
+ fetch_retry_attempt: decision.next_attempt,
67
+ fetch_circuit_opened_at: nil,
68
+ fetch_circuit_until: nil,
69
+ next_fetch_at: retry_at,
70
+ backoff_until: retry_at,
71
+ fetch_status: "queued"
72
+ )
73
+ end
74
+
75
+ retry_job wait: decision.wait || 0
76
+ end
77
+
78
+ def open_circuit!(source, decision)
79
+ source.with_lock do
80
+ source.reload
81
+ source.update!(
82
+ fetch_retry_attempt: 0,
83
+ fetch_circuit_opened_at: Time.current,
84
+ fetch_circuit_until: decision.circuit_until,
85
+ next_fetch_at: decision.circuit_until,
86
+ backoff_until: decision.circuit_until,
87
+ fetch_status: "failed"
88
+ )
89
+ end
90
+ end
91
+
92
+ def reset_retry_state!(source)
93
+ source.with_lock do
94
+ source.reload
95
+ source.update!(
96
+ fetch_retry_attempt: 0,
97
+ fetch_circuit_opened_at: nil,
98
+ fetch_circuit_until: nil
99
+ )
100
+ end
101
+ end
102
+
103
+ def transient_error?(error)
104
+ error.is_a?(SourceMonitor::Fetching::FetchError)
105
+ end
106
+
107
+ def log_retry_failure(source, original_error, policy_error)
108
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
109
+
110
+ message = "[SourceMonitor::FetchFeedJob] Failed to schedule retry for source #{source&.id}: " \
111
+ "#{original_error.class}: #{original_error.message} (policy error: #{policy_error.class})"
112
+ Rails.logger.error(message)
113
+ rescue StandardError
114
+ nil
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class ItemCleanupJob < ApplicationJob
5
+ DEFAULT_BATCH_SIZE = 100
6
+
7
+ source_monitor_queue :fetch
8
+
9
+ def perform(options = nil)
10
+ options = SourceMonitor::Jobs::CleanupOptions.normalize(options)
11
+
12
+ scope = resolve_scope(options)
13
+ batch_size = SourceMonitor::Jobs::CleanupOptions.batch_size(options, default: DEFAULT_BATCH_SIZE)
14
+ now = SourceMonitor::Jobs::CleanupOptions.resolve_time(options[:now])
15
+ strategy = resolve_strategy(options)
16
+ pruner_class = options[:retention_pruner_class] || SourceMonitor::Items::RetentionPruner
17
+
18
+ scope.find_in_batches(batch_size:) do |batch|
19
+ batch.each do |source|
20
+ pruner_class.call(source:, now:, strategy:)
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def resolve_scope(options)
28
+ relation = options[:source_scope] || SourceMonitor::Source.all
29
+ ids = SourceMonitor::Jobs::CleanupOptions.extract_ids([ options[:source_ids], options[:source_id] ])
30
+
31
+ if ids.any?
32
+ relation.where(id: ids)
33
+ else
34
+ relation
35
+ end
36
+ end
37
+
38
+ def resolve_strategy(options)
39
+ if options.key?(:strategy)
40
+ options[:strategy]
41
+ elsif options.key?(:soft_delete)
42
+ ActiveModel::Type::Boolean.new.cast(options[:soft_delete]) ? :soft_delete : :destroy
43
+ else
44
+ SourceMonitor.config.retention.strategy
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class LogCleanupJob < ApplicationJob
5
+ DEFAULT_FETCH_LOG_RETENTION_DAYS = 90
6
+ DEFAULT_SCRAPE_LOG_RETENTION_DAYS = 45
7
+
8
+ source_monitor_queue :fetch
9
+
10
+ def perform(options = nil)
11
+ options = SourceMonitor::Jobs::CleanupOptions.normalize(options)
12
+
13
+ now = SourceMonitor::Jobs::CleanupOptions.resolve_time(options[:now])
14
+ fetch_cutoff = resolve_cutoff(now:, days: options[:fetch_logs_older_than_days], default: DEFAULT_FETCH_LOG_RETENTION_DAYS)
15
+ scrape_cutoff = resolve_cutoff(now:, days: options[:scrape_logs_older_than_days], default: DEFAULT_SCRAPE_LOG_RETENTION_DAYS)
16
+
17
+ prune_fetch_logs(fetch_cutoff) if fetch_cutoff
18
+ prune_scrape_logs(scrape_cutoff) if scrape_cutoff
19
+ end
20
+
21
+ private
22
+
23
+ def resolve_cutoff(now:, days:, default:)
24
+ resolved_days =
25
+ if days.nil?
26
+ default
27
+ else
28
+ SourceMonitor::Jobs::CleanupOptions.integer(days)
29
+ end
30
+
31
+ return nil unless resolved_days
32
+ return nil if resolved_days <= 0
33
+
34
+ now - resolved_days.days
35
+ end
36
+
37
+ def prune_fetch_logs(cutoff)
38
+ SourceMonitor::FetchLog.where(SourceMonitor::FetchLog.arel_table[:started_at].lt(cutoff))
39
+ .in_batches(of: 500) { |batch| batch.delete_all }
40
+ end
41
+
42
+ def prune_scrape_logs(cutoff)
43
+ SourceMonitor::ScrapeLog.where(SourceMonitor::ScrapeLog.arel_table[:started_at].lt(cutoff))
44
+ .in_batches(of: 500) { |batch| batch.delete_all }
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class ScheduleFetchesJob < ApplicationJob
5
+ source_monitor_queue :fetch
6
+
7
+ def perform(options = nil)
8
+ limit = extract_limit(options)
9
+ SourceMonitor::Scheduler.run(limit:)
10
+ end
11
+
12
+ private
13
+
14
+ def extract_limit(options)
15
+ options_hash =
16
+ case options
17
+ when nil then {}
18
+ when Hash then options
19
+ else {}
20
+ end
21
+
22
+ if options_hash.respond_to?(:symbolize_keys)
23
+ options_hash = options_hash.symbolize_keys
24
+ end
25
+
26
+ options_hash[:limit] || SourceMonitor::Scheduler::DEFAULT_BATCH_SIZE
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class ScrapeItemJob < ApplicationJob
5
+ source_monitor_queue :scrape
6
+
7
+ discard_on ActiveJob::DeserializationError
8
+
9
+ def perform(item_id)
10
+ log("job:start", item_id: item_id)
11
+ item = SourceMonitor::Item.includes(:source).find_by(id: item_id)
12
+ return unless item
13
+
14
+ source = item.source
15
+ unless source&.scraping_enabled?
16
+ log("job:skipped_scraping_disabled", item: item)
17
+ SourceMonitor::Scraping::State.clear_inflight!(item)
18
+ return
19
+ end
20
+
21
+ SourceMonitor::Scraping::State.mark_processing!(item)
22
+ SourceMonitor::Scraping::ItemScraper.new(item:, source:).call
23
+ log("job:completed", item: item, status: item.scrape_status)
24
+ rescue StandardError => error
25
+ log("job:error", item: item, error: error.message)
26
+ SourceMonitor::Scraping::State.mark_failed!(item)
27
+ raise
28
+ ensure
29
+ SourceMonitor::Scraping::State.clear_inflight!(item) if item
30
+ end
31
+
32
+ private
33
+
34
+ def log(stage, item: nil, item_id: nil, **extra)
35
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
36
+
37
+ payload = {
38
+ stage: "SourceMonitor::ScrapeItemJob##{stage}",
39
+ item_id: item&.id || item_id,
40
+ source_id: item&.source_id
41
+ }.merge(extra.compact)
42
+ Rails.logger.info("[SourceMonitor::ManualScrape] #{payload.to_json}")
43
+ rescue StandardError
44
+ nil
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class SourceHealthCheckJob < ApplicationJob
5
+ source_monitor_queue :fetch
6
+
7
+ discard_on ActiveJob::DeserializationError
8
+
9
+ def perform(source_id)
10
+ source = SourceMonitor::Source.find_by(id: source_id)
11
+ return unless source
12
+
13
+ result = SourceMonitor::Health::SourceHealthCheck.new(source: source).call
14
+ broadcast_outcome(source, result)
15
+ result
16
+ rescue StandardError => error
17
+ Rails.logger&.error(
18
+ "[SourceMonitor::SourceHealthCheckJob] error for source #{source_id}: #{error.class}: #{error.message}"
19
+ ) if defined?(Rails) && Rails.respond_to?(:logger)
20
+
21
+ record_unexpected_failure(source, error) if source
22
+ broadcast_outcome(source, nil, error) if source
23
+ nil
24
+ end
25
+
26
+ private
27
+
28
+ def record_unexpected_failure(source, error)
29
+ SourceMonitor::HealthCheckLog.create!(
30
+ source: source,
31
+ success: false,
32
+ started_at: Time.current,
33
+ completed_at: Time.current,
34
+ duration_ms: 0,
35
+ error_class: error.class.name,
36
+ error_message: error.message
37
+ )
38
+ rescue StandardError
39
+ nil
40
+ end
41
+
42
+ def broadcast_outcome(source, result, error = nil)
43
+ SourceMonitor::Realtime.broadcast_source(source)
44
+
45
+ message, level = toast_payload(source, result, error)
46
+ return if message.blank?
47
+
48
+ SourceMonitor::Realtime.broadcast_toast(message:, level:)
49
+ end
50
+
51
+ def toast_payload(source, result, error)
52
+ if error
53
+ return [
54
+ "Health check failed for #{source.name}: #{error.message}",
55
+ :error
56
+ ]
57
+ end
58
+
59
+ if result&.success?
60
+ [
61
+ "Health check succeeded for #{source.name}.",
62
+ :success
63
+ ]
64
+ else
65
+ failure_reason = result&.error&.message
66
+ http_status = result&.log&.http_status
67
+ message = "Health check failed for #{source.name}"
68
+ message += " (HTTP #{http_status})" if http_status.present?
69
+ message += ": #{failure_reason}" if failure_reason.present?
70
+ [
71
+ "#{message}.",
72
+ :error
73
+ ]
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ if defined?(::ActionMailer::Base)
5
+ class ApplicationMailer < ::ActionMailer::Base
6
+ default from: "from@example.com"
7
+ layout "mailer"
8
+ end
9
+ else
10
+ # :nocov:
11
+ # Define a no-op mailer so API-only host apps (which skip Action Mailer)
12
+ # can autoload this constant without pulling in the framework.
13
+ class ApplicationMailer
14
+ end
15
+ # :nocov:
16
+ end
17
+ end
File without changes
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Loggable
5
+ extend ActiveSupport::Concern
6
+
7
+ included do
8
+ attribute :metadata, default: -> { {} }
9
+
10
+ validates :started_at, presence: true
11
+ validates :duration_ms, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
12
+
13
+ scope :recent, -> { order(started_at: :desc) }
14
+ scope :successful, -> { where(success: true) }
15
+ scope :failed, -> { where(success: false) }
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,5 @@
1
+ module SourceMonitor
2
+ class ApplicationRecord < ActiveRecord::Base
3
+ self.abstract_class = true
4
+ end
5
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class FetchLog < ApplicationRecord
5
+ include SourceMonitor::Loggable
6
+
7
+ belongs_to :source, class_name: "SourceMonitor::Source", inverse_of: :fetch_logs
8
+ has_one :log_entry, as: :loggable, class_name: "SourceMonitor::LogEntry", inverse_of: :loggable, dependent: :destroy
9
+
10
+ attribute :items_created, :integer, default: 0
11
+ attribute :items_updated, :integer, default: 0
12
+ attribute :items_failed, :integer, default: 0
13
+ attribute :http_response_headers, default: -> { {} }
14
+
15
+ validates :source, presence: true
16
+ validates :items_created, :items_updated, :items_failed,
17
+ numericality: { greater_than_or_equal_to: 0 }
18
+
19
+ scope :for_job, ->(job_id) { where(job_id:) }
20
+
21
+ SourceMonitor::ModelExtensions.register(self, :fetch_log)
22
+
23
+ after_save :sync_log_entry
24
+
25
+ private
26
+
27
+ def sync_log_entry
28
+ SourceMonitor::Logs::EntrySync.call(self)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class HealthCheckLog < ApplicationRecord
5
+ include SourceMonitor::Loggable
6
+
7
+ belongs_to :source, class_name: "SourceMonitor::Source", inverse_of: :health_check_logs
8
+ has_one :log_entry,
9
+ as: :loggable,
10
+ class_name: "SourceMonitor::LogEntry",
11
+ inverse_of: :loggable,
12
+ dependent: :destroy
13
+
14
+ attribute :http_response_headers, default: -> { {} }
15
+
16
+ validates :source, presence: true
17
+
18
+ SourceMonitor::ModelExtensions.register(self, :health_check_log)
19
+
20
+ after_save :sync_log_entry
21
+
22
+ private
23
+
24
+ def sync_log_entry
25
+ SourceMonitor::Logs::EntrySync.call(self)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "source_monitor/models/url_normalizable"
4
+
5
+ module SourceMonitor
6
+ class Item < ApplicationRecord
7
+ include SourceMonitor::Models::UrlNormalizable
8
+
9
+ belongs_to :source, class_name: "SourceMonitor::Source", inverse_of: :items, counter_cache: true
10
+ has_one :item_content, class_name: "SourceMonitor::ItemContent", inverse_of: :item, dependent: :destroy, autosave: true
11
+ has_many :scrape_logs, class_name: "SourceMonitor::ScrapeLog", inverse_of: :item, dependent: :destroy
12
+ has_many :log_entries, class_name: "SourceMonitor::LogEntry", inverse_of: :item, dependent: :destroy
13
+
14
+ # Explicit scope for active (non-deleted) items - no default_scope to avoid anti-pattern
15
+ scope :active, -> { where(deleted_at: nil) }
16
+ scope :with_deleted, -> { unscope(where: :deleted_at) }
17
+ scope :only_deleted, -> { where.not(deleted_at: nil) }
18
+
19
+ normalizes_urls :url, :canonical_url, :comments_url
20
+ validates_url_format :url, :canonical_url, :comments_url
21
+
22
+ validates :source, presence: true
23
+ validates :guid, presence: true, uniqueness: { scope: :source_id, case_sensitive: false }
24
+ validates :content_fingerprint, uniqueness: { scope: :source_id }, allow_blank: true
25
+ validates :url, presence: true
26
+
27
+ scope :recent, -> { active.order(Arel.sql("published_at DESC NULLS LAST, created_at DESC")) }
28
+ scope :published, -> { active.where.not(published_at: nil) }
29
+ scope :pending_scrape, -> { active.where(scraped_at: nil) }
30
+ scope :failed_scrape, -> { active.where(scrape_status: "failed") }
31
+
32
+ delegate :scraped_html, :scraped_content, to: :item_content, allow_nil: true
33
+
34
+ SourceMonitor::ModelExtensions.register(self, :item)
35
+
36
+ class << self
37
+ def ransackable_attributes(_auth_object = nil)
38
+ %w[title summary url published_at created_at scrape_status]
39
+ end
40
+
41
+ def ransackable_associations(_auth_object = nil)
42
+ %w[source]
43
+ end
44
+ end
45
+
46
+ def scraped_html=(value)
47
+ assign_content_attribute(:scraped_html, value)
48
+ end
49
+
50
+ def scraped_content=(value)
51
+ assign_content_attribute(:scraped_content, value)
52
+ end
53
+
54
+ def deleted?
55
+ deleted_at.present?
56
+ end
57
+
58
+ def soft_delete!(timestamp: Time.current)
59
+ return if deleted?
60
+
61
+ self.class.transaction do
62
+ timestamp = timestamp.in_time_zone if timestamp.respond_to?(:in_time_zone)
63
+ timestamp ||= Time.current
64
+
65
+ update_columns(
66
+ deleted_at: timestamp,
67
+ updated_at: timestamp
68
+ )
69
+
70
+ SourceMonitor::Source.decrement_counter(:items_count, source_id) if source_id
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ # Item content lives in a separate row that we only create when rich content exists.
77
+ # This helper keeps the association lazy-loaded, ensures updates route through the
78
+ # join model, and tears it back down once both scraped fields are blank again.
79
+ def assign_content_attribute(attribute, value)
80
+ ensure_item_content_presence(value)
81
+ item_content&.public_send("#{attribute}=", value)
82
+ cleanup_item_content_if_blank
83
+ end
84
+
85
+ def ensure_item_content_presence(value)
86
+ return if item_content.present? || value.nil?
87
+
88
+ build_item_content
89
+ end
90
+
91
+ def cleanup_item_content_if_blank
92
+ return unless item_content
93
+ return if item_content.scraped_html.present? || item_content.scraped_content.present?
94
+
95
+ if item_content.persisted?
96
+ item_content.mark_for_destruction
97
+ else
98
+ association(:item_content).reset
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class ItemContent < ApplicationRecord
5
+ belongs_to :item, class_name: "SourceMonitor::Item", inverse_of: :item_content, touch: true
6
+
7
+ validates :item, presence: true
8
+
9
+ SourceMonitor::ModelExtensions.register(self, :item_content)
10
+ end
11
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class LogEntry < ApplicationRecord
5
+ self.table_name = "sourcemon_log_entries"
6
+
7
+ delegated_type :loggable, types: %w[SourceMonitor::FetchLog SourceMonitor::ScrapeLog SourceMonitor::HealthCheckLog]
8
+
9
+ belongs_to :source, class_name: "SourceMonitor::Source", inverse_of: :log_entries
10
+ belongs_to :item, class_name: "SourceMonitor::Item", inverse_of: :log_entries, optional: true
11
+
12
+ validates :started_at, presence: true
13
+ validates :source, presence: true
14
+
15
+ scope :recent, -> { order(started_at: :desc) }
16
+
17
+ SourceMonitor::ModelExtensions.register(self, :log_entry)
18
+
19
+ class << self
20
+ def ransackable_attributes(_auth_object = nil)
21
+ %w[
22
+ success
23
+ started_at
24
+ http_status
25
+ scraper_adapter
26
+ error_message
27
+ error_class
28
+ loggable_type
29
+ ]
30
+ end
31
+
32
+ def ransackable_associations(_auth_object = nil)
33
+ %w[source item loggable]
34
+ end
35
+ end
36
+
37
+ def fetch?
38
+ loggable_type == FetchLog.sti_name
39
+ end
40
+
41
+ def scrape?
42
+ loggable_type == ScrapeLog.sti_name
43
+ end
44
+
45
+ def health_check?
46
+ loggable_type == HealthCheckLog.sti_name
47
+ end
48
+
49
+ def log_type
50
+ return :fetch if fetch?
51
+ return :scrape if scrape?
52
+
53
+ :health_check
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class ScrapeLog < ApplicationRecord
5
+ include SourceMonitor::Loggable
6
+
7
+ belongs_to :item, class_name: "SourceMonitor::Item", inverse_of: :scrape_logs
8
+ belongs_to :source, class_name: "SourceMonitor::Source", inverse_of: :scrape_logs
9
+ has_one :log_entry, as: :loggable, class_name: "SourceMonitor::LogEntry", inverse_of: :loggable, dependent: :destroy
10
+
11
+ validates :item, :source, presence: true
12
+ validates :content_length, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
13
+ validate :source_matches_item
14
+
15
+ SourceMonitor::ModelExtensions.register(self, :scrape_log)
16
+
17
+ after_save :sync_log_entry
18
+
19
+ private
20
+
21
+ def source_matches_item
22
+ return if item.nil? || source.nil?
23
+
24
+ errors.add(:source, "must match item source") if item.source_id != source_id
25
+ end
26
+
27
+ def sync_log_entry
28
+ SourceMonitor::Logs::EntrySync.call(self)
29
+ end
30
+ end
31
+ end