source_monitor 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rubocop.yml +12 -0
  4. data/.ruby-version +1 -0
  5. data/AGENTS.md +132 -0
  6. data/CHANGELOG.md +66 -0
  7. data/CONTRIBUTING.md +31 -0
  8. data/Gemfile +30 -0
  9. data/Gemfile.lock +411 -0
  10. data/MIT-LICENSE +20 -0
  11. data/README.md +108 -0
  12. data/Rakefile +8 -0
  13. data/app/assets/builds/.keep +0 -0
  14. data/app/assets/config/source_monitor_manifest.js +4 -0
  15. data/app/assets/images/source_monitor/.keep +0 -0
  16. data/app/assets/javascripts/source_monitor/application.js +20 -0
  17. data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
  18. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
  19. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  20. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
  21. data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
  22. data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
  23. data/app/assets/svgs/source_monitor/.keep +0 -0
  24. data/app/controllers/concerns/.keep +0 -0
  25. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
  26. data/app/controllers/source_monitor/application_controller.rb +62 -0
  27. data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
  28. data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
  29. data/app/controllers/source_monitor/health_controller.rb +10 -0
  30. data/app/controllers/source_monitor/items_controller.rb +116 -0
  31. data/app/controllers/source_monitor/logs_controller.rb +15 -0
  32. data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
  33. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
  34. data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
  35. data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
  36. data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
  37. data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
  38. data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
  39. data/app/controllers/source_monitor/sources_controller.rb +179 -0
  40. data/app/helpers/source_monitor/application_helper.rb +327 -0
  41. data/app/jobs/source_monitor/application_job.rb +13 -0
  42. data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
  43. data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
  44. data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
  45. data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
  46. data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
  47. data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
  48. data/app/mailers/source_monitor/application_mailer.rb +17 -0
  49. data/app/models/concerns/.keep +0 -0
  50. data/app/models/concerns/source_monitor/loggable.rb +18 -0
  51. data/app/models/source_monitor/application_record.rb +5 -0
  52. data/app/models/source_monitor/fetch_log.rb +31 -0
  53. data/app/models/source_monitor/health_check_log.rb +28 -0
  54. data/app/models/source_monitor/item.rb +102 -0
  55. data/app/models/source_monitor/item_content.rb +11 -0
  56. data/app/models/source_monitor/log_entry.rb +56 -0
  57. data/app/models/source_monitor/scrape_log.rb +31 -0
  58. data/app/models/source_monitor/source.rb +115 -0
  59. data/app/views/layouts/source_monitor/application.html.erb +54 -0
  60. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
  61. data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
  62. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
  63. data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
  64. data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
  65. data/app/views/source_monitor/dashboard/index.html.erb +48 -0
  66. data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
  67. data/app/views/source_monitor/items/_details.html.erb +234 -0
  68. data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
  69. data/app/views/source_monitor/items/index.html.erb +147 -0
  70. data/app/views/source_monitor/items/show.html.erb +3 -0
  71. data/app/views/source_monitor/logs/index.html.erb +208 -0
  72. data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
  73. data/app/views/source_monitor/shared/_toast.html.erb +34 -0
  74. data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
  75. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
  76. data/app/views/source_monitor/sources/_details.html.erb +302 -0
  77. data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
  78. data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
  79. data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
  80. data/app/views/source_monitor/sources/_form.html.erb +143 -0
  81. data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
  82. data/app/views/source_monitor/sources/_row.html.erb +102 -0
  83. data/app/views/source_monitor/sources/edit.html.erb +28 -0
  84. data/app/views/source_monitor/sources/index.html.erb +153 -0
  85. data/app/views/source_monitor/sources/new.html.erb +22 -0
  86. data/app/views/source_monitor/sources/show.html.erb +3 -0
  87. data/config/coverage_baseline.json +2010 -0
  88. data/config/initializers/feedjira.rb +19 -0
  89. data/config/routes.rb +18 -0
  90. data/config/tailwind.config.js +17 -0
  91. data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
  92. data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
  93. data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
  94. data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
  95. data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
  96. data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
  97. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
  98. data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
  99. data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
  100. data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
  101. data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
  102. data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
  103. data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
  104. data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
  105. data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
  106. data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
  107. data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
  108. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
  109. data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
  110. data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
  111. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
  112. data/docs/configuration.md +170 -0
  113. data/docs/deployment.md +63 -0
  114. data/docs/gh-cli-workflow.md +44 -0
  115. data/docs/installation.md +144 -0
  116. data/docs/troubleshooting.md +76 -0
  117. data/eslint.config.mjs +27 -0
  118. data/lib/generators/source_monitor/install/install_generator.rb +59 -0
  119. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
  120. data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
  121. data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
  122. data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
  123. data/lib/source_monitor/assets/bundler.rb +49 -0
  124. data/lib/source_monitor/assets.rb +6 -0
  125. data/lib/source_monitor/configuration.rb +654 -0
  126. data/lib/source_monitor/dashboard/queries.rb +356 -0
  127. data/lib/source_monitor/dashboard/quick_action.rb +7 -0
  128. data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
  129. data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
  130. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
  131. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
  132. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
  133. data/lib/source_monitor/engine.rb +107 -0
  134. data/lib/source_monitor/events.rb +110 -0
  135. data/lib/source_monitor/feedjira_extensions.rb +103 -0
  136. data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
  137. data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
  138. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
  139. data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
  140. data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
  141. data/lib/source_monitor/fetching/fetch_error.rb +88 -0
  142. data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
  143. data/lib/source_monitor/fetching/retry_policy.rb +85 -0
  144. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
  145. data/lib/source_monitor/health/source_health_check.rb +100 -0
  146. data/lib/source_monitor/health/source_health_monitor.rb +210 -0
  147. data/lib/source_monitor/health/source_health_reset.rb +68 -0
  148. data/lib/source_monitor/health.rb +46 -0
  149. data/lib/source_monitor/http.rb +85 -0
  150. data/lib/source_monitor/instrumentation.rb +52 -0
  151. data/lib/source_monitor/items/item_creator.rb +601 -0
  152. data/lib/source_monitor/items/retention_pruner.rb +146 -0
  153. data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
  154. data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
  155. data/lib/source_monitor/items/retention_strategies.rb +9 -0
  156. data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
  157. data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
  158. data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
  159. data/lib/source_monitor/jobs/visibility.rb +133 -0
  160. data/lib/source_monitor/logs/entry_sync.rb +69 -0
  161. data/lib/source_monitor/logs/filter_set.rb +163 -0
  162. data/lib/source_monitor/logs/query.rb +81 -0
  163. data/lib/source_monitor/logs/table_presenter.rb +161 -0
  164. data/lib/source_monitor/metrics.rb +77 -0
  165. data/lib/source_monitor/model_extensions.rb +109 -0
  166. data/lib/source_monitor/models/sanitizable.rb +76 -0
  167. data/lib/source_monitor/models/url_normalizable.rb +84 -0
  168. data/lib/source_monitor/pagination/paginator.rb +90 -0
  169. data/lib/source_monitor/realtime/adapter.rb +97 -0
  170. data/lib/source_monitor/realtime/broadcaster.rb +237 -0
  171. data/lib/source_monitor/realtime.rb +17 -0
  172. data/lib/source_monitor/release/changelog.rb +59 -0
  173. data/lib/source_monitor/release/runner.rb +73 -0
  174. data/lib/source_monitor/scheduler.rb +82 -0
  175. data/lib/source_monitor/scrapers/base.rb +105 -0
  176. data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
  177. data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
  178. data/lib/source_monitor/scrapers/readability.rb +156 -0
  179. data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
  180. data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
  181. data/lib/source_monitor/scraping/enqueuer.rb +125 -0
  182. data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
  183. data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
  184. data/lib/source_monitor/scraping/item_scraper.rb +84 -0
  185. data/lib/source_monitor/scraping/scheduler.rb +43 -0
  186. data/lib/source_monitor/scraping/state.rb +79 -0
  187. data/lib/source_monitor/security/authentication.rb +85 -0
  188. data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
  189. data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
  190. data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
  191. data/lib/source_monitor/version.rb +3 -0
  192. data/lib/source_monitor.rb +149 -0
  193. data/lib/tasks/recover_stalled_fetches.rake +16 -0
  194. data/lib/tasks/source_monitor_assets.rake +28 -0
  195. data/lib/tasks/source_monitor_tasks.rake +29 -0
  196. data/lib/tasks/test_smoke.rake +12 -0
  197. data/package-lock.json +3997 -0
  198. data/package.json +29 -0
  199. data/postcss.config.js +6 -0
  200. data/source_monitor.gemspec +46 -0
  201. data/stylelint.config.js +12 -0
  202. metadata +469 -0
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Dashboard
5
+ class UpcomingFetchSchedule
6
+ Group = Struct.new(
7
+ :key,
8
+ :label,
9
+ :min_minutes,
10
+ :max_minutes,
11
+ :window_start,
12
+ :window_end,
13
+ :include_unscheduled,
14
+ :sources,
15
+ keyword_init: true
16
+ ) do
17
+ def empty?
18
+ sources.blank?
19
+ end
20
+ end
21
+
22
+ INTERVAL_DEFINITIONS = [
23
+ { key: "0-30", label: "Within 30 minutes", min_minutes: 0, max_minutes: 30 },
24
+ { key: "30-60", label: "30-60 minutes", min_minutes: 30, max_minutes: 60 },
25
+ { key: "60-120", label: "60-120 minutes", min_minutes: 60, max_minutes: 120 },
26
+ { key: "120-240", label: "120-240 minutes", min_minutes: 120, max_minutes: 240 },
27
+ { key: "240+", label: "240 minutes +", min_minutes: 240, max_minutes: nil, include_unscheduled: true }
28
+ ].freeze
29
+
30
+ attr_reader :scope, :reference_time
31
+
32
+ def initialize(scope: SourceMonitor::Source.active, reference_time: Time.current)
33
+ @scope = scope
34
+ @reference_time = reference_time
35
+ end
36
+
37
+ def groups
38
+ @groups ||= build_groups
39
+ end
40
+
41
+ private
42
+
43
+ def build_groups
44
+ definitions = build_definitions
45
+ scheduled_sources.each do |source|
46
+ definition = definition_for(source.next_fetch_at)
47
+ definitions[definition[:key]][:sources] << source if definition
48
+ end
49
+
50
+ unscheduled_sources.each do |source|
51
+ definition = definitions.values.find { |value| value[:include_unscheduled] }
52
+ next unless definition
53
+
54
+ definition[:sources] << source
55
+ end
56
+
57
+ definitions.values.map do |definition|
58
+ Group.new(
59
+ key: definition[:key],
60
+ label: definition[:label],
61
+ min_minutes: definition[:min_minutes],
62
+ max_minutes: definition[:max_minutes],
63
+ window_start: window_start_for(definition[:min_minutes]),
64
+ window_end: window_end_for(definition[:max_minutes]),
65
+ include_unscheduled: definition[:include_unscheduled],
66
+ sources: sort_sources(definition[:sources])
67
+ )
68
+ end
69
+ end
70
+
71
+ def build_definitions
72
+ INTERVAL_DEFINITIONS.each_with_object({}) do |definition, memo|
73
+ memo[definition[:key]] = definition.merge(sources: [])
74
+ end
75
+ end
76
+
77
+ def scheduled_sources
78
+ scope.where.not(next_fetch_at: nil).order(:next_fetch_at)
79
+ end
80
+
81
+ def unscheduled_sources
82
+ scope.where(next_fetch_at: nil).order(:name)
83
+ end
84
+
85
+ def definition_for(next_fetch_at)
86
+ minutes = minutes_until(next_fetch_at)
87
+
88
+ INTERVAL_DEFINITIONS.find do |definition|
89
+ min = definition[:min_minutes]
90
+ max = definition[:max_minutes]
91
+
92
+ minutes >= min && (max.nil? || minutes < max)
93
+ end
94
+ end
95
+
96
+ def minutes_until(timestamp)
97
+ return Float::INFINITY if timestamp.blank?
98
+
99
+ minutes = (timestamp - reference_time) / 60.0
100
+ return 0 if minutes.negative?
101
+
102
+ minutes
103
+ end
104
+
105
+ def window_start_for(min_minutes)
106
+ return nil if min_minutes.nil? || min_minutes.infinite?
107
+
108
+ reference_time + min_minutes.minutes
109
+ end
110
+
111
+ def window_end_for(max_minutes)
112
+ return nil if max_minutes.nil? || max_minutes.infinite?
113
+
114
+ reference_time + max_minutes.minutes
115
+ end
116
+
117
+ def sort_sources(sources)
118
+ future_cap = reference_time + 100.years
119
+
120
+ sources.sort_by do |source|
121
+ [ source.next_fetch_at || future_cap, source.name.to_s ]
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,107 @@
1
+ module SourceMonitor
2
+ class Engine < ::Rails::Engine
3
+ isolate_namespace SourceMonitor
4
+ require "source_monitor/assets/bundler"
5
+ require "source_monitor/jobs/fetch_failure_subscriber"
6
+
7
+ def self.table_name_prefix
8
+ SourceMonitor.config.models.table_name_prefix
9
+ end
10
+
11
+ initializer "source_monitor.assets" do |app|
12
+ next unless app.config.respond_to?(:assets)
13
+
14
+ engine_root = SourceMonitor::Engine.root
15
+
16
+ app.config.assets.paths << engine_root.join("app/assets/builds").to_s
17
+ app.config.assets.paths << engine_root.join("app/assets/images").to_s
18
+ app.config.assets.paths << engine_root.join("app/assets/svgs").to_s
19
+ end
20
+
21
+ initializer "source_monitor.assets.sprockets" do |app|
22
+ next unless app.config.respond_to?(:assets)
23
+
24
+ manifest_entry = "source_monitor_manifest.js"
25
+ app.config.assets.precompile << manifest_entry unless app.config.assets.precompile.include?(manifest_entry)
26
+ app.config.assets.precompile.concat(SourceMonitor::Engine.asset_precompile_entries)
27
+ app.config.assets.precompile.uniq!
28
+ end
29
+
30
+ initializer "source_monitor.metrics" do
31
+ SourceMonitor::Metrics.setup_subscribers!
32
+ end
33
+
34
+ initializer "source_monitor.dashboard_streams" do
35
+ config.to_prepare do
36
+ SourceMonitor::Health.setup!
37
+ SourceMonitor::Realtime.setup!
38
+ SourceMonitor::Dashboard::TurboBroadcaster.setup!
39
+ end
40
+ end
41
+
42
+ initializer "source_monitor.jobs" do |app|
43
+ SourceMonitor::Jobs::Visibility.setup!
44
+ SourceMonitor::Jobs::FetchFailureSubscriber.setup!
45
+
46
+ if defined?(::SolidQueue)
47
+ adapter_name = ActiveJob::Base.queue_adapter_name.to_s
48
+ if adapter_name.empty? || adapter_name == "async"
49
+ ActiveJob::Base.queue_adapter = :solid_queue
50
+ end
51
+
52
+ if defined?(::SolidQueue::RecurringTask)
53
+ job_class_config = SourceMonitor.config.recurring_command_job_class
54
+ if job_class_config.present?
55
+ resolved_class = job_class_config.is_a?(String) ? job_class_config.constantize : job_class_config
56
+ SolidQueue::RecurringTask.default_job_class = resolved_class
57
+ end
58
+ end
59
+
60
+ if defined?(MissionControl::Jobs)
61
+ adapters = MissionControl::Jobs.adapters
62
+ if adapters.respond_to?(:add)
63
+ adapters.add(:solid_queue)
64
+ adapters.delete(:async)
65
+ elsif adapters.respond_to?(:<<)
66
+ adapters << :solid_queue unless adapters.include?(:solid_queue)
67
+ adapters.delete(:async) if adapters.respond_to?(:delete)
68
+ end
69
+
70
+ if defined?(ActiveJob::QueueAdapters::SolidQueueExt) &&
71
+ !(ActiveJob::QueueAdapters::SolidQueueAdapter < ActiveJob::QueueAdapters::SolidQueueExt)
72
+ ActiveJob::QueueAdapters::SolidQueueAdapter.prepend ActiveJob::QueueAdapters::SolidQueueExt
73
+ end
74
+
75
+ MissionControl::Jobs.applications.each do |application|
76
+ next if application.servers.any? { |server| server.queue_adapter_name == :solid_queue }
77
+
78
+ solid_queue_adapter = ActiveJob::QueueAdapters.lookup(:solid_queue).new
79
+ application.add_servers(solid_queue: solid_queue_adapter)
80
+ end
81
+ end
82
+
83
+ app.config.after_initialize do
84
+ SourceMonitor::Jobs::Visibility.setup!
85
+ end
86
+ end
87
+ end
88
+ class << self
89
+ def asset_precompile_entries
90
+ engine_root = SourceMonitor::Engine.root
91
+ asset_roots = {
92
+ images: engine_root.join("app/assets/images"),
93
+ svgs: engine_root.join("app/assets/svgs")
94
+ }
95
+
96
+ asset_roots.flat_map do |_, base_path|
97
+ Dir[base_path.join("source_monitor/**/*").to_s].filter_map do |absolute_path|
98
+ next unless File.file?(absolute_path)
99
+ next if File.basename(absolute_path).start_with?(".")
100
+
101
+ Pathname.new(absolute_path).relative_path_from(base_path).to_s
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/core_ext/time"
4
+
5
+ module SourceMonitor
6
+ module Events
7
+ ItemCreatedEvent = Struct.new(:item, :source, :entry, :result, :status, :occurred_at, keyword_init: true) do
8
+ def created?
9
+ status.to_s == "created"
10
+ end
11
+ end
12
+
13
+ ItemScrapedEvent = Struct.new(:item, :source, :result, :log, :status, :occurred_at, keyword_init: true) do
14
+ def success?
15
+ status.to_s != "failed"
16
+ end
17
+ end
18
+
19
+ FetchCompletedEvent = Struct.new(:source, :result, :status, :occurred_at, keyword_init: true)
20
+
21
+ ItemProcessorContext = Struct.new(:item, :source, :entry, :result, :status, :occurred_at, keyword_init: true)
22
+
23
+ module_function
24
+
25
+ def after_item_created(item:, source:, entry:, result:)
26
+ event = ItemCreatedEvent.new(
27
+ item: item,
28
+ source: source,
29
+ entry: entry,
30
+ result: result,
31
+ status: result&.status,
32
+ occurred_at: Time.current
33
+ )
34
+
35
+ dispatch(:after_item_created, event)
36
+ end
37
+
38
+ def after_item_scraped(result)
39
+ item = result&.item
40
+ source = item&.source
41
+ event = ItemScrapedEvent.new(
42
+ item: item,
43
+ source: source,
44
+ result: result,
45
+ log: result&.log,
46
+ status: result&.status,
47
+ occurred_at: Time.current
48
+ )
49
+
50
+ dispatch(:after_item_scraped, event)
51
+ end
52
+
53
+ def after_fetch_completed(source:, result:)
54
+ event = FetchCompletedEvent.new(
55
+ source: source,
56
+ result: result,
57
+ status: result&.status,
58
+ occurred_at: Time.current
59
+ )
60
+
61
+ dispatch(:after_fetch_completed, event)
62
+ end
63
+
64
+ def run_item_processors(source:, entry:, result:)
65
+ item = result&.item
66
+ context = ItemProcessorContext.new(
67
+ item: item,
68
+ source: source,
69
+ entry: entry,
70
+ result: result,
71
+ status: result&.status,
72
+ occurred_at: Time.current
73
+ )
74
+
75
+ SourceMonitor.config.events.item_processors.each do |processor|
76
+ invoke(processor, context)
77
+ rescue StandardError => error
78
+ log_handler_error(:item_processor, processor, error)
79
+ end
80
+ end
81
+
82
+ def dispatch(event_name, event)
83
+ SourceMonitor.config.events.callbacks_for(event_name).each do |callback|
84
+ invoke(callback, event)
85
+ rescue StandardError => error
86
+ log_handler_error(event_name, callback, error)
87
+ end
88
+ end
89
+
90
+ def invoke(callable, event)
91
+ if callable.respond_to?(:arity) && callable.arity.zero?
92
+ callable.call
93
+ else
94
+ callable.call(event)
95
+ end
96
+ end
97
+
98
+ def log_handler_error(kind, handler, error)
99
+ message = "[SourceMonitor] #{kind} handler #{handler.inspect} failed: #{error.class}: #{error.message}"
100
+
101
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
102
+ Rails.logger.error(message)
103
+ else
104
+ warn(message)
105
+ end
106
+ rescue StandardError
107
+ warn(message)
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "feedjira"
4
+ require "sax-machine"
5
+
6
+ module SourceMonitor
7
+ module FeedjiraExtensions
8
+ class MediaThumbnail
9
+ include SAXMachine
10
+
11
+ attribute :url
12
+ attribute :width
13
+ attribute :height
14
+ end
15
+
16
+ class MediaContent
17
+ include SAXMachine
18
+
19
+ attribute :url
20
+ attribute :type
21
+ attribute :medium
22
+ attribute :height
23
+ attribute :width
24
+ attribute :"fileSize", as: :file_size
25
+ attribute :duration
26
+ attribute :expression
27
+ end
28
+
29
+ class Enclosure
30
+ include SAXMachine
31
+
32
+ attribute :url
33
+ attribute :type
34
+ attribute :length
35
+ end
36
+
37
+ class AtomAuthor
38
+ include SAXMachine
39
+
40
+ element :name
41
+ element :email
42
+ element :uri
43
+ end
44
+
45
+ class AtomLink
46
+ include SAXMachine
47
+
48
+ attribute :href
49
+ attribute :rel
50
+ attribute :type
51
+ attribute :length
52
+ end
53
+
54
+ module_function
55
+
56
+ def apply!
57
+ return if @applied
58
+
59
+ extend_rss_entry
60
+ extend_atom_entry
61
+
62
+ @applied = true
63
+ end
64
+
65
+ def extend_rss_entry
66
+ Feedjira::Parser::RSSEntry.element :"media:keywords", as: :media_keywords_raw
67
+ Feedjira::Parser::RSSEntry.element :"itunes:keywords", as: :itunes_keywords_raw
68
+ Feedjira::Parser::RSSEntry.element :"slash:comments", as: :slash_comments_raw
69
+ Feedjira::Parser::RSSEntry.elements :"media:thumbnail",
70
+ as: :media_thumbnail_nodes,
71
+ class: MediaThumbnail
72
+ Feedjira::Parser::RSSEntry.elements :"media:content",
73
+ as: :media_content_nodes,
74
+ class: MediaContent
75
+ Feedjira::Parser::RSSEntry.elements :enclosure,
76
+ as: :enclosure_nodes,
77
+ class: Enclosure
78
+
79
+ Feedjira::Parser::RSSEntry.prepend(RSSAuthorCapture)
80
+ end
81
+
82
+ def extend_atom_entry
83
+ Feedjira::Parser::AtomEntry.elements :author,
84
+ as: :author_nodes,
85
+ class: AtomAuthor
86
+ Feedjira::Parser::AtomEntry.elements :link,
87
+ as: :link_nodes,
88
+ class: AtomLink
89
+ end
90
+ module RSSAuthorCapture
91
+ def author=(value)
92
+ (@source_monitor_rss_authors ||= []) << value if value
93
+ super
94
+ end
95
+
96
+ def rss_authors
97
+ Array(@source_monitor_rss_authors)
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ SourceMonitor::FeedjiraExtensions.apply!
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ # Wraps Postgres advisory lock usage to provide a small, testable collaborator
6
+ # for coordinating fetch execution across processes.
7
+ class AdvisoryLock
8
+ NotAcquiredError = Class.new(StandardError)
9
+
10
+ def initialize(namespace:, key:, connection_pool: ActiveRecord::Base.connection_pool)
11
+ @namespace = namespace
12
+ @key = key
13
+ @connection_pool = connection_pool
14
+ end
15
+
16
+ def with_lock
17
+ connection_pool.with_connection do |connection|
18
+ locked = try_lock(connection)
19
+ raise NotAcquiredError, "advisory lock #{namespace}/#{key} busy" unless locked
20
+
21
+ begin
22
+ yield
23
+ ensure
24
+ release(connection)
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ attr_reader :namespace, :key, :connection_pool
32
+
33
+ def try_lock(connection)
34
+ result = connection.exec_query(
35
+ "SELECT pg_try_advisory_lock(#{namespace.to_i}, #{key.to_i})"
36
+ )
37
+
38
+ truthy?(result.rows.dig(0, 0))
39
+ end
40
+
41
+ def release(connection)
42
+ connection.exec_query(
43
+ "SELECT pg_advisory_unlock(#{namespace.to_i}, #{key.to_i})"
44
+ )
45
+ rescue StandardError
46
+ nil
47
+ end
48
+
49
+ def truthy?(value)
50
+ value == true || value.to_s == "t"
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ module Completion
6
+ # Publishes fetch completion events to the configured event dispatcher.
7
+ class EventPublisher
8
+ def initialize(dispatcher: SourceMonitor::Events)
9
+ @dispatcher = dispatcher
10
+ end
11
+
12
+ def call(source:, result:)
13
+ dispatcher.after_fetch_completed(source: source, result: result)
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :dispatcher
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ module Completion
6
+ # Enqueues follow-up scraping work for items created during a fetch.
7
+ class FollowUpHandler
8
+ def initialize(enqueuer_class: SourceMonitor::Scraping::Enqueuer, job_class: SourceMonitor::ScrapeItemJob)
9
+ @enqueuer_class = enqueuer_class
10
+ @job_class = job_class
11
+ end
12
+
13
+ def call(source:, result:)
14
+ return unless should_enqueue?(source:, result:)
15
+
16
+ Array(result.item_processing&.created_items).each do |item|
17
+ next unless item.present? && item.scraped_at.nil?
18
+
19
+ enqueuer_class.enqueue(item:, source:, job_class:, reason: :auto)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ attr_reader :enqueuer_class, :job_class
26
+
27
+ def should_enqueue?(source:, result:)
28
+ return false unless result
29
+ return false unless result.status == :fetched
30
+ return false unless source.scraping_enabled? && source.auto_scrape?
31
+
32
+ result.item_processing&.created.to_i.positive?
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ module Completion
6
+ # Applies item retention after a fetch completes.
7
+ class RetentionHandler
8
+ def initialize(pruner: SourceMonitor::Items::RetentionPruner)
9
+ @pruner = pruner
10
+ end
11
+
12
+ def call(source:, result:) # rubocop:disable Lint/UnusedMethodArgument
13
+ pruner.call(
14
+ source: source,
15
+ strategy: SourceMonitor.config.retention.strategy
16
+ )
17
+ rescue StandardError => error
18
+ Rails.logger.error(
19
+ "[SourceMonitor] Retention pruning failed for source #{source.id}: #{error.class} - #{error.message}"
20
+ )
21
+ nil
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :pruner
27
+ end
28
+ end
29
+ end
30
+ end