source_monitor 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rubocop.yml +12 -0
  4. data/.ruby-version +1 -0
  5. data/AGENTS.md +132 -0
  6. data/CHANGELOG.md +66 -0
  7. data/CONTRIBUTING.md +31 -0
  8. data/Gemfile +30 -0
  9. data/Gemfile.lock +411 -0
  10. data/MIT-LICENSE +20 -0
  11. data/README.md +108 -0
  12. data/Rakefile +8 -0
  13. data/app/assets/builds/.keep +0 -0
  14. data/app/assets/config/source_monitor_manifest.js +4 -0
  15. data/app/assets/images/source_monitor/.keep +0 -0
  16. data/app/assets/javascripts/source_monitor/application.js +20 -0
  17. data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
  18. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
  19. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  20. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
  21. data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
  22. data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
  23. data/app/assets/svgs/source_monitor/.keep +0 -0
  24. data/app/controllers/concerns/.keep +0 -0
  25. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
  26. data/app/controllers/source_monitor/application_controller.rb +62 -0
  27. data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
  28. data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
  29. data/app/controllers/source_monitor/health_controller.rb +10 -0
  30. data/app/controllers/source_monitor/items_controller.rb +116 -0
  31. data/app/controllers/source_monitor/logs_controller.rb +15 -0
  32. data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
  33. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
  34. data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
  35. data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
  36. data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
  37. data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
  38. data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
  39. data/app/controllers/source_monitor/sources_controller.rb +179 -0
  40. data/app/helpers/source_monitor/application_helper.rb +327 -0
  41. data/app/jobs/source_monitor/application_job.rb +13 -0
  42. data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
  43. data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
  44. data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
  45. data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
  46. data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
  47. data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
  48. data/app/mailers/source_monitor/application_mailer.rb +17 -0
  49. data/app/models/concerns/.keep +0 -0
  50. data/app/models/concerns/source_monitor/loggable.rb +18 -0
  51. data/app/models/source_monitor/application_record.rb +5 -0
  52. data/app/models/source_monitor/fetch_log.rb +31 -0
  53. data/app/models/source_monitor/health_check_log.rb +28 -0
  54. data/app/models/source_monitor/item.rb +102 -0
  55. data/app/models/source_monitor/item_content.rb +11 -0
  56. data/app/models/source_monitor/log_entry.rb +56 -0
  57. data/app/models/source_monitor/scrape_log.rb +31 -0
  58. data/app/models/source_monitor/source.rb +115 -0
  59. data/app/views/layouts/source_monitor/application.html.erb +54 -0
  60. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
  61. data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
  62. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
  63. data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
  64. data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
  65. data/app/views/source_monitor/dashboard/index.html.erb +48 -0
  66. data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
  67. data/app/views/source_monitor/items/_details.html.erb +234 -0
  68. data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
  69. data/app/views/source_monitor/items/index.html.erb +147 -0
  70. data/app/views/source_monitor/items/show.html.erb +3 -0
  71. data/app/views/source_monitor/logs/index.html.erb +208 -0
  72. data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
  73. data/app/views/source_monitor/shared/_toast.html.erb +34 -0
  74. data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
  75. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
  76. data/app/views/source_monitor/sources/_details.html.erb +302 -0
  77. data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
  78. data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
  79. data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
  80. data/app/views/source_monitor/sources/_form.html.erb +143 -0
  81. data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
  82. data/app/views/source_monitor/sources/_row.html.erb +102 -0
  83. data/app/views/source_monitor/sources/edit.html.erb +28 -0
  84. data/app/views/source_monitor/sources/index.html.erb +153 -0
  85. data/app/views/source_monitor/sources/new.html.erb +22 -0
  86. data/app/views/source_monitor/sources/show.html.erb +3 -0
  87. data/config/coverage_baseline.json +2010 -0
  88. data/config/initializers/feedjira.rb +19 -0
  89. data/config/routes.rb +18 -0
  90. data/config/tailwind.config.js +17 -0
  91. data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
  92. data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
  93. data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
  94. data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
  95. data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
  96. data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
  97. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
  98. data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
  99. data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
  100. data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
  101. data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
  102. data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
  103. data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
  104. data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
  105. data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
  106. data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
  107. data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
  108. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
  109. data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
  110. data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
  111. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
  112. data/docs/configuration.md +170 -0
  113. data/docs/deployment.md +63 -0
  114. data/docs/gh-cli-workflow.md +44 -0
  115. data/docs/installation.md +144 -0
  116. data/docs/troubleshooting.md +76 -0
  117. data/eslint.config.mjs +27 -0
  118. data/lib/generators/source_monitor/install/install_generator.rb +59 -0
  119. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
  120. data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
  121. data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
  122. data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
  123. data/lib/source_monitor/assets/bundler.rb +49 -0
  124. data/lib/source_monitor/assets.rb +6 -0
  125. data/lib/source_monitor/configuration.rb +654 -0
  126. data/lib/source_monitor/dashboard/queries.rb +356 -0
  127. data/lib/source_monitor/dashboard/quick_action.rb +7 -0
  128. data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
  129. data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
  130. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
  131. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
  132. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
  133. data/lib/source_monitor/engine.rb +107 -0
  134. data/lib/source_monitor/events.rb +110 -0
  135. data/lib/source_monitor/feedjira_extensions.rb +103 -0
  136. data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
  137. data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
  138. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
  139. data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
  140. data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
  141. data/lib/source_monitor/fetching/fetch_error.rb +88 -0
  142. data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
  143. data/lib/source_monitor/fetching/retry_policy.rb +85 -0
  144. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
  145. data/lib/source_monitor/health/source_health_check.rb +100 -0
  146. data/lib/source_monitor/health/source_health_monitor.rb +210 -0
  147. data/lib/source_monitor/health/source_health_reset.rb +68 -0
  148. data/lib/source_monitor/health.rb +46 -0
  149. data/lib/source_monitor/http.rb +85 -0
  150. data/lib/source_monitor/instrumentation.rb +52 -0
  151. data/lib/source_monitor/items/item_creator.rb +601 -0
  152. data/lib/source_monitor/items/retention_pruner.rb +146 -0
  153. data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
  154. data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
  155. data/lib/source_monitor/items/retention_strategies.rb +9 -0
  156. data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
  157. data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
  158. data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
  159. data/lib/source_monitor/jobs/visibility.rb +133 -0
  160. data/lib/source_monitor/logs/entry_sync.rb +69 -0
  161. data/lib/source_monitor/logs/filter_set.rb +163 -0
  162. data/lib/source_monitor/logs/query.rb +81 -0
  163. data/lib/source_monitor/logs/table_presenter.rb +161 -0
  164. data/lib/source_monitor/metrics.rb +77 -0
  165. data/lib/source_monitor/model_extensions.rb +109 -0
  166. data/lib/source_monitor/models/sanitizable.rb +76 -0
  167. data/lib/source_monitor/models/url_normalizable.rb +84 -0
  168. data/lib/source_monitor/pagination/paginator.rb +90 -0
  169. data/lib/source_monitor/realtime/adapter.rb +97 -0
  170. data/lib/source_monitor/realtime/broadcaster.rb +237 -0
  171. data/lib/source_monitor/realtime.rb +17 -0
  172. data/lib/source_monitor/release/changelog.rb +59 -0
  173. data/lib/source_monitor/release/runner.rb +73 -0
  174. data/lib/source_monitor/scheduler.rb +82 -0
  175. data/lib/source_monitor/scrapers/base.rb +105 -0
  176. data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
  177. data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
  178. data/lib/source_monitor/scrapers/readability.rb +156 -0
  179. data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
  180. data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
  181. data/lib/source_monitor/scraping/enqueuer.rb +125 -0
  182. data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
  183. data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
  184. data/lib/source_monitor/scraping/item_scraper.rb +84 -0
  185. data/lib/source_monitor/scraping/scheduler.rb +43 -0
  186. data/lib/source_monitor/scraping/state.rb +79 -0
  187. data/lib/source_monitor/security/authentication.rb +85 -0
  188. data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
  189. data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
  190. data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
  191. data/lib/source_monitor/version.rb +3 -0
  192. data/lib/source_monitor.rb +149 -0
  193. data/lib/tasks/recover_stalled_fetches.rake +16 -0
  194. data/lib/tasks/source_monitor_assets.rake +28 -0
  195. data/lib/tasks/source_monitor_tasks.rake +29 -0
  196. data/lib/tasks/test_smoke.rake +12 -0
  197. data/package-lock.json +3997 -0
  198. data/package.json +29 -0
  199. data/postcss.config.js +6 -0
  200. data/source_monitor.gemspec +46 -0
  201. data/stylelint.config.js +12 -0
  202. metadata +469 -0
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Health
5
+ class SourceHealthReset
6
+ def self.call(source:, now: Time.current)
7
+ new(source:, now:).call
8
+ end
9
+
10
+ def initialize(source:, now: Time.current)
11
+ @source = source
12
+ @now = now
13
+ end
14
+
15
+ def call
16
+ return unless source
17
+
18
+ source.with_lock do
19
+ source.update!(reset_attributes)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ attr_reader :source, :now
26
+
27
+ def reset_attributes
28
+ {
29
+ health_status: "healthy",
30
+ auto_paused_at: nil,
31
+ auto_paused_until: nil,
32
+ rolling_success_rate: nil,
33
+ failure_count: 0,
34
+ last_error: nil,
35
+ last_error_at: nil,
36
+ backoff_until: nil,
37
+ fetch_status: "idle",
38
+ fetch_retry_attempt: 0,
39
+ fetch_circuit_opened_at: nil,
40
+ fetch_circuit_until: nil,
41
+ next_fetch_at: computed_next_fetch_at,
42
+ updated_at: now
43
+ }
44
+ end
45
+
46
+ def computed_next_fetch_at
47
+ minutes = effective_fetch_interval_minutes
48
+ return nil unless minutes
49
+
50
+ now + minutes.minutes
51
+ end
52
+
53
+ def effective_fetch_interval_minutes
54
+ explicit = source.fetch_interval_minutes
55
+ return normalize_interval(explicit) if explicit.present?
56
+
57
+ SourceMonitor.config.fetching.min_interval_minutes
58
+ end
59
+
60
+ def normalize_interval(value)
61
+ return nil if value.nil?
62
+
63
+ integer = value.to_i
64
+ integer.positive? ? integer : nil
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "source_monitor/health/source_health_monitor"
4
+ require "source_monitor/health/source_health_reset"
5
+ require "source_monitor/health/source_health_check"
6
+
7
+ module SourceMonitor
8
+ module Health
9
+ module_function
10
+
11
+ def setup!
12
+ register_fetch_callback
13
+ end
14
+
15
+ def fetch_callback
16
+ @fetch_callback ||= lambda do |event|
17
+ source = event&.source
18
+ next unless source
19
+
20
+ SourceHealthMonitor.new(source: source).call
21
+ rescue StandardError => error
22
+ log_error(source, error)
23
+ end
24
+ end
25
+
26
+ def register_fetch_callback
27
+ callbacks = SourceMonitor.config.events.callbacks_for(:after_fetch_completed)
28
+ return if callbacks.include?(fetch_callback)
29
+
30
+ SourceMonitor.config.events.after_fetch_completed(fetch_callback)
31
+ end
32
+ private_class_method :register_fetch_callback
33
+
34
+ def log_error(source, error)
35
+ message = "[SourceMonitor] Source health monitor failed for #{source&.id || 'unknown'}: #{error.class}: #{error.message}"
36
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
37
+ Rails.logger.error(message)
38
+ else
39
+ warn(message)
40
+ end
41
+ rescue StandardError
42
+ warn(message)
43
+ end
44
+ private_class_method :log_error
45
+ end
46
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "faraday/retry"
5
+ require "faraday/follow_redirects"
6
+ require "faraday/gzip"
7
+ require "active_support/core_ext/object/blank"
8
+
9
+ module SourceMonitor
10
+ module HTTP
11
+ DEFAULT_TIMEOUT = 15
12
+ DEFAULT_OPEN_TIMEOUT = 5
13
+ DEFAULT_MAX_REDIRECTS = 5
14
+ DEFAULT_USER_AGENT = "SourceMonitor/#{SourceMonitor::VERSION}"
15
+ RETRY_STATUSES = [ 429, 500, 502, 503, 504 ].freeze
16
+
17
+ class << self
18
+ def client(proxy: nil, headers: {}, timeout: nil, open_timeout: nil, retry_requests: true)
19
+ settings = SourceMonitor.config.http
20
+
21
+ effective_proxy = resolve_proxy(proxy, settings)
22
+ effective_timeout = timeout || settings.timeout || DEFAULT_TIMEOUT
23
+ effective_open_timeout = open_timeout || settings.open_timeout || DEFAULT_OPEN_TIMEOUT
24
+
25
+ Faraday.new(nil, proxy: effective_proxy) do |connection|
26
+ configure_request(
27
+ connection,
28
+ headers,
29
+ timeout: effective_timeout,
30
+ open_timeout: effective_open_timeout,
31
+ settings: settings,
32
+ enable_retry: retry_requests
33
+ )
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def configure_request(connection, headers, timeout:, open_timeout:, settings:, enable_retry:) # rubocop:disable Metrics/MethodLength
40
+ if enable_retry
41
+ connection.request :retry,
42
+ max: settings.retry_max || 4,
43
+ interval: settings.retry_interval || 0.5,
44
+ interval_randomness: settings.retry_interval_randomness || 0.5,
45
+ backoff_factor: settings.retry_backoff_factor || 2,
46
+ retry_statuses: settings.retry_statuses || RETRY_STATUSES
47
+ end
48
+ connection.request :gzip
49
+
50
+ connection.response :follow_redirects, limit: settings.max_redirects || DEFAULT_MAX_REDIRECTS
51
+ connection.response :raise_error
52
+
53
+ connection.options.timeout = timeout
54
+ connection.options.open_timeout = open_timeout
55
+
56
+ default_headers(settings).merge(headers).each do |key, value|
57
+ connection.headers[key] = value
58
+ end
59
+
60
+ connection.adapter Faraday.default_adapter
61
+ end
62
+
63
+ def default_headers(settings)
64
+ base_headers = {
65
+ "User-Agent" => resolve_callable(settings.user_agent).presence || DEFAULT_USER_AGENT,
66
+ "Accept" => "application/rss+xml, application/atom+xml, application/json;q=0.9, text/xml;q=0.8",
67
+ "Accept-Encoding" => "gzip,deflate"
68
+ }
69
+
70
+ base_headers.merge(settings.headers || {})
71
+ end
72
+
73
+ def resolve_proxy(proxy, settings)
74
+ return nil if proxy == false
75
+ return proxy unless proxy.nil?
76
+
77
+ resolve_callable(settings.proxy)
78
+ end
79
+
80
+ def resolve_callable(value)
81
+ value.respond_to?(:call) ? value.call : value
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/notifications"
4
+
5
+ module SourceMonitor
6
+ module Instrumentation
7
+ FETCH_START_EVENT = "source_monitor.fetch.start".freeze
8
+ FETCH_FINISH_EVENT = "source_monitor.fetch.finish".freeze
9
+ ITEM_DUPLICATE_EVENT = "source_monitor.items.duplicate".freeze
10
+ ITEM_RETENTION_EVENT = "source_monitor.items.retention".freeze
11
+
12
+ module_function
13
+
14
+ def fetch(payload = {})
15
+ payload = payload.dup
16
+ instrument(FETCH_START_EVENT, payload)
17
+
18
+ started_at = monotonic_time
19
+ result = yield if block_given?
20
+ duration_ms = ((monotonic_time - started_at) * 1000.0).round(2)
21
+
22
+ instrument(FETCH_FINISH_EVENT, payload.merge(duration_ms: duration_ms))
23
+ result
24
+ end
25
+
26
+ def fetch_start(payload = {})
27
+ instrument(FETCH_START_EVENT, payload)
28
+ end
29
+
30
+ def fetch_finish(payload = {})
31
+ instrument(FETCH_FINISH_EVENT, payload)
32
+ end
33
+
34
+ def item_duplicate(payload = {})
35
+ instrument(ITEM_DUPLICATE_EVENT, payload)
36
+ end
37
+
38
+ def item_retention(payload = {})
39
+ instrument(ITEM_RETENTION_EVENT, payload)
40
+ end
41
+
42
+ def instrument(event_name, payload = {})
43
+ ActiveSupport::Notifications.instrument(event_name, payload) do
44
+ yield if block_given?
45
+ end
46
+ end
47
+
48
+ def monotonic_time
49
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
50
+ end
51
+ end
52
+ end