source_monitor 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rubocop.yml +12 -0
  4. data/.ruby-version +1 -0
  5. data/AGENTS.md +132 -0
  6. data/CHANGELOG.md +66 -0
  7. data/CONTRIBUTING.md +31 -0
  8. data/Gemfile +30 -0
  9. data/Gemfile.lock +411 -0
  10. data/MIT-LICENSE +20 -0
  11. data/README.md +108 -0
  12. data/Rakefile +8 -0
  13. data/app/assets/builds/.keep +0 -0
  14. data/app/assets/config/source_monitor_manifest.js +4 -0
  15. data/app/assets/images/source_monitor/.keep +0 -0
  16. data/app/assets/javascripts/source_monitor/application.js +20 -0
  17. data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
  18. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
  19. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  20. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
  21. data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
  22. data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
  23. data/app/assets/svgs/source_monitor/.keep +0 -0
  24. data/app/controllers/concerns/.keep +0 -0
  25. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
  26. data/app/controllers/source_monitor/application_controller.rb +62 -0
  27. data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
  28. data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
  29. data/app/controllers/source_monitor/health_controller.rb +10 -0
  30. data/app/controllers/source_monitor/items_controller.rb +116 -0
  31. data/app/controllers/source_monitor/logs_controller.rb +15 -0
  32. data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
  33. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
  34. data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
  35. data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
  36. data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
  37. data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
  38. data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
  39. data/app/controllers/source_monitor/sources_controller.rb +179 -0
  40. data/app/helpers/source_monitor/application_helper.rb +327 -0
  41. data/app/jobs/source_monitor/application_job.rb +13 -0
  42. data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
  43. data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
  44. data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
  45. data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
  46. data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
  47. data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
  48. data/app/mailers/source_monitor/application_mailer.rb +17 -0
  49. data/app/models/concerns/.keep +0 -0
  50. data/app/models/concerns/source_monitor/loggable.rb +18 -0
  51. data/app/models/source_monitor/application_record.rb +5 -0
  52. data/app/models/source_monitor/fetch_log.rb +31 -0
  53. data/app/models/source_monitor/health_check_log.rb +28 -0
  54. data/app/models/source_monitor/item.rb +102 -0
  55. data/app/models/source_monitor/item_content.rb +11 -0
  56. data/app/models/source_monitor/log_entry.rb +56 -0
  57. data/app/models/source_monitor/scrape_log.rb +31 -0
  58. data/app/models/source_monitor/source.rb +115 -0
  59. data/app/views/layouts/source_monitor/application.html.erb +54 -0
  60. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
  61. data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
  62. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
  63. data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
  64. data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
  65. data/app/views/source_monitor/dashboard/index.html.erb +48 -0
  66. data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
  67. data/app/views/source_monitor/items/_details.html.erb +234 -0
  68. data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
  69. data/app/views/source_monitor/items/index.html.erb +147 -0
  70. data/app/views/source_monitor/items/show.html.erb +3 -0
  71. data/app/views/source_monitor/logs/index.html.erb +208 -0
  72. data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
  73. data/app/views/source_monitor/shared/_toast.html.erb +34 -0
  74. data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
  75. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
  76. data/app/views/source_monitor/sources/_details.html.erb +302 -0
  77. data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
  78. data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
  79. data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
  80. data/app/views/source_monitor/sources/_form.html.erb +143 -0
  81. data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
  82. data/app/views/source_monitor/sources/_row.html.erb +102 -0
  83. data/app/views/source_monitor/sources/edit.html.erb +28 -0
  84. data/app/views/source_monitor/sources/index.html.erb +153 -0
  85. data/app/views/source_monitor/sources/new.html.erb +22 -0
  86. data/app/views/source_monitor/sources/show.html.erb +3 -0
  87. data/config/coverage_baseline.json +2010 -0
  88. data/config/initializers/feedjira.rb +19 -0
  89. data/config/routes.rb +18 -0
  90. data/config/tailwind.config.js +17 -0
  91. data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
  92. data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
  93. data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
  94. data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
  95. data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
  96. data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
  97. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
  98. data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
  99. data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
  100. data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
  101. data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
  102. data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
  103. data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
  104. data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
  105. data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
  106. data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
  107. data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
  108. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
  109. data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
  110. data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
  111. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
  112. data/docs/configuration.md +170 -0
  113. data/docs/deployment.md +63 -0
  114. data/docs/gh-cli-workflow.md +44 -0
  115. data/docs/installation.md +144 -0
  116. data/docs/troubleshooting.md +76 -0
  117. data/eslint.config.mjs +27 -0
  118. data/lib/generators/source_monitor/install/install_generator.rb +59 -0
  119. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
  120. data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
  121. data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
  122. data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
  123. data/lib/source_monitor/assets/bundler.rb +49 -0
  124. data/lib/source_monitor/assets.rb +6 -0
  125. data/lib/source_monitor/configuration.rb +654 -0
  126. data/lib/source_monitor/dashboard/queries.rb +356 -0
  127. data/lib/source_monitor/dashboard/quick_action.rb +7 -0
  128. data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
  129. data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
  130. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
  131. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
  132. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
  133. data/lib/source_monitor/engine.rb +107 -0
  134. data/lib/source_monitor/events.rb +110 -0
  135. data/lib/source_monitor/feedjira_extensions.rb +103 -0
  136. data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
  137. data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
  138. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
  139. data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
  140. data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
  141. data/lib/source_monitor/fetching/fetch_error.rb +88 -0
  142. data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
  143. data/lib/source_monitor/fetching/retry_policy.rb +85 -0
  144. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
  145. data/lib/source_monitor/health/source_health_check.rb +100 -0
  146. data/lib/source_monitor/health/source_health_monitor.rb +210 -0
  147. data/lib/source_monitor/health/source_health_reset.rb +68 -0
  148. data/lib/source_monitor/health.rb +46 -0
  149. data/lib/source_monitor/http.rb +85 -0
  150. data/lib/source_monitor/instrumentation.rb +52 -0
  151. data/lib/source_monitor/items/item_creator.rb +601 -0
  152. data/lib/source_monitor/items/retention_pruner.rb +146 -0
  153. data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
  154. data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
  155. data/lib/source_monitor/items/retention_strategies.rb +9 -0
  156. data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
  157. data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
  158. data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
  159. data/lib/source_monitor/jobs/visibility.rb +133 -0
  160. data/lib/source_monitor/logs/entry_sync.rb +69 -0
  161. data/lib/source_monitor/logs/filter_set.rb +163 -0
  162. data/lib/source_monitor/logs/query.rb +81 -0
  163. data/lib/source_monitor/logs/table_presenter.rb +161 -0
  164. data/lib/source_monitor/metrics.rb +77 -0
  165. data/lib/source_monitor/model_extensions.rb +109 -0
  166. data/lib/source_monitor/models/sanitizable.rb +76 -0
  167. data/lib/source_monitor/models/url_normalizable.rb +84 -0
  168. data/lib/source_monitor/pagination/paginator.rb +90 -0
  169. data/lib/source_monitor/realtime/adapter.rb +97 -0
  170. data/lib/source_monitor/realtime/broadcaster.rb +237 -0
  171. data/lib/source_monitor/realtime.rb +17 -0
  172. data/lib/source_monitor/release/changelog.rb +59 -0
  173. data/lib/source_monitor/release/runner.rb +73 -0
  174. data/lib/source_monitor/scheduler.rb +82 -0
  175. data/lib/source_monitor/scrapers/base.rb +105 -0
  176. data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
  177. data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
  178. data/lib/source_monitor/scrapers/readability.rb +156 -0
  179. data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
  180. data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
  181. data/lib/source_monitor/scraping/enqueuer.rb +125 -0
  182. data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
  183. data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
  184. data/lib/source_monitor/scraping/item_scraper.rb +84 -0
  185. data/lib/source_monitor/scraping/scheduler.rb +43 -0
  186. data/lib/source_monitor/scraping/state.rb +79 -0
  187. data/lib/source_monitor/security/authentication.rb +85 -0
  188. data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
  189. data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
  190. data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
  191. data/lib/source_monitor/version.rb +3 -0
  192. data/lib/source_monitor.rb +149 -0
  193. data/lib/tasks/recover_stalled_fetches.rake +16 -0
  194. data/lib/tasks/source_monitor_assets.rake +28 -0
  195. data/lib/tasks/source_monitor_tasks.rake +29 -0
  196. data/lib/tasks/test_smoke.rake +12 -0
  197. data/package-lock.json +3997 -0
  198. data/package.json +29 -0
  199. data/postcss.config.js +6 -0
  200. data/source_monitor.gemspec +46 -0
  201. data/stylelint.config.js +12 -0
  202. metadata +469 -0
@@ -0,0 +1,654 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/core_ext/string/inflections"
4
+
5
+ module SourceMonitor
6
+ class Configuration
7
+ attr_accessor :queue_namespace,
8
+ :fetch_queue_name,
9
+ :scrape_queue_name,
10
+ :fetch_queue_concurrency,
11
+ :scrape_queue_concurrency,
12
+ :recurring_command_job_class,
13
+ :job_metrics_enabled,
14
+ :mission_control_enabled,
15
+ :mission_control_dashboard_path
16
+
17
+ attr_reader :http, :scrapers, :retention, :events, :models, :realtime, :fetching, :health, :authentication, :scraping
18
+
19
+ DEFAULT_QUEUE_NAMESPACE = "source_monitor"
20
+
21
+ def initialize
22
+ @queue_namespace = DEFAULT_QUEUE_NAMESPACE
23
+ @fetch_queue_name = "#{DEFAULT_QUEUE_NAMESPACE}_fetch"
24
+ @scrape_queue_name = "#{DEFAULT_QUEUE_NAMESPACE}_scrape"
25
+ @fetch_queue_concurrency = 2
26
+ @scrape_queue_concurrency = 2
27
+ @recurring_command_job_class = nil
28
+ @job_metrics_enabled = true
29
+ @mission_control_enabled = false
30
+ @mission_control_dashboard_path = nil
31
+ @http = HTTPSettings.new
32
+ @scrapers = ScraperRegistry.new
33
+ @retention = RetentionSettings.new
34
+ @events = Events.new
35
+ @models = Models.new
36
+ @realtime = RealtimeSettings.new
37
+ @fetching = FetchingSettings.new
38
+ @health = HealthSettings.new
39
+ @authentication = AuthenticationSettings.new
40
+ @scraping = ScrapingSettings.new
41
+ end
42
+
43
+ def queue_name_for(role)
44
+ explicit_name =
45
+ case role.to_sym
46
+ when :fetch
47
+ fetch_queue_name
48
+ when :scrape
49
+ scrape_queue_name
50
+ else
51
+ raise ArgumentError, "unknown queue role #{role.inspect}"
52
+ end
53
+
54
+ prefix = ActiveJob::Base.queue_name_prefix
55
+ delimiter = ActiveJob::Base.queue_name_delimiter
56
+
57
+ if prefix && !prefix.empty?
58
+ [ prefix, explicit_name ].join(delimiter)
59
+ else
60
+ explicit_name
61
+ end
62
+ end
63
+
64
+ def concurrency_for(role)
65
+ case role.to_sym
66
+ when :fetch
67
+ fetch_queue_concurrency
68
+ when :scrape
69
+ scrape_queue_concurrency
70
+ else
71
+ raise ArgumentError, "unknown queue role #{role.inspect}"
72
+ end
73
+ end
74
+
75
+ class AuthenticationSettings
76
+ Handler = Struct.new(:type, :callable) do
77
+ def call(controller)
78
+ return unless callable
79
+
80
+ case type
81
+ when :symbol
82
+ controller.public_send(callable)
83
+ when :callable
84
+ arity = callable.arity
85
+ if arity.zero?
86
+ controller.instance_exec(&callable)
87
+ else
88
+ callable.call(controller)
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ attr_reader :authenticate_handler, :authorize_handler
95
+ attr_accessor :current_user_method, :user_signed_in_method
96
+
97
+ def initialize
98
+ reset!
99
+ end
100
+
101
+ def authenticate_with(handler = nil, &block)
102
+ @authenticate_handler = build_handler(handler, &block)
103
+ end
104
+
105
+ def authorize_with(handler = nil, &block)
106
+ @authorize_handler = build_handler(handler, &block)
107
+ end
108
+
109
+ def reset!
110
+ @authenticate_handler = nil
111
+ @authorize_handler = nil
112
+ @current_user_method = nil
113
+ @user_signed_in_method = nil
114
+ end
115
+
116
+ private
117
+
118
+ def build_handler(handler = nil, &block)
119
+ handler ||= block
120
+ return nil unless handler
121
+
122
+ if handler.is_a?(Symbol) || handler.is_a?(String)
123
+ Handler.new(:symbol, handler.to_sym)
124
+ elsif handler.respond_to?(:call)
125
+ Handler.new(:callable, handler)
126
+ else
127
+ raise ArgumentError, "Invalid authentication handler #{handler.inspect}"
128
+ end
129
+ end
130
+ end
131
+
132
+ class ScrapingSettings
133
+ attr_accessor :max_in_flight_per_source, :max_bulk_batch_size
134
+
135
+ DEFAULT_MAX_IN_FLIGHT = 25
136
+ DEFAULT_MAX_BULK_BATCH_SIZE = 100
137
+
138
+ def initialize
139
+ reset!
140
+ end
141
+
142
+ def reset!
143
+ @max_in_flight_per_source = DEFAULT_MAX_IN_FLIGHT
144
+ @max_bulk_batch_size = DEFAULT_MAX_BULK_BATCH_SIZE
145
+ end
146
+
147
+ def max_in_flight_per_source=(value)
148
+ @max_in_flight_per_source = normalize_numeric(value)
149
+ end
150
+
151
+ def max_bulk_batch_size=(value)
152
+ @max_bulk_batch_size = normalize_numeric(value)
153
+ end
154
+
155
+ private
156
+
157
+ def normalize_numeric(value)
158
+ return nil if value.nil?
159
+ return nil if value == ""
160
+
161
+ integer = value.respond_to?(:to_i) ? value.to_i : value
162
+ integer.positive? ? integer : nil
163
+ end
164
+ end
165
+
166
+ class RealtimeSettings
167
+ VALID_ADAPTERS = %i[solid_cable redis async].freeze
168
+
169
+ attr_reader :adapter, :solid_cable
170
+ attr_accessor :redis_url
171
+
172
+ def initialize
173
+ reset!
174
+ end
175
+
176
+ def adapter=(value)
177
+ value = value&.to_sym
178
+ unless VALID_ADAPTERS.include?(value)
179
+ raise ArgumentError, "Unsupported realtime adapter #{value.inspect}"
180
+ end
181
+
182
+ @adapter = value
183
+ end
184
+
185
+ def reset!
186
+ @solid_cable = SolidCableOptions.new
187
+ @redis_url = nil
188
+ self.adapter = :solid_cable
189
+ end
190
+
191
+ def solid_cable=(options)
192
+ solid_cable.assign(options)
193
+ end
194
+
195
+ def action_cable_config
196
+ case adapter
197
+ when :solid_cable
198
+ solid_cable.to_h.merge(adapter: "solid_cable")
199
+ when :redis
200
+ config = { adapter: "redis" }
201
+ config[:url] = redis_url if redis_url.present?
202
+ config
203
+ when :async
204
+ { adapter: "async" }
205
+ else
206
+ {}
207
+ end
208
+ end
209
+
210
+ class SolidCableOptions
211
+ attr_accessor :polling_interval,
212
+ :message_retention,
213
+ :autotrim,
214
+ :silence_polling,
215
+ :use_skip_locked,
216
+ :trim_batch_size,
217
+ :connects_to
218
+
219
+ def initialize
220
+ reset!
221
+ end
222
+
223
+ def assign(options)
224
+ return unless options.respond_to?(:each)
225
+
226
+ options.each do |key, value|
227
+ setter = "#{key}="
228
+ public_send(setter, value) if respond_to?(setter)
229
+ end
230
+ end
231
+
232
+ def reset!
233
+ @polling_interval = "0.1.seconds"
234
+ @message_retention = "1.day"
235
+ @autotrim = true
236
+ @silence_polling = true
237
+ @use_skip_locked = true
238
+ @trim_batch_size = nil
239
+ @connects_to = nil
240
+ end
241
+
242
+ def to_h
243
+ {
244
+ polling_interval: polling_interval,
245
+ message_retention: message_retention,
246
+ autotrim: autotrim,
247
+ silence_polling: silence_polling,
248
+ use_skip_locked: use_skip_locked,
249
+ trim_batch_size: trim_batch_size,
250
+ connects_to: connects_to
251
+ }.compact
252
+ end
253
+ end
254
+ end
255
+
256
+ class HTTPSettings
257
+ attr_accessor :timeout,
258
+ :open_timeout,
259
+ :max_redirects,
260
+ :user_agent,
261
+ :proxy,
262
+ :headers,
263
+ :retry_max,
264
+ :retry_interval,
265
+ :retry_interval_randomness,
266
+ :retry_backoff_factor,
267
+ :retry_statuses
268
+
269
+ def initialize
270
+ reset!
271
+ end
272
+
273
+ def reset!
274
+ @timeout = 15
275
+ @open_timeout = 5
276
+ @max_redirects = 5
277
+ @user_agent = default_user_agent
278
+ @proxy = nil
279
+ @headers = {}
280
+ @retry_max = 4
281
+ @retry_interval = 0.5
282
+ @retry_interval_randomness = 0.5
283
+ @retry_backoff_factor = 2
284
+ @retry_statuses = nil
285
+ end
286
+
287
+ private
288
+
289
+ def default_user_agent
290
+ "SourceMonitor/#{SourceMonitor::VERSION}"
291
+ end
292
+ end
293
+
294
+ class FetchingSettings
295
+ attr_accessor :min_interval_minutes,
296
+ :max_interval_minutes,
297
+ :increase_factor,
298
+ :decrease_factor,
299
+ :failure_increase_factor,
300
+ :jitter_percent
301
+
302
+ def initialize
303
+ reset!
304
+ end
305
+
306
+ def reset!
307
+ @min_interval_minutes = 5
308
+ @max_interval_minutes = 24 * 60
309
+ @increase_factor = 1.25
310
+ @decrease_factor = 0.75
311
+ @failure_increase_factor = 1.5
312
+ @jitter_percent = 0.1
313
+ end
314
+ end
315
+
316
+ class HealthSettings
317
+ attr_accessor :window_size,
318
+ :healthy_threshold,
319
+ :warning_threshold,
320
+ :auto_pause_threshold,
321
+ :auto_resume_threshold,
322
+ :auto_pause_cooldown_minutes
323
+
324
+ def initialize
325
+ reset!
326
+ end
327
+
328
+ def reset!
329
+ @window_size = 20
330
+ @healthy_threshold = 0.8
331
+ @warning_threshold = 0.5
332
+ @auto_pause_threshold = 0.2
333
+ @auto_resume_threshold = 0.6
334
+ @auto_pause_cooldown_minutes = 60
335
+ end
336
+ end
337
+
338
+ class ScraperRegistry
339
+ include Enumerable
340
+
341
+ def initialize
342
+ @adapters = {}
343
+ end
344
+
345
+ def register(name, adapter)
346
+ key = normalize_name(name)
347
+ @adapters[key] = normalize_adapter(adapter)
348
+ end
349
+
350
+ def unregister(name)
351
+ @adapters.delete(normalize_name(name))
352
+ end
353
+
354
+ def adapter_for(name)
355
+ adapter = @adapters[normalize_name(name)]
356
+ adapter if adapter
357
+ end
358
+
359
+ def each(&block)
360
+ @adapters.each(&block)
361
+ end
362
+
363
+ private
364
+
365
+ def normalize_name(name)
366
+ value = name.to_s
367
+ raise ArgumentError, "Invalid scraper adapter name #{name.inspect}" unless value.match?(/\A[a-z0-9_]+\z/i)
368
+
369
+ value.downcase
370
+ end
371
+
372
+ def normalize_adapter(adapter)
373
+ constant = resolve_adapter(adapter)
374
+
375
+ if defined?(SourceMonitor::Scrapers::Base) && !(constant <= SourceMonitor::Scrapers::Base)
376
+ raise ArgumentError, "Scraper adapters must inherit from SourceMonitor::Scrapers::Base"
377
+ end
378
+
379
+ constant
380
+ end
381
+
382
+ def resolve_adapter(adapter)
383
+ return adapter if adapter.is_a?(Class)
384
+
385
+ if adapter.respond_to?(:to_s)
386
+ constant_name = adapter.to_s
387
+ begin
388
+ return constant_name.constantize
389
+ rescue NameError
390
+ raise ArgumentError, "Unknown scraper adapter constant #{constant_name.inspect}"
391
+ end
392
+ end
393
+
394
+ raise ArgumentError, "Invalid scraper adapter #{adapter.inspect}"
395
+ end
396
+ end
397
+
398
+ class RetentionSettings
399
+ attr_accessor :items_retention_days, :max_items
400
+
401
+ def initialize
402
+ @items_retention_days = nil
403
+ @max_items = nil
404
+ @strategy = :destroy
405
+ end
406
+
407
+ def strategy
408
+ @strategy
409
+ end
410
+
411
+ def strategy=(value)
412
+ normalized = normalize_strategy(value)
413
+ @strategy = normalized unless normalized.nil?
414
+ end
415
+
416
+ private
417
+
418
+ def normalize_strategy(value)
419
+ return :destroy if value.nil?
420
+
421
+ if value.respond_to?(:to_sym)
422
+ candidate = value.to_sym
423
+ valid =
424
+ if defined?(SourceMonitor::Items::RetentionPruner::VALID_STRATEGIES)
425
+ SourceMonitor::Items::RetentionPruner::VALID_STRATEGIES
426
+ else
427
+ %i[destroy soft_delete]
428
+ end
429
+
430
+ raise ArgumentError, "Invalid retention strategy #{value.inspect}" unless valid.include?(candidate)
431
+ candidate
432
+ else
433
+ raise ArgumentError, "Invalid retention strategy #{value.inspect}"
434
+ end
435
+ end
436
+ end
437
+
438
+ class Events
439
+ CALLBACK_KEYS = %i[after_item_created after_item_scraped after_fetch_completed].freeze
440
+
441
+ def initialize
442
+ @callbacks = Hash.new { |hash, key| hash[key] = [] }
443
+ @item_processors = []
444
+ end
445
+
446
+ CALLBACK_KEYS.each do |key|
447
+ define_method(key) do |handler = nil, &block|
448
+ register_callback(key, handler, &block)
449
+ end
450
+ end
451
+
452
+ def register_item_processor(processor = nil, &block)
453
+ callable = processor || block
454
+ validate_callable!(callable, :item_processor)
455
+ @item_processors << callable
456
+ callable
457
+ end
458
+
459
+ def callbacks_for(name)
460
+ @callbacks[name.to_sym]&.dup || []
461
+ end
462
+
463
+ def item_processors
464
+ @item_processors.dup
465
+ end
466
+
467
+ def reset!
468
+ @callbacks.clear
469
+ @item_processors.clear
470
+ end
471
+
472
+ private
473
+
474
+ def register_callback(key, handler = nil, &block)
475
+ callable = handler || block
476
+ validate_callable!(callable, key)
477
+ key = key.to_sym
478
+ unless CALLBACK_KEYS.include?(key)
479
+ raise ArgumentError, "Unknown event #{key.inspect}"
480
+ end
481
+
482
+ @callbacks[key] << callable
483
+ callable
484
+ end
485
+
486
+ def validate_callable!(callable, name)
487
+ unless callable.respond_to?(:call)
488
+ raise ArgumentError, "#{name} handler must respond to #call"
489
+ end
490
+ end
491
+ end
492
+
493
+ class Models
494
+ MODEL_KEYS = {
495
+ source: :source,
496
+ item: :item,
497
+ fetch_log: :fetch_log,
498
+ scrape_log: :scrape_log,
499
+ health_check_log: :health_check_log,
500
+ item_content: :item_content,
501
+ log_entry: :log_entry
502
+ }.freeze
503
+
504
+ attr_accessor :table_name_prefix
505
+
506
+ def initialize
507
+ @table_name_prefix = "sourcemon_"
508
+ @definitions = MODEL_KEYS.transform_values { ModelDefinition.new }
509
+ end
510
+
511
+ MODEL_KEYS.each do |method_name, key|
512
+ define_method(method_name) { @definitions[key] }
513
+ end
514
+
515
+ def for(name)
516
+ key = name.to_sym
517
+ definition = @definitions[key]
518
+ raise ArgumentError, "Unknown model #{name.inspect}" unless definition
519
+
520
+ definition
521
+ end
522
+ end
523
+
524
+ class ModelDefinition
525
+ attr_reader :validations
526
+
527
+ def initialize
528
+ @concern_definitions = []
529
+ @validations = []
530
+ end
531
+
532
+ def include_concern(concern = nil, &block)
533
+ definition = ConcernDefinition.new(concern, block)
534
+ unless @concern_definitions.any? { |existing| existing.signature == definition.signature }
535
+ @concern_definitions << definition
536
+ end
537
+
538
+ definition.return_value
539
+ end
540
+
541
+ def each_concern
542
+ return enum_for(:each_concern) unless block_given?
543
+
544
+ @concern_definitions.each do |definition|
545
+ yield definition.signature, definition.resolve
546
+ end
547
+ end
548
+
549
+ def validate(handler = nil, **options, &block)
550
+ callable =
551
+ if block
552
+ block
553
+ elsif handler.respond_to?(:call) && !handler.is_a?(Symbol) && !handler.is_a?(String)
554
+ handler
555
+ elsif handler.is_a?(Symbol) || handler.is_a?(String)
556
+ handler.to_sym
557
+ else
558
+ raise ArgumentError, "Invalid validation handler #{handler.inspect}"
559
+ end
560
+
561
+ validation = ValidationDefinition.new(callable, options)
562
+ @validations << validation
563
+ validation
564
+ end
565
+
566
+ private
567
+
568
+ class ConcernDefinition
569
+ attr_reader :signature
570
+
571
+ def initialize(concern, block)
572
+ @resolver = build_resolver(concern, block)
573
+ @signature = build_signature(concern, block)
574
+ @return_value = determine_return_value(concern, block)
575
+ end
576
+
577
+ def resolve
578
+ @resolved ||= @resolver.call
579
+ end
580
+
581
+ def return_value
582
+ @return_value
583
+ end
584
+
585
+ private
586
+
587
+ def build_resolver(concern, block)
588
+ if block
589
+ mod = Module.new(&block)
590
+ -> { mod }
591
+ elsif concern.is_a?(Module)
592
+ -> { concern }
593
+ elsif concern.respond_to?(:to_s)
594
+ constant_name = concern.to_s
595
+ lambda do
596
+ constant_name.constantize
597
+ rescue NameError => error
598
+ raise ArgumentError, error.message
599
+ end
600
+ else
601
+ raise ArgumentError, "Invalid concern #{concern.inspect}"
602
+ end
603
+ end
604
+
605
+ def build_signature(concern, block)
606
+ if block
607
+ [ :anonymous_module, block.object_id ]
608
+ elsif concern.is_a?(Module)
609
+ [ :module, concern.object_id ]
610
+ else
611
+ [ :constant, concern.to_s ]
612
+ end
613
+ end
614
+
615
+ def determine_return_value(concern, block)
616
+ if block
617
+ resolve
618
+ elsif concern.is_a?(Module)
619
+ concern
620
+ else
621
+ concern
622
+ end
623
+ end
624
+ end
625
+ end
626
+
627
+ class ValidationDefinition
628
+ attr_reader :handler, :options
629
+
630
+ def initialize(handler, options)
631
+ @handler = handler
632
+ @options = options
633
+ end
634
+
635
+ def signature
636
+ handler_key =
637
+ case handler
638
+ when Symbol
639
+ [ :symbol, handler ]
640
+ when String
641
+ [ :symbol, handler.to_sym ]
642
+ else
643
+ [ :callable, handler.object_id ]
644
+ end
645
+
646
+ [ handler_key, options ]
647
+ end
648
+
649
+ def symbol?
650
+ handler.is_a?(Symbol) || handler.is_a?(String)
651
+ end
652
+ end
653
+ end
654
+ end