source_monitor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rubocop.yml +12 -0
- data/.ruby-version +1 -0
- data/AGENTS.md +132 -0
- data/CHANGELOG.md +66 -0
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +411 -0
- data/MIT-LICENSE +20 -0
- data/README.md +108 -0
- data/Rakefile +8 -0
- data/app/assets/builds/.keep +0 -0
- data/app/assets/config/source_monitor_manifest.js +4 -0
- data/app/assets/images/source_monitor/.keep +0 -0
- data/app/assets/javascripts/source_monitor/application.js +20 -0
- data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
- data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
- data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
- data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
- data/app/assets/svgs/source_monitor/.keep +0 -0
- data/app/controllers/concerns/.keep +0 -0
- data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
- data/app/controllers/source_monitor/application_controller.rb +62 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
- data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/health_controller.rb +10 -0
- data/app/controllers/source_monitor/items_controller.rb +116 -0
- data/app/controllers/source_monitor/logs_controller.rb +15 -0
- data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
- data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
- data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
- data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
- data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
- data/app/controllers/source_monitor/sources_controller.rb +179 -0
- data/app/helpers/source_monitor/application_helper.rb +327 -0
- data/app/jobs/source_monitor/application_job.rb +13 -0
- data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
- data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
- data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
- data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
- data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
- data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
- data/app/mailers/source_monitor/application_mailer.rb +17 -0
- data/app/models/concerns/.keep +0 -0
- data/app/models/concerns/source_monitor/loggable.rb +18 -0
- data/app/models/source_monitor/application_record.rb +5 -0
- data/app/models/source_monitor/fetch_log.rb +31 -0
- data/app/models/source_monitor/health_check_log.rb +28 -0
- data/app/models/source_monitor/item.rb +102 -0
- data/app/models/source_monitor/item_content.rb +11 -0
- data/app/models/source_monitor/log_entry.rb +56 -0
- data/app/models/source_monitor/scrape_log.rb +31 -0
- data/app/models/source_monitor/source.rb +115 -0
- data/app/views/layouts/source_monitor/application.html.erb +54 -0
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
- data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
- data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
- data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
- data/app/views/source_monitor/dashboard/index.html.erb +48 -0
- data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
- data/app/views/source_monitor/items/_details.html.erb +234 -0
- data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/items/index.html.erb +147 -0
- data/app/views/source_monitor/items/show.html.erb +3 -0
- data/app/views/source_monitor/logs/index.html.erb +208 -0
- data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
- data/app/views/source_monitor/shared/_toast.html.erb +34 -0
- data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
- data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
- data/app/views/source_monitor/sources/_details.html.erb +302 -0
- data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
- data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
- data/app/views/source_monitor/sources/_form.html.erb +143 -0
- data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
- data/app/views/source_monitor/sources/_row.html.erb +102 -0
- data/app/views/source_monitor/sources/edit.html.erb +28 -0
- data/app/views/source_monitor/sources/index.html.erb +153 -0
- data/app/views/source_monitor/sources/new.html.erb +22 -0
- data/app/views/source_monitor/sources/show.html.erb +3 -0
- data/config/coverage_baseline.json +2010 -0
- data/config/initializers/feedjira.rb +19 -0
- data/config/routes.rb +18 -0
- data/config/tailwind.config.js +17 -0
- data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
- data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
- data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
- data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
- data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
- data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
- data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
- data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
- data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
- data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
- data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
- data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
- data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
- data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
- data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
- data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
- data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
- data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
- data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
- data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
- data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
- data/docs/configuration.md +170 -0
- data/docs/deployment.md +63 -0
- data/docs/gh-cli-workflow.md +44 -0
- data/docs/installation.md +144 -0
- data/docs/troubleshooting.md +76 -0
- data/eslint.config.mjs +27 -0
- data/lib/generators/source_monitor/install/install_generator.rb +59 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
- data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
- data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
- data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
- data/lib/source_monitor/assets/bundler.rb +49 -0
- data/lib/source_monitor/assets.rb +6 -0
- data/lib/source_monitor/configuration.rb +654 -0
- data/lib/source_monitor/dashboard/queries.rb +356 -0
- data/lib/source_monitor/dashboard/quick_action.rb +7 -0
- data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
- data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
- data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
- data/lib/source_monitor/engine.rb +107 -0
- data/lib/source_monitor/events.rb +110 -0
- data/lib/source_monitor/feedjira_extensions.rb +103 -0
- data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
- data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
- data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
- data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
- data/lib/source_monitor/fetching/fetch_error.rb +88 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
- data/lib/source_monitor/fetching/retry_policy.rb +85 -0
- data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
- data/lib/source_monitor/health/source_health_check.rb +100 -0
- data/lib/source_monitor/health/source_health_monitor.rb +210 -0
- data/lib/source_monitor/health/source_health_reset.rb +68 -0
- data/lib/source_monitor/health.rb +46 -0
- data/lib/source_monitor/http.rb +85 -0
- data/lib/source_monitor/instrumentation.rb +52 -0
- data/lib/source_monitor/items/item_creator.rb +601 -0
- data/lib/source_monitor/items/retention_pruner.rb +146 -0
- data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
- data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
- data/lib/source_monitor/items/retention_strategies.rb +9 -0
- data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
- data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
- data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
- data/lib/source_monitor/jobs/visibility.rb +133 -0
- data/lib/source_monitor/logs/entry_sync.rb +69 -0
- data/lib/source_monitor/logs/filter_set.rb +163 -0
- data/lib/source_monitor/logs/query.rb +81 -0
- data/lib/source_monitor/logs/table_presenter.rb +161 -0
- data/lib/source_monitor/metrics.rb +77 -0
- data/lib/source_monitor/model_extensions.rb +109 -0
- data/lib/source_monitor/models/sanitizable.rb +76 -0
- data/lib/source_monitor/models/url_normalizable.rb +84 -0
- data/lib/source_monitor/pagination/paginator.rb +90 -0
- data/lib/source_monitor/realtime/adapter.rb +97 -0
- data/lib/source_monitor/realtime/broadcaster.rb +237 -0
- data/lib/source_monitor/realtime.rb +17 -0
- data/lib/source_monitor/release/changelog.rb +59 -0
- data/lib/source_monitor/release/runner.rb +73 -0
- data/lib/source_monitor/scheduler.rb +82 -0
- data/lib/source_monitor/scrapers/base.rb +105 -0
- data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
- data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
- data/lib/source_monitor/scrapers/readability.rb +156 -0
- data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
- data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
- data/lib/source_monitor/scraping/enqueuer.rb +125 -0
- data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
- data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
- data/lib/source_monitor/scraping/item_scraper.rb +84 -0
- data/lib/source_monitor/scraping/scheduler.rb +43 -0
- data/lib/source_monitor/scraping/state.rb +79 -0
- data/lib/source_monitor/security/authentication.rb +85 -0
- data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
- data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
- data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
- data/lib/source_monitor/version.rb +3 -0
- data/lib/source_monitor.rb +149 -0
- data/lib/tasks/recover_stalled_fetches.rake +16 -0
- data/lib/tasks/source_monitor_assets.rake +28 -0
- data/lib/tasks/source_monitor_tasks.rake +29 -0
- data/lib/tasks/test_smoke.rake +12 -0
- data/package-lock.json +3997 -0
- data/package.json +29 -0
- data/postcss.config.js +6 -0
- data/source_monitor.gemspec +46 -0
- data/stylelint.config.js +12 -0
- metadata +469 -0
|
@@ -0,0 +1,654 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/core_ext/string/inflections"
|
|
4
|
+
|
|
5
|
+
module SourceMonitor
|
|
6
|
+
class Configuration
|
|
7
|
+
attr_accessor :queue_namespace,
|
|
8
|
+
:fetch_queue_name,
|
|
9
|
+
:scrape_queue_name,
|
|
10
|
+
:fetch_queue_concurrency,
|
|
11
|
+
:scrape_queue_concurrency,
|
|
12
|
+
:recurring_command_job_class,
|
|
13
|
+
:job_metrics_enabled,
|
|
14
|
+
:mission_control_enabled,
|
|
15
|
+
:mission_control_dashboard_path
|
|
16
|
+
|
|
17
|
+
attr_reader :http, :scrapers, :retention, :events, :models, :realtime, :fetching, :health, :authentication, :scraping
|
|
18
|
+
|
|
19
|
+
DEFAULT_QUEUE_NAMESPACE = "source_monitor"
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@queue_namespace = DEFAULT_QUEUE_NAMESPACE
|
|
23
|
+
@fetch_queue_name = "#{DEFAULT_QUEUE_NAMESPACE}_fetch"
|
|
24
|
+
@scrape_queue_name = "#{DEFAULT_QUEUE_NAMESPACE}_scrape"
|
|
25
|
+
@fetch_queue_concurrency = 2
|
|
26
|
+
@scrape_queue_concurrency = 2
|
|
27
|
+
@recurring_command_job_class = nil
|
|
28
|
+
@job_metrics_enabled = true
|
|
29
|
+
@mission_control_enabled = false
|
|
30
|
+
@mission_control_dashboard_path = nil
|
|
31
|
+
@http = HTTPSettings.new
|
|
32
|
+
@scrapers = ScraperRegistry.new
|
|
33
|
+
@retention = RetentionSettings.new
|
|
34
|
+
@events = Events.new
|
|
35
|
+
@models = Models.new
|
|
36
|
+
@realtime = RealtimeSettings.new
|
|
37
|
+
@fetching = FetchingSettings.new
|
|
38
|
+
@health = HealthSettings.new
|
|
39
|
+
@authentication = AuthenticationSettings.new
|
|
40
|
+
@scraping = ScrapingSettings.new
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def queue_name_for(role)
|
|
44
|
+
explicit_name =
|
|
45
|
+
case role.to_sym
|
|
46
|
+
when :fetch
|
|
47
|
+
fetch_queue_name
|
|
48
|
+
when :scrape
|
|
49
|
+
scrape_queue_name
|
|
50
|
+
else
|
|
51
|
+
raise ArgumentError, "unknown queue role #{role.inspect}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
prefix = ActiveJob::Base.queue_name_prefix
|
|
55
|
+
delimiter = ActiveJob::Base.queue_name_delimiter
|
|
56
|
+
|
|
57
|
+
if prefix && !prefix.empty?
|
|
58
|
+
[ prefix, explicit_name ].join(delimiter)
|
|
59
|
+
else
|
|
60
|
+
explicit_name
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def concurrency_for(role)
|
|
65
|
+
case role.to_sym
|
|
66
|
+
when :fetch
|
|
67
|
+
fetch_queue_concurrency
|
|
68
|
+
when :scrape
|
|
69
|
+
scrape_queue_concurrency
|
|
70
|
+
else
|
|
71
|
+
raise ArgumentError, "unknown queue role #{role.inspect}"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
class AuthenticationSettings
|
|
76
|
+
Handler = Struct.new(:type, :callable) do
|
|
77
|
+
def call(controller)
|
|
78
|
+
return unless callable
|
|
79
|
+
|
|
80
|
+
case type
|
|
81
|
+
when :symbol
|
|
82
|
+
controller.public_send(callable)
|
|
83
|
+
when :callable
|
|
84
|
+
arity = callable.arity
|
|
85
|
+
if arity.zero?
|
|
86
|
+
controller.instance_exec(&callable)
|
|
87
|
+
else
|
|
88
|
+
callable.call(controller)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
attr_reader :authenticate_handler, :authorize_handler
|
|
95
|
+
attr_accessor :current_user_method, :user_signed_in_method
|
|
96
|
+
|
|
97
|
+
def initialize
|
|
98
|
+
reset!
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def authenticate_with(handler = nil, &block)
|
|
102
|
+
@authenticate_handler = build_handler(handler, &block)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def authorize_with(handler = nil, &block)
|
|
106
|
+
@authorize_handler = build_handler(handler, &block)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def reset!
|
|
110
|
+
@authenticate_handler = nil
|
|
111
|
+
@authorize_handler = nil
|
|
112
|
+
@current_user_method = nil
|
|
113
|
+
@user_signed_in_method = nil
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
def build_handler(handler = nil, &block)
|
|
119
|
+
handler ||= block
|
|
120
|
+
return nil unless handler
|
|
121
|
+
|
|
122
|
+
if handler.is_a?(Symbol) || handler.is_a?(String)
|
|
123
|
+
Handler.new(:symbol, handler.to_sym)
|
|
124
|
+
elsif handler.respond_to?(:call)
|
|
125
|
+
Handler.new(:callable, handler)
|
|
126
|
+
else
|
|
127
|
+
raise ArgumentError, "Invalid authentication handler #{handler.inspect}"
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
class ScrapingSettings
|
|
133
|
+
attr_accessor :max_in_flight_per_source, :max_bulk_batch_size
|
|
134
|
+
|
|
135
|
+
DEFAULT_MAX_IN_FLIGHT = 25
|
|
136
|
+
DEFAULT_MAX_BULK_BATCH_SIZE = 100
|
|
137
|
+
|
|
138
|
+
def initialize
|
|
139
|
+
reset!
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def reset!
|
|
143
|
+
@max_in_flight_per_source = DEFAULT_MAX_IN_FLIGHT
|
|
144
|
+
@max_bulk_batch_size = DEFAULT_MAX_BULK_BATCH_SIZE
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def max_in_flight_per_source=(value)
|
|
148
|
+
@max_in_flight_per_source = normalize_numeric(value)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def max_bulk_batch_size=(value)
|
|
152
|
+
@max_bulk_batch_size = normalize_numeric(value)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
def normalize_numeric(value)
|
|
158
|
+
return nil if value.nil?
|
|
159
|
+
return nil if value == ""
|
|
160
|
+
|
|
161
|
+
integer = value.respond_to?(:to_i) ? value.to_i : value
|
|
162
|
+
integer.positive? ? integer : nil
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
class RealtimeSettings
|
|
167
|
+
VALID_ADAPTERS = %i[solid_cable redis async].freeze
|
|
168
|
+
|
|
169
|
+
attr_reader :adapter, :solid_cable
|
|
170
|
+
attr_accessor :redis_url
|
|
171
|
+
|
|
172
|
+
def initialize
|
|
173
|
+
reset!
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def adapter=(value)
|
|
177
|
+
value = value&.to_sym
|
|
178
|
+
unless VALID_ADAPTERS.include?(value)
|
|
179
|
+
raise ArgumentError, "Unsupported realtime adapter #{value.inspect}"
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
@adapter = value
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def reset!
|
|
186
|
+
@solid_cable = SolidCableOptions.new
|
|
187
|
+
@redis_url = nil
|
|
188
|
+
self.adapter = :solid_cable
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def solid_cable=(options)
|
|
192
|
+
solid_cable.assign(options)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def action_cable_config
|
|
196
|
+
case adapter
|
|
197
|
+
when :solid_cable
|
|
198
|
+
solid_cable.to_h.merge(adapter: "solid_cable")
|
|
199
|
+
when :redis
|
|
200
|
+
config = { adapter: "redis" }
|
|
201
|
+
config[:url] = redis_url if redis_url.present?
|
|
202
|
+
config
|
|
203
|
+
when :async
|
|
204
|
+
{ adapter: "async" }
|
|
205
|
+
else
|
|
206
|
+
{}
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
class SolidCableOptions
|
|
211
|
+
attr_accessor :polling_interval,
|
|
212
|
+
:message_retention,
|
|
213
|
+
:autotrim,
|
|
214
|
+
:silence_polling,
|
|
215
|
+
:use_skip_locked,
|
|
216
|
+
:trim_batch_size,
|
|
217
|
+
:connects_to
|
|
218
|
+
|
|
219
|
+
def initialize
|
|
220
|
+
reset!
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def assign(options)
|
|
224
|
+
return unless options.respond_to?(:each)
|
|
225
|
+
|
|
226
|
+
options.each do |key, value|
|
|
227
|
+
setter = "#{key}="
|
|
228
|
+
public_send(setter, value) if respond_to?(setter)
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def reset!
|
|
233
|
+
@polling_interval = "0.1.seconds"
|
|
234
|
+
@message_retention = "1.day"
|
|
235
|
+
@autotrim = true
|
|
236
|
+
@silence_polling = true
|
|
237
|
+
@use_skip_locked = true
|
|
238
|
+
@trim_batch_size = nil
|
|
239
|
+
@connects_to = nil
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def to_h
|
|
243
|
+
{
|
|
244
|
+
polling_interval: polling_interval,
|
|
245
|
+
message_retention: message_retention,
|
|
246
|
+
autotrim: autotrim,
|
|
247
|
+
silence_polling: silence_polling,
|
|
248
|
+
use_skip_locked: use_skip_locked,
|
|
249
|
+
trim_batch_size: trim_batch_size,
|
|
250
|
+
connects_to: connects_to
|
|
251
|
+
}.compact
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
class HTTPSettings
|
|
257
|
+
attr_accessor :timeout,
|
|
258
|
+
:open_timeout,
|
|
259
|
+
:max_redirects,
|
|
260
|
+
:user_agent,
|
|
261
|
+
:proxy,
|
|
262
|
+
:headers,
|
|
263
|
+
:retry_max,
|
|
264
|
+
:retry_interval,
|
|
265
|
+
:retry_interval_randomness,
|
|
266
|
+
:retry_backoff_factor,
|
|
267
|
+
:retry_statuses
|
|
268
|
+
|
|
269
|
+
def initialize
|
|
270
|
+
reset!
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def reset!
|
|
274
|
+
@timeout = 15
|
|
275
|
+
@open_timeout = 5
|
|
276
|
+
@max_redirects = 5
|
|
277
|
+
@user_agent = default_user_agent
|
|
278
|
+
@proxy = nil
|
|
279
|
+
@headers = {}
|
|
280
|
+
@retry_max = 4
|
|
281
|
+
@retry_interval = 0.5
|
|
282
|
+
@retry_interval_randomness = 0.5
|
|
283
|
+
@retry_backoff_factor = 2
|
|
284
|
+
@retry_statuses = nil
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
private
|
|
288
|
+
|
|
289
|
+
def default_user_agent
|
|
290
|
+
"SourceMonitor/#{SourceMonitor::VERSION}"
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
class FetchingSettings
|
|
295
|
+
attr_accessor :min_interval_minutes,
|
|
296
|
+
:max_interval_minutes,
|
|
297
|
+
:increase_factor,
|
|
298
|
+
:decrease_factor,
|
|
299
|
+
:failure_increase_factor,
|
|
300
|
+
:jitter_percent
|
|
301
|
+
|
|
302
|
+
def initialize
|
|
303
|
+
reset!
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def reset!
|
|
307
|
+
@min_interval_minutes = 5
|
|
308
|
+
@max_interval_minutes = 24 * 60
|
|
309
|
+
@increase_factor = 1.25
|
|
310
|
+
@decrease_factor = 0.75
|
|
311
|
+
@failure_increase_factor = 1.5
|
|
312
|
+
@jitter_percent = 0.1
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
class HealthSettings
|
|
317
|
+
attr_accessor :window_size,
|
|
318
|
+
:healthy_threshold,
|
|
319
|
+
:warning_threshold,
|
|
320
|
+
:auto_pause_threshold,
|
|
321
|
+
:auto_resume_threshold,
|
|
322
|
+
:auto_pause_cooldown_minutes
|
|
323
|
+
|
|
324
|
+
def initialize
|
|
325
|
+
reset!
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def reset!
|
|
329
|
+
@window_size = 20
|
|
330
|
+
@healthy_threshold = 0.8
|
|
331
|
+
@warning_threshold = 0.5
|
|
332
|
+
@auto_pause_threshold = 0.2
|
|
333
|
+
@auto_resume_threshold = 0.6
|
|
334
|
+
@auto_pause_cooldown_minutes = 60
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
class ScraperRegistry
|
|
339
|
+
include Enumerable
|
|
340
|
+
|
|
341
|
+
def initialize
|
|
342
|
+
@adapters = {}
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def register(name, adapter)
|
|
346
|
+
key = normalize_name(name)
|
|
347
|
+
@adapters[key] = normalize_adapter(adapter)
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def unregister(name)
|
|
351
|
+
@adapters.delete(normalize_name(name))
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def adapter_for(name)
|
|
355
|
+
adapter = @adapters[normalize_name(name)]
|
|
356
|
+
adapter if adapter
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def each(&block)
|
|
360
|
+
@adapters.each(&block)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
private
|
|
364
|
+
|
|
365
|
+
def normalize_name(name)
|
|
366
|
+
value = name.to_s
|
|
367
|
+
raise ArgumentError, "Invalid scraper adapter name #{name.inspect}" unless value.match?(/\A[a-z0-9_]+\z/i)
|
|
368
|
+
|
|
369
|
+
value.downcase
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def normalize_adapter(adapter)
|
|
373
|
+
constant = resolve_adapter(adapter)
|
|
374
|
+
|
|
375
|
+
if defined?(SourceMonitor::Scrapers::Base) && !(constant <= SourceMonitor::Scrapers::Base)
|
|
376
|
+
raise ArgumentError, "Scraper adapters must inherit from SourceMonitor::Scrapers::Base"
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
constant
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def resolve_adapter(adapter)
|
|
383
|
+
return adapter if adapter.is_a?(Class)
|
|
384
|
+
|
|
385
|
+
if adapter.respond_to?(:to_s)
|
|
386
|
+
constant_name = adapter.to_s
|
|
387
|
+
begin
|
|
388
|
+
return constant_name.constantize
|
|
389
|
+
rescue NameError
|
|
390
|
+
raise ArgumentError, "Unknown scraper adapter constant #{constant_name.inspect}"
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
raise ArgumentError, "Invalid scraper adapter #{adapter.inspect}"
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
class RetentionSettings
|
|
399
|
+
attr_accessor :items_retention_days, :max_items
|
|
400
|
+
|
|
401
|
+
def initialize
|
|
402
|
+
@items_retention_days = nil
|
|
403
|
+
@max_items = nil
|
|
404
|
+
@strategy = :destroy
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def strategy
|
|
408
|
+
@strategy
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def strategy=(value)
|
|
412
|
+
normalized = normalize_strategy(value)
|
|
413
|
+
@strategy = normalized unless normalized.nil?
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
private
|
|
417
|
+
|
|
418
|
+
def normalize_strategy(value)
|
|
419
|
+
return :destroy if value.nil?
|
|
420
|
+
|
|
421
|
+
if value.respond_to?(:to_sym)
|
|
422
|
+
candidate = value.to_sym
|
|
423
|
+
valid =
|
|
424
|
+
if defined?(SourceMonitor::Items::RetentionPruner::VALID_STRATEGIES)
|
|
425
|
+
SourceMonitor::Items::RetentionPruner::VALID_STRATEGIES
|
|
426
|
+
else
|
|
427
|
+
%i[destroy soft_delete]
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
raise ArgumentError, "Invalid retention strategy #{value.inspect}" unless valid.include?(candidate)
|
|
431
|
+
candidate
|
|
432
|
+
else
|
|
433
|
+
raise ArgumentError, "Invalid retention strategy #{value.inspect}"
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
class Events
|
|
439
|
+
CALLBACK_KEYS = %i[after_item_created after_item_scraped after_fetch_completed].freeze
|
|
440
|
+
|
|
441
|
+
def initialize
|
|
442
|
+
@callbacks = Hash.new { |hash, key| hash[key] = [] }
|
|
443
|
+
@item_processors = []
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
CALLBACK_KEYS.each do |key|
|
|
447
|
+
define_method(key) do |handler = nil, &block|
|
|
448
|
+
register_callback(key, handler, &block)
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
def register_item_processor(processor = nil, &block)
|
|
453
|
+
callable = processor || block
|
|
454
|
+
validate_callable!(callable, :item_processor)
|
|
455
|
+
@item_processors << callable
|
|
456
|
+
callable
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def callbacks_for(name)
|
|
460
|
+
@callbacks[name.to_sym]&.dup || []
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def item_processors
|
|
464
|
+
@item_processors.dup
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
def reset!
|
|
468
|
+
@callbacks.clear
|
|
469
|
+
@item_processors.clear
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
private
|
|
473
|
+
|
|
474
|
+
def register_callback(key, handler = nil, &block)
|
|
475
|
+
callable = handler || block
|
|
476
|
+
validate_callable!(callable, key)
|
|
477
|
+
key = key.to_sym
|
|
478
|
+
unless CALLBACK_KEYS.include?(key)
|
|
479
|
+
raise ArgumentError, "Unknown event #{key.inspect}"
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
@callbacks[key] << callable
|
|
483
|
+
callable
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
def validate_callable!(callable, name)
|
|
487
|
+
unless callable.respond_to?(:call)
|
|
488
|
+
raise ArgumentError, "#{name} handler must respond to #call"
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
class Models
|
|
494
|
+
MODEL_KEYS = {
|
|
495
|
+
source: :source,
|
|
496
|
+
item: :item,
|
|
497
|
+
fetch_log: :fetch_log,
|
|
498
|
+
scrape_log: :scrape_log,
|
|
499
|
+
health_check_log: :health_check_log,
|
|
500
|
+
item_content: :item_content,
|
|
501
|
+
log_entry: :log_entry
|
|
502
|
+
}.freeze
|
|
503
|
+
|
|
504
|
+
attr_accessor :table_name_prefix
|
|
505
|
+
|
|
506
|
+
def initialize
|
|
507
|
+
@table_name_prefix = "sourcemon_"
|
|
508
|
+
@definitions = MODEL_KEYS.transform_values { ModelDefinition.new }
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
MODEL_KEYS.each do |method_name, key|
|
|
512
|
+
define_method(method_name) { @definitions[key] }
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
def for(name)
|
|
516
|
+
key = name.to_sym
|
|
517
|
+
definition = @definitions[key]
|
|
518
|
+
raise ArgumentError, "Unknown model #{name.inspect}" unless definition
|
|
519
|
+
|
|
520
|
+
definition
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
class ModelDefinition
|
|
525
|
+
attr_reader :validations
|
|
526
|
+
|
|
527
|
+
def initialize
|
|
528
|
+
@concern_definitions = []
|
|
529
|
+
@validations = []
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def include_concern(concern = nil, &block)
|
|
533
|
+
definition = ConcernDefinition.new(concern, block)
|
|
534
|
+
unless @concern_definitions.any? { |existing| existing.signature == definition.signature }
|
|
535
|
+
@concern_definitions << definition
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
definition.return_value
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
def each_concern
|
|
542
|
+
return enum_for(:each_concern) unless block_given?
|
|
543
|
+
|
|
544
|
+
@concern_definitions.each do |definition|
|
|
545
|
+
yield definition.signature, definition.resolve
|
|
546
|
+
end
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
def validate(handler = nil, **options, &block)
|
|
550
|
+
callable =
|
|
551
|
+
if block
|
|
552
|
+
block
|
|
553
|
+
elsif handler.respond_to?(:call) && !handler.is_a?(Symbol) && !handler.is_a?(String)
|
|
554
|
+
handler
|
|
555
|
+
elsif handler.is_a?(Symbol) || handler.is_a?(String)
|
|
556
|
+
handler.to_sym
|
|
557
|
+
else
|
|
558
|
+
raise ArgumentError, "Invalid validation handler #{handler.inspect}"
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
validation = ValidationDefinition.new(callable, options)
|
|
562
|
+
@validations << validation
|
|
563
|
+
validation
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
private
|
|
567
|
+
|
|
568
|
+
class ConcernDefinition
|
|
569
|
+
attr_reader :signature
|
|
570
|
+
|
|
571
|
+
def initialize(concern, block)
|
|
572
|
+
@resolver = build_resolver(concern, block)
|
|
573
|
+
@signature = build_signature(concern, block)
|
|
574
|
+
@return_value = determine_return_value(concern, block)
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
def resolve
|
|
578
|
+
@resolved ||= @resolver.call
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
def return_value
|
|
582
|
+
@return_value
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
private
|
|
586
|
+
|
|
587
|
+
def build_resolver(concern, block)
|
|
588
|
+
if block
|
|
589
|
+
mod = Module.new(&block)
|
|
590
|
+
-> { mod }
|
|
591
|
+
elsif concern.is_a?(Module)
|
|
592
|
+
-> { concern }
|
|
593
|
+
elsif concern.respond_to?(:to_s)
|
|
594
|
+
constant_name = concern.to_s
|
|
595
|
+
lambda do
|
|
596
|
+
constant_name.constantize
|
|
597
|
+
rescue NameError => error
|
|
598
|
+
raise ArgumentError, error.message
|
|
599
|
+
end
|
|
600
|
+
else
|
|
601
|
+
raise ArgumentError, "Invalid concern #{concern.inspect}"
|
|
602
|
+
end
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
def build_signature(concern, block)
|
|
606
|
+
if block
|
|
607
|
+
[ :anonymous_module, block.object_id ]
|
|
608
|
+
elsif concern.is_a?(Module)
|
|
609
|
+
[ :module, concern.object_id ]
|
|
610
|
+
else
|
|
611
|
+
[ :constant, concern.to_s ]
|
|
612
|
+
end
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
def determine_return_value(concern, block)
|
|
616
|
+
if block
|
|
617
|
+
resolve
|
|
618
|
+
elsif concern.is_a?(Module)
|
|
619
|
+
concern
|
|
620
|
+
else
|
|
621
|
+
concern
|
|
622
|
+
end
|
|
623
|
+
end
|
|
624
|
+
end
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
class ValidationDefinition
|
|
628
|
+
attr_reader :handler, :options
|
|
629
|
+
|
|
630
|
+
def initialize(handler, options)
|
|
631
|
+
@handler = handler
|
|
632
|
+
@options = options
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
def signature
|
|
636
|
+
handler_key =
|
|
637
|
+
case handler
|
|
638
|
+
when Symbol
|
|
639
|
+
[ :symbol, handler ]
|
|
640
|
+
when String
|
|
641
|
+
[ :symbol, handler.to_sym ]
|
|
642
|
+
else
|
|
643
|
+
[ :callable, handler.object_id ]
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
[ handler_key, options ]
|
|
647
|
+
end
|
|
648
|
+
|
|
649
|
+
def symbol?
|
|
650
|
+
handler.is_a?(Symbol) || handler.is_a?(String)
|
|
651
|
+
end
|
|
652
|
+
end
|
|
653
|
+
end
|
|
654
|
+
end
|