source_monitor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rubocop.yml +12 -0
- data/.ruby-version +1 -0
- data/AGENTS.md +132 -0
- data/CHANGELOG.md +66 -0
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +411 -0
- data/MIT-LICENSE +20 -0
- data/README.md +108 -0
- data/Rakefile +8 -0
- data/app/assets/builds/.keep +0 -0
- data/app/assets/config/source_monitor_manifest.js +4 -0
- data/app/assets/images/source_monitor/.keep +0 -0
- data/app/assets/javascripts/source_monitor/application.js +20 -0
- data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
- data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
- data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
- data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
- data/app/assets/svgs/source_monitor/.keep +0 -0
- data/app/controllers/concerns/.keep +0 -0
- data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
- data/app/controllers/source_monitor/application_controller.rb +62 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
- data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/health_controller.rb +10 -0
- data/app/controllers/source_monitor/items_controller.rb +116 -0
- data/app/controllers/source_monitor/logs_controller.rb +15 -0
- data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
- data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
- data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
- data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
- data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
- data/app/controllers/source_monitor/sources_controller.rb +179 -0
- data/app/helpers/source_monitor/application_helper.rb +327 -0
- data/app/jobs/source_monitor/application_job.rb +13 -0
- data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
- data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
- data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
- data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
- data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
- data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
- data/app/mailers/source_monitor/application_mailer.rb +17 -0
- data/app/models/concerns/.keep +0 -0
- data/app/models/concerns/source_monitor/loggable.rb +18 -0
- data/app/models/source_monitor/application_record.rb +5 -0
- data/app/models/source_monitor/fetch_log.rb +31 -0
- data/app/models/source_monitor/health_check_log.rb +28 -0
- data/app/models/source_monitor/item.rb +102 -0
- data/app/models/source_monitor/item_content.rb +11 -0
- data/app/models/source_monitor/log_entry.rb +56 -0
- data/app/models/source_monitor/scrape_log.rb +31 -0
- data/app/models/source_monitor/source.rb +115 -0
- data/app/views/layouts/source_monitor/application.html.erb +54 -0
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
- data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
- data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
- data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
- data/app/views/source_monitor/dashboard/index.html.erb +48 -0
- data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
- data/app/views/source_monitor/items/_details.html.erb +234 -0
- data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/items/index.html.erb +147 -0
- data/app/views/source_monitor/items/show.html.erb +3 -0
- data/app/views/source_monitor/logs/index.html.erb +208 -0
- data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
- data/app/views/source_monitor/shared/_toast.html.erb +34 -0
- data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
- data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
- data/app/views/source_monitor/sources/_details.html.erb +302 -0
- data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
- data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
- data/app/views/source_monitor/sources/_form.html.erb +143 -0
- data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
- data/app/views/source_monitor/sources/_row.html.erb +102 -0
- data/app/views/source_monitor/sources/edit.html.erb +28 -0
- data/app/views/source_monitor/sources/index.html.erb +153 -0
- data/app/views/source_monitor/sources/new.html.erb +22 -0
- data/app/views/source_monitor/sources/show.html.erb +3 -0
- data/config/coverage_baseline.json +2010 -0
- data/config/initializers/feedjira.rb +19 -0
- data/config/routes.rb +18 -0
- data/config/tailwind.config.js +17 -0
- data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
- data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
- data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
- data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
- data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
- data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
- data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
- data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
- data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
- data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
- data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
- data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
- data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
- data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
- data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
- data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
- data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
- data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
- data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
- data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
- data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
- data/docs/configuration.md +170 -0
- data/docs/deployment.md +63 -0
- data/docs/gh-cli-workflow.md +44 -0
- data/docs/installation.md +144 -0
- data/docs/troubleshooting.md +76 -0
- data/eslint.config.mjs +27 -0
- data/lib/generators/source_monitor/install/install_generator.rb +59 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
- data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
- data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
- data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
- data/lib/source_monitor/assets/bundler.rb +49 -0
- data/lib/source_monitor/assets.rb +6 -0
- data/lib/source_monitor/configuration.rb +654 -0
- data/lib/source_monitor/dashboard/queries.rb +356 -0
- data/lib/source_monitor/dashboard/quick_action.rb +7 -0
- data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
- data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
- data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
- data/lib/source_monitor/engine.rb +107 -0
- data/lib/source_monitor/events.rb +110 -0
- data/lib/source_monitor/feedjira_extensions.rb +103 -0
- data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
- data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
- data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
- data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
- data/lib/source_monitor/fetching/fetch_error.rb +88 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
- data/lib/source_monitor/fetching/retry_policy.rb +85 -0
- data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
- data/lib/source_monitor/health/source_health_check.rb +100 -0
- data/lib/source_monitor/health/source_health_monitor.rb +210 -0
- data/lib/source_monitor/health/source_health_reset.rb +68 -0
- data/lib/source_monitor/health.rb +46 -0
- data/lib/source_monitor/http.rb +85 -0
- data/lib/source_monitor/instrumentation.rb +52 -0
- data/lib/source_monitor/items/item_creator.rb +601 -0
- data/lib/source_monitor/items/retention_pruner.rb +146 -0
- data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
- data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
- data/lib/source_monitor/items/retention_strategies.rb +9 -0
- data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
- data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
- data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
- data/lib/source_monitor/jobs/visibility.rb +133 -0
- data/lib/source_monitor/logs/entry_sync.rb +69 -0
- data/lib/source_monitor/logs/filter_set.rb +163 -0
- data/lib/source_monitor/logs/query.rb +81 -0
- data/lib/source_monitor/logs/table_presenter.rb +161 -0
- data/lib/source_monitor/metrics.rb +77 -0
- data/lib/source_monitor/model_extensions.rb +109 -0
- data/lib/source_monitor/models/sanitizable.rb +76 -0
- data/lib/source_monitor/models/url_normalizable.rb +84 -0
- data/lib/source_monitor/pagination/paginator.rb +90 -0
- data/lib/source_monitor/realtime/adapter.rb +97 -0
- data/lib/source_monitor/realtime/broadcaster.rb +237 -0
- data/lib/source_monitor/realtime.rb +17 -0
- data/lib/source_monitor/release/changelog.rb +59 -0
- data/lib/source_monitor/release/runner.rb +73 -0
- data/lib/source_monitor/scheduler.rb +82 -0
- data/lib/source_monitor/scrapers/base.rb +105 -0
- data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
- data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
- data/lib/source_monitor/scrapers/readability.rb +156 -0
- data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
- data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
- data/lib/source_monitor/scraping/enqueuer.rb +125 -0
- data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
- data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
- data/lib/source_monitor/scraping/item_scraper.rb +84 -0
- data/lib/source_monitor/scraping/scheduler.rb +43 -0
- data/lib/source_monitor/scraping/state.rb +79 -0
- data/lib/source_monitor/security/authentication.rb +85 -0
- data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
- data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
- data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
- data/lib/source_monitor/version.rb +3 -0
- data/lib/source_monitor.rb +149 -0
- data/lib/tasks/recover_stalled_fetches.rake +16 -0
- data/lib/tasks/source_monitor_assets.rake +28 -0
- data/lib/tasks/source_monitor_tasks.rake +29 -0
- data/lib/tasks/test_smoke.rake +12 -0
- data/package-lock.json +3997 -0
- data/package.json +29 -0
- data/postcss.config.js +6 -0
- data/source_monitor.gemspec +46 -0
- data/stylelint.config.js +12 -0
- metadata +469 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "source_monitor/scraping/item_scraper/adapter_resolver"
|
|
4
|
+
require "source_monitor/scraping/item_scraper/persistence"
|
|
5
|
+
|
|
6
|
+
module SourceMonitor
|
|
7
|
+
module Scraping
|
|
8
|
+
# Orchestrates execution of the configured scraper adapter for an item,
|
|
9
|
+
# updating the item record and recording a ScrapeLog entry detailing the
|
|
10
|
+
# outcome. The service is intentionally small so future adapters or
|
|
11
|
+
# scheduling mechanisms can reuse it for both manual and automated flows.
|
|
12
|
+
class ItemScraper
|
|
13
|
+
UnknownAdapterError = Class.new(StandardError)
|
|
14
|
+
|
|
15
|
+
Result = Struct.new(:status, :item, :log, :message, :error, keyword_init: true) do
|
|
16
|
+
def success?
|
|
17
|
+
status.to_s != "failed"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def failed?
|
|
21
|
+
!success?
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
attr_reader :item, :source, :adapter_name, :settings, :http, :adapter_resolver, :persistence
|
|
26
|
+
|
|
27
|
+
def initialize(item:, source: nil, adapter_name: nil, settings: nil, http: SourceMonitor::HTTP)
|
|
28
|
+
@item = item
|
|
29
|
+
@source = source || item&.source
|
|
30
|
+
@adapter_name = (adapter_name || @source&.scraper_adapter).to_s
|
|
31
|
+
@settings = settings
|
|
32
|
+
@http = http
|
|
33
|
+
@adapter_resolver = SourceMonitor::Scraping::ItemScraper::AdapterResolver.new(name: @adapter_name, source: @source)
|
|
34
|
+
@persistence = SourceMonitor::Scraping::ItemScraper::Persistence.new(item: @item, source: @source, adapter_name: @adapter_name)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def call
|
|
38
|
+
started_at = Time.current
|
|
39
|
+
log("scraper:start", started_at:, item:, source:)
|
|
40
|
+
raise ArgumentError, "Item does not belong to a source" unless source
|
|
41
|
+
adapter = adapter_resolver.resolve!
|
|
42
|
+
adapter_result = adapter.call(item:, source:, settings:, http:)
|
|
43
|
+
|
|
44
|
+
result = persistence.persist_success(adapter_result:, started_at:)
|
|
45
|
+
finalize_result(result)
|
|
46
|
+
rescue UnknownAdapterError => error
|
|
47
|
+
log("scraper:unknown_adapter", error: error.message)
|
|
48
|
+
result = persistence.persist_failure(error:, started_at:, message_override: error.message)
|
|
49
|
+
finalize_result(result)
|
|
50
|
+
rescue StandardError => error
|
|
51
|
+
log("scraper:exception", error: error.message)
|
|
52
|
+
result = persistence.persist_failure(error:, started_at:)
|
|
53
|
+
finalize_result(result)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def finalize_result(result)
|
|
59
|
+
log(
|
|
60
|
+
"scraper:finalize",
|
|
61
|
+
status: result&.status,
|
|
62
|
+
scrape_status: result&.item&.scrape_status,
|
|
63
|
+
log_id: result&.log&.id
|
|
64
|
+
)
|
|
65
|
+
SourceMonitor::Events.after_item_scraped(result)
|
|
66
|
+
result
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def log(stage, **extra)
|
|
70
|
+
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
71
|
+
|
|
72
|
+
payload = {
|
|
73
|
+
stage: "SourceMonitor::Scraping::ItemScraper##{stage}",
|
|
74
|
+
item_id: item&.id,
|
|
75
|
+
source_id: source&.id,
|
|
76
|
+
adapter: adapter_name
|
|
77
|
+
}.merge(extra.compact)
|
|
78
|
+
Rails.logger.info("[SourceMonitor::ManualScrape] #{payload.to_json}")
|
|
79
|
+
rescue StandardError
|
|
80
|
+
nil
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Scraping
|
|
5
|
+
# Identifies items that still need scraping and enqueue jobs for sources
|
|
6
|
+
# configured for automatic scraping. This mirrors the feed fetch scheduler
|
|
7
|
+
# so recurring tasks can keep the scrape queue warm.
|
|
8
|
+
class Scheduler
|
|
9
|
+
DEFAULT_BATCH_SIZE = 100
|
|
10
|
+
|
|
11
|
+
def self.run(limit: DEFAULT_BATCH_SIZE)
|
|
12
|
+
new(limit:).run
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def initialize(limit:)
|
|
16
|
+
@limit = limit
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run
|
|
20
|
+
items = due_items.limit(limit).includes(:source).to_a
|
|
21
|
+
return 0 if items.empty?
|
|
22
|
+
|
|
23
|
+
items.sum do |item|
|
|
24
|
+
result = SourceMonitor::Scraping::Enqueuer.enqueue(item: item, source: item.source, reason: :auto)
|
|
25
|
+
result.enqueued? ? 1 : 0
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
attr_reader :limit
|
|
32
|
+
|
|
33
|
+
def due_items
|
|
34
|
+
SourceMonitor::Item
|
|
35
|
+
.joins(:source)
|
|
36
|
+
.merge(SourceMonitor::Source.active.where(scraping_enabled: true, auto_scrape: true))
|
|
37
|
+
.where(scraped_at: nil)
|
|
38
|
+
.where(scrape_status: [ nil, "" ])
|
|
39
|
+
.order(Arel.sql("sourcemon_items.created_at ASC"))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Scraping
|
|
5
|
+
# Centralizes scrape status transitions so jobs, schedulers, and UI helpers
|
|
6
|
+
# keep item states consistent and broadcast changes in one place.
|
|
7
|
+
module State
|
|
8
|
+
extend self
|
|
9
|
+
|
|
10
|
+
IN_FLIGHT_STATUSES = %w[pending processing].freeze
|
|
11
|
+
|
|
12
|
+
def mark_pending!(item, broadcast: false, lock: true)
|
|
13
|
+
update_status(item, "pending", broadcast:, lock:)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def mark_processing!(item, broadcast: true, lock: true)
|
|
17
|
+
update_status(item, "processing", broadcast:, lock:)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def mark_failed!(item, broadcast: true, lock: true, failed_at: Time.current)
|
|
21
|
+
update_status(
|
|
22
|
+
item,
|
|
23
|
+
"failed",
|
|
24
|
+
broadcast:,
|
|
25
|
+
lock:,
|
|
26
|
+
extra: { scraped_at: failed_at || Time.current }
|
|
27
|
+
)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def clear_inflight!(item, broadcast: true, lock: true)
|
|
31
|
+
with_item(item, lock:) do |record|
|
|
32
|
+
next unless in_flight?(record.scrape_status)
|
|
33
|
+
|
|
34
|
+
record.update_columns(scrape_status: nil)
|
|
35
|
+
record.assign_attributes(scrape_status: nil)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
broadcast_item(item) if broadcast
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def in_flight?(status)
|
|
42
|
+
IN_FLIGHT_STATUSES.include?(status.to_s)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def update_status(item, status, broadcast:, lock:, extra: {})
|
|
48
|
+
with_item(item, lock:) do |record|
|
|
49
|
+
attributes = { scrape_status: status }.merge(extra.compact)
|
|
50
|
+
record.update_columns(attributes)
|
|
51
|
+
record.assign_attributes(attributes)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
broadcast_item(item) if broadcast
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def with_item(item, lock:)
|
|
58
|
+
return unless item
|
|
59
|
+
|
|
60
|
+
if lock
|
|
61
|
+
item.with_lock do
|
|
62
|
+
item.reload
|
|
63
|
+
yield(item)
|
|
64
|
+
end
|
|
65
|
+
else
|
|
66
|
+
yield(item)
|
|
67
|
+
end
|
|
68
|
+
rescue ActiveRecord::RecordNotFound
|
|
69
|
+
nil
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def broadcast_item(item)
|
|
73
|
+
SourceMonitor::Realtime.broadcast_item(item)
|
|
74
|
+
rescue StandardError
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Security
|
|
5
|
+
module Authentication
|
|
6
|
+
def self.authenticate!(controller)
|
|
7
|
+
call_handler(SourceMonitor.config.authentication.authenticate_handler, controller)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.authorize!(controller)
|
|
11
|
+
call_handler(SourceMonitor.config.authentication.authorize_handler, controller)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.current_user(controller)
|
|
15
|
+
method_name = preferred_current_user_method(controller)
|
|
16
|
+
safe_public_send(controller, method_name)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.user_signed_in?(controller)
|
|
20
|
+
method_name = preferred_user_signed_in_method(controller)
|
|
21
|
+
|
|
22
|
+
if method_name
|
|
23
|
+
safe_public_send(controller, method_name)
|
|
24
|
+
else
|
|
25
|
+
!!current_user(controller)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.authentication_configured?
|
|
30
|
+
config = SourceMonitor.config.authentication
|
|
31
|
+
config.authenticate_handler.present? || config.authorize_handler.present?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.authorize_configured?
|
|
35
|
+
SourceMonitor.config.authentication.authorize_handler.present?
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.authenticate_configured?
|
|
39
|
+
SourceMonitor.config.authentication.authenticate_handler.present?
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.call_handler(handler, controller)
|
|
43
|
+
return unless handler
|
|
44
|
+
|
|
45
|
+
handler.call(controller)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def self.safe_public_send(controller, method_name)
|
|
49
|
+
return unless method_name
|
|
50
|
+
return unless controller.respond_to?(method_name, true)
|
|
51
|
+
|
|
52
|
+
controller.public_send(method_name)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.preferred_current_user_method(controller)
|
|
56
|
+
config = SourceMonitor.config.authentication
|
|
57
|
+
method_name = config.current_user_method
|
|
58
|
+
method_name = method_name.to_sym if method_name.respond_to?(:to_sym)
|
|
59
|
+
|
|
60
|
+
if method_name
|
|
61
|
+
method_name
|
|
62
|
+
elsif controller.respond_to?(:current_user, true)
|
|
63
|
+
:current_user
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def self.preferred_user_signed_in_method(controller)
|
|
68
|
+
config = SourceMonitor.config.authentication
|
|
69
|
+
method_name = config.user_signed_in_method
|
|
70
|
+
method_name = method_name.to_sym if method_name.respond_to?(:to_sym)
|
|
71
|
+
|
|
72
|
+
if method_name
|
|
73
|
+
method_name
|
|
74
|
+
elsif controller.respond_to?(:user_signed_in?, true)
|
|
75
|
+
:user_signed_in?
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private_class_method :call_handler,
|
|
80
|
+
:safe_public_send,
|
|
81
|
+
:preferred_current_user_method,
|
|
82
|
+
:preferred_user_signed_in_method
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "action_view"
|
|
4
|
+
|
|
5
|
+
module SourceMonitor
|
|
6
|
+
module Security
|
|
7
|
+
module ParameterSanitizer
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def sanitize(value)
|
|
11
|
+
case value
|
|
12
|
+
when ActionController::Parameters
|
|
13
|
+
sanitize(value.to_unsafe_h)
|
|
14
|
+
when Hash
|
|
15
|
+
value.each_with_object({}) do |(key, val), memo|
|
|
16
|
+
memo[key] = sanitize(val)
|
|
17
|
+
end
|
|
18
|
+
when Array
|
|
19
|
+
value.map { |element| sanitize(element) }
|
|
20
|
+
when String
|
|
21
|
+
sanitize_string(value)
|
|
22
|
+
else
|
|
23
|
+
value
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def sanitize_string(value)
|
|
28
|
+
stripped = value.to_s
|
|
29
|
+
return stripped if stripped.blank?
|
|
30
|
+
|
|
31
|
+
sanitized = full_sanitizer.sanitize(stripped)
|
|
32
|
+
sanitized.strip
|
|
33
|
+
end
|
|
34
|
+
private_class_method :sanitize_string
|
|
35
|
+
|
|
36
|
+
def full_sanitizer
|
|
37
|
+
@full_sanitizer ||= ActionView::Base.full_sanitizer
|
|
38
|
+
end
|
|
39
|
+
private_class_method :full_sanitizer
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Sources
|
|
5
|
+
# Presenter for building Turbo Stream responses for source-related actions
|
|
6
|
+
# Consolidates duplicated response building logic from the controller
|
|
7
|
+
class TurboStreamPresenter
|
|
8
|
+
include ActionView::RecordIdentifier
|
|
9
|
+
|
|
10
|
+
attr_reader :source, :responder
|
|
11
|
+
|
|
12
|
+
def initialize(source:, responder:)
|
|
13
|
+
@source = source
|
|
14
|
+
@responder = responder
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Builds the complete Turbo Stream response for source deletion
|
|
18
|
+
# Includes: row removal, heatmap update, empty state (if needed), and redirect (if provided)
|
|
19
|
+
def render_deletion(metrics:, query:, search_params:, redirect_location: nil)
|
|
20
|
+
responder.remove_row(source)
|
|
21
|
+
responder.remove("source_monitor_sources_empty_state")
|
|
22
|
+
|
|
23
|
+
render_heatmap_update(metrics:, search_params:)
|
|
24
|
+
render_empty_state_if_needed(query:)
|
|
25
|
+
responder.redirect(redirect_location, action: "replace") if redirect_location.present?
|
|
26
|
+
|
|
27
|
+
self
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def render_heatmap_update(metrics:, search_params:)
|
|
33
|
+
responder.replace(
|
|
34
|
+
"source_monitor_sources_heatmap",
|
|
35
|
+
partial: "source_monitor/sources/fetch_interval_heatmap",
|
|
36
|
+
locals: {
|
|
37
|
+
fetch_interval_distribution: metrics.fetch_interval_distribution,
|
|
38
|
+
selected_bucket: metrics.selected_fetch_interval_bucket,
|
|
39
|
+
search_params:
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def render_empty_state_if_needed(query:)
|
|
45
|
+
return if query.result.exists?
|
|
46
|
+
|
|
47
|
+
responder.append(
|
|
48
|
+
"source_monitor_sources_table_body",
|
|
49
|
+
partial: "source_monitor/sources/empty_state_row"
|
|
50
|
+
)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module TurboStreams
|
|
5
|
+
class StreamResponder
|
|
6
|
+
Operation = Struct.new(:action, :target, :partial, :locals, keyword_init: true)
|
|
7
|
+
|
|
8
|
+
include ActionView::RecordIdentifier
|
|
9
|
+
|
|
10
|
+
attr_reader :operations
|
|
11
|
+
|
|
12
|
+
def initialize
|
|
13
|
+
@operations = []
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def replace(target, partial:, locals: {})
|
|
17
|
+
operations << Operation.new(action: :replace, target:, partial:, locals:)
|
|
18
|
+
self
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def append(target, partial:, locals: {})
|
|
22
|
+
operations << Operation.new(action: :append, target:, partial:, locals:)
|
|
23
|
+
self
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def replace_details(record, partial:, locals: {})
|
|
27
|
+
replace(dom_id(record, :details), partial:, locals:)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def replace_row(record, partial:, locals: {})
|
|
31
|
+
replace(dom_id(record, :row), partial:, locals:)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def remove(target)
|
|
35
|
+
operations << Operation.new(action: :remove, target:, partial: nil, locals: nil)
|
|
36
|
+
self
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def remove_row(record)
|
|
40
|
+
remove(dom_id(record, :row))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def toast(message:, level: :info, title: nil, delay_ms: 5000)
|
|
44
|
+
append(
|
|
45
|
+
"source_monitor_notifications",
|
|
46
|
+
partial: "source_monitor/shared/toast",
|
|
47
|
+
locals: {
|
|
48
|
+
message:,
|
|
49
|
+
level: level || :info,
|
|
50
|
+
title:,
|
|
51
|
+
delay_ms: delay_ms || 5000
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def redirect(url, action: "advance")
|
|
57
|
+
operations << Operation.new(
|
|
58
|
+
action: :redirect,
|
|
59
|
+
target: "source_monitor_redirects",
|
|
60
|
+
partial: nil,
|
|
61
|
+
locals: { url:, turbo_action: action }
|
|
62
|
+
)
|
|
63
|
+
self
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def render(view_context)
|
|
67
|
+
operations.map do |operation|
|
|
68
|
+
if operation.action == :redirect
|
|
69
|
+
# Custom redirect action - manually build the turbo-stream tag with URL attribute
|
|
70
|
+
url = operation.locals[:url]
|
|
71
|
+
turbo_action = operation.locals[:turbo_action]
|
|
72
|
+
view_context.tag.send(:"turbo-stream",
|
|
73
|
+
action: "redirect",
|
|
74
|
+
target: operation.target,
|
|
75
|
+
url: url,
|
|
76
|
+
"visit-action": turbo_action
|
|
77
|
+
)
|
|
78
|
+
elsif operation.partial
|
|
79
|
+
view_context.turbo_stream.public_send(
|
|
80
|
+
operation.action,
|
|
81
|
+
operation.target,
|
|
82
|
+
partial: operation.partial,
|
|
83
|
+
locals: operation.locals || {}
|
|
84
|
+
)
|
|
85
|
+
else
|
|
86
|
+
view_context.turbo_stream.public_send(
|
|
87
|
+
operation.action,
|
|
88
|
+
operation.target
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
begin
|
|
2
|
+
require "solid_queue"
|
|
3
|
+
rescue LoadError
|
|
4
|
+
# Solid Queue is optional if the host app supplies a different Active Job backend.
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
begin
|
|
8
|
+
require "solid_cable"
|
|
9
|
+
rescue LoadError
|
|
10
|
+
# Solid Cable is optional if the host app uses Redis or another Action Cable adapter.
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require "turbo-rails"
|
|
15
|
+
rescue LoadError
|
|
16
|
+
# Turbo is optional but recommended for real-time updates.
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
begin
|
|
20
|
+
require "ransack"
|
|
21
|
+
rescue LoadError
|
|
22
|
+
# Ransack powers search forms when available.
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
require "source_monitor/version"
|
|
26
|
+
require "active_support/core_ext/module/redefine_method"
|
|
27
|
+
|
|
28
|
+
SourceMonitor.singleton_class.redefine_method(:table_name_prefix) do
|
|
29
|
+
SourceMonitor::Engine.table_name_prefix
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
ActiveSupport.on_load(:active_record) do
|
|
33
|
+
SourceMonitor.singleton_class.redefine_method(:table_name_prefix) do
|
|
34
|
+
SourceMonitor::Engine.table_name_prefix
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
require "source_monitor/engine"
|
|
39
|
+
require "source_monitor/configuration"
|
|
40
|
+
require "source_monitor/model_extensions"
|
|
41
|
+
require "source_monitor/events"
|
|
42
|
+
require "source_monitor/instrumentation"
|
|
43
|
+
require "source_monitor/metrics"
|
|
44
|
+
require "source_monitor/http"
|
|
45
|
+
require "source_monitor/feedjira_extensions"
|
|
46
|
+
require "source_monitor/dashboard/quick_action"
|
|
47
|
+
require "source_monitor/dashboard/recent_activity"
|
|
48
|
+
require "source_monitor/dashboard/recent_activity_presenter"
|
|
49
|
+
require "source_monitor/dashboard/quick_actions_presenter"
|
|
50
|
+
require "source_monitor/dashboard/queries"
|
|
51
|
+
require "source_monitor/dashboard/turbo_broadcaster"
|
|
52
|
+
require "source_monitor/logs/entry_sync"
|
|
53
|
+
require "source_monitor/logs/filter_set"
|
|
54
|
+
require "source_monitor/logs/query"
|
|
55
|
+
require "source_monitor/logs/table_presenter"
|
|
56
|
+
require "source_monitor/realtime"
|
|
57
|
+
require "source_monitor/analytics/source_fetch_interval_distribution"
|
|
58
|
+
require "source_monitor/analytics/source_activity_rates"
|
|
59
|
+
require "source_monitor/analytics/sources_index_metrics"
|
|
60
|
+
require "source_monitor/jobs/cleanup_options"
|
|
61
|
+
require "source_monitor/jobs/visibility"
|
|
62
|
+
require "source_monitor/jobs/solid_queue_metrics"
|
|
63
|
+
require "source_monitor/security/parameter_sanitizer"
|
|
64
|
+
require "source_monitor/security/authentication"
|
|
65
|
+
require "source_monitor/pagination/paginator"
|
|
66
|
+
require "source_monitor/turbo_streams/stream_responder"
|
|
67
|
+
require "source_monitor/scrapers/base"
|
|
68
|
+
require "source_monitor/scrapers/fetchers/http_fetcher"
|
|
69
|
+
require "source_monitor/scrapers/parsers/readability_parser"
|
|
70
|
+
require "source_monitor/scrapers/readability"
|
|
71
|
+
require "source_monitor/scraping/enqueuer"
|
|
72
|
+
require "source_monitor/scraping/bulk_source_scraper"
|
|
73
|
+
require "source_monitor/scraping/state"
|
|
74
|
+
require "source_monitor/scraping/scheduler"
|
|
75
|
+
require "source_monitor/scraping/item_scraper"
|
|
76
|
+
require "source_monitor/fetching/fetch_error"
|
|
77
|
+
require "source_monitor/fetching/feed_fetcher"
|
|
78
|
+
require "source_monitor/items/retention_pruner"
|
|
79
|
+
require "source_monitor/fetching/fetch_runner"
|
|
80
|
+
require "source_monitor/scheduler"
|
|
81
|
+
require "source_monitor/items/item_creator"
|
|
82
|
+
require "source_monitor/health"
|
|
83
|
+
require "source_monitor/assets"
|
|
84
|
+
|
|
85
|
+
module SourceMonitor
|
|
86
|
+
class << self
|
|
87
|
+
def configure
|
|
88
|
+
yield config
|
|
89
|
+
SourceMonitor::ModelExtensions.reload!
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def config
|
|
93
|
+
@config ||= Configuration.new
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def events
|
|
97
|
+
config.events
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def reset_configuration!
|
|
101
|
+
@config = Configuration.new
|
|
102
|
+
SourceMonitor::ModelExtensions.reload!
|
|
103
|
+
SourceMonitor::Health.setup!
|
|
104
|
+
SourceMonitor::Realtime.setup!
|
|
105
|
+
SourceMonitor::Dashboard::TurboBroadcaster.setup!
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def queue_name(role)
|
|
109
|
+
config.queue_name_for(role)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def queue_concurrency(role)
|
|
113
|
+
config.concurrency_for(role)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def table_name_prefix
|
|
117
|
+
SourceMonitor::Engine.table_name_prefix
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def mission_control_enabled?
|
|
121
|
+
config.mission_control_enabled
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def mission_control_dashboard_path
|
|
125
|
+
raw_path = config.mission_control_dashboard_path
|
|
126
|
+
resolved = resolve_callable(raw_path)
|
|
127
|
+
return if resolved.blank?
|
|
128
|
+
|
|
129
|
+
valid_dashboard_path?(resolved) ? resolved : nil
|
|
130
|
+
rescue StandardError
|
|
131
|
+
nil
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private
|
|
135
|
+
|
|
136
|
+
def resolve_callable(value)
|
|
137
|
+
value.respond_to?(:call) ? value.call : value
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def valid_dashboard_path?(value)
|
|
141
|
+
return true if value.to_s.match?(%r{\Ahttps?://})
|
|
142
|
+
|
|
143
|
+
Rails.application.routes.recognize_path(value, method: :get)
|
|
144
|
+
true
|
|
145
|
+
rescue ActionController::RoutingError
|
|
146
|
+
false
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
namespace :source_monitor do
|
|
4
|
+
namespace :maintenance do
|
|
5
|
+
desc "Recover sources stuck in the fetching state when Solid Queue workers crash"
|
|
6
|
+
task recover_stalled_fetches: :environment do
|
|
7
|
+
result = SourceMonitor::Fetching::StalledFetchReconciler.call
|
|
8
|
+
|
|
9
|
+
recovered_count = result.recovered_source_ids.size
|
|
10
|
+
removed_jobs_count = result.jobs_removed.size
|
|
11
|
+
|
|
12
|
+
puts "Recovered #{recovered_count} stalled sources."
|
|
13
|
+
puts "Removed #{removed_jobs_count} orphaned job#{'s' unless removed_jobs_count == 1}."
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require "source_monitor/assets/bundler"
|
|
2
|
+
|
|
3
|
+
namespace :source_monitor do
|
|
4
|
+
namespace :assets do
|
|
5
|
+
desc "Build SourceMonitor CSS and JS bundles"
|
|
6
|
+
task build: :environment do
|
|
7
|
+
SourceMonitor::Assets::Bundler.build!
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
desc "Verify required SourceMonitor asset bundles exist"
|
|
11
|
+
task verify: :environment do
|
|
12
|
+
SourceMonitor::Assets::Bundler.verify!
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
namespace :app do
|
|
18
|
+
namespace :source_monitor do
|
|
19
|
+
namespace :assets do
|
|
20
|
+
task build: "source_monitor:assets:build"
|
|
21
|
+
task verify: "source_monitor:assets:verify"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
if defined?(Rake::Task) && Rake::Task.task_defined?("test")
|
|
27
|
+
Rake::Task["test"].enhance([ "source_monitor:assets:verify" ])
|
|
28
|
+
end
|