source_monitor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rubocop.yml +12 -0
- data/.ruby-version +1 -0
- data/AGENTS.md +132 -0
- data/CHANGELOG.md +66 -0
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +411 -0
- data/MIT-LICENSE +20 -0
- data/README.md +108 -0
- data/Rakefile +8 -0
- data/app/assets/builds/.keep +0 -0
- data/app/assets/config/source_monitor_manifest.js +4 -0
- data/app/assets/images/source_monitor/.keep +0 -0
- data/app/assets/javascripts/source_monitor/application.js +20 -0
- data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
- data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
- data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
- data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
- data/app/assets/svgs/source_monitor/.keep +0 -0
- data/app/controllers/concerns/.keep +0 -0
- data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
- data/app/controllers/source_monitor/application_controller.rb +62 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
- data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/health_controller.rb +10 -0
- data/app/controllers/source_monitor/items_controller.rb +116 -0
- data/app/controllers/source_monitor/logs_controller.rb +15 -0
- data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
- data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
- data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
- data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
- data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
- data/app/controllers/source_monitor/sources_controller.rb +179 -0
- data/app/helpers/source_monitor/application_helper.rb +327 -0
- data/app/jobs/source_monitor/application_job.rb +13 -0
- data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
- data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
- data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
- data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
- data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
- data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
- data/app/mailers/source_monitor/application_mailer.rb +17 -0
- data/app/models/concerns/.keep +0 -0
- data/app/models/concerns/source_monitor/loggable.rb +18 -0
- data/app/models/source_monitor/application_record.rb +5 -0
- data/app/models/source_monitor/fetch_log.rb +31 -0
- data/app/models/source_monitor/health_check_log.rb +28 -0
- data/app/models/source_monitor/item.rb +102 -0
- data/app/models/source_monitor/item_content.rb +11 -0
- data/app/models/source_monitor/log_entry.rb +56 -0
- data/app/models/source_monitor/scrape_log.rb +31 -0
- data/app/models/source_monitor/source.rb +115 -0
- data/app/views/layouts/source_monitor/application.html.erb +54 -0
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
- data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
- data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
- data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
- data/app/views/source_monitor/dashboard/index.html.erb +48 -0
- data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
- data/app/views/source_monitor/items/_details.html.erb +234 -0
- data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/items/index.html.erb +147 -0
- data/app/views/source_monitor/items/show.html.erb +3 -0
- data/app/views/source_monitor/logs/index.html.erb +208 -0
- data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
- data/app/views/source_monitor/shared/_toast.html.erb +34 -0
- data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
- data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
- data/app/views/source_monitor/sources/_details.html.erb +302 -0
- data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
- data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
- data/app/views/source_monitor/sources/_form.html.erb +143 -0
- data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
- data/app/views/source_monitor/sources/_row.html.erb +102 -0
- data/app/views/source_monitor/sources/edit.html.erb +28 -0
- data/app/views/source_monitor/sources/index.html.erb +153 -0
- data/app/views/source_monitor/sources/new.html.erb +22 -0
- data/app/views/source_monitor/sources/show.html.erb +3 -0
- data/config/coverage_baseline.json +2010 -0
- data/config/initializers/feedjira.rb +19 -0
- data/config/routes.rb +18 -0
- data/config/tailwind.config.js +17 -0
- data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
- data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
- data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
- data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
- data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
- data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
- data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
- data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
- data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
- data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
- data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
- data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
- data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
- data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
- data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
- data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
- data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
- data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
- data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
- data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
- data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
- data/docs/configuration.md +170 -0
- data/docs/deployment.md +63 -0
- data/docs/gh-cli-workflow.md +44 -0
- data/docs/installation.md +144 -0
- data/docs/troubleshooting.md +76 -0
- data/eslint.config.mjs +27 -0
- data/lib/generators/source_monitor/install/install_generator.rb +59 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
- data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
- data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
- data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
- data/lib/source_monitor/assets/bundler.rb +49 -0
- data/lib/source_monitor/assets.rb +6 -0
- data/lib/source_monitor/configuration.rb +654 -0
- data/lib/source_monitor/dashboard/queries.rb +356 -0
- data/lib/source_monitor/dashboard/quick_action.rb +7 -0
- data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
- data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
- data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
- data/lib/source_monitor/engine.rb +107 -0
- data/lib/source_monitor/events.rb +110 -0
- data/lib/source_monitor/feedjira_extensions.rb +103 -0
- data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
- data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
- data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
- data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
- data/lib/source_monitor/fetching/fetch_error.rb +88 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
- data/lib/source_monitor/fetching/retry_policy.rb +85 -0
- data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
- data/lib/source_monitor/health/source_health_check.rb +100 -0
- data/lib/source_monitor/health/source_health_monitor.rb +210 -0
- data/lib/source_monitor/health/source_health_reset.rb +68 -0
- data/lib/source_monitor/health.rb +46 -0
- data/lib/source_monitor/http.rb +85 -0
- data/lib/source_monitor/instrumentation.rb +52 -0
- data/lib/source_monitor/items/item_creator.rb +601 -0
- data/lib/source_monitor/items/retention_pruner.rb +146 -0
- data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
- data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
- data/lib/source_monitor/items/retention_strategies.rb +9 -0
- data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
- data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
- data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
- data/lib/source_monitor/jobs/visibility.rb +133 -0
- data/lib/source_monitor/logs/entry_sync.rb +69 -0
- data/lib/source_monitor/logs/filter_set.rb +163 -0
- data/lib/source_monitor/logs/query.rb +81 -0
- data/lib/source_monitor/logs/table_presenter.rb +161 -0
- data/lib/source_monitor/metrics.rb +77 -0
- data/lib/source_monitor/model_extensions.rb +109 -0
- data/lib/source_monitor/models/sanitizable.rb +76 -0
- data/lib/source_monitor/models/url_normalizable.rb +84 -0
- data/lib/source_monitor/pagination/paginator.rb +90 -0
- data/lib/source_monitor/realtime/adapter.rb +97 -0
- data/lib/source_monitor/realtime/broadcaster.rb +237 -0
- data/lib/source_monitor/realtime.rb +17 -0
- data/lib/source_monitor/release/changelog.rb +59 -0
- data/lib/source_monitor/release/runner.rb +73 -0
- data/lib/source_monitor/scheduler.rb +82 -0
- data/lib/source_monitor/scrapers/base.rb +105 -0
- data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
- data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
- data/lib/source_monitor/scrapers/readability.rb +156 -0
- data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
- data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
- data/lib/source_monitor/scraping/enqueuer.rb +125 -0
- data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
- data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
- data/lib/source_monitor/scraping/item_scraper.rb +84 -0
- data/lib/source_monitor/scraping/scheduler.rb +43 -0
- data/lib/source_monitor/scraping/state.rb +79 -0
- data/lib/source_monitor/security/authentication.rb +85 -0
- data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
- data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
- data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
- data/lib/source_monitor/version.rb +3 -0
- data/lib/source_monitor.rb +149 -0
- data/lib/tasks/recover_stalled_fetches.rake +16 -0
- data/lib/tasks/source_monitor_assets.rake +28 -0
- data/lib/tasks/source_monitor_tasks.rake +29 -0
- data/lib/tasks/test_smoke.rake +12 -0
- data/package-lock.json +3997 -0
- data/package.json +29 -0
- data/postcss.config.js +6 -0
- data/source_monitor.gemspec +46 -0
- data/stylelint.config.js +12 -0
- metadata +469 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Dashboard
|
|
5
|
+
class UpcomingFetchSchedule
|
|
6
|
+
Group = Struct.new(
|
|
7
|
+
:key,
|
|
8
|
+
:label,
|
|
9
|
+
:min_minutes,
|
|
10
|
+
:max_minutes,
|
|
11
|
+
:window_start,
|
|
12
|
+
:window_end,
|
|
13
|
+
:include_unscheduled,
|
|
14
|
+
:sources,
|
|
15
|
+
keyword_init: true
|
|
16
|
+
) do
|
|
17
|
+
def empty?
|
|
18
|
+
sources.blank?
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
INTERVAL_DEFINITIONS = [
|
|
23
|
+
{ key: "0-30", label: "Within 30 minutes", min_minutes: 0, max_minutes: 30 },
|
|
24
|
+
{ key: "30-60", label: "30-60 minutes", min_minutes: 30, max_minutes: 60 },
|
|
25
|
+
{ key: "60-120", label: "60-120 minutes", min_minutes: 60, max_minutes: 120 },
|
|
26
|
+
{ key: "120-240", label: "120-240 minutes", min_minutes: 120, max_minutes: 240 },
|
|
27
|
+
{ key: "240+", label: "240 minutes +", min_minutes: 240, max_minutes: nil, include_unscheduled: true }
|
|
28
|
+
].freeze
|
|
29
|
+
|
|
30
|
+
attr_reader :scope, :reference_time
|
|
31
|
+
|
|
32
|
+
def initialize(scope: SourceMonitor::Source.active, reference_time: Time.current)
|
|
33
|
+
@scope = scope
|
|
34
|
+
@reference_time = reference_time
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def groups
|
|
38
|
+
@groups ||= build_groups
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def build_groups
|
|
44
|
+
definitions = build_definitions
|
|
45
|
+
scheduled_sources.each do |source|
|
|
46
|
+
definition = definition_for(source.next_fetch_at)
|
|
47
|
+
definitions[definition[:key]][:sources] << source if definition
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
unscheduled_sources.each do |source|
|
|
51
|
+
definition = definitions.values.find { |value| value[:include_unscheduled] }
|
|
52
|
+
next unless definition
|
|
53
|
+
|
|
54
|
+
definition[:sources] << source
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
definitions.values.map do |definition|
|
|
58
|
+
Group.new(
|
|
59
|
+
key: definition[:key],
|
|
60
|
+
label: definition[:label],
|
|
61
|
+
min_minutes: definition[:min_minutes],
|
|
62
|
+
max_minutes: definition[:max_minutes],
|
|
63
|
+
window_start: window_start_for(definition[:min_minutes]),
|
|
64
|
+
window_end: window_end_for(definition[:max_minutes]),
|
|
65
|
+
include_unscheduled: definition[:include_unscheduled],
|
|
66
|
+
sources: sort_sources(definition[:sources])
|
|
67
|
+
)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def build_definitions
|
|
72
|
+
INTERVAL_DEFINITIONS.each_with_object({}) do |definition, memo|
|
|
73
|
+
memo[definition[:key]] = definition.merge(sources: [])
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def scheduled_sources
|
|
78
|
+
scope.where.not(next_fetch_at: nil).order(:next_fetch_at)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def unscheduled_sources
|
|
82
|
+
scope.where(next_fetch_at: nil).order(:name)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def definition_for(next_fetch_at)
|
|
86
|
+
minutes = minutes_until(next_fetch_at)
|
|
87
|
+
|
|
88
|
+
INTERVAL_DEFINITIONS.find do |definition|
|
|
89
|
+
min = definition[:min_minutes]
|
|
90
|
+
max = definition[:max_minutes]
|
|
91
|
+
|
|
92
|
+
minutes >= min && (max.nil? || minutes < max)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def minutes_until(timestamp)
|
|
97
|
+
return Float::INFINITY if timestamp.blank?
|
|
98
|
+
|
|
99
|
+
minutes = (timestamp - reference_time) / 60.0
|
|
100
|
+
return 0 if minutes.negative?
|
|
101
|
+
|
|
102
|
+
minutes
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def window_start_for(min_minutes)
|
|
106
|
+
return nil if min_minutes.nil? || min_minutes.infinite?
|
|
107
|
+
|
|
108
|
+
reference_time + min_minutes.minutes
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def window_end_for(max_minutes)
|
|
112
|
+
return nil if max_minutes.nil? || max_minutes.infinite?
|
|
113
|
+
|
|
114
|
+
reference_time + max_minutes.minutes
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def sort_sources(sources)
|
|
118
|
+
future_cap = reference_time + 100.years
|
|
119
|
+
|
|
120
|
+
sources.sort_by do |source|
|
|
121
|
+
[ source.next_fetch_at || future_cap, source.name.to_s ]
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
module SourceMonitor
|
|
2
|
+
class Engine < ::Rails::Engine
|
|
3
|
+
isolate_namespace SourceMonitor
|
|
4
|
+
require "source_monitor/assets/bundler"
|
|
5
|
+
require "source_monitor/jobs/fetch_failure_subscriber"
|
|
6
|
+
|
|
7
|
+
def self.table_name_prefix
|
|
8
|
+
SourceMonitor.config.models.table_name_prefix
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
initializer "source_monitor.assets" do |app|
|
|
12
|
+
next unless app.config.respond_to?(:assets)
|
|
13
|
+
|
|
14
|
+
engine_root = SourceMonitor::Engine.root
|
|
15
|
+
|
|
16
|
+
app.config.assets.paths << engine_root.join("app/assets/builds").to_s
|
|
17
|
+
app.config.assets.paths << engine_root.join("app/assets/images").to_s
|
|
18
|
+
app.config.assets.paths << engine_root.join("app/assets/svgs").to_s
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
initializer "source_monitor.assets.sprockets" do |app|
|
|
22
|
+
next unless app.config.respond_to?(:assets)
|
|
23
|
+
|
|
24
|
+
manifest_entry = "source_monitor_manifest.js"
|
|
25
|
+
app.config.assets.precompile << manifest_entry unless app.config.assets.precompile.include?(manifest_entry)
|
|
26
|
+
app.config.assets.precompile.concat(SourceMonitor::Engine.asset_precompile_entries)
|
|
27
|
+
app.config.assets.precompile.uniq!
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
initializer "source_monitor.metrics" do
|
|
31
|
+
SourceMonitor::Metrics.setup_subscribers!
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
initializer "source_monitor.dashboard_streams" do
|
|
35
|
+
config.to_prepare do
|
|
36
|
+
SourceMonitor::Health.setup!
|
|
37
|
+
SourceMonitor::Realtime.setup!
|
|
38
|
+
SourceMonitor::Dashboard::TurboBroadcaster.setup!
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
initializer "source_monitor.jobs" do |app|
|
|
43
|
+
SourceMonitor::Jobs::Visibility.setup!
|
|
44
|
+
SourceMonitor::Jobs::FetchFailureSubscriber.setup!
|
|
45
|
+
|
|
46
|
+
if defined?(::SolidQueue)
|
|
47
|
+
adapter_name = ActiveJob::Base.queue_adapter_name.to_s
|
|
48
|
+
if adapter_name.empty? || adapter_name == "async"
|
|
49
|
+
ActiveJob::Base.queue_adapter = :solid_queue
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
if defined?(::SolidQueue::RecurringTask)
|
|
53
|
+
job_class_config = SourceMonitor.config.recurring_command_job_class
|
|
54
|
+
if job_class_config.present?
|
|
55
|
+
resolved_class = job_class_config.is_a?(String) ? job_class_config.constantize : job_class_config
|
|
56
|
+
SolidQueue::RecurringTask.default_job_class = resolved_class
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
if defined?(MissionControl::Jobs)
|
|
61
|
+
adapters = MissionControl::Jobs.adapters
|
|
62
|
+
if adapters.respond_to?(:add)
|
|
63
|
+
adapters.add(:solid_queue)
|
|
64
|
+
adapters.delete(:async)
|
|
65
|
+
elsif adapters.respond_to?(:<<)
|
|
66
|
+
adapters << :solid_queue unless adapters.include?(:solid_queue)
|
|
67
|
+
adapters.delete(:async) if adapters.respond_to?(:delete)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
if defined?(ActiveJob::QueueAdapters::SolidQueueExt) &&
|
|
71
|
+
!(ActiveJob::QueueAdapters::SolidQueueAdapter < ActiveJob::QueueAdapters::SolidQueueExt)
|
|
72
|
+
ActiveJob::QueueAdapters::SolidQueueAdapter.prepend ActiveJob::QueueAdapters::SolidQueueExt
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
MissionControl::Jobs.applications.each do |application|
|
|
76
|
+
next if application.servers.any? { |server| server.queue_adapter_name == :solid_queue }
|
|
77
|
+
|
|
78
|
+
solid_queue_adapter = ActiveJob::QueueAdapters.lookup(:solid_queue).new
|
|
79
|
+
application.add_servers(solid_queue: solid_queue_adapter)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
app.config.after_initialize do
|
|
84
|
+
SourceMonitor::Jobs::Visibility.setup!
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
class << self
|
|
89
|
+
def asset_precompile_entries
|
|
90
|
+
engine_root = SourceMonitor::Engine.root
|
|
91
|
+
asset_roots = {
|
|
92
|
+
images: engine_root.join("app/assets/images"),
|
|
93
|
+
svgs: engine_root.join("app/assets/svgs")
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
asset_roots.flat_map do |_, base_path|
|
|
97
|
+
Dir[base_path.join("source_monitor/**/*").to_s].filter_map do |absolute_path|
|
|
98
|
+
next unless File.file?(absolute_path)
|
|
99
|
+
next if File.basename(absolute_path).start_with?(".")
|
|
100
|
+
|
|
101
|
+
Pathname.new(absolute_path).relative_path_from(base_path).to_s
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/core_ext/time"
|
|
4
|
+
|
|
5
|
+
module SourceMonitor
|
|
6
|
+
module Events
|
|
7
|
+
ItemCreatedEvent = Struct.new(:item, :source, :entry, :result, :status, :occurred_at, keyword_init: true) do
|
|
8
|
+
def created?
|
|
9
|
+
status.to_s == "created"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
ItemScrapedEvent = Struct.new(:item, :source, :result, :log, :status, :occurred_at, keyword_init: true) do
|
|
14
|
+
def success?
|
|
15
|
+
status.to_s != "failed"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
FetchCompletedEvent = Struct.new(:source, :result, :status, :occurred_at, keyword_init: true)
|
|
20
|
+
|
|
21
|
+
ItemProcessorContext = Struct.new(:item, :source, :entry, :result, :status, :occurred_at, keyword_init: true)
|
|
22
|
+
|
|
23
|
+
module_function
|
|
24
|
+
|
|
25
|
+
def after_item_created(item:, source:, entry:, result:)
|
|
26
|
+
event = ItemCreatedEvent.new(
|
|
27
|
+
item: item,
|
|
28
|
+
source: source,
|
|
29
|
+
entry: entry,
|
|
30
|
+
result: result,
|
|
31
|
+
status: result&.status,
|
|
32
|
+
occurred_at: Time.current
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
dispatch(:after_item_created, event)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def after_item_scraped(result)
|
|
39
|
+
item = result&.item
|
|
40
|
+
source = item&.source
|
|
41
|
+
event = ItemScrapedEvent.new(
|
|
42
|
+
item: item,
|
|
43
|
+
source: source,
|
|
44
|
+
result: result,
|
|
45
|
+
log: result&.log,
|
|
46
|
+
status: result&.status,
|
|
47
|
+
occurred_at: Time.current
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
dispatch(:after_item_scraped, event)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def after_fetch_completed(source:, result:)
|
|
54
|
+
event = FetchCompletedEvent.new(
|
|
55
|
+
source: source,
|
|
56
|
+
result: result,
|
|
57
|
+
status: result&.status,
|
|
58
|
+
occurred_at: Time.current
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
dispatch(:after_fetch_completed, event)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def run_item_processors(source:, entry:, result:)
|
|
65
|
+
item = result&.item
|
|
66
|
+
context = ItemProcessorContext.new(
|
|
67
|
+
item: item,
|
|
68
|
+
source: source,
|
|
69
|
+
entry: entry,
|
|
70
|
+
result: result,
|
|
71
|
+
status: result&.status,
|
|
72
|
+
occurred_at: Time.current
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
SourceMonitor.config.events.item_processors.each do |processor|
|
|
76
|
+
invoke(processor, context)
|
|
77
|
+
rescue StandardError => error
|
|
78
|
+
log_handler_error(:item_processor, processor, error)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def dispatch(event_name, event)
|
|
83
|
+
SourceMonitor.config.events.callbacks_for(event_name).each do |callback|
|
|
84
|
+
invoke(callback, event)
|
|
85
|
+
rescue StandardError => error
|
|
86
|
+
log_handler_error(event_name, callback, error)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def invoke(callable, event)
|
|
91
|
+
if callable.respond_to?(:arity) && callable.arity.zero?
|
|
92
|
+
callable.call
|
|
93
|
+
else
|
|
94
|
+
callable.call(event)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def log_handler_error(kind, handler, error)
|
|
99
|
+
message = "[SourceMonitor] #{kind} handler #{handler.inspect} failed: #{error.class}: #{error.message}"
|
|
100
|
+
|
|
101
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
102
|
+
Rails.logger.error(message)
|
|
103
|
+
else
|
|
104
|
+
warn(message)
|
|
105
|
+
end
|
|
106
|
+
rescue StandardError
|
|
107
|
+
warn(message)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "feedjira"
|
|
4
|
+
require "sax-machine"
|
|
5
|
+
|
|
6
|
+
module SourceMonitor
|
|
7
|
+
module FeedjiraExtensions
|
|
8
|
+
class MediaThumbnail
|
|
9
|
+
include SAXMachine
|
|
10
|
+
|
|
11
|
+
attribute :url
|
|
12
|
+
attribute :width
|
|
13
|
+
attribute :height
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class MediaContent
|
|
17
|
+
include SAXMachine
|
|
18
|
+
|
|
19
|
+
attribute :url
|
|
20
|
+
attribute :type
|
|
21
|
+
attribute :medium
|
|
22
|
+
attribute :height
|
|
23
|
+
attribute :width
|
|
24
|
+
attribute :"fileSize", as: :file_size
|
|
25
|
+
attribute :duration
|
|
26
|
+
attribute :expression
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class Enclosure
|
|
30
|
+
include SAXMachine
|
|
31
|
+
|
|
32
|
+
attribute :url
|
|
33
|
+
attribute :type
|
|
34
|
+
attribute :length
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class AtomAuthor
|
|
38
|
+
include SAXMachine
|
|
39
|
+
|
|
40
|
+
element :name
|
|
41
|
+
element :email
|
|
42
|
+
element :uri
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
class AtomLink
|
|
46
|
+
include SAXMachine
|
|
47
|
+
|
|
48
|
+
attribute :href
|
|
49
|
+
attribute :rel
|
|
50
|
+
attribute :type
|
|
51
|
+
attribute :length
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
module_function
|
|
55
|
+
|
|
56
|
+
def apply!
|
|
57
|
+
return if @applied
|
|
58
|
+
|
|
59
|
+
extend_rss_entry
|
|
60
|
+
extend_atom_entry
|
|
61
|
+
|
|
62
|
+
@applied = true
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def extend_rss_entry
|
|
66
|
+
Feedjira::Parser::RSSEntry.element :"media:keywords", as: :media_keywords_raw
|
|
67
|
+
Feedjira::Parser::RSSEntry.element :"itunes:keywords", as: :itunes_keywords_raw
|
|
68
|
+
Feedjira::Parser::RSSEntry.element :"slash:comments", as: :slash_comments_raw
|
|
69
|
+
Feedjira::Parser::RSSEntry.elements :"media:thumbnail",
|
|
70
|
+
as: :media_thumbnail_nodes,
|
|
71
|
+
class: MediaThumbnail
|
|
72
|
+
Feedjira::Parser::RSSEntry.elements :"media:content",
|
|
73
|
+
as: :media_content_nodes,
|
|
74
|
+
class: MediaContent
|
|
75
|
+
Feedjira::Parser::RSSEntry.elements :enclosure,
|
|
76
|
+
as: :enclosure_nodes,
|
|
77
|
+
class: Enclosure
|
|
78
|
+
|
|
79
|
+
Feedjira::Parser::RSSEntry.prepend(RSSAuthorCapture)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def extend_atom_entry
|
|
83
|
+
Feedjira::Parser::AtomEntry.elements :author,
|
|
84
|
+
as: :author_nodes,
|
|
85
|
+
class: AtomAuthor
|
|
86
|
+
Feedjira::Parser::AtomEntry.elements :link,
|
|
87
|
+
as: :link_nodes,
|
|
88
|
+
class: AtomLink
|
|
89
|
+
end
|
|
90
|
+
module RSSAuthorCapture
|
|
91
|
+
def author=(value)
|
|
92
|
+
(@source_monitor_rss_authors ||= []) << value if value
|
|
93
|
+
super
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def rss_authors
|
|
97
|
+
Array(@source_monitor_rss_authors)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
SourceMonitor::FeedjiraExtensions.apply!
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
# Wraps Postgres advisory lock usage to provide a small, testable collaborator
|
|
6
|
+
# for coordinating fetch execution across processes.
|
|
7
|
+
class AdvisoryLock
|
|
8
|
+
NotAcquiredError = Class.new(StandardError)
|
|
9
|
+
|
|
10
|
+
def initialize(namespace:, key:, connection_pool: ActiveRecord::Base.connection_pool)
|
|
11
|
+
@namespace = namespace
|
|
12
|
+
@key = key
|
|
13
|
+
@connection_pool = connection_pool
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def with_lock
|
|
17
|
+
connection_pool.with_connection do |connection|
|
|
18
|
+
locked = try_lock(connection)
|
|
19
|
+
raise NotAcquiredError, "advisory lock #{namespace}/#{key} busy" unless locked
|
|
20
|
+
|
|
21
|
+
begin
|
|
22
|
+
yield
|
|
23
|
+
ensure
|
|
24
|
+
release(connection)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
attr_reader :namespace, :key, :connection_pool
|
|
32
|
+
|
|
33
|
+
def try_lock(connection)
|
|
34
|
+
result = connection.exec_query(
|
|
35
|
+
"SELECT pg_try_advisory_lock(#{namespace.to_i}, #{key.to_i})"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
truthy?(result.rows.dig(0, 0))
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def release(connection)
|
|
42
|
+
connection.exec_query(
|
|
43
|
+
"SELECT pg_advisory_unlock(#{namespace.to_i}, #{key.to_i})"
|
|
44
|
+
)
|
|
45
|
+
rescue StandardError
|
|
46
|
+
nil
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def truthy?(value)
|
|
50
|
+
value == true || value.to_s == "t"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
module Completion
|
|
6
|
+
# Publishes fetch completion events to the configured event dispatcher.
|
|
7
|
+
class EventPublisher
|
|
8
|
+
def initialize(dispatcher: SourceMonitor::Events)
|
|
9
|
+
@dispatcher = dispatcher
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def call(source:, result:)
|
|
13
|
+
dispatcher.after_fetch_completed(source: source, result: result)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
attr_reader :dispatcher
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
module Completion
|
|
6
|
+
# Enqueues follow-up scraping work for items created during a fetch.
|
|
7
|
+
class FollowUpHandler
|
|
8
|
+
def initialize(enqueuer_class: SourceMonitor::Scraping::Enqueuer, job_class: SourceMonitor::ScrapeItemJob)
|
|
9
|
+
@enqueuer_class = enqueuer_class
|
|
10
|
+
@job_class = job_class
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(source:, result:)
|
|
14
|
+
return unless should_enqueue?(source:, result:)
|
|
15
|
+
|
|
16
|
+
Array(result.item_processing&.created_items).each do |item|
|
|
17
|
+
next unless item.present? && item.scraped_at.nil?
|
|
18
|
+
|
|
19
|
+
enqueuer_class.enqueue(item:, source:, job_class:, reason: :auto)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
attr_reader :enqueuer_class, :job_class
|
|
26
|
+
|
|
27
|
+
def should_enqueue?(source:, result:)
|
|
28
|
+
return false unless result
|
|
29
|
+
return false unless result.status == :fetched
|
|
30
|
+
return false unless source.scraping_enabled? && source.auto_scrape?
|
|
31
|
+
|
|
32
|
+
result.item_processing&.created.to_i.positive?
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
module Completion
|
|
6
|
+
# Applies item retention after a fetch completes.
|
|
7
|
+
class RetentionHandler
|
|
8
|
+
def initialize(pruner: SourceMonitor::Items::RetentionPruner)
|
|
9
|
+
@pruner = pruner
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def call(source:, result:) # rubocop:disable Lint/UnusedMethodArgument
|
|
13
|
+
pruner.call(
|
|
14
|
+
source: source,
|
|
15
|
+
strategy: SourceMonitor.config.retention.strategy
|
|
16
|
+
)
|
|
17
|
+
rescue StandardError => error
|
|
18
|
+
Rails.logger.error(
|
|
19
|
+
"[SourceMonitor] Retention pruning failed for source #{source.id}: #{error.class} - #{error.message}"
|
|
20
|
+
)
|
|
21
|
+
nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
attr_reader :pruner
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|