source_monitor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rubocop.yml +12 -0
- data/.ruby-version +1 -0
- data/AGENTS.md +132 -0
- data/CHANGELOG.md +66 -0
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +30 -0
- data/Gemfile.lock +411 -0
- data/MIT-LICENSE +20 -0
- data/README.md +108 -0
- data/Rakefile +8 -0
- data/app/assets/builds/.keep +0 -0
- data/app/assets/config/source_monitor_manifest.js +4 -0
- data/app/assets/images/source_monitor/.keep +0 -0
- data/app/assets/javascripts/source_monitor/application.js +20 -0
- data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
- data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
- data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
- data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
- data/app/assets/svgs/source_monitor/.keep +0 -0
- data/app/controllers/concerns/.keep +0 -0
- data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
- data/app/controllers/source_monitor/application_controller.rb +62 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
- data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/health_controller.rb +10 -0
- data/app/controllers/source_monitor/items_controller.rb +116 -0
- data/app/controllers/source_monitor/logs_controller.rb +15 -0
- data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
- data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
- data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
- data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
- data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
- data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
- data/app/controllers/source_monitor/sources_controller.rb +179 -0
- data/app/helpers/source_monitor/application_helper.rb +327 -0
- data/app/jobs/source_monitor/application_job.rb +13 -0
- data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
- data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
- data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
- data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
- data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
- data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
- data/app/mailers/source_monitor/application_mailer.rb +17 -0
- data/app/models/concerns/.keep +0 -0
- data/app/models/concerns/source_monitor/loggable.rb +18 -0
- data/app/models/source_monitor/application_record.rb +5 -0
- data/app/models/source_monitor/fetch_log.rb +31 -0
- data/app/models/source_monitor/health_check_log.rb +28 -0
- data/app/models/source_monitor/item.rb +102 -0
- data/app/models/source_monitor/item_content.rb +11 -0
- data/app/models/source_monitor/log_entry.rb +56 -0
- data/app/models/source_monitor/scrape_log.rb +31 -0
- data/app/models/source_monitor/source.rb +115 -0
- data/app/views/layouts/source_monitor/application.html.erb +54 -0
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
- data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
- data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
- data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
- data/app/views/source_monitor/dashboard/index.html.erb +48 -0
- data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
- data/app/views/source_monitor/items/_details.html.erb +234 -0
- data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/items/index.html.erb +147 -0
- data/app/views/source_monitor/items/show.html.erb +3 -0
- data/app/views/source_monitor/logs/index.html.erb +208 -0
- data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
- data/app/views/source_monitor/shared/_toast.html.erb +34 -0
- data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
- data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
- data/app/views/source_monitor/sources/_details.html.erb +302 -0
- data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
- data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
- data/app/views/source_monitor/sources/_form.html.erb +143 -0
- data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
- data/app/views/source_monitor/sources/_row.html.erb +102 -0
- data/app/views/source_monitor/sources/edit.html.erb +28 -0
- data/app/views/source_monitor/sources/index.html.erb +153 -0
- data/app/views/source_monitor/sources/new.html.erb +22 -0
- data/app/views/source_monitor/sources/show.html.erb +3 -0
- data/config/coverage_baseline.json +2010 -0
- data/config/initializers/feedjira.rb +19 -0
- data/config/routes.rb +18 -0
- data/config/tailwind.config.js +17 -0
- data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
- data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
- data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
- data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
- data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
- data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
- data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
- data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
- data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
- data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
- data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
- data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
- data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
- data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
- data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
- data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
- data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
- data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
- data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
- data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
- data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
- data/docs/configuration.md +170 -0
- data/docs/deployment.md +63 -0
- data/docs/gh-cli-workflow.md +44 -0
- data/docs/installation.md +144 -0
- data/docs/troubleshooting.md +76 -0
- data/eslint.config.mjs +27 -0
- data/lib/generators/source_monitor/install/install_generator.rb +59 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
- data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
- data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
- data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
- data/lib/source_monitor/assets/bundler.rb +49 -0
- data/lib/source_monitor/assets.rb +6 -0
- data/lib/source_monitor/configuration.rb +654 -0
- data/lib/source_monitor/dashboard/queries.rb +356 -0
- data/lib/source_monitor/dashboard/quick_action.rb +7 -0
- data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
- data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
- data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
- data/lib/source_monitor/engine.rb +107 -0
- data/lib/source_monitor/events.rb +110 -0
- data/lib/source_monitor/feedjira_extensions.rb +103 -0
- data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
- data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
- data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
- data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
- data/lib/source_monitor/fetching/fetch_error.rb +88 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
- data/lib/source_monitor/fetching/retry_policy.rb +85 -0
- data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
- data/lib/source_monitor/health/source_health_check.rb +100 -0
- data/lib/source_monitor/health/source_health_monitor.rb +210 -0
- data/lib/source_monitor/health/source_health_reset.rb +68 -0
- data/lib/source_monitor/health.rb +46 -0
- data/lib/source_monitor/http.rb +85 -0
- data/lib/source_monitor/instrumentation.rb +52 -0
- data/lib/source_monitor/items/item_creator.rb +601 -0
- data/lib/source_monitor/items/retention_pruner.rb +146 -0
- data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
- data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
- data/lib/source_monitor/items/retention_strategies.rb +9 -0
- data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
- data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
- data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
- data/lib/source_monitor/jobs/visibility.rb +133 -0
- data/lib/source_monitor/logs/entry_sync.rb +69 -0
- data/lib/source_monitor/logs/filter_set.rb +163 -0
- data/lib/source_monitor/logs/query.rb +81 -0
- data/lib/source_monitor/logs/table_presenter.rb +161 -0
- data/lib/source_monitor/metrics.rb +77 -0
- data/lib/source_monitor/model_extensions.rb +109 -0
- data/lib/source_monitor/models/sanitizable.rb +76 -0
- data/lib/source_monitor/models/url_normalizable.rb +84 -0
- data/lib/source_monitor/pagination/paginator.rb +90 -0
- data/lib/source_monitor/realtime/adapter.rb +97 -0
- data/lib/source_monitor/realtime/broadcaster.rb +237 -0
- data/lib/source_monitor/realtime.rb +17 -0
- data/lib/source_monitor/release/changelog.rb +59 -0
- data/lib/source_monitor/release/runner.rb +73 -0
- data/lib/source_monitor/scheduler.rb +82 -0
- data/lib/source_monitor/scrapers/base.rb +105 -0
- data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
- data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
- data/lib/source_monitor/scrapers/readability.rb +156 -0
- data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
- data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
- data/lib/source_monitor/scraping/enqueuer.rb +125 -0
- data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
- data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
- data/lib/source_monitor/scraping/item_scraper.rb +84 -0
- data/lib/source_monitor/scraping/scheduler.rb +43 -0
- data/lib/source_monitor/scraping/state.rb +79 -0
- data/lib/source_monitor/security/authentication.rb +85 -0
- data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
- data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
- data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
- data/lib/source_monitor/version.rb +3 -0
- data/lib/source_monitor.rb +149 -0
- data/lib/tasks/recover_stalled_fetches.rake +16 -0
- data/lib/tasks/source_monitor_assets.rake +28 -0
- data/lib/tasks/source_monitor_tasks.rake +29 -0
- data/lib/tasks/test_smoke.rake +12 -0
- data/package-lock.json +3997 -0
- data/package.json +29 -0
- data/postcss.config.js +6 -0
- data/source_monitor.gemspec +46 -0
- data/stylelint.config.js +12 -0
- metadata +469 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "feedjira"
|
|
4
|
+
|
|
5
|
+
Feedjira.configure do |config|
|
|
6
|
+
config.parsers = [
|
|
7
|
+
Feedjira::Parser::JSONFeed,
|
|
8
|
+
Feedjira::Parser::Atom,
|
|
9
|
+
Feedjira::Parser::AtomFeedBurner,
|
|
10
|
+
Feedjira::Parser::AtomYoutube,
|
|
11
|
+
Feedjira::Parser::AtomGoogleAlerts,
|
|
12
|
+
Feedjira::Parser::GoogleDocsAtom,
|
|
13
|
+
Feedjira::Parser::ITunesRSS,
|
|
14
|
+
Feedjira::Parser::RSSFeedBurner,
|
|
15
|
+
Feedjira::Parser::RSS
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
config.strip_whitespace = true
|
|
19
|
+
end
|
data/config/routes.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
SourceMonitor::Engine.routes.draw do
|
|
2
|
+
get "/health", to: "health#show"
|
|
3
|
+
get "/dashboard", to: "dashboard#index", as: :dashboard
|
|
4
|
+
root to: "dashboard#index"
|
|
5
|
+
resources :logs, only: :index
|
|
6
|
+
resources :fetch_logs, only: :show
|
|
7
|
+
resources :scrape_logs, only: :show
|
|
8
|
+
resources :items, only: %i[index show] do
|
|
9
|
+
post :scrape, on: :member
|
|
10
|
+
end
|
|
11
|
+
resources :sources do
|
|
12
|
+
resource :fetch, only: :create, controller: "source_fetches"
|
|
13
|
+
resource :retry, only: :create, controller: "source_retries"
|
|
14
|
+
resource :bulk_scrape, only: :create, controller: "source_bulk_scrapes"
|
|
15
|
+
resource :health_check, only: :create, controller: "source_health_checks"
|
|
16
|
+
resource :health_reset, only: :create, controller: "source_health_resets"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
const defaultContent = [
|
|
2
|
+
"./app/views/**/*.{erb,html,html.erb}",
|
|
3
|
+
"./app/helpers/**/*.rb",
|
|
4
|
+
"./app/assets/stylesheets/source_monitor/**/*.css",
|
|
5
|
+
"./app/assets/javascripts/**/*.{js,ts,jsx,tsx}",
|
|
6
|
+
"./lib/**/*.rb",
|
|
7
|
+
"./test/dummy/app/views/**/*.{erb,html,html.erb}"
|
|
8
|
+
];
|
|
9
|
+
|
|
10
|
+
export default {
|
|
11
|
+
content: defaultContent,
|
|
12
|
+
important: ".fm-admin",
|
|
13
|
+
theme: {
|
|
14
|
+
extend: {}
|
|
15
|
+
},
|
|
16
|
+
plugins: []
|
|
17
|
+
};
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorSources < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
create_table :sourcemon_sources do |t|
|
|
6
|
+
t.string :name, null: false
|
|
7
|
+
t.string :feed_url, null: false
|
|
8
|
+
t.string :website_url
|
|
9
|
+
t.boolean :active, null: false, default: true
|
|
10
|
+
t.string :feed_format
|
|
11
|
+
t.integer :fetch_interval_hours, null: false, default: 6
|
|
12
|
+
t.datetime :next_fetch_at
|
|
13
|
+
t.datetime :last_fetched_at
|
|
14
|
+
t.integer :last_fetch_duration_ms
|
|
15
|
+
t.integer :last_http_status
|
|
16
|
+
t.text :last_error
|
|
17
|
+
t.datetime :last_error_at
|
|
18
|
+
t.string :etag
|
|
19
|
+
t.datetime :last_modified
|
|
20
|
+
t.integer :failure_count, null: false, default: 0
|
|
21
|
+
t.datetime :backoff_until
|
|
22
|
+
t.integer :items_count, null: false, default: 0
|
|
23
|
+
t.boolean :scraping_enabled, null: false, default: false
|
|
24
|
+
t.boolean :auto_scrape, null: false, default: false
|
|
25
|
+
t.jsonb :scrape_settings, null: false, default: {}
|
|
26
|
+
t.string :scraper_adapter, null: false, default: "readability"
|
|
27
|
+
t.boolean :requires_javascript, null: false, default: false
|
|
28
|
+
t.jsonb :custom_headers, null: false, default: {}
|
|
29
|
+
t.integer :items_retention_days
|
|
30
|
+
t.integer :max_items
|
|
31
|
+
t.jsonb :metadata, null: false, default: {}
|
|
32
|
+
|
|
33
|
+
t.timestamps
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
add_index :sourcemon_sources, :feed_url, unique: true
|
|
37
|
+
add_index :sourcemon_sources, :active
|
|
38
|
+
add_index :sourcemon_sources, :next_fetch_at
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorItems < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
create_table :sourcemon_items do |t|
|
|
6
|
+
t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
|
|
7
|
+
t.string :guid
|
|
8
|
+
t.string :content_fingerprint
|
|
9
|
+
t.string :title
|
|
10
|
+
t.string :url
|
|
11
|
+
t.string :canonical_url
|
|
12
|
+
t.string :author
|
|
13
|
+
t.jsonb :authors, null: false, default: []
|
|
14
|
+
t.text :summary
|
|
15
|
+
t.text :content
|
|
16
|
+
t.text :scraped_html
|
|
17
|
+
t.text :scraped_content
|
|
18
|
+
t.datetime :scraped_at
|
|
19
|
+
t.string :scrape_status
|
|
20
|
+
t.datetime :published_at
|
|
21
|
+
t.datetime :updated_at_source
|
|
22
|
+
t.jsonb :categories, null: false, default: []
|
|
23
|
+
t.jsonb :tags, null: false, default: []
|
|
24
|
+
t.jsonb :keywords, null: false, default: []
|
|
25
|
+
t.jsonb :enclosures, null: false, default: []
|
|
26
|
+
t.string :media_thumbnail_url
|
|
27
|
+
t.jsonb :media_content, null: false, default: []
|
|
28
|
+
t.string :language
|
|
29
|
+
t.string :copyright
|
|
30
|
+
t.string :comments_url
|
|
31
|
+
t.integer :comments_count, null: false, default: 0
|
|
32
|
+
t.jsonb :metadata, null: false, default: {}
|
|
33
|
+
t.timestamps
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
add_index :sourcemon_items, :guid
|
|
37
|
+
add_index :sourcemon_items, :content_fingerprint
|
|
38
|
+
add_index :sourcemon_items, :url
|
|
39
|
+
add_index :sourcemon_items, :scrape_status
|
|
40
|
+
add_index :sourcemon_items, :published_at
|
|
41
|
+
add_index :sourcemon_items, [ :source_id, :guid ], unique: true
|
|
42
|
+
add_index :sourcemon_items, [ :source_id, :content_fingerprint ], unique: true
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorFetchLogs < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
create_table :sourcemon_fetch_logs do |t|
|
|
6
|
+
t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
|
|
7
|
+
t.boolean :success, null: false, default: false
|
|
8
|
+
t.integer :items_created, null: false, default: 0
|
|
9
|
+
t.integer :items_updated, null: false, default: 0
|
|
10
|
+
t.integer :items_failed, null: false, default: 0
|
|
11
|
+
t.datetime :started_at, null: false
|
|
12
|
+
t.datetime :completed_at
|
|
13
|
+
t.integer :duration_ms
|
|
14
|
+
t.integer :http_status
|
|
15
|
+
t.jsonb :http_response_headers, null: false, default: {}
|
|
16
|
+
t.string :error_class
|
|
17
|
+
t.text :error_message
|
|
18
|
+
t.text :error_backtrace
|
|
19
|
+
t.integer :feed_size_bytes
|
|
20
|
+
t.integer :items_in_feed
|
|
21
|
+
t.string :job_id
|
|
22
|
+
t.jsonb :metadata, null: false, default: {}
|
|
23
|
+
|
|
24
|
+
t.timestamps
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
add_index :sourcemon_fetch_logs, :success
|
|
28
|
+
add_index :sourcemon_fetch_logs, :started_at
|
|
29
|
+
add_index :sourcemon_fetch_logs, :job_id
|
|
30
|
+
add_index :sourcemon_fetch_logs, :created_at
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorScrapeLogs < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
create_table :sourcemon_scrape_logs do |t|
|
|
6
|
+
t.references :item, null: false, foreign_key: { to_table: :sourcemon_items }
|
|
7
|
+
t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
|
|
8
|
+
t.boolean :success, null: false, default: false
|
|
9
|
+
t.datetime :started_at, null: false
|
|
10
|
+
t.datetime :completed_at
|
|
11
|
+
t.integer :duration_ms
|
|
12
|
+
t.integer :http_status
|
|
13
|
+
t.string :scraper_adapter
|
|
14
|
+
t.integer :content_length
|
|
15
|
+
t.string :error_class
|
|
16
|
+
t.text :error_message
|
|
17
|
+
t.jsonb :metadata, null: false, default: {}
|
|
18
|
+
|
|
19
|
+
t.timestamps
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
add_index :sourcemon_scrape_logs, :success
|
|
23
|
+
add_index :sourcemon_scrape_logs, :created_at
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class ChangeFetchIntervalToMinutes < ActiveRecord::Migration[8.0]
|
|
4
|
+
def up
|
|
5
|
+
rename_column :sourcemon_sources, :fetch_interval_hours, :fetch_interval_minutes
|
|
6
|
+
change_column_default :sourcemon_sources, :fetch_interval_minutes, 360
|
|
7
|
+
|
|
8
|
+
execute <<~SQL
|
|
9
|
+
UPDATE sourcemon_sources
|
|
10
|
+
SET fetch_interval_minutes = fetch_interval_minutes * 60
|
|
11
|
+
SQL
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def down
|
|
15
|
+
execute <<~SQL
|
|
16
|
+
UPDATE sourcemon_sources
|
|
17
|
+
SET fetch_interval_minutes = GREATEST(1, ROUND(fetch_interval_minutes::numeric / 60.0))
|
|
18
|
+
SQL
|
|
19
|
+
|
|
20
|
+
change_column_default :sourcemon_sources, :fetch_interval_minutes, 6
|
|
21
|
+
rename_column :sourcemon_sources, :fetch_interval_minutes, :fetch_interval_hours
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorItemContents < ActiveRecord::Migration[8.0]
|
|
4
|
+
def up
|
|
5
|
+
create_table :sourcemon_item_contents do |t|
|
|
6
|
+
t.references :item, null: false, foreign_key: { to_table: :sourcemon_items }, index: { unique: true }
|
|
7
|
+
t.text :scraped_html
|
|
8
|
+
t.text :scraped_content
|
|
9
|
+
|
|
10
|
+
t.timestamps(null: false)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
execute <<~SQL
|
|
14
|
+
INSERT INTO sourcemon_item_contents (item_id, scraped_html, scraped_content, created_at, updated_at)
|
|
15
|
+
SELECT id, scraped_html, scraped_content, COALESCE(updated_at, CURRENT_TIMESTAMP), COALESCE(updated_at, CURRENT_TIMESTAMP)
|
|
16
|
+
FROM sourcemon_items
|
|
17
|
+
WHERE scraped_html IS NOT NULL OR scraped_content IS NOT NULL
|
|
18
|
+
SQL
|
|
19
|
+
|
|
20
|
+
remove_column :sourcemon_items, :scraped_html, :text
|
|
21
|
+
remove_column :sourcemon_items, :scraped_content, :text
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def down
|
|
25
|
+
add_column :sourcemon_items, :scraped_html, :text
|
|
26
|
+
add_column :sourcemon_items, :scraped_content, :text
|
|
27
|
+
|
|
28
|
+
execute <<~SQL
|
|
29
|
+
UPDATE sourcemon_items items
|
|
30
|
+
SET scraped_html = contents.scraped_html,
|
|
31
|
+
scraped_content = contents.scraped_content
|
|
32
|
+
FROM sourcemon_item_contents contents
|
|
33
|
+
WHERE contents.item_id = items.id
|
|
34
|
+
SQL
|
|
35
|
+
|
|
36
|
+
drop_table :sourcemon_item_contents
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddFetchStatusToSourceMonitorSources < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
add_column :sourcemon_sources, :fetch_status, :string, default: "idle", null: false
|
|
6
|
+
add_column :sourcemon_sources, :last_fetch_started_at, :datetime
|
|
7
|
+
add_index :sourcemon_sources, :fetch_status
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSolidCableMessages < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :solid_cable_messages do |t|
|
|
6
|
+
t.binary :channel, limit: 1024, null: false
|
|
7
|
+
t.binary :payload, limit: 536_870_912, null: false
|
|
8
|
+
t.datetime :created_at, null: false
|
|
9
|
+
t.integer :channel_hash, limit: 8, null: false
|
|
10
|
+
|
|
11
|
+
t.index :channel
|
|
12
|
+
t.index :channel_hash
|
|
13
|
+
t.index :created_at
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddFetchRetryStateToSources < ActiveRecord::Migration[7.2]
|
|
4
|
+
def change
|
|
5
|
+
change_table :sourcemon_sources, bulk: true do |t|
|
|
6
|
+
t.integer :fetch_retry_attempt, null: false, default: 0
|
|
7
|
+
t.datetime :fetch_circuit_opened_at
|
|
8
|
+
t.datetime :fetch_circuit_until
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_index :sourcemon_sources, :fetch_retry_attempt
|
|
12
|
+
add_index :sourcemon_sources, :fetch_circuit_until
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddHealthFieldsToSources < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
change_table :sourcemon_sources, bulk: true do |t|
|
|
6
|
+
t.decimal :rolling_success_rate, precision: 5, scale: 4
|
|
7
|
+
t.string :health_status, null: false, default: "healthy"
|
|
8
|
+
t.datetime :health_status_changed_at
|
|
9
|
+
t.datetime :auto_paused_at
|
|
10
|
+
t.datetime :auto_paused_until
|
|
11
|
+
t.decimal :health_auto_pause_threshold, precision: 5, scale: 4
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
add_index :sourcemon_sources, :health_status
|
|
15
|
+
add_index :sourcemon_sources, :auto_paused_until
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class OptimizeSourceMonitorDatabasePerformance < ActiveRecord::Migration[8.0]
|
|
4
|
+
def change
|
|
5
|
+
add_index :sourcemon_sources, :created_at, name: "index_sourcemon_sources_on_created_at" unless index_exists?(:sourcemon_sources, :created_at)
|
|
6
|
+
|
|
7
|
+
unless index_exists?(:sourcemon_items, %i[source_id published_at created_at])
|
|
8
|
+
add_index :sourcemon_items, %i[source_id published_at created_at], name: "index_sourcemon_items_on_source_and_published_at"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_index :sourcemon_scrape_logs, :started_at, name: "index_sourcemon_scrape_logs_on_started_at" unless index_exists?(:sourcemon_scrape_logs, :started_at)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddNotNullConstraintsToItems < ActiveRecord::Migration[8.0]
|
|
4
|
+
def up
|
|
5
|
+
# First, clean up any existing invalid data
|
|
6
|
+
# For guid: use content_fingerprint or generate a UUID as fallback
|
|
7
|
+
execute <<~SQL
|
|
8
|
+
UPDATE sourcemon_items
|
|
9
|
+
SET guid = COALESCE(content_fingerprint, gen_random_uuid()::text)
|
|
10
|
+
WHERE guid IS NULL
|
|
11
|
+
SQL
|
|
12
|
+
|
|
13
|
+
# For url: use canonical_url or a placeholder as fallback
|
|
14
|
+
execute <<~SQL
|
|
15
|
+
UPDATE sourcemon_items
|
|
16
|
+
SET url = COALESCE(canonical_url, 'https://unknown.example.com')
|
|
17
|
+
WHERE url IS NULL
|
|
18
|
+
SQL
|
|
19
|
+
|
|
20
|
+
# Now add the NOT NULL constraints
|
|
21
|
+
change_column_null :sourcemon_items, :guid, false
|
|
22
|
+
change_column_null :sourcemon_items, :url, false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def down
|
|
26
|
+
# Allow NULL values again
|
|
27
|
+
change_column_null :sourcemon_items, :guid, true
|
|
28
|
+
change_column_null :sourcemon_items, :url, true
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class AddPerformanceIndexes < ActiveRecord::Migration[8.0]
|
|
2
|
+
def change
|
|
3
|
+
# Index for activity rate calculations
|
|
4
|
+
# Query: SELECT COUNT(*) FROM items WHERE source_id IN (...) AND created_at >= ? GROUP BY source_id
|
|
5
|
+
unless index_exists?(:sourcemon_items, [ :source_id, :created_at ], name: "index_items_on_source_and_created_at_for_rates")
|
|
6
|
+
add_index :sourcemon_items,
|
|
7
|
+
[ :source_id, :created_at ],
|
|
8
|
+
name: "index_items_on_source_and_created_at_for_rates"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Partial index for due_for_fetch queries
|
|
12
|
+
# Query: SELECT * FROM sources WHERE active = true AND (next_fetch_at IS NULL OR next_fetch_at <= ?)
|
|
13
|
+
unless index_exists?(:sourcemon_sources, [ :active, :next_fetch_at ], name: "index_sources_on_active_and_next_fetch")
|
|
14
|
+
add_index :sourcemon_sources,
|
|
15
|
+
[ :active, :next_fetch_at ],
|
|
16
|
+
where: "active = true",
|
|
17
|
+
name: "index_sources_on_active_and_next_fetch"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Partial index for failed source queries
|
|
21
|
+
# Query: SELECT * FROM sources WHERE failure_count > 0
|
|
22
|
+
unless index_exists?(:sourcemon_sources, :failure_count, name: "index_sources_on_failures")
|
|
23
|
+
add_index :sourcemon_sources,
|
|
24
|
+
:failure_count,
|
|
25
|
+
where: "failure_count > 0",
|
|
26
|
+
name: "index_sources_on_failures"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class AddFetchStatusCheckConstraint < ActiveRecord::Migration[8.0]
|
|
2
|
+
def up
|
|
3
|
+
# Add PostgreSQL CHECK constraint to enforce fetch_status enum values at database level
|
|
4
|
+
# This complements the application-level validation in the Source model
|
|
5
|
+
execute <<-SQL
|
|
6
|
+
ALTER TABLE sourcemon_sources
|
|
7
|
+
ADD CONSTRAINT check_fetch_status_values
|
|
8
|
+
CHECK (fetch_status IN ('idle', 'queued', 'fetching', 'failed'))
|
|
9
|
+
SQL
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def down
|
|
13
|
+
execute <<-SQL
|
|
14
|
+
ALTER TABLE sourcemon_sources
|
|
15
|
+
DROP CONSTRAINT check_fetch_status_values
|
|
16
|
+
SQL
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorLogEntries < ActiveRecord::Migration[7.1]
|
|
4
|
+
def change
|
|
5
|
+
create_table :sourcemon_log_entries do |t|
|
|
6
|
+
t.references :loggable, polymorphic: true, null: false, index: { name: "index_sourcemon_log_entries_on_loggable" }
|
|
7
|
+
t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
|
|
8
|
+
t.references :item, foreign_key: { to_table: :sourcemon_items }
|
|
9
|
+
t.boolean :success, null: false, default: false
|
|
10
|
+
t.datetime :started_at, null: false
|
|
11
|
+
t.datetime :completed_at
|
|
12
|
+
t.integer :http_status
|
|
13
|
+
t.integer :duration_ms
|
|
14
|
+
t.integer :items_created
|
|
15
|
+
t.integer :items_updated
|
|
16
|
+
t.integer :items_failed
|
|
17
|
+
t.string :scraper_adapter
|
|
18
|
+
t.integer :content_length
|
|
19
|
+
t.string :error_class
|
|
20
|
+
t.text :error_message
|
|
21
|
+
|
|
22
|
+
t.timestamps
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
add_index :sourcemon_log_entries, :started_at
|
|
26
|
+
add_index :sourcemon_log_entries, :success
|
|
27
|
+
add_index :sourcemon_log_entries, :scraper_adapter
|
|
28
|
+
|
|
29
|
+
reversible do |direction|
|
|
30
|
+
direction.up do
|
|
31
|
+
say_with_time "Backfilling sourcemon_log_entries" do
|
|
32
|
+
fetch_log_class = Class.new(ActiveRecord::Base) do
|
|
33
|
+
self.table_name = "sourcemon_fetch_logs"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
scrape_log_class = Class.new(ActiveRecord::Base) do
|
|
37
|
+
self.table_name = "sourcemon_scrape_logs"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
log_entry_class = Class.new(ActiveRecord::Base) do
|
|
41
|
+
self.table_name = "sourcemon_log_entries"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
fetch_log_class.find_each do |log|
|
|
45
|
+
log_entry_class.create!(
|
|
46
|
+
loggable_type: "SourceMonitor::FetchLog",
|
|
47
|
+
loggable_id: log.id,
|
|
48
|
+
source_id: log.source_id,
|
|
49
|
+
item_id: nil,
|
|
50
|
+
success: log.success,
|
|
51
|
+
started_at: log.started_at,
|
|
52
|
+
completed_at: log.completed_at,
|
|
53
|
+
http_status: log.http_status,
|
|
54
|
+
duration_ms: log.duration_ms,
|
|
55
|
+
items_created: log.items_created,
|
|
56
|
+
items_updated: log.items_updated,
|
|
57
|
+
items_failed: log.items_failed,
|
|
58
|
+
scraper_adapter: nil,
|
|
59
|
+
content_length: nil,
|
|
60
|
+
error_class: log.error_class,
|
|
61
|
+
error_message: log.error_message
|
|
62
|
+
)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
scrape_log_class.find_each do |log|
|
|
66
|
+
log_entry_class.create!(
|
|
67
|
+
loggable_type: "SourceMonitor::ScrapeLog",
|
|
68
|
+
loggable_id: log.id,
|
|
69
|
+
source_id: log.source_id,
|
|
70
|
+
item_id: log.item_id,
|
|
71
|
+
success: log.success,
|
|
72
|
+
started_at: log.started_at,
|
|
73
|
+
completed_at: log.completed_at,
|
|
74
|
+
http_status: log.http_status,
|
|
75
|
+
duration_ms: log.duration_ms,
|
|
76
|
+
items_created: nil,
|
|
77
|
+
items_updated: nil,
|
|
78
|
+
items_failed: nil,
|
|
79
|
+
scraper_adapter: log.scraper_adapter,
|
|
80
|
+
content_length: log.content_length,
|
|
81
|
+
error_class: log.error_class,
|
|
82
|
+
error_message: log.error_message
|
|
83
|
+
)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSourceMonitorHealthCheckLogs < ActiveRecord::Migration[7.2]
|
|
4
|
+
def change
|
|
5
|
+
create_table :sourcemon_health_check_logs do |t|
|
|
6
|
+
t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
|
|
7
|
+
t.boolean :success, null: false, default: false
|
|
8
|
+
t.datetime :started_at, null: false
|
|
9
|
+
t.datetime :completed_at
|
|
10
|
+
t.integer :duration_ms
|
|
11
|
+
t.integer :http_status
|
|
12
|
+
t.jsonb :http_response_headers, null: false, default: {}
|
|
13
|
+
t.string :error_class
|
|
14
|
+
t.text :error_message
|
|
15
|
+
|
|
16
|
+
t.timestamps
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
add_index :sourcemon_health_check_logs, :started_at
|
|
20
|
+
add_index :sourcemon_health_check_logs, :success
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class RefreshFetchStatusConstraint < ActiveRecord::Migration[8.0]
|
|
2
|
+
ALLOWED_STATUSES = %w[idle queued fetching failed invalid].freeze
|
|
3
|
+
PREVIOUS_STATUSES = %w[idle queued fetching failed].freeze
|
|
4
|
+
|
|
5
|
+
def up
|
|
6
|
+
replace_constraint(ALLOWED_STATUSES)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def down
|
|
10
|
+
replace_constraint(PREVIOUS_STATUSES)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def replace_constraint(statuses)
|
|
16
|
+
quoted_values = statuses.map { |status| ActiveRecord::Base.connection.quote(status) }.join(", ")
|
|
17
|
+
|
|
18
|
+
execute <<~SQL
|
|
19
|
+
ALTER TABLE sourcemon_sources
|
|
20
|
+
DROP CONSTRAINT IF EXISTS check_fetch_status_values
|
|
21
|
+
SQL
|
|
22
|
+
|
|
23
|
+
execute <<~SQL
|
|
24
|
+
ALTER TABLE sourcemon_sources
|
|
25
|
+
ADD CONSTRAINT check_fetch_status_values
|
|
26
|
+
CHECK (fetch_status IN (#{quoted_values}))
|
|
27
|
+
SQL
|
|
28
|
+
end
|
|
29
|
+
end
|