source_monitor 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/.rubocop.yml +12 -0
  4. data/.ruby-version +1 -0
  5. data/AGENTS.md +132 -0
  6. data/CHANGELOG.md +66 -0
  7. data/CONTRIBUTING.md +31 -0
  8. data/Gemfile +30 -0
  9. data/Gemfile.lock +411 -0
  10. data/MIT-LICENSE +20 -0
  11. data/README.md +108 -0
  12. data/Rakefile +8 -0
  13. data/app/assets/builds/.keep +0 -0
  14. data/app/assets/config/source_monitor_manifest.js +4 -0
  15. data/app/assets/images/source_monitor/.keep +0 -0
  16. data/app/assets/javascripts/source_monitor/application.js +20 -0
  17. data/app/assets/javascripts/source_monitor/controllers/async_submit_controller.js +36 -0
  18. data/app/assets/javascripts/source_monitor/controllers/dropdown_controller.js +109 -0
  19. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +56 -0
  20. data/app/assets/javascripts/source_monitor/controllers/notification_controller.js +53 -0
  21. data/app/assets/javascripts/source_monitor/turbo_actions.js +13 -0
  22. data/app/assets/stylesheets/source_monitor/application.tailwind.css +13 -0
  23. data/app/assets/svgs/source_monitor/.keep +0 -0
  24. data/app/controllers/concerns/.keep +0 -0
  25. data/app/controllers/concerns/source_monitor/sanitizes_search_params.rb +81 -0
  26. data/app/controllers/source_monitor/application_controller.rb +62 -0
  27. data/app/controllers/source_monitor/dashboard_controller.rb +27 -0
  28. data/app/controllers/source_monitor/fetch_logs_controller.rb +9 -0
  29. data/app/controllers/source_monitor/health_controller.rb +10 -0
  30. data/app/controllers/source_monitor/items_controller.rb +116 -0
  31. data/app/controllers/source_monitor/logs_controller.rb +15 -0
  32. data/app/controllers/source_monitor/scrape_logs_controller.rb +9 -0
  33. data/app/controllers/source_monitor/source_bulk_scrapes_controller.rb +35 -0
  34. data/app/controllers/source_monitor/source_fetches_controller.rb +22 -0
  35. data/app/controllers/source_monitor/source_health_checks_controller.rb +34 -0
  36. data/app/controllers/source_monitor/source_health_resets_controller.rb +27 -0
  37. data/app/controllers/source_monitor/source_retries_controller.rb +22 -0
  38. data/app/controllers/source_monitor/source_turbo_responses.rb +115 -0
  39. data/app/controllers/source_monitor/sources_controller.rb +179 -0
  40. data/app/helpers/source_monitor/application_helper.rb +327 -0
  41. data/app/jobs/source_monitor/application_job.rb +13 -0
  42. data/app/jobs/source_monitor/fetch_feed_job.rb +117 -0
  43. data/app/jobs/source_monitor/item_cleanup_job.rb +48 -0
  44. data/app/jobs/source_monitor/log_cleanup_job.rb +47 -0
  45. data/app/jobs/source_monitor/schedule_fetches_job.rb +29 -0
  46. data/app/jobs/source_monitor/scrape_item_job.rb +47 -0
  47. data/app/jobs/source_monitor/source_health_check_job.rb +77 -0
  48. data/app/mailers/source_monitor/application_mailer.rb +17 -0
  49. data/app/models/concerns/.keep +0 -0
  50. data/app/models/concerns/source_monitor/loggable.rb +18 -0
  51. data/app/models/source_monitor/application_record.rb +5 -0
  52. data/app/models/source_monitor/fetch_log.rb +31 -0
  53. data/app/models/source_monitor/health_check_log.rb +28 -0
  54. data/app/models/source_monitor/item.rb +102 -0
  55. data/app/models/source_monitor/item_content.rb +11 -0
  56. data/app/models/source_monitor/log_entry.rb +56 -0
  57. data/app/models/source_monitor/scrape_log.rb +31 -0
  58. data/app/models/source_monitor/source.rb +115 -0
  59. data/app/views/layouts/source_monitor/application.html.erb +54 -0
  60. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +90 -0
  61. data/app/views/source_monitor/dashboard/_job_metrics.html.erb +82 -0
  62. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +39 -0
  63. data/app/views/source_monitor/dashboard/_stat_card.html.erb +6 -0
  64. data/app/views/source_monitor/dashboard/_stats.html.erb +9 -0
  65. data/app/views/source_monitor/dashboard/index.html.erb +48 -0
  66. data/app/views/source_monitor/fetch_logs/show.html.erb +90 -0
  67. data/app/views/source_monitor/items/_details.html.erb +234 -0
  68. data/app/views/source_monitor/items/_details_wrapper.html.erb +3 -0
  69. data/app/views/source_monitor/items/index.html.erb +147 -0
  70. data/app/views/source_monitor/items/show.html.erb +3 -0
  71. data/app/views/source_monitor/logs/index.html.erb +208 -0
  72. data/app/views/source_monitor/scrape_logs/show.html.erb +73 -0
  73. data/app/views/source_monitor/shared/_toast.html.erb +34 -0
  74. data/app/views/source_monitor/sources/_bulk_scrape_form.html.erb +64 -0
  75. data/app/views/source_monitor/sources/_bulk_scrape_modal.html.erb +53 -0
  76. data/app/views/source_monitor/sources/_details.html.erb +302 -0
  77. data/app/views/source_monitor/sources/_details_wrapper.html.erb +3 -0
  78. data/app/views/source_monitor/sources/_empty_state_row.html.erb +5 -0
  79. data/app/views/source_monitor/sources/_fetch_interval_heatmap.html.erb +46 -0
  80. data/app/views/source_monitor/sources/_form.html.erb +143 -0
  81. data/app/views/source_monitor/sources/_health_status_badge.html.erb +46 -0
  82. data/app/views/source_monitor/sources/_row.html.erb +102 -0
  83. data/app/views/source_monitor/sources/edit.html.erb +28 -0
  84. data/app/views/source_monitor/sources/index.html.erb +153 -0
  85. data/app/views/source_monitor/sources/new.html.erb +22 -0
  86. data/app/views/source_monitor/sources/show.html.erb +3 -0
  87. data/config/coverage_baseline.json +2010 -0
  88. data/config/initializers/feedjira.rb +19 -0
  89. data/config/routes.rb +18 -0
  90. data/config/tailwind.config.js +17 -0
  91. data/db/migrate/20241008120000_create_source_monitor_sources.rb +40 -0
  92. data/db/migrate/20241008121000_create_source_monitor_items.rb +44 -0
  93. data/db/migrate/20241008122000_create_source_monitor_fetch_logs.rb +32 -0
  94. data/db/migrate/20241008123000_create_source_monitor_scrape_logs.rb +25 -0
  95. data/db/migrate/20251008183000_change_fetch_interval_to_minutes.rb +23 -0
  96. data/db/migrate/20251009090000_create_source_monitor_item_contents.rb +38 -0
  97. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +5 -0
  98. data/db/migrate/20251010090000_add_adaptive_fetching_toggle_to_sources.rb +7 -0
  99. data/db/migrate/20251010123000_add_deleted_at_to_source_monitor_items.rb +8 -0
  100. data/db/migrate/20251010153000_add_type_to_source_monitor_sources.rb +8 -0
  101. data/db/migrate/20251010154500_add_fetch_status_to_source_monitor_sources.rb +9 -0
  102. data/db/migrate/20251010160000_create_solid_cable_messages.rb +16 -0
  103. data/db/migrate/20251011090000_add_fetch_retry_state_to_sources.rb +14 -0
  104. data/db/migrate/20251012090000_add_health_fields_to_sources.rb +17 -0
  105. data/db/migrate/20251012100000_optimize_source_monitor_database_performance.rb +13 -0
  106. data/db/migrate/20251014064947_add_not_null_constraints_to_items.rb +30 -0
  107. data/db/migrate/20251014171659_add_performance_indexes.rb +29 -0
  108. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +18 -0
  109. data/db/migrate/20251015100000_create_source_monitor_log_entries.rb +89 -0
  110. data/db/migrate/20251022100000_create_source_monitor_health_check_logs.rb +22 -0
  111. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +29 -0
  112. data/docs/configuration.md +170 -0
  113. data/docs/deployment.md +63 -0
  114. data/docs/gh-cli-workflow.md +44 -0
  115. data/docs/installation.md +144 -0
  116. data/docs/troubleshooting.md +76 -0
  117. data/eslint.config.mjs +27 -0
  118. data/lib/generators/source_monitor/install/install_generator.rb +59 -0
  119. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +155 -0
  120. data/lib/source_monitor/analytics/source_activity_rates.rb +53 -0
  121. data/lib/source_monitor/analytics/source_fetch_interval_distribution.rb +57 -0
  122. data/lib/source_monitor/analytics/sources_index_metrics.rb +92 -0
  123. data/lib/source_monitor/assets/bundler.rb +49 -0
  124. data/lib/source_monitor/assets.rb +6 -0
  125. data/lib/source_monitor/configuration.rb +654 -0
  126. data/lib/source_monitor/dashboard/queries.rb +356 -0
  127. data/lib/source_monitor/dashboard/quick_action.rb +7 -0
  128. data/lib/source_monitor/dashboard/quick_actions_presenter.rb +26 -0
  129. data/lib/source_monitor/dashboard/recent_activity.rb +30 -0
  130. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +77 -0
  131. data/lib/source_monitor/dashboard/turbo_broadcaster.rb +87 -0
  132. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +126 -0
  133. data/lib/source_monitor/engine.rb +107 -0
  134. data/lib/source_monitor/events.rb +110 -0
  135. data/lib/source_monitor/feedjira_extensions.rb +103 -0
  136. data/lib/source_monitor/fetching/advisory_lock.rb +54 -0
  137. data/lib/source_monitor/fetching/completion/event_publisher.rb +22 -0
  138. data/lib/source_monitor/fetching/completion/follow_up_handler.rb +37 -0
  139. data/lib/source_monitor/fetching/completion/retention_handler.rb +30 -0
  140. data/lib/source_monitor/fetching/feed_fetcher.rb +627 -0
  141. data/lib/source_monitor/fetching/fetch_error.rb +88 -0
  142. data/lib/source_monitor/fetching/fetch_runner.rb +142 -0
  143. data/lib/source_monitor/fetching/retry_policy.rb +85 -0
  144. data/lib/source_monitor/fetching/stalled_fetch_reconciler.rb +146 -0
  145. data/lib/source_monitor/health/source_health_check.rb +100 -0
  146. data/lib/source_monitor/health/source_health_monitor.rb +210 -0
  147. data/lib/source_monitor/health/source_health_reset.rb +68 -0
  148. data/lib/source_monitor/health.rb +46 -0
  149. data/lib/source_monitor/http.rb +85 -0
  150. data/lib/source_monitor/instrumentation.rb +52 -0
  151. data/lib/source_monitor/items/item_creator.rb +601 -0
  152. data/lib/source_monitor/items/retention_pruner.rb +146 -0
  153. data/lib/source_monitor/items/retention_strategies/destroy.rb +26 -0
  154. data/lib/source_monitor/items/retention_strategies/soft_delete.rb +50 -0
  155. data/lib/source_monitor/items/retention_strategies.rb +9 -0
  156. data/lib/source_monitor/jobs/cleanup_options.rb +85 -0
  157. data/lib/source_monitor/jobs/fetch_failure_subscriber.rb +129 -0
  158. data/lib/source_monitor/jobs/solid_queue_metrics.rb +199 -0
  159. data/lib/source_monitor/jobs/visibility.rb +133 -0
  160. data/lib/source_monitor/logs/entry_sync.rb +69 -0
  161. data/lib/source_monitor/logs/filter_set.rb +163 -0
  162. data/lib/source_monitor/logs/query.rb +81 -0
  163. data/lib/source_monitor/logs/table_presenter.rb +161 -0
  164. data/lib/source_monitor/metrics.rb +77 -0
  165. data/lib/source_monitor/model_extensions.rb +109 -0
  166. data/lib/source_monitor/models/sanitizable.rb +76 -0
  167. data/lib/source_monitor/models/url_normalizable.rb +84 -0
  168. data/lib/source_monitor/pagination/paginator.rb +90 -0
  169. data/lib/source_monitor/realtime/adapter.rb +97 -0
  170. data/lib/source_monitor/realtime/broadcaster.rb +237 -0
  171. data/lib/source_monitor/realtime.rb +17 -0
  172. data/lib/source_monitor/release/changelog.rb +59 -0
  173. data/lib/source_monitor/release/runner.rb +73 -0
  174. data/lib/source_monitor/scheduler.rb +82 -0
  175. data/lib/source_monitor/scrapers/base.rb +105 -0
  176. data/lib/source_monitor/scrapers/fetchers/http_fetcher.rb +97 -0
  177. data/lib/source_monitor/scrapers/parsers/readability_parser.rb +101 -0
  178. data/lib/source_monitor/scrapers/readability.rb +156 -0
  179. data/lib/source_monitor/scraping/bulk_result_presenter.rb +85 -0
  180. data/lib/source_monitor/scraping/bulk_source_scraper.rb +233 -0
  181. data/lib/source_monitor/scraping/enqueuer.rb +125 -0
  182. data/lib/source_monitor/scraping/item_scraper/adapter_resolver.rb +44 -0
  183. data/lib/source_monitor/scraping/item_scraper/persistence.rb +189 -0
  184. data/lib/source_monitor/scraping/item_scraper.rb +84 -0
  185. data/lib/source_monitor/scraping/scheduler.rb +43 -0
  186. data/lib/source_monitor/scraping/state.rb +79 -0
  187. data/lib/source_monitor/security/authentication.rb +85 -0
  188. data/lib/source_monitor/security/parameter_sanitizer.rb +42 -0
  189. data/lib/source_monitor/sources/turbo_stream_presenter.rb +54 -0
  190. data/lib/source_monitor/turbo_streams/stream_responder.rb +95 -0
  191. data/lib/source_monitor/version.rb +3 -0
  192. data/lib/source_monitor.rb +149 -0
  193. data/lib/tasks/recover_stalled_fetches.rake +16 -0
  194. data/lib/tasks/source_monitor_assets.rake +28 -0
  195. data/lib/tasks/source_monitor_tasks.rake +29 -0
  196. data/lib/tasks/test_smoke.rake +12 -0
  197. data/package-lock.json +3997 -0
  198. data/package.json +29 -0
  199. data/postcss.config.js +6 -0
  200. data/source_monitor.gemspec +46 -0
  201. data/stylelint.config.js +12 -0
  202. metadata +469 -0
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "feedjira"
4
+
5
+ Feedjira.configure do |config|
6
+ config.parsers = [
7
+ Feedjira::Parser::JSONFeed,
8
+ Feedjira::Parser::Atom,
9
+ Feedjira::Parser::AtomFeedBurner,
10
+ Feedjira::Parser::AtomYoutube,
11
+ Feedjira::Parser::AtomGoogleAlerts,
12
+ Feedjira::Parser::GoogleDocsAtom,
13
+ Feedjira::Parser::ITunesRSS,
14
+ Feedjira::Parser::RSSFeedBurner,
15
+ Feedjira::Parser::RSS
16
+ ]
17
+
18
+ config.strip_whitespace = true
19
+ end
data/config/routes.rb ADDED
@@ -0,0 +1,18 @@
1
+ SourceMonitor::Engine.routes.draw do
2
+ get "/health", to: "health#show"
3
+ get "/dashboard", to: "dashboard#index", as: :dashboard
4
+ root to: "dashboard#index"
5
+ resources :logs, only: :index
6
+ resources :fetch_logs, only: :show
7
+ resources :scrape_logs, only: :show
8
+ resources :items, only: %i[index show] do
9
+ post :scrape, on: :member
10
+ end
11
+ resources :sources do
12
+ resource :fetch, only: :create, controller: "source_fetches"
13
+ resource :retry, only: :create, controller: "source_retries"
14
+ resource :bulk_scrape, only: :create, controller: "source_bulk_scrapes"
15
+ resource :health_check, only: :create, controller: "source_health_checks"
16
+ resource :health_reset, only: :create, controller: "source_health_resets"
17
+ end
18
+ end
@@ -0,0 +1,17 @@
1
+ const defaultContent = [
2
+ "./app/views/**/*.{erb,html,html.erb}",
3
+ "./app/helpers/**/*.rb",
4
+ "./app/assets/stylesheets/source_monitor/**/*.css",
5
+ "./app/assets/javascripts/**/*.{js,ts,jsx,tsx}",
6
+ "./lib/**/*.rb",
7
+ "./test/dummy/app/views/**/*.{erb,html,html.erb}"
8
+ ];
9
+
10
+ export default {
11
+ content: defaultContent,
12
+ important: ".fm-admin",
13
+ theme: {
14
+ extend: {}
15
+ },
16
+ plugins: []
17
+ };
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorSources < ActiveRecord::Migration[8.0]
4
+ def change
5
+ create_table :sourcemon_sources do |t|
6
+ t.string :name, null: false
7
+ t.string :feed_url, null: false
8
+ t.string :website_url
9
+ t.boolean :active, null: false, default: true
10
+ t.string :feed_format
11
+ t.integer :fetch_interval_hours, null: false, default: 6
12
+ t.datetime :next_fetch_at
13
+ t.datetime :last_fetched_at
14
+ t.integer :last_fetch_duration_ms
15
+ t.integer :last_http_status
16
+ t.text :last_error
17
+ t.datetime :last_error_at
18
+ t.string :etag
19
+ t.datetime :last_modified
20
+ t.integer :failure_count, null: false, default: 0
21
+ t.datetime :backoff_until
22
+ t.integer :items_count, null: false, default: 0
23
+ t.boolean :scraping_enabled, null: false, default: false
24
+ t.boolean :auto_scrape, null: false, default: false
25
+ t.jsonb :scrape_settings, null: false, default: {}
26
+ t.string :scraper_adapter, null: false, default: "readability"
27
+ t.boolean :requires_javascript, null: false, default: false
28
+ t.jsonb :custom_headers, null: false, default: {}
29
+ t.integer :items_retention_days
30
+ t.integer :max_items
31
+ t.jsonb :metadata, null: false, default: {}
32
+
33
+ t.timestamps
34
+ end
35
+
36
+ add_index :sourcemon_sources, :feed_url, unique: true
37
+ add_index :sourcemon_sources, :active
38
+ add_index :sourcemon_sources, :next_fetch_at
39
+ end
40
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorItems < ActiveRecord::Migration[8.0]
4
+ def change
5
+ create_table :sourcemon_items do |t|
6
+ t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
7
+ t.string :guid
8
+ t.string :content_fingerprint
9
+ t.string :title
10
+ t.string :url
11
+ t.string :canonical_url
12
+ t.string :author
13
+ t.jsonb :authors, null: false, default: []
14
+ t.text :summary
15
+ t.text :content
16
+ t.text :scraped_html
17
+ t.text :scraped_content
18
+ t.datetime :scraped_at
19
+ t.string :scrape_status
20
+ t.datetime :published_at
21
+ t.datetime :updated_at_source
22
+ t.jsonb :categories, null: false, default: []
23
+ t.jsonb :tags, null: false, default: []
24
+ t.jsonb :keywords, null: false, default: []
25
+ t.jsonb :enclosures, null: false, default: []
26
+ t.string :media_thumbnail_url
27
+ t.jsonb :media_content, null: false, default: []
28
+ t.string :language
29
+ t.string :copyright
30
+ t.string :comments_url
31
+ t.integer :comments_count, null: false, default: 0
32
+ t.jsonb :metadata, null: false, default: {}
33
+ t.timestamps
34
+ end
35
+
36
+ add_index :sourcemon_items, :guid
37
+ add_index :sourcemon_items, :content_fingerprint
38
+ add_index :sourcemon_items, :url
39
+ add_index :sourcemon_items, :scrape_status
40
+ add_index :sourcemon_items, :published_at
41
+ add_index :sourcemon_items, [ :source_id, :guid ], unique: true
42
+ add_index :sourcemon_items, [ :source_id, :content_fingerprint ], unique: true
43
+ end
44
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorFetchLogs < ActiveRecord::Migration[8.0]
4
+ def change
5
+ create_table :sourcemon_fetch_logs do |t|
6
+ t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
7
+ t.boolean :success, null: false, default: false
8
+ t.integer :items_created, null: false, default: 0
9
+ t.integer :items_updated, null: false, default: 0
10
+ t.integer :items_failed, null: false, default: 0
11
+ t.datetime :started_at, null: false
12
+ t.datetime :completed_at
13
+ t.integer :duration_ms
14
+ t.integer :http_status
15
+ t.jsonb :http_response_headers, null: false, default: {}
16
+ t.string :error_class
17
+ t.text :error_message
18
+ t.text :error_backtrace
19
+ t.integer :feed_size_bytes
20
+ t.integer :items_in_feed
21
+ t.string :job_id
22
+ t.jsonb :metadata, null: false, default: {}
23
+
24
+ t.timestamps
25
+ end
26
+
27
+ add_index :sourcemon_fetch_logs, :success
28
+ add_index :sourcemon_fetch_logs, :started_at
29
+ add_index :sourcemon_fetch_logs, :job_id
30
+ add_index :sourcemon_fetch_logs, :created_at
31
+ end
32
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorScrapeLogs < ActiveRecord::Migration[8.0]
4
+ def change
5
+ create_table :sourcemon_scrape_logs do |t|
6
+ t.references :item, null: false, foreign_key: { to_table: :sourcemon_items }
7
+ t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
8
+ t.boolean :success, null: false, default: false
9
+ t.datetime :started_at, null: false
10
+ t.datetime :completed_at
11
+ t.integer :duration_ms
12
+ t.integer :http_status
13
+ t.string :scraper_adapter
14
+ t.integer :content_length
15
+ t.string :error_class
16
+ t.text :error_message
17
+ t.jsonb :metadata, null: false, default: {}
18
+
19
+ t.timestamps
20
+ end
21
+
22
+ add_index :sourcemon_scrape_logs, :success
23
+ add_index :sourcemon_scrape_logs, :created_at
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ChangeFetchIntervalToMinutes < ActiveRecord::Migration[8.0]
4
+ def up
5
+ rename_column :sourcemon_sources, :fetch_interval_hours, :fetch_interval_minutes
6
+ change_column_default :sourcemon_sources, :fetch_interval_minutes, 360
7
+
8
+ execute <<~SQL
9
+ UPDATE sourcemon_sources
10
+ SET fetch_interval_minutes = fetch_interval_minutes * 60
11
+ SQL
12
+ end
13
+
14
+ def down
15
+ execute <<~SQL
16
+ UPDATE sourcemon_sources
17
+ SET fetch_interval_minutes = GREATEST(1, ROUND(fetch_interval_minutes::numeric / 60.0))
18
+ SQL
19
+
20
+ change_column_default :sourcemon_sources, :fetch_interval_minutes, 6
21
+ rename_column :sourcemon_sources, :fetch_interval_minutes, :fetch_interval_hours
22
+ end
23
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorItemContents < ActiveRecord::Migration[8.0]
4
+ def up
5
+ create_table :sourcemon_item_contents do |t|
6
+ t.references :item, null: false, foreign_key: { to_table: :sourcemon_items }, index: { unique: true }
7
+ t.text :scraped_html
8
+ t.text :scraped_content
9
+
10
+ t.timestamps(null: false)
11
+ end
12
+
13
+ execute <<~SQL
14
+ INSERT INTO sourcemon_item_contents (item_id, scraped_html, scraped_content, created_at, updated_at)
15
+ SELECT id, scraped_html, scraped_content, COALESCE(updated_at, CURRENT_TIMESTAMP), COALESCE(updated_at, CURRENT_TIMESTAMP)
16
+ FROM sourcemon_items
17
+ WHERE scraped_html IS NOT NULL OR scraped_content IS NOT NULL
18
+ SQL
19
+
20
+ remove_column :sourcemon_items, :scraped_html, :text
21
+ remove_column :sourcemon_items, :scraped_content, :text
22
+ end
23
+
24
+ def down
25
+ add_column :sourcemon_items, :scraped_html, :text
26
+ add_column :sourcemon_items, :scraped_content, :text
27
+
28
+ execute <<~SQL
29
+ UPDATE sourcemon_items items
30
+ SET scraped_html = contents.scraped_html,
31
+ scraped_content = contents.scraped_content
32
+ FROM sourcemon_item_contents contents
33
+ WHERE contents.item_id = items.id
34
+ SQL
35
+
36
+ drop_table :sourcemon_item_contents
37
+ end
38
+ end
@@ -0,0 +1,5 @@
1
+ class AddFeedContentReadabilityToSources < ActiveRecord::Migration[8.0]
2
+ def change
3
+ add_column :sourcemon_sources, :feed_content_readability_enabled, :boolean, default: false, null: false
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddAdaptiveFetchingToggleToSources < ActiveRecord::Migration[7.1]
4
+ def change
5
+ add_column :sourcemon_sources, :adaptive_fetching_enabled, :boolean, null: false, default: true
6
+ end
7
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddDeletedAtToSourceMonitorItems < ActiveRecord::Migration[8.0]
4
+ def change
5
+ add_column :sourcemon_items, :deleted_at, :datetime
6
+ add_index :sourcemon_items, :deleted_at
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddTypeToSourceMonitorSources < ActiveRecord::Migration[8.0]
4
+ def change
5
+ add_column :sourcemon_sources, :type, :string
6
+ add_index :sourcemon_sources, :type
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddFetchStatusToSourceMonitorSources < ActiveRecord::Migration[8.0]
4
+ def change
5
+ add_column :sourcemon_sources, :fetch_status, :string, default: "idle", null: false
6
+ add_column :sourcemon_sources, :last_fetch_started_at, :datetime
7
+ add_index :sourcemon_sources, :fetch_status
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSolidCableMessages < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :solid_cable_messages do |t|
6
+ t.binary :channel, limit: 1024, null: false
7
+ t.binary :payload, limit: 536_870_912, null: false
8
+ t.datetime :created_at, null: false
9
+ t.integer :channel_hash, limit: 8, null: false
10
+
11
+ t.index :channel
12
+ t.index :channel_hash
13
+ t.index :created_at
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddFetchRetryStateToSources < ActiveRecord::Migration[7.2]
4
+ def change
5
+ change_table :sourcemon_sources, bulk: true do |t|
6
+ t.integer :fetch_retry_attempt, null: false, default: 0
7
+ t.datetime :fetch_circuit_opened_at
8
+ t.datetime :fetch_circuit_until
9
+ end
10
+
11
+ add_index :sourcemon_sources, :fetch_retry_attempt
12
+ add_index :sourcemon_sources, :fetch_circuit_until
13
+ end
14
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddHealthFieldsToSources < ActiveRecord::Migration[8.0]
4
+ def change
5
+ change_table :sourcemon_sources, bulk: true do |t|
6
+ t.decimal :rolling_success_rate, precision: 5, scale: 4
7
+ t.string :health_status, null: false, default: "healthy"
8
+ t.datetime :health_status_changed_at
9
+ t.datetime :auto_paused_at
10
+ t.datetime :auto_paused_until
11
+ t.decimal :health_auto_pause_threshold, precision: 5, scale: 4
12
+ end
13
+
14
+ add_index :sourcemon_sources, :health_status
15
+ add_index :sourcemon_sources, :auto_paused_until
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class OptimizeSourceMonitorDatabasePerformance < ActiveRecord::Migration[8.0]
4
+ def change
5
+ add_index :sourcemon_sources, :created_at, name: "index_sourcemon_sources_on_created_at" unless index_exists?(:sourcemon_sources, :created_at)
6
+
7
+ unless index_exists?(:sourcemon_items, %i[source_id published_at created_at])
8
+ add_index :sourcemon_items, %i[source_id published_at created_at], name: "index_sourcemon_items_on_source_and_published_at"
9
+ end
10
+
11
+ add_index :sourcemon_scrape_logs, :started_at, name: "index_sourcemon_scrape_logs_on_started_at" unless index_exists?(:sourcemon_scrape_logs, :started_at)
12
+ end
13
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ class AddNotNullConstraintsToItems < ActiveRecord::Migration[8.0]
4
+ def up
5
+ # First, clean up any existing invalid data
6
+ # For guid: use content_fingerprint or generate a UUID as fallback
7
+ execute <<~SQL
8
+ UPDATE sourcemon_items
9
+ SET guid = COALESCE(content_fingerprint, gen_random_uuid()::text)
10
+ WHERE guid IS NULL
11
+ SQL
12
+
13
+ # For url: use canonical_url or a placeholder as fallback
14
+ execute <<~SQL
15
+ UPDATE sourcemon_items
16
+ SET url = COALESCE(canonical_url, 'https://unknown.example.com')
17
+ WHERE url IS NULL
18
+ SQL
19
+
20
+ # Now add the NOT NULL constraints
21
+ change_column_null :sourcemon_items, :guid, false
22
+ change_column_null :sourcemon_items, :url, false
23
+ end
24
+
25
+ def down
26
+ # Allow NULL values again
27
+ change_column_null :sourcemon_items, :guid, true
28
+ change_column_null :sourcemon_items, :url, true
29
+ end
30
+ end
@@ -0,0 +1,29 @@
1
+ class AddPerformanceIndexes < ActiveRecord::Migration[8.0]
2
+ def change
3
+ # Index for activity rate calculations
4
+ # Query: SELECT COUNT(*) FROM items WHERE source_id IN (...) AND created_at >= ? GROUP BY source_id
5
+ unless index_exists?(:sourcemon_items, [ :source_id, :created_at ], name: "index_items_on_source_and_created_at_for_rates")
6
+ add_index :sourcemon_items,
7
+ [ :source_id, :created_at ],
8
+ name: "index_items_on_source_and_created_at_for_rates"
9
+ end
10
+
11
+ # Partial index for due_for_fetch queries
12
+ # Query: SELECT * FROM sources WHERE active = true AND (next_fetch_at IS NULL OR next_fetch_at <= ?)
13
+ unless index_exists?(:sourcemon_sources, [ :active, :next_fetch_at ], name: "index_sources_on_active_and_next_fetch")
14
+ add_index :sourcemon_sources,
15
+ [ :active, :next_fetch_at ],
16
+ where: "active = true",
17
+ name: "index_sources_on_active_and_next_fetch"
18
+ end
19
+
20
+ # Partial index for failed source queries
21
+ # Query: SELECT * FROM sources WHERE failure_count > 0
22
+ unless index_exists?(:sourcemon_sources, :failure_count, name: "index_sources_on_failures")
23
+ add_index :sourcemon_sources,
24
+ :failure_count,
25
+ where: "failure_count > 0",
26
+ name: "index_sources_on_failures"
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ class AddFetchStatusCheckConstraint < ActiveRecord::Migration[8.0]
2
+ def up
3
+ # Add PostgreSQL CHECK constraint to enforce fetch_status enum values at database level
4
+ # This complements the application-level validation in the Source model
5
+ execute <<-SQL
6
+ ALTER TABLE sourcemon_sources
7
+ ADD CONSTRAINT check_fetch_status_values
8
+ CHECK (fetch_status IN ('idle', 'queued', 'fetching', 'failed'))
9
+ SQL
10
+ end
11
+
12
+ def down
13
+ execute <<-SQL
14
+ ALTER TABLE sourcemon_sources
15
+ DROP CONSTRAINT check_fetch_status_values
16
+ SQL
17
+ end
18
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorLogEntries < ActiveRecord::Migration[7.1]
4
+ def change
5
+ create_table :sourcemon_log_entries do |t|
6
+ t.references :loggable, polymorphic: true, null: false, index: { name: "index_sourcemon_log_entries_on_loggable" }
7
+ t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
8
+ t.references :item, foreign_key: { to_table: :sourcemon_items }
9
+ t.boolean :success, null: false, default: false
10
+ t.datetime :started_at, null: false
11
+ t.datetime :completed_at
12
+ t.integer :http_status
13
+ t.integer :duration_ms
14
+ t.integer :items_created
15
+ t.integer :items_updated
16
+ t.integer :items_failed
17
+ t.string :scraper_adapter
18
+ t.integer :content_length
19
+ t.string :error_class
20
+ t.text :error_message
21
+
22
+ t.timestamps
23
+ end
24
+
25
+ add_index :sourcemon_log_entries, :started_at
26
+ add_index :sourcemon_log_entries, :success
27
+ add_index :sourcemon_log_entries, :scraper_adapter
28
+
29
+ reversible do |direction|
30
+ direction.up do
31
+ say_with_time "Backfilling sourcemon_log_entries" do
32
+ fetch_log_class = Class.new(ActiveRecord::Base) do
33
+ self.table_name = "sourcemon_fetch_logs"
34
+ end
35
+
36
+ scrape_log_class = Class.new(ActiveRecord::Base) do
37
+ self.table_name = "sourcemon_scrape_logs"
38
+ end
39
+
40
+ log_entry_class = Class.new(ActiveRecord::Base) do
41
+ self.table_name = "sourcemon_log_entries"
42
+ end
43
+
44
+ fetch_log_class.find_each do |log|
45
+ log_entry_class.create!(
46
+ loggable_type: "SourceMonitor::FetchLog",
47
+ loggable_id: log.id,
48
+ source_id: log.source_id,
49
+ item_id: nil,
50
+ success: log.success,
51
+ started_at: log.started_at,
52
+ completed_at: log.completed_at,
53
+ http_status: log.http_status,
54
+ duration_ms: log.duration_ms,
55
+ items_created: log.items_created,
56
+ items_updated: log.items_updated,
57
+ items_failed: log.items_failed,
58
+ scraper_adapter: nil,
59
+ content_length: nil,
60
+ error_class: log.error_class,
61
+ error_message: log.error_message
62
+ )
63
+ end
64
+
65
+ scrape_log_class.find_each do |log|
66
+ log_entry_class.create!(
67
+ loggable_type: "SourceMonitor::ScrapeLog",
68
+ loggable_id: log.id,
69
+ source_id: log.source_id,
70
+ item_id: log.item_id,
71
+ success: log.success,
72
+ started_at: log.started_at,
73
+ completed_at: log.completed_at,
74
+ http_status: log.http_status,
75
+ duration_ms: log.duration_ms,
76
+ items_created: nil,
77
+ items_updated: nil,
78
+ items_failed: nil,
79
+ scraper_adapter: log.scraper_adapter,
80
+ content_length: log.content_length,
81
+ error_class: log.error_class,
82
+ error_message: log.error_message
83
+ )
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateSourceMonitorHealthCheckLogs < ActiveRecord::Migration[7.2]
4
+ def change
5
+ create_table :sourcemon_health_check_logs do |t|
6
+ t.references :source, null: false, foreign_key: { to_table: :sourcemon_sources }
7
+ t.boolean :success, null: false, default: false
8
+ t.datetime :started_at, null: false
9
+ t.datetime :completed_at
10
+ t.integer :duration_ms
11
+ t.integer :http_status
12
+ t.jsonb :http_response_headers, null: false, default: {}
13
+ t.string :error_class
14
+ t.text :error_message
15
+
16
+ t.timestamps
17
+ end
18
+
19
+ add_index :sourcemon_health_check_logs, :started_at
20
+ add_index :sourcemon_health_check_logs, :success
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ class RefreshFetchStatusConstraint < ActiveRecord::Migration[8.0]
2
+ ALLOWED_STATUSES = %w[idle queued fetching failed invalid].freeze
3
+ PREVIOUS_STATUSES = %w[idle queued fetching failed].freeze
4
+
5
+ def up
6
+ replace_constraint(ALLOWED_STATUSES)
7
+ end
8
+
9
+ def down
10
+ replace_constraint(PREVIOUS_STATUSES)
11
+ end
12
+
13
+ private
14
+
15
+ def replace_constraint(statuses)
16
+ quoted_values = statuses.map { |status| ActiveRecord::Base.connection.quote(status) }.join(", ")
17
+
18
+ execute <<~SQL
19
+ ALTER TABLE sourcemon_sources
20
+ DROP CONSTRAINT IF EXISTS check_fetch_status_values
21
+ SQL
22
+
23
+ execute <<~SQL
24
+ ALTER TABLE sourcemon_sources
25
+ ADD CONSTRAINT check_fetch_status_values
26
+ CHECK (fetch_status IN (#{quoted_values}))
27
+ SQL
28
+ end
29
+ end