source_monitor 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +15 -0
  3. data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +3 -3
  4. data/.claude/skills/sm-configure/reference/configuration-reference.md +3 -3
  5. data/.claude/skills/sm-domain-model/SKILL.md +2 -2
  6. data/.claude/skills/sm-domain-model/reference/table-structure.md +3 -1
  7. data/.claude/skills/sm-engine-migration/SKILL.md +1 -1
  8. data/.claude/skills/sm-engine-migration/reference/migration-conventions.md +1 -1
  9. data/.claude/skills/sm-health-rule/SKILL.md +18 -21
  10. data/.claude/skills/sm-health-rule/reference/health-system.md +1 -1
  11. data/.claude/skills/sm-host-setup/reference/initializer-template.md +2 -2
  12. data/.claude/skills/sm-upgrade/reference/version-history.md +17 -12
  13. data/CHANGELOG.md +42 -0
  14. data/CLAUDE.md +2 -2
  15. data/Gemfile +1 -0
  16. data/Gemfile.lock +4 -1
  17. data/README.md +3 -3
  18. data/VERSION +1 -1
  19. data/app/assets/builds/source_monitor/application.css +132 -12
  20. data/app/assets/builds/source_monitor/application.js +25 -1
  21. data/app/assets/builds/source_monitor/application.js.map +2 -2
  22. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +8 -0
  23. data/app/assets/javascripts/source_monitor/controllers/select_all_controller.js +22 -2
  24. data/app/assets/stylesheets/source_monitor/application.tailwind.css +1 -1
  25. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +57 -0
  26. data/app/controllers/source_monitor/dashboard_controller.rb +10 -1
  27. data/app/controllers/source_monitor/import_history_dismissals_controller.rb +20 -0
  28. data/app/controllers/source_monitor/source_retries_controller.rb +10 -2
  29. data/app/controllers/source_monitor/source_scrape_tests_controller.rb +73 -0
  30. data/app/controllers/source_monitor/sources_controller.rb +51 -9
  31. data/app/helpers/source_monitor/application_helper.rb +24 -0
  32. data/app/helpers/source_monitor/health_badge_helper.rb +7 -20
  33. data/app/jobs/source_monitor/fetch_feed_job.rb +32 -3
  34. data/app/jobs/source_monitor/source_health_check_job.rb +1 -1
  35. data/app/models/source_monitor/fetch_log.rb +4 -0
  36. data/app/models/source_monitor/import_history.rb +2 -0
  37. data/app/models/source_monitor/source.rb +47 -2
  38. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +94 -68
  39. data/app/views/source_monitor/dashboard/_scrape_recommendations.html.erb +17 -0
  40. data/app/views/source_monitor/dashboard/_stats.html.erb +19 -0
  41. data/app/views/source_monitor/dashboard/index.html.erb +7 -1
  42. data/app/views/source_monitor/import_sessions/health_check/_row.html.erb +2 -2
  43. data/app/views/source_monitor/shared/_pagination.html.erb +74 -0
  44. data/app/views/source_monitor/source_scrape_tests/_result.html.erb +81 -0
  45. data/app/views/source_monitor/source_scrape_tests/show.html.erb +60 -0
  46. data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +29 -0
  47. data/app/views/source_monitor/sources/_details.html.erb +19 -1
  48. data/app/views/source_monitor/sources/_empty_state_row.html.erb +1 -1
  49. data/app/views/source_monitor/sources/_import_history_panel.html.erb +12 -5
  50. data/app/views/source_monitor/sources/_row.html.erb +34 -6
  51. data/app/views/source_monitor/sources/index.html.erb +184 -132
  52. data/config/brakeman.ignore +11 -1
  53. data/config/routes.rb +5 -0
  54. data/db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb +7 -0
  55. data/db/migrate/20260306233004_add_error_category_to_fetch_logs.rb +8 -0
  56. data/db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb +11 -0
  57. data/db/migrate/20260312120000_simplify_health_status_values.rb +20 -0
  58. data/docs/configuration.md +9 -1
  59. data/docs/troubleshooting.md +9 -0
  60. data/docs/upgrade.md +31 -0
  61. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +2 -3
  62. data/lib/source_monitor/analytics/scrape_recommendations.rb +27 -0
  63. data/lib/source_monitor/configuration/health_settings.rb +0 -2
  64. data/lib/source_monitor/configuration/scraping_settings.rb +8 -1
  65. data/lib/source_monitor/dashboard/queries/stats_query.rb +12 -1
  66. data/lib/source_monitor/dashboard/queries.rb +6 -3
  67. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +6 -5
  68. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +40 -54
  69. data/lib/source_monitor/favicons/discoverer.rb +16 -0
  70. data/lib/source_monitor/favicons/svg_converter.rb +60 -0
  71. data/lib/source_monitor/fetching/cloudflare_bypass.rb +79 -0
  72. data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +82 -2
  73. data/lib/source_monitor/fetching/feed_fetcher.rb +55 -1
  74. data/lib/source_monitor/fetching/fetch_error.rb +27 -0
  75. data/lib/source_monitor/fetching/fetch_runner.rb +4 -0
  76. data/lib/source_monitor/fetching/retry_policy.rb +4 -0
  77. data/lib/source_monitor/health/import_source_health_check.rb +3 -3
  78. data/lib/source_monitor/health/source_health_monitor.rb +9 -14
  79. data/lib/source_monitor/health/source_health_reset.rb +1 -1
  80. data/lib/source_monitor/pagination/paginator.rb +18 -1
  81. data/lib/source_monitor/version.rb +1 -1
  82. data/lib/source_monitor.rb +3 -0
  83. metadata +17 -1
@@ -3,9 +3,13 @@ import { Controller } from "@hotwired/stimulus";
3
3
  export default class extends Controller {
4
4
  static targets = ["panel"];
5
5
  static classes = ["open"];
6
+ static values = { autoOpen: Boolean, removeOnClose: Boolean };
6
7
 
7
8
  connect() {
8
9
  this.handleEscape = this.handleEscape.bind(this);
10
+ if (this.autoOpenValue) {
11
+ this.open();
12
+ }
9
13
  }
10
14
 
11
15
  disconnect() {
@@ -35,6 +39,10 @@ export default class extends Controller {
35
39
  }
36
40
 
37
41
  this.teardown();
42
+
43
+ if (this.removeOnCloseValue) {
44
+ this.element.remove();
45
+ }
38
46
  }
39
47
 
40
48
  backdrop(event) {
@@ -1,18 +1,21 @@
1
1
  import { Controller } from "@hotwired/stimulus";
2
2
 
3
3
  export default class extends Controller {
4
- static targets = ["master", "item"];
4
+ static targets = ["master", "item", "actionBar", "count"];
5
5
 
6
6
  connect() {
7
7
  this.syncMaster();
8
+ this.updateActionBar();
8
9
  }
9
10
 
10
11
  itemTargetConnected() {
11
12
  this.syncMaster();
13
+ this.updateActionBar();
12
14
  }
13
15
 
14
16
  itemTargetDisconnected() {
15
17
  this.syncMaster();
18
+ this.updateActionBar();
16
19
  }
17
20
 
18
21
  toggleAll(event) {
@@ -21,16 +24,33 @@ export default class extends Controller {
21
24
  if (checkbox.disabled) return;
22
25
  checkbox.checked = checked;
23
26
  });
27
+ this.updateActionBar();
24
28
  }
25
29
 
26
30
  toggleItem() {
27
31
  this.syncMaster();
32
+ this.updateActionBar();
28
33
  }
29
34
 
30
35
  syncMaster() {
31
36
  if (!this.hasMasterTarget) return;
32
37
  const selectable = this.itemTargets.filter((checkbox) => !checkbox.disabled);
33
- const allChecked = selectable.length > 0 && selectable.every((checkbox) => checkbox.checked);
38
+ const allChecked =
39
+ selectable.length > 0 &&
40
+ selectable.every((checkbox) => checkbox.checked);
34
41
  this.masterTarget.checked = allChecked;
35
42
  }
43
+
44
+ updateActionBar() {
45
+ if (!this.hasActionBarTarget) return;
46
+ const checkedCount = this.itemTargets.filter((cb) => cb.checked).length;
47
+ if (this.hasCountTarget) {
48
+ this.countTarget.textContent = checkedCount;
49
+ }
50
+ if (checkedCount > 0) {
51
+ this.actionBarTarget.classList.remove("hidden");
52
+ } else {
53
+ this.actionBarTarget.classList.add("hidden");
54
+ }
55
+ }
36
56
  }
@@ -7,7 +7,7 @@
7
7
  @apply min-h-screen bg-slate-50 text-slate-900;
8
8
  }
9
9
 
10
- .fm-admin a {
10
+ .fm-admin a:not([class*="bg-"]) {
11
11
  @apply text-blue-600 hover:text-blue-500;
12
12
  }
13
13
  }
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class BulkScrapeEnablementsController < ApplicationController
5
+ def create
6
+ source_ids = Array(params.dig(:bulk_scrape_enablement, :source_ids)).map(&:to_i).reject(&:zero?)
7
+
8
+ if source_ids.empty?
9
+ handle_empty_selection
10
+ return
11
+ end
12
+
13
+ sources = Source.where(id: source_ids, scraping_enabled: false)
14
+ updated_count = sources.update_all(
15
+ scraping_enabled: true,
16
+ auto_scrape: true,
17
+ scraper_adapter: default_adapter,
18
+ updated_at: Time.current
19
+ )
20
+
21
+ respond_to do |format|
22
+ format.turbo_stream do
23
+ responder = SourceMonitor::TurboStreams::StreamResponder.new
24
+ responder.toast(
25
+ message: "Scraping enabled for #{updated_count} #{'source'.pluralize(updated_count)}.",
26
+ level: :success
27
+ )
28
+ responder.redirect(source_monitor.sources_path)
29
+ render turbo_stream: responder.render(view_context)
30
+ end
31
+ format.html do
32
+ redirect_to source_monitor.sources_path,
33
+ notice: "Scraping enabled for #{updated_count} #{'source'.pluralize(updated_count)}."
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def default_adapter
41
+ "readability"
42
+ end
43
+
44
+ def handle_empty_selection
45
+ respond_to do |format|
46
+ format.turbo_stream do
47
+ responder = SourceMonitor::TurboStreams::StreamResponder.new
48
+ responder.toast(message: "No sources selected.", level: :warning)
49
+ render turbo_stream: responder.render(view_context), status: :unprocessable_entity
50
+ end
51
+ format.html do
52
+ redirect_to source_monitor.sources_path, alert: "No sources selected."
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -17,11 +17,20 @@ module SourceMonitor
17
17
  ).to_a
18
18
  @job_adapter = SourceMonitor::Jobs::Visibility.adapter_name
19
19
  @job_metrics = queries.job_metrics
20
- fetch_schedule = queries.upcoming_fetch_schedule
20
+ @schedule_pages = schedule_pages_params
21
+ fetch_schedule = queries.upcoming_fetch_schedule(pages: @schedule_pages)
21
22
  @fetch_schedule_groups = fetch_schedule.groups
22
23
  @fetch_schedule_reference_time = fetch_schedule.reference_time
24
+ @scrape_candidates_count = @stats[:scrape_candidates_count]
25
+ @scrape_recommendation_threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
23
26
  @mission_control_enabled = SourceMonitor.mission_control_enabled?
24
27
  @mission_control_dashboard_path = SourceMonitor.mission_control_dashboard_path
25
28
  end
29
+
30
+ private
31
+
32
+ def schedule_pages_params
33
+ params.fetch(:schedule_pages, {}).permit!.to_h
34
+ end
26
35
  end
27
36
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class ImportHistoryDismissalsController < ApplicationController
5
+ def create
6
+ import_history = ImportHistory.find(params[:import_history_id])
7
+ import_history.update!(dismissed_at: Time.current)
8
+
9
+ respond_to do |format|
10
+ format.turbo_stream do
11
+ render turbo_stream: turbo_stream.remove("source_monitor_import_history_panel")
12
+ end
13
+
14
+ format.html do
15
+ redirect_to source_monitor.sources_path, notice: "Import dismissed"
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -7,8 +7,16 @@ module SourceMonitor
7
7
  before_action :set_source
8
8
 
9
9
  def create
10
- SourceMonitor::Fetching::FetchRunner.enqueue(@source.id, force: true)
11
- render_fetch_enqueue_response("Retry has been forced and will run shortly.")
10
+ result = SourceMonitor::Fetching::FetchRunner.enqueue(@source.id, force: true)
11
+
12
+ if result == :already_fetching
13
+ render_fetch_enqueue_response(
14
+ "Fetch already in progress for this source. Please wait for the current fetch to complete.",
15
+ toast_level: :warning
16
+ )
17
+ else
18
+ render_fetch_enqueue_response("Retry has been forced and will run shortly.")
19
+ end
12
20
  rescue StandardError => error
13
21
  handle_fetch_failure(error)
14
22
  end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class SourceScrapeTestsController < ApplicationController
5
+ before_action :set_source
6
+
7
+ def create
8
+ item = pick_test_item
9
+ unless item
10
+ handle_no_item
11
+ return
12
+ end
13
+
14
+ result = SourceMonitor::Scraping::ItemScraper.new(item: item, source: @source).call
15
+
16
+ @test_result = {
17
+ item: item.reload,
18
+ scrape_result: result,
19
+ feed_word_count: item.item_content&.feed_word_count,
20
+ scraped_word_count: item.item_content&.scraped_word_count,
21
+ feed_content_preview: item.content.to_s.truncate(500),
22
+ scraped_content_preview: item.item_content&.scraped_content.to_s.truncate(500),
23
+ improvement: compute_improvement(item)
24
+ }
25
+
26
+ respond_to do |format|
27
+ format.turbo_stream do
28
+ render turbo_stream: [
29
+ turbo_stream.remove("scrape_test_modal_#{@source.id}"),
30
+ turbo_stream.append_all("body",
31
+ partial: "source_monitor/source_scrape_tests/result",
32
+ locals: { source: @source, test_result: @test_result })
33
+ ]
34
+ end
35
+ format.html { render :show }
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def set_source
42
+ @source = Source.find(params[:source_id])
43
+ end
44
+
45
+ def pick_test_item
46
+ @source.items
47
+ .joins(:item_content)
48
+ .where.not(sourcemon_item_contents: { feed_word_count: nil })
49
+ .order(published_at: :desc)
50
+ .first
51
+ end
52
+
53
+ def handle_no_item
54
+ respond_to do |format|
55
+ format.turbo_stream do
56
+ responder = SourceMonitor::TurboStreams::StreamResponder.new
57
+ responder.toast(message: "No items with feed content available for test scrape.", level: :warning)
58
+ render turbo_stream: responder.render(view_context)
59
+ end
60
+ format.html do
61
+ redirect_to source_monitor.source_path(@source), alert: "No items available for test scrape."
62
+ end
63
+ end
64
+ end
65
+
66
+ def compute_improvement(item)
67
+ feed = item.item_content&.feed_word_count.to_i
68
+ scraped = item.item_content&.scraped_word_count.to_i
69
+ return 0 if feed.zero?
70
+ ((scraped - feed).to_f / feed * 100).round(1)
71
+ end
72
+ end
73
+ end
@@ -20,29 +20,27 @@ module SourceMonitor
20
20
 
21
21
  def index
22
22
  @search_params = sanitized_search_params
23
- @q = build_search_query
23
+ expand_scrape_recommendation_filter
24
+ @q = build_search_query(params: @search_params)
24
25
 
25
- paginator = SourceMonitor::Pagination::Paginator.new(
26
+ @paginator = SourceMonitor::Pagination::Paginator.new(
26
27
  scope: @q.result,
27
28
  page: params[:page],
28
29
  per_page: params[:per_page] || PER_PAGE
29
30
  ).paginate
30
31
 
31
- @sources = paginator.records
32
- @page = paginator.page
33
- @has_next_page = paginator.has_next_page
34
- @has_previous_page = paginator.has_previous_page
32
+ @sources = @paginator.records
35
33
 
36
34
  @search_term = @search_params[SEARCH_FIELD.to_s].to_s.strip
37
35
  @search_field = SEARCH_FIELD
38
36
 
39
37
  metrics = SourceMonitor::Analytics::SourcesIndexMetrics.new(
40
38
  base_scope: Source.all,
41
- result_scope: paginator.records,
39
+ result_scope: @paginator.records,
42
40
  search_params: @search_params
43
41
  )
44
42
 
45
- @recent_import_histories = SourceMonitor::ImportHistory.recent_for(source_monitor_current_user&.id).limit(5)
43
+ @recent_import_histories = SourceMonitor::ImportHistory.not_dismissed.recent_for(source_monitor_current_user&.id).limit(5)
46
44
 
47
45
  @fetch_interval_distribution = metrics.fetch_interval_distribution
48
46
  @fetch_interval_filter = metrics.fetch_interval_filter
@@ -62,6 +60,8 @@ module SourceMonitor
62
60
  @avg_feed_word_counts = {}
63
61
  @avg_scraped_word_counts = {}
64
62
  end
63
+
64
+ @scrape_candidate_ids = compute_scrape_candidate_ids
65
65
  end
66
66
 
67
67
  def show
@@ -90,8 +90,17 @@ module SourceMonitor
90
90
  end
91
91
 
92
92
  def update
93
+ scraping_was_disabled = !@source.scraping_enabled?
94
+
93
95
  if @source.update(source_params)
94
- redirect_to source_monitor.source_path(@source), notice: "Source updated successfully"
96
+ notice = "Source updated successfully"
97
+
98
+ if scraping_was_disabled && @source.scraping_enabled?
99
+ enqueued = enqueue_unscraped_items(@source)
100
+ notice = "Auto-scraping enabled. #{enqueued} existing #{'item'.pluralize(enqueued)} queued for scraping."
101
+ end
102
+
103
+ redirect_to source_monitor.source_path(@source), notice: notice
95
104
  else
96
105
  render :edit, status: :unprocessable_entity
97
106
  end
@@ -180,6 +189,39 @@ module SourceMonitor
180
189
  end
181
190
  end
182
191
 
192
+ def expand_scrape_recommendation_filter
193
+ return unless @search_params["scraping_enabled_eq"] == "recommend"
194
+
195
+ threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
196
+ @search_params.delete("scraping_enabled_eq")
197
+ @search_params["scraping_enabled_eq"] = "false"
198
+ @search_params["active_eq"] = "true"
199
+ @search_params["avg_feed_words_lt"] = threshold.to_s
200
+ end
201
+
202
+ def compute_scrape_candidate_ids
203
+ threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
204
+ return Set.new if threshold.nil? || threshold <= 0
205
+
206
+ candidate_ids = @sources.select do |source|
207
+ avg = @avg_feed_word_counts[source.id]
208
+ avg.present? && avg < threshold && !source.scraping_enabled?
209
+ end.map(&:id)
210
+
211
+ Set.new(candidate_ids)
212
+ end
213
+
214
+ def enqueue_unscraped_items(source)
215
+ result = SourceMonitor::Scraping::BulkSourceScraper.new(
216
+ source: source,
217
+ selection: :unscraped
218
+ ).call
219
+ result.enqueued_count
220
+ rescue StandardError => error
221
+ Rails.logger.warn("[SourceMonitor] Failed to enqueue unscraped items: #{error.message}") if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
222
+ 0
223
+ end
224
+
183
225
  def enqueue_favicon_fetch(source)
184
226
  return unless defined?(ActiveStorage)
185
227
  return unless SourceMonitor.config.favicons.enabled?
@@ -249,6 +249,30 @@ module SourceMonitor
249
249
  end
250
250
  end
251
251
 
252
+ def pagination_page_numbers(current_page:, total_pages:, window: 2)
253
+ return [ 1 ] if total_pages <= 1
254
+
255
+ # When total pages fit without gaps, show them all
256
+ if total_pages <= (2 * window) + 3
257
+ return (1..total_pages).to_a
258
+ end
259
+
260
+ pages = [ 1, total_pages ]
261
+ ((current_page - window)..(current_page + window)).each do |p|
262
+ pages << p if p >= 1 && p <= total_pages
263
+ end
264
+ pages = pages.uniq.sort
265
+
266
+ result = []
267
+ last = 0
268
+ pages.each do |p|
269
+ result << :gap if p > last + 1
270
+ result << p
271
+ last = p
272
+ end
273
+ result
274
+ end
275
+
252
276
  private
253
277
 
254
278
  def external_link_icon
@@ -5,15 +5,13 @@ module SourceMonitor
5
5
  def source_health_badge(source, override: nil)
6
6
  return override if override.present?
7
7
 
8
- status = source&.health_status.presence || "healthy"
8
+ status = source&.health_status.presence || "working"
9
9
 
10
10
  mapping = {
11
- "healthy" => { label: "Healthy", classes: "bg-green-100 text-green-700", show_spinner: false },
12
- "warning" => { label: "Needs Attention", classes: "bg-amber-100 text-amber-700", show_spinner: false },
13
- "critical" => { label: "Failing", classes: "bg-rose-100 text-rose-700", show_spinner: false },
14
- "declining" => { label: "Declining", classes: "bg-orange-100 text-orange-700", show_spinner: false },
11
+ "working" => { label: "Working", classes: "bg-green-100 text-green-700", show_spinner: false },
12
+ "declining" => { label: "Declining", classes: "bg-yellow-100 text-yellow-700", show_spinner: false },
15
13
  "improving" => { label: "Improving", classes: "bg-sky-100 text-sky-700", show_spinner: false },
16
- "auto_paused" => { label: "Auto-Paused", classes: "bg-amber-100 text-amber-700", show_spinner: false },
14
+ "failing" => { label: "Failing", classes: "bg-rose-100 text-rose-700", show_spinner: false },
17
15
  "unknown" => { label: "Unknown", classes: "bg-slate-100 text-slate-600", show_spinner: false }
18
16
  }
19
17
 
@@ -21,11 +19,11 @@ module SourceMonitor
21
19
  end
22
20
 
23
21
  def source_health_actions(source)
24
- status = source&.health_status.presence || "healthy"
22
+ status = source&.health_status.presence || "working"
25
23
  helpers = SourceMonitor::Engine.routes.url_helpers
26
24
 
27
25
  case status
28
- when "critical", "declining"
26
+ when "failing", "declining"
29
27
  [
30
28
  {
31
29
  key: :full_fetch,
@@ -44,17 +42,6 @@ module SourceMonitor
44
42
  data: { testid: "source-health-action-health_check" }
45
43
  }
46
44
  ]
47
- when "auto_paused"
48
- [
49
- {
50
- key: :reset,
51
- label: "Reset to Active Status",
52
- description: "Clears the pause window, failure counters, and schedules the next fetch using the configured interval.",
53
- path: helpers.source_health_reset_path(source),
54
- method: :post,
55
- data: { testid: "source-health-action-reset" }
56
- }
57
- ]
58
45
  else
59
46
  []
60
47
  end
@@ -63,7 +50,7 @@ module SourceMonitor
63
50
  def interactive_health_status?(source, override: nil)
64
51
  return false if override.present?
65
52
 
66
- %w[critical declining auto_paused].include?(source&.health_status.presence)
53
+ %w[failing declining].include?(source&.health_status.presence)
67
54
  end
68
55
  end
69
56
  end
@@ -7,15 +7,22 @@ module SourceMonitor
7
7
 
8
8
  source_monitor_queue :fetch
9
9
 
10
+ SCHEDULED_CONCURRENCY_MAX_ATTEMPTS = 5
11
+
10
12
  discard_on ActiveJob::DeserializationError
11
- retry_on SourceMonitor::Fetching::FetchRunner::ConcurrencyError,
12
- wait: FETCH_CONCURRENCY_RETRY_WAIT,
13
- attempts: 5
13
+
14
+ rescue_from SourceMonitor::Fetching::FetchRunner::ConcurrencyError do |error|
15
+ handle_concurrency_error(error)
16
+ end
14
17
 
15
18
  def perform(source_id, force: false)
19
+ @source_id = source_id
20
+ @force = force
21
+
16
22
  source = SourceMonitor::Source.find_by(id: source_id)
17
23
  return unless source
18
24
 
25
+ @source = source
19
26
  return unless should_run?(source, force: force)
20
27
 
21
28
  SourceMonitor::Fetching::FetchRunner.new(source: source, force: force).run
@@ -25,6 +32,28 @@ module SourceMonitor
25
32
 
26
33
  private
27
34
 
35
+ def handle_concurrency_error(error)
36
+ if @force
37
+ log_force_fetch_skipped
38
+ @source&.update_columns(fetch_status: "idle") if @source&.fetch_status == "queued"
39
+ else
40
+ attempt = executions
41
+ if attempt < SCHEDULED_CONCURRENCY_MAX_ATTEMPTS
42
+ retry_job wait: FETCH_CONCURRENCY_RETRY_WAIT
43
+ else
44
+ raise error
45
+ end
46
+ end
47
+ end
48
+
49
+ def log_force_fetch_skipped
50
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
51
+
52
+ Rails.logger.info("[SourceMonitor::FetchFeedJob] Fetch already in progress for source #{@source_id}, skipping force-fetch")
53
+ rescue StandardError
54
+ nil
55
+ end
56
+
28
57
  def should_run?(source, force:)
29
58
  return true if force
30
59
 
@@ -24,7 +24,7 @@ module SourceMonitor
24
24
  nil
25
25
  end
26
26
 
27
- DEGRADED_STATUSES = %w[declining critical warning].freeze
27
+ DEGRADED_STATUSES = %w[declining failing].freeze
28
28
 
29
29
  private
30
30
 
@@ -12,11 +12,15 @@ module SourceMonitor
12
12
  attribute :items_failed, :integer, default: 0
13
13
  attribute :http_response_headers, default: -> { {} }
14
14
 
15
+ ERROR_CATEGORIES = %w[network parse blocked auth unknown].freeze
16
+
15
17
  validates :source, presence: true
16
18
  validates :items_created, :items_updated, :items_failed,
17
19
  numericality: { greater_than_or_equal_to: 0 }
20
+ validates :error_category, inclusion: { in: ERROR_CATEGORIES }, allow_nil: true
18
21
 
19
22
  scope :for_job, ->(job_id) { where(job_id:) }
23
+ scope :by_category, ->(category) { where(error_category: category) }
20
24
 
21
25
  SourceMonitor::ModelExtensions.register(self, :fetch_log)
22
26
 
@@ -4,6 +4,8 @@ module SourceMonitor
4
4
  class ImportHistory < ApplicationRecord
5
5
  validates :user_id, presence: true
6
6
 
7
+ scope :not_dismissed, -> { where(dismissed_at: nil) }
8
+
7
9
  scope :recent_for, lambda { |user_id|
8
10
  scope = order(created_at: :desc)
9
11
  scope = scope.where(user_id: user_id) if user_id
@@ -34,7 +34,7 @@ module SourceMonitor
34
34
  attribute :custom_headers, default: -> { {} }
35
35
  attribute :metadata, default: -> { {} }
36
36
  attribute :fetch_status, :string, default: "idle"
37
- attribute :health_status, :string, default: "healthy"
37
+ attribute :health_status, :string, default: "working"
38
38
 
39
39
  sanitizes_string_attributes :name, :feed_url, :website_url, :scraper_adapter
40
40
  sanitizes_hash_attributes :scrape_settings, :custom_headers, :metadata
@@ -61,9 +61,28 @@ module SourceMonitor
61
61
  active.where(arel_table[:next_fetch_at].eq(nil).or(arel_table[:next_fetch_at].lteq(reference_time)))
62
62
  end
63
63
 
64
+ def scrape_candidates(threshold: SourceMonitor.config.scraping.scrape_recommendation_threshold)
65
+ threshold_value = threshold.to_i
66
+ return none if threshold_value <= 0
67
+
68
+ active
69
+ .where(scraping_enabled: false)
70
+ .where(
71
+ "#{table_name}.id IN (
72
+ SELECT i.source_id
73
+ FROM #{Item.table_name} i
74
+ INNER JOIN #{ItemContent.table_name} ic ON ic.item_id = i.id
75
+ WHERE ic.feed_word_count IS NOT NULL
76
+ GROUP BY i.source_id
77
+ HAVING AVG(ic.feed_word_count) < ?
78
+ )", threshold_value
79
+ )
80
+ end
81
+
64
82
  def ransackable_attributes(_auth_object = nil)
65
83
  %w[name feed_url website_url created_at fetch_interval_minutes items_count last_fetched_at
66
- active health_status feed_format scraper_adapter]
84
+ active health_status feed_format scraper_adapter scraping_enabled
85
+ new_items_per_day avg_feed_words avg_scraped_words]
67
86
  end
68
87
 
69
88
  def ransackable_associations(_auth_object = nil)
@@ -71,6 +90,32 @@ module SourceMonitor
71
90
  end
72
91
  end
73
92
 
93
+ ransacker :new_items_per_day do
94
+ Arel.sql(
95
+ "(SELECT COUNT(*) / 30.0 FROM #{Item.table_name} i" \
96
+ " WHERE i.source_id = #{table_name}.id" \
97
+ " AND i.created_at >= NOW() - INTERVAL '30 days')"
98
+ )
99
+ end
100
+
101
+ ransacker :avg_feed_words do
102
+ Arel.sql(
103
+ "(SELECT AVG(ic.feed_word_count) FROM #{ItemContent.table_name} ic" \
104
+ " INNER JOIN #{Item.table_name} i ON i.id = ic.item_id" \
105
+ " WHERE i.source_id = #{table_name}.id" \
106
+ " AND ic.feed_word_count IS NOT NULL)"
107
+ )
108
+ end
109
+
110
+ ransacker :avg_scraped_words do
111
+ Arel.sql(
112
+ "(SELECT AVG(ic.scraped_word_count) FROM #{ItemContent.table_name} ic" \
113
+ " INNER JOIN #{Item.table_name} i ON i.id = ic.item_id" \
114
+ " WHERE i.source_id = #{table_name}.id" \
115
+ " AND ic.scraped_word_count IS NOT NULL)"
116
+ )
117
+ end
118
+
74
119
  def fetch_interval_minutes=(value)
75
120
  self[:fetch_interval_minutes] = value.presence && value.to_i
76
121
  end