source_monitor 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +15 -0
  3. data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +3 -3
  4. data/.claude/skills/sm-configure/reference/configuration-reference.md +3 -3
  5. data/.claude/skills/sm-domain-model/SKILL.md +2 -2
  6. data/.claude/skills/sm-domain-model/reference/table-structure.md +3 -1
  7. data/.claude/skills/sm-engine-migration/SKILL.md +1 -1
  8. data/.claude/skills/sm-engine-migration/reference/migration-conventions.md +1 -1
  9. data/.claude/skills/sm-health-rule/SKILL.md +18 -21
  10. data/.claude/skills/sm-health-rule/reference/health-system.md +1 -1
  11. data/.claude/skills/sm-host-setup/reference/initializer-template.md +2 -2
  12. data/.claude/skills/sm-upgrade/reference/version-history.md +17 -12
  13. data/CHANGELOG.md +42 -0
  14. data/CLAUDE.md +2 -2
  15. data/Gemfile +1 -0
  16. data/Gemfile.lock +4 -1
  17. data/README.md +3 -3
  18. data/VERSION +1 -1
  19. data/app/assets/builds/source_monitor/application.css +132 -12
  20. data/app/assets/builds/source_monitor/application.js +25 -1
  21. data/app/assets/builds/source_monitor/application.js.map +2 -2
  22. data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +8 -0
  23. data/app/assets/javascripts/source_monitor/controllers/select_all_controller.js +22 -2
  24. data/app/assets/stylesheets/source_monitor/application.tailwind.css +1 -1
  25. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +57 -0
  26. data/app/controllers/source_monitor/dashboard_controller.rb +10 -1
  27. data/app/controllers/source_monitor/import_history_dismissals_controller.rb +20 -0
  28. data/app/controllers/source_monitor/source_retries_controller.rb +10 -2
  29. data/app/controllers/source_monitor/source_scrape_tests_controller.rb +73 -0
  30. data/app/controllers/source_monitor/sources_controller.rb +51 -9
  31. data/app/helpers/source_monitor/application_helper.rb +24 -0
  32. data/app/helpers/source_monitor/health_badge_helper.rb +7 -20
  33. data/app/jobs/source_monitor/fetch_feed_job.rb +32 -3
  34. data/app/jobs/source_monitor/source_health_check_job.rb +1 -1
  35. data/app/models/source_monitor/fetch_log.rb +4 -0
  36. data/app/models/source_monitor/import_history.rb +2 -0
  37. data/app/models/source_monitor/source.rb +47 -2
  38. data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +94 -68
  39. data/app/views/source_monitor/dashboard/_scrape_recommendations.html.erb +17 -0
  40. data/app/views/source_monitor/dashboard/_stats.html.erb +19 -0
  41. data/app/views/source_monitor/dashboard/index.html.erb +7 -1
  42. data/app/views/source_monitor/import_sessions/health_check/_row.html.erb +2 -2
  43. data/app/views/source_monitor/shared/_pagination.html.erb +74 -0
  44. data/app/views/source_monitor/source_scrape_tests/_result.html.erb +81 -0
  45. data/app/views/source_monitor/source_scrape_tests/show.html.erb +60 -0
  46. data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +29 -0
  47. data/app/views/source_monitor/sources/_details.html.erb +19 -1
  48. data/app/views/source_monitor/sources/_empty_state_row.html.erb +1 -1
  49. data/app/views/source_monitor/sources/_import_history_panel.html.erb +12 -5
  50. data/app/views/source_monitor/sources/_row.html.erb +34 -6
  51. data/app/views/source_monitor/sources/index.html.erb +184 -132
  52. data/config/brakeman.ignore +11 -1
  53. data/config/routes.rb +5 -0
  54. data/db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb +7 -0
  55. data/db/migrate/20260306233004_add_error_category_to_fetch_logs.rb +8 -0
  56. data/db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb +11 -0
  57. data/db/migrate/20260312120000_simplify_health_status_values.rb +20 -0
  58. data/docs/configuration.md +9 -1
  59. data/docs/troubleshooting.md +9 -0
  60. data/docs/upgrade.md +31 -0
  61. data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +2 -3
  62. data/lib/source_monitor/analytics/scrape_recommendations.rb +27 -0
  63. data/lib/source_monitor/configuration/health_settings.rb +0 -2
  64. data/lib/source_monitor/configuration/scraping_settings.rb +8 -1
  65. data/lib/source_monitor/dashboard/queries/stats_query.rb +12 -1
  66. data/lib/source_monitor/dashboard/queries.rb +6 -3
  67. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +6 -5
  68. data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +40 -54
  69. data/lib/source_monitor/favicons/discoverer.rb +16 -0
  70. data/lib/source_monitor/favicons/svg_converter.rb +60 -0
  71. data/lib/source_monitor/fetching/cloudflare_bypass.rb +79 -0
  72. data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +82 -2
  73. data/lib/source_monitor/fetching/feed_fetcher.rb +55 -1
  74. data/lib/source_monitor/fetching/fetch_error.rb +27 -0
  75. data/lib/source_monitor/fetching/fetch_runner.rb +4 -0
  76. data/lib/source_monitor/fetching/retry_policy.rb +4 -0
  77. data/lib/source_monitor/health/import_source_health_check.rb +3 -3
  78. data/lib/source_monitor/health/source_health_monitor.rb +9 -14
  79. data/lib/source_monitor/health/source_health_reset.rb +1 -1
  80. data/lib/source_monitor/pagination/paginator.rb +18 -1
  81. data/lib/source_monitor/version.rb +1 -1
  82. data/lib/source_monitor.rb +3 -0
  83. metadata +17 -1
@@ -78,7 +78,7 @@ module SourceMonitor
78
78
  def perform_fetch(started_at, instrumentation_payload)
79
79
  response = perform_request
80
80
  handle_response(response, started_at, instrumentation_payload)
81
- rescue TimeoutError, ConnectionError, HTTPError, ParsingError => error
81
+ rescue TimeoutError, ConnectionError, HTTPError, ParsingError, BlockedError, AuthenticationError => error
82
82
  raise error
83
83
  rescue Faraday::TimeoutError => error
84
84
  raise TimeoutError.new(error.message, original_error: error)
@@ -209,12 +209,66 @@ module SourceMonitor
209
209
  )
210
210
  end
211
211
 
212
+ SNIFF_LIMIT = 4096
213
+
214
+ CLOUDFLARE_MARKERS = [
215
+ "<title>Just a moment</title>",
216
+ "<title>Attention Required</title>",
217
+ "cf-challenge",
218
+ "cf-browser-verification",
219
+ "__cf_chl_",
220
+ "data-ray="
221
+ ].freeze
222
+
223
+ CAPTCHA_MARKERS = [
224
+ "g-recaptcha",
225
+ "h-captcha"
226
+ ].freeze
227
+
228
+ LOGIN_TITLE_PATTERN = /<title>\s*(log\s*in|sign\s*in)\s*<\/title>/i
229
+
212
230
  def parse_feed(body, response)
231
+ blocked_by = detect_blocked_response(body, response)
232
+
233
+ if blocked_by == "cloudflare" && !@bypass_attempted
234
+ @bypass_attempted = true
235
+ bypass_response = CloudflareBypass.new(response: response, feed_url: source.feed_url).call
236
+ if bypass_response
237
+ body = bypass_response.body
238
+ else
239
+ raise BlockedError.new(blocked_by: blocked_by, response: response)
240
+ end
241
+ elsif blocked_by
242
+ raise BlockedError.new(blocked_by: blocked_by, response: response)
243
+ end
244
+
213
245
  Feedjira.parse(body)
246
+ rescue BlockedError
247
+ raise
214
248
  rescue StandardError => error
215
249
  raise ParsingError.new(error.message, response: response, original_error: error)
216
250
  end
217
251
 
252
+ def detect_blocked_response(body, _response)
253
+ return if body.blank?
254
+
255
+ snippet = body[0, SNIFF_LIMIT]
256
+ snippet_lower = snippet.downcase
257
+
258
+ return "cloudflare" if CLOUDFLARE_MARKERS.any? { |marker| snippet_lower.include?(marker.downcase) }
259
+ return "captcha" if CAPTCHA_MARKERS.any? { |marker| snippet_lower.include?(marker.downcase) }
260
+
261
+ if snippet_lower.match?(LOGIN_TITLE_PATTERN)
262
+ return "login_wall"
263
+ end
264
+
265
+ if snippet_lower.include?("<html") && snippet_lower.include?("<form") && snippet_lower.include?("password")
266
+ return "login_wall"
267
+ end
268
+
269
+ nil
270
+ end
271
+
218
272
  def handle_failure(error, started_at:, instrumentation_payload:)
219
273
  response = error.response
220
274
  body = response&.body
@@ -84,5 +84,32 @@ module SourceMonitor
84
84
  "Unexpected response received"
85
85
  end
86
86
  end
87
+
88
+ class BlockedError < FetchError
89
+ CODE = "blocked"
90
+
91
+ attr_reader :blocked_by
92
+
93
+ def initialize(message = nil, blocked_by: "unknown", **kwargs)
94
+ @blocked_by = blocked_by
95
+ super(message, **kwargs)
96
+ end
97
+
98
+ protected
99
+
100
+ def default_message
101
+ "Feed blocked by #{blocked_by}"
102
+ end
103
+ end
104
+
105
+ class AuthenticationError < FetchError
106
+ CODE = "authentication"
107
+
108
+ protected
109
+
110
+ def default_message
111
+ "Authentication required"
112
+ end
113
+ end
87
114
  end
88
115
  end
@@ -41,6 +41,10 @@ module SourceMonitor
41
41
  source = resolve_source(source_or_id)
42
42
  return unless source
43
43
 
44
+ if force && source.fetch_status == "fetching"
45
+ return :already_fetching
46
+ end
47
+
44
48
  # Don't broadcast here - controller handles immediate UI update
45
49
  source.update_columns(fetch_status: "queued")
46
50
  SourceMonitor::FetchFeedJob.perform_later(source.id, force: force)
@@ -21,6 +21,8 @@ module SourceMonitor
21
21
  http_5xx: { attempts: 2, wait: 10.minutes, circuit_wait: 90.minutes },
22
22
  http_4xx: { attempts: 1, wait: 45.minutes, circuit_wait: 2.hours },
23
23
  parsing: { attempts: 1, wait: 30.minutes, circuit_wait: 2.hours },
24
+ blocked: { attempts: 1, wait: 1.hour, circuit_wait: 4.hours },
25
+ authentication: { attempts: 1, wait: 1.hour, circuit_wait: 4.hours },
24
26
  unexpected: { attempts: 1, wait: 30.minutes, circuit_wait: 2.hours },
25
27
  fallback: { attempts: 2, wait: 10.minutes, circuit_wait: 90.minutes }
26
28
  }.freeze
@@ -76,6 +78,8 @@ module SourceMonitor
76
78
  end
77
79
 
78
80
  return :parsing if error.is_a?(SourceMonitor::Fetching::ParsingError)
81
+ return :blocked if error.is_a?(SourceMonitor::Fetching::BlockedError)
82
+ return :authentication if error.is_a?(SourceMonitor::Fetching::AuthenticationError)
79
83
  return :unexpected if error.is_a?(SourceMonitor::Fetching::UnexpectedResponseError)
80
84
 
81
85
  :fallback
@@ -11,19 +11,19 @@ module SourceMonitor
11
11
  end
12
12
 
13
13
  def call
14
- return Result.new(status: "unhealthy", error_message: "Missing feed URL", http_status: nil) if feed_url.blank?
14
+ return Result.new(status: "failing", error_message: "Missing feed URL", http_status: nil) if feed_url.blank?
15
15
 
16
16
  response = connection.get(feed_url)
17
17
  status_code = response_status(response)
18
18
  healthy = healthy_status?(status_code)
19
19
 
20
20
  Result.new(
21
- status: healthy ? "healthy" : "unhealthy",
21
+ status: healthy ? "working" : "failing",
22
22
  error_message: healthy ? nil : error_for_status(status_code),
23
23
  http_status: status_code
24
24
  )
25
25
  rescue StandardError => error
26
- Result.new(status: "unhealthy", error_message: error.message, http_status: response_status(error))
26
+ Result.new(status: "failing", error_message: error.message, http_status: response_status(error))
27
27
  end
28
28
 
29
29
  private
@@ -32,6 +32,7 @@ module SourceMonitor
32
32
  auto_paused_at = nil
33
33
  attrs[:auto_paused_until] = nil
34
34
  attrs[:auto_paused_at] = nil
35
+ attrs[:consecutive_fetch_failures] = 0
35
36
  attrs[:backoff_until] = nil if source.backoff_until.present?
36
37
  end
37
38
 
@@ -114,19 +115,17 @@ module SourceMonitor
114
115
  end
115
116
  end
116
117
 
117
- def determine_status(rate, auto_paused_until, logs)
118
- if auto_paused_active?(auto_paused_until)
119
- "auto_paused"
118
+ def determine_status(rate, _auto_paused_until, logs)
119
+ if rate >= healthy_threshold
120
+ "working"
121
+ elsif rate < auto_pause_threshold
122
+ "failing"
120
123
  elsif consecutive_failures(logs) >= 3
121
124
  "declining"
122
125
  elsif improving_streak?(logs)
123
126
  "improving"
124
- elsif rate >= healthy_threshold
125
- "healthy"
126
- elsif rate >= warning_threshold
127
- "warning"
128
127
  else
129
- "critical"
128
+ "declining"
130
129
  end
131
130
  end
132
131
 
@@ -143,7 +142,7 @@ module SourceMonitor
143
142
  end
144
143
 
145
144
  def apply_status(attrs, status)
146
- previous = source.health_status.presence || "healthy"
145
+ previous = source.health_status.presence || "working"
147
146
  return if previous == status
148
147
 
149
148
  attrs[:health_status] = status
@@ -161,11 +160,7 @@ module SourceMonitor
161
160
  end
162
161
 
163
162
  def healthy_threshold
164
- [ config.healthy_threshold.to_f, warning_threshold ].max
165
- end
166
-
167
- def warning_threshold
168
- config.warning_threshold.to_f
163
+ config.healthy_threshold.to_f
169
164
  end
170
165
 
171
166
  def auto_paused_active?(value)
@@ -26,7 +26,7 @@ module SourceMonitor
26
26
 
27
27
  def reset_attributes
28
28
  {
29
- health_status: "healthy",
29
+ health_status: "working",
30
30
  auto_paused_at: nil,
31
31
  auto_paused_until: nil,
32
32
  rolling_success_rate: nil,
@@ -8,6 +8,7 @@ module SourceMonitor
8
8
  :per_page,
9
9
  :has_next_page,
10
10
  :has_previous_page,
11
+ :total_count,
11
12
  keyword_init: true
12
13
  ) do
13
14
  def has_next_page?
@@ -29,6 +30,12 @@ module SourceMonitor
29
30
 
30
31
  [ page - 1, 1 ].max
31
32
  end
33
+
34
+ def total_pages
35
+ return 1 if total_count.nil? || total_count <= 0
36
+
37
+ [ 1, (total_count.to_f / per_page).ceil ].max
38
+ end
32
39
  end
33
40
 
34
41
  class Paginator
@@ -41,6 +48,7 @@ module SourceMonitor
41
48
  end
42
49
 
43
50
  def paginate
51
+ total = compute_total_count
44
52
  paginated_records = fetch_records
45
53
  has_next_page = paginated_records.length > per_page
46
54
 
@@ -49,7 +57,8 @@ module SourceMonitor
49
57
  page: page,
50
58
  per_page: per_page,
51
59
  has_next_page: has_next_page,
52
- has_previous_page: page > 1
60
+ has_previous_page: page > 1,
61
+ total_count: total
53
62
  )
54
63
  end
55
64
 
@@ -57,6 +66,14 @@ module SourceMonitor
57
66
 
58
67
  attr_reader :scope, :page, :per_page
59
68
 
69
+ def compute_total_count
70
+ if scope.is_a?(ActiveRecord::Relation)
71
+ scope.count
72
+ else
73
+ Array(scope).size
74
+ end
75
+ end
76
+
60
77
  def fetch_records
61
78
  offset = (page - 1) * per_page
62
79
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SourceMonitor
4
- VERSION = "0.10.2"
4
+ VERSION = "0.11.0"
5
5
  end
@@ -54,6 +54,7 @@ module SourceMonitor
54
54
  autoload :Assets, "source_monitor/assets"
55
55
 
56
56
  module Analytics
57
+ autoload :ScrapeRecommendations, "source_monitor/analytics/scrape_recommendations"
57
58
  autoload :SourceFetchIntervalDistribution, "source_monitor/analytics/source_fetch_interval_distribution"
58
59
  autoload :SourceActivityRates, "source_monitor/analytics/source_activity_rates"
59
60
  autoload :SourcesIndexMetrics, "source_monitor/analytics/sources_index_metrics"
@@ -81,6 +82,7 @@ module SourceMonitor
81
82
  autoload :RetryPolicy, "source_monitor/fetching/retry_policy"
82
83
  autoload :StalledFetchReconciler, "source_monitor/fetching/stalled_fetch_reconciler"
83
84
  autoload :AdvisoryLock, "source_monitor/fetching/advisory_lock"
85
+ autoload :CloudflareBypass, "source_monitor/fetching/cloudflare_bypass"
84
86
  end
85
87
 
86
88
  module ImportSessions
@@ -90,6 +92,7 @@ module SourceMonitor
90
92
 
91
93
  module Favicons
92
94
  autoload :Discoverer, "source_monitor/favicons/discoverer"
95
+ autoload :SvgConverter, "source_monitor/favicons/svg_converter"
93
96
  end
94
97
 
95
98
  module Images
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: source_monitor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.2
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - dchuk
@@ -238,6 +238,7 @@ executables: []
238
238
  extensions: []
239
239
  extra_rdoc_files: []
240
240
  files:
241
+ - ".claude/agent-memory/vbw-vbw-debugger/MEMORY.md"
241
242
  - ".claude/agent-memory/vbw-vbw-dev/MEMORY.md"
242
243
  - ".claude/agent-memory/vbw-vbw-lead/MEMORY.md"
243
244
  - ".claude/agents/rails-concern.md"
@@ -372,9 +373,11 @@ files:
372
373
  - app/controllers/concerns/.keep
373
374
  - app/controllers/concerns/source_monitor/sanitizes_search_params.rb
374
375
  - app/controllers/source_monitor/application_controller.rb
376
+ - app/controllers/source_monitor/bulk_scrape_enablements_controller.rb
375
377
  - app/controllers/source_monitor/dashboard_controller.rb
376
378
  - app/controllers/source_monitor/fetch_logs_controller.rb
377
379
  - app/controllers/source_monitor/health_controller.rb
380
+ - app/controllers/source_monitor/import_history_dismissals_controller.rb
378
381
  - app/controllers/source_monitor/import_sessions/bulk_configuration.rb
379
382
  - app/controllers/source_monitor/import_sessions/entry_annotation.rb
380
383
  - app/controllers/source_monitor/import_sessions/health_check_management.rb
@@ -389,6 +392,7 @@ files:
389
392
  - app/controllers/source_monitor/source_health_checks_controller.rb
390
393
  - app/controllers/source_monitor/source_health_resets_controller.rb
391
394
  - app/controllers/source_monitor/source_retries_controller.rb
395
+ - app/controllers/source_monitor/source_scrape_tests_controller.rb
392
396
  - app/controllers/source_monitor/source_turbo_responses.rb
393
397
  - app/controllers/source_monitor/sources_controller.rb
394
398
  - app/helpers/source_monitor/application_helper.rb
@@ -422,6 +426,7 @@ files:
422
426
  - app/views/source_monitor/dashboard/_fetch_schedule.html.erb
423
427
  - app/views/source_monitor/dashboard/_job_metrics.html.erb
424
428
  - app/views/source_monitor/dashboard/_recent_activity.html.erb
429
+ - app/views/source_monitor/dashboard/_scrape_recommendations.html.erb
425
430
  - app/views/source_monitor/dashboard/_stat_card.html.erb
426
431
  - app/views/source_monitor/dashboard/_stats.html.erb
427
432
  - app/views/source_monitor/dashboard/index.html.erb
@@ -444,7 +449,11 @@ files:
444
449
  - app/views/source_monitor/items/show.html.erb
445
450
  - app/views/source_monitor/logs/index.html.erb
446
451
  - app/views/source_monitor/scrape_logs/show.html.erb
452
+ - app/views/source_monitor/shared/_pagination.html.erb
447
453
  - app/views/source_monitor/shared/_toast.html.erb
454
+ - app/views/source_monitor/source_scrape_tests/_result.html.erb
455
+ - app/views/source_monitor/source_scrape_tests/show.html.erb
456
+ - app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb
448
457
  - app/views/source_monitor/sources/_bulk_scrape_form.html.erb
449
458
  - app/views/source_monitor/sources/_bulk_scrape_modal.html.erb
450
459
  - app/views/source_monitor/sources/_details.html.erb
@@ -492,6 +501,10 @@ files:
492
501
  - db/migrate/20260210204022_add_composite_index_to_log_entries.rb
493
502
  - db/migrate/20260222120000_add_min_scrape_interval_to_sources.rb
494
503
  - db/migrate/20260222194201_add_word_counts_to_item_contents.rb
504
+ - db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb
505
+ - db/migrate/20260306233004_add_error_category_to_fetch_logs.rb
506
+ - db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb
507
+ - db/migrate/20260312120000_simplify_health_status_values.rb
495
508
  - docs/configuration.md
496
509
  - docs/deployment.md
497
510
  - docs/gh-cli-workflow.md
@@ -503,6 +516,7 @@ files:
503
516
  - lib/generators/source_monitor/install/install_generator.rb
504
517
  - lib/generators/source_monitor/install/templates/source_monitor.rb.tt
505
518
  - lib/source_monitor.rb
519
+ - lib/source_monitor/analytics/scrape_recommendations.rb
506
520
  - lib/source_monitor/analytics/source_activity_rates.rb
507
521
  - lib/source_monitor/analytics/source_fetch_interval_distribution.rb
508
522
  - lib/source_monitor/analytics/sources_index_metrics.rb
@@ -536,8 +550,10 @@ files:
536
550
  - lib/source_monitor/engine.rb
537
551
  - lib/source_monitor/events.rb
538
552
  - lib/source_monitor/favicons/discoverer.rb
553
+ - lib/source_monitor/favicons/svg_converter.rb
539
554
  - lib/source_monitor/feedjira_extensions.rb
540
555
  - lib/source_monitor/fetching/advisory_lock.rb
556
+ - lib/source_monitor/fetching/cloudflare_bypass.rb
541
557
  - lib/source_monitor/fetching/completion/event_publisher.rb
542
558
  - lib/source_monitor/fetching/completion/follow_up_handler.rb
543
559
  - lib/source_monitor/fetching/completion/retention_handler.rb