source_monitor 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +15 -0
- data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +3 -3
- data/.claude/skills/sm-configure/reference/configuration-reference.md +3 -3
- data/.claude/skills/sm-domain-model/SKILL.md +2 -2
- data/.claude/skills/sm-domain-model/reference/table-structure.md +3 -1
- data/.claude/skills/sm-engine-migration/SKILL.md +1 -1
- data/.claude/skills/sm-engine-migration/reference/migration-conventions.md +1 -1
- data/.claude/skills/sm-health-rule/SKILL.md +18 -21
- data/.claude/skills/sm-health-rule/reference/health-system.md +1 -1
- data/.claude/skills/sm-host-setup/reference/initializer-template.md +2 -2
- data/.claude/skills/sm-upgrade/reference/version-history.md +17 -12
- data/CHANGELOG.md +42 -0
- data/CLAUDE.md +2 -2
- data/Gemfile +1 -0
- data/Gemfile.lock +4 -1
- data/README.md +3 -3
- data/VERSION +1 -1
- data/app/assets/builds/source_monitor/application.css +132 -12
- data/app/assets/builds/source_monitor/application.js +25 -1
- data/app/assets/builds/source_monitor/application.js.map +2 -2
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +8 -0
- data/app/assets/javascripts/source_monitor/controllers/select_all_controller.js +22 -2
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +1 -1
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +57 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +10 -1
- data/app/controllers/source_monitor/import_history_dismissals_controller.rb +20 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +10 -2
- data/app/controllers/source_monitor/source_scrape_tests_controller.rb +73 -0
- data/app/controllers/source_monitor/sources_controller.rb +51 -9
- data/app/helpers/source_monitor/application_helper.rb +24 -0
- data/app/helpers/source_monitor/health_badge_helper.rb +7 -20
- data/app/jobs/source_monitor/fetch_feed_job.rb +32 -3
- data/app/jobs/source_monitor/source_health_check_job.rb +1 -1
- data/app/models/source_monitor/fetch_log.rb +4 -0
- data/app/models/source_monitor/import_history.rb +2 -0
- data/app/models/source_monitor/source.rb +47 -2
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +94 -68
- data/app/views/source_monitor/dashboard/_scrape_recommendations.html.erb +17 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +19 -0
- data/app/views/source_monitor/dashboard/index.html.erb +7 -1
- data/app/views/source_monitor/import_sessions/health_check/_row.html.erb +2 -2
- data/app/views/source_monitor/shared/_pagination.html.erb +74 -0
- data/app/views/source_monitor/source_scrape_tests/_result.html.erb +81 -0
- data/app/views/source_monitor/source_scrape_tests/show.html.erb +60 -0
- data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +29 -0
- data/app/views/source_monitor/sources/_details.html.erb +19 -1
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +1 -1
- data/app/views/source_monitor/sources/_import_history_panel.html.erb +12 -5
- data/app/views/source_monitor/sources/_row.html.erb +34 -6
- data/app/views/source_monitor/sources/index.html.erb +184 -132
- data/config/brakeman.ignore +11 -1
- data/config/routes.rb +5 -0
- data/db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb +7 -0
- data/db/migrate/20260306233004_add_error_category_to_fetch_logs.rb +8 -0
- data/db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb +11 -0
- data/db/migrate/20260312120000_simplify_health_status_values.rb +20 -0
- data/docs/configuration.md +9 -1
- data/docs/troubleshooting.md +9 -0
- data/docs/upgrade.md +31 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +2 -3
- data/lib/source_monitor/analytics/scrape_recommendations.rb +27 -0
- data/lib/source_monitor/configuration/health_settings.rb +0 -2
- data/lib/source_monitor/configuration/scraping_settings.rb +8 -1
- data/lib/source_monitor/dashboard/queries/stats_query.rb +12 -1
- data/lib/source_monitor/dashboard/queries.rb +6 -3
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +6 -5
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +40 -54
- data/lib/source_monitor/favicons/discoverer.rb +16 -0
- data/lib/source_monitor/favicons/svg_converter.rb +60 -0
- data/lib/source_monitor/fetching/cloudflare_bypass.rb +79 -0
- data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +82 -2
- data/lib/source_monitor/fetching/feed_fetcher.rb +55 -1
- data/lib/source_monitor/fetching/fetch_error.rb +27 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +4 -0
- data/lib/source_monitor/fetching/retry_policy.rb +4 -0
- data/lib/source_monitor/health/import_source_health_check.rb +3 -3
- data/lib/source_monitor/health/source_health_monitor.rb +9 -14
- data/lib/source_monitor/health/source_health_reset.rb +1 -1
- data/lib/source_monitor/pagination/paginator.rb +18 -1
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +3 -0
- metadata +17 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 522d0a31c2068abc659c59ab47fb1b96cd4855729257a823aa08877c3593552c
|
|
4
|
+
data.tar.gz: 0e4278b1f628e7439befb9433f67618eb587f8f24860eef2e5f7db92207affd7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 65e8e108bb043d0065a0fed294f02d5b3d232ceab7b9bb153ba1d3130cdad8ae5dfd1b0aa6a3d9c7bd172dfbd593bfc015a2d9b6b5c665435452586ed2209ba0
|
|
7
|
+
data.tar.gz: e691d60468521daccab51c9ae8a71ddfbb695edbc62df88401a93252250de68cfd9a36d33e3792ce3026d4e9a666fb2be8b8707b3910e83f2c21efa2cd32c2da
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Debugger Memory
|
|
2
|
+
|
|
3
|
+
## Rails Association Cache Pollution Pattern
|
|
4
|
+
- `source.items.new` AND `Item.new(source: source)` both add to the loaded association cache via inverse_of
|
|
5
|
+
- Only `Item.new(source_id: source.id)` truly bypasses inverse_of and avoids cache pollution
|
|
6
|
+
- When unsaved/invalid records are in a loaded has_many cache, `parent.update!` triggers auto-save and fails with `RecordInvalid: Items is invalid`
|
|
7
|
+
- `update_columns` bypasses all callbacks and auto-save, safe to use with polluted caches
|
|
8
|
+
- After `update_columns`, call `reload` so the in-memory object reflects DB state
|
|
9
|
+
|
|
10
|
+
## Test Patterns
|
|
11
|
+
- Use `clean_source_monitor_tables!` in setup for blank-slate DB
|
|
12
|
+
- `create_source!` is the factory helper (in test_helper.rb)
|
|
13
|
+
- WebMock stubs + VCR cassettes for HTTP; `stub_request(:get, url)`
|
|
14
|
+
- Stub class methods with `singleton_class.define_method` pattern
|
|
15
|
+
- Always restore stubs in `ensure` block
|
|
@@ -75,8 +75,7 @@ Has `reset!` method. All attributes are plain `attr_accessor`.
|
|
|
75
75
|
| Attribute | Type | Default | Description |
|
|
76
76
|
|-----------|------|---------|-------------|
|
|
77
77
|
| `window_size` | Integer | `20` | Rolling window of fetches for health calc |
|
|
78
|
-
| `healthy_threshold` | Float | `0.8` | Success rate above =
|
|
79
|
-
| `warning_threshold` | Float | `0.5` | Success rate above = warning |
|
|
78
|
+
| `healthy_threshold` | Float | `0.8` | Success rate above = working |
|
|
80
79
|
| `auto_pause_threshold` | Float | `0.2` | Success rate below = auto-pause |
|
|
81
80
|
| `auto_resume_threshold` | Float | `0.6` | Success rate above = auto-resume |
|
|
82
81
|
| `auto_pause_cooldown_minutes` | Integer | `60` | Cooldown before auto-resume check |
|
|
@@ -91,8 +90,9 @@ Has `reset!` method. All attributes are plain `attr_accessor`.
|
|
|
91
90
|
|
|
92
91
|
| Attribute | Type | Default | Description |
|
|
93
92
|
|-----------|------|---------|-------------|
|
|
94
|
-
| `max_in_flight_per_source` | Integer/nil | `
|
|
93
|
+
| `max_in_flight_per_source` | Integer/nil | `nil` | Max concurrent scrape jobs per source |
|
|
95
94
|
| `max_bulk_batch_size` | Integer/nil | `100` | Max items in a bulk scrape batch |
|
|
95
|
+
| `scrape_recommendation_threshold` | Integer/nil | `200` | Avg feed word count threshold for scrape recommendations |
|
|
96
96
|
|
|
97
97
|
Has `reset!` method. Custom setters normalize values:
|
|
98
98
|
- `nil` -> `nil`
|
|
@@ -95,8 +95,7 @@ Tunes automatic pause/resume heuristics per source.
|
|
|
95
95
|
| Setting | Type | Default | Description |
|
|
96
96
|
|---|---|---|---|
|
|
97
97
|
| `window_size` | Integer | `20` | Number of fetch attempts to evaluate |
|
|
98
|
-
| `healthy_threshold` | Float | `0.8` | Success ratio for "
|
|
99
|
-
| `warning_threshold` | Float | `0.5` | Success ratio for "warning" badge |
|
|
98
|
+
| `healthy_threshold` | Float | `0.8` | Success ratio for "working" status |
|
|
100
99
|
| `auto_pause_threshold` | Float | `0.2` | Auto-pause source below this ratio |
|
|
101
100
|
| `auto_resume_threshold` | Float | `0.6` | Auto-resume source above this ratio |
|
|
102
101
|
| `auto_pause_cooldown_minutes` | Integer | `60` | Grace period before re-enabling |
|
|
@@ -162,8 +161,9 @@ Class: `SourceMonitor::Configuration::ScrapingSettings`
|
|
|
162
161
|
|
|
163
162
|
| Setting | Type | Default | Description |
|
|
164
163
|
|---|---|---|---|
|
|
165
|
-
| `max_in_flight_per_source` | Integer/nil | `
|
|
164
|
+
| `max_in_flight_per_source` | Integer/nil | `nil` | Max concurrent scrapes per source |
|
|
166
165
|
| `max_bulk_batch_size` | Integer/nil | `100` | Max items per bulk scrape enqueue |
|
|
166
|
+
| `scrape_recommendation_threshold` | Integer/nil | `200` | Minimum average feed word count below which a source is recommended for scraping |
|
|
167
167
|
|
|
168
168
|
Values are normalized to positive integers. Set to `nil` to disable limits.
|
|
169
169
|
|
|
@@ -92,7 +92,7 @@ URL normalization and format validation. Used by Source and Item.
|
|
|
92
92
|
| Field | Values | Notes |
|
|
93
93
|
|-------|--------|-------|
|
|
94
94
|
| `fetch_status` | `idle`, `queued`, `fetching`, `failed`, `invalid` | DB CHECK constraint |
|
|
95
|
-
| `health_status` | `
|
|
95
|
+
| `health_status` | `working` (default) | Values: working, declining, improving, failing |
|
|
96
96
|
| `active` | `true`/`false` | Boolean toggle |
|
|
97
97
|
|
|
98
98
|
### Key Scopes
|
|
@@ -101,7 +101,7 @@ URL normalization and format validation. Used by Source and Item.
|
|
|
101
101
|
|-------|---------|
|
|
102
102
|
| `active` | `WHERE active = true` |
|
|
103
103
|
| `failed` | `failure_count > 0 OR last_error IS NOT NULL OR last_error_at IS NOT NULL` |
|
|
104
|
-
| `healthy` | `active AND failure_count = 0 AND last_error IS NULL AND last_error_at IS NULL` |
|
|
104
|
+
| `healthy` | `active AND failure_count = 0 AND last_error IS NULL AND last_error_at IS NULL` (Note: scope name preserved for backward compat; health_status uses "working") |
|
|
105
105
|
| `due_for_fetch(reference_time:)` | Class method. Active sources where `next_fetch_at IS NULL OR <= reference_time` |
|
|
106
106
|
|
|
107
107
|
### Validations
|
|
@@ -44,10 +44,11 @@ Core feed source configuration and state.
|
|
|
44
44
|
| adaptive_fetching_enabled | boolean | NO | true | Adaptive interval toggle |
|
|
45
45
|
| feed_content_readability_enabled | boolean | NO | false | Process feed content through readability |
|
|
46
46
|
| rolling_success_rate | decimal(5,4) | YES | | Rolling success rate (0.0-1.0) |
|
|
47
|
-
| health_status | string | NO | "
|
|
47
|
+
| health_status | string | NO | "working" | Health status (working/declining/improving/failing) |
|
|
48
48
|
| health_status_changed_at | datetime | YES | | Last health status change |
|
|
49
49
|
| auto_paused_at | datetime | YES | | When source was auto-paused |
|
|
50
50
|
| auto_paused_until | datetime | YES | | Auto-pause expiry |
|
|
51
|
+
| consecutive_fetch_failures | integer | NO | 0 | Consecutive fetch failure streak count |
|
|
51
52
|
| health_auto_pause_threshold | decimal(5,4) | YES | | Custom pause threshold (0.0-1.0) |
|
|
52
53
|
| created_at | datetime | NO | | |
|
|
53
54
|
| updated_at | datetime | NO | | |
|
|
@@ -170,6 +171,7 @@ Records of feed fetch attempts.
|
|
|
170
171
|
| feed_size_bytes | integer | YES | | Feed body size |
|
|
171
172
|
| items_in_feed | integer | YES | | Total entries in feed |
|
|
172
173
|
| job_id | string | YES | | ActiveJob ID |
|
|
174
|
+
| error_category | string | YES | | Failure classification (e.g., timeout, dns, blocked) |
|
|
173
175
|
| metadata | jsonb | NO | {} | Extra metadata (parser, errors) |
|
|
174
176
|
| created_at | datetime | NO | | |
|
|
175
177
|
| updated_at | datetime | NO | | |
|
|
@@ -338,7 +338,7 @@ class AddHealthFieldsToSources < ActiveRecord::Migration[8.0]
|
|
|
338
338
|
def change
|
|
339
339
|
change_table :sourcemon_sources, bulk: true do |t|
|
|
340
340
|
t.decimal :rolling_success_rate, precision: 5, scale: 4
|
|
341
|
-
t.string :health_status, null: false, default: "
|
|
341
|
+
t.string :health_status, null: false, default: "working"
|
|
342
342
|
t.datetime :health_status_changed_at
|
|
343
343
|
t.datetime :auto_paused_at
|
|
344
344
|
t.datetime :auto_paused_until
|
|
@@ -41,7 +41,7 @@ Created in: `20241008120000_create_source_monitor_sources.rb`
|
|
|
41
41
|
| `fetch_circuit_opened_at` | datetime | - | - |
|
|
42
42
|
| `fetch_circuit_until` | datetime | - | - |
|
|
43
43
|
| `rolling_success_rate` | decimal(5,4) | - | - |
|
|
44
|
-
| `health_status` | string | NOT NULL, indexed | `"
|
|
44
|
+
| `health_status` | string | NOT NULL, indexed | `"working"` |
|
|
45
45
|
| `health_status_changed_at` | datetime | - | - |
|
|
46
46
|
| `auto_paused_at` | datetime | - | - |
|
|
47
47
|
| `auto_paused_until` | datetime | indexed | - |
|
|
@@ -35,7 +35,7 @@ Health Module (setup!)
|
|
|
35
35
|
|
|
|
36
36
|
+-- SourceHealthReset (manual reset)
|
|
37
37
|
+-- Clears all health state
|
|
38
|
-
+-- Resets to "
|
|
38
|
+
+-- Resets to "working" status
|
|
39
39
|
```
|
|
40
40
|
|
|
41
41
|
## Key Files
|
|
@@ -55,39 +55,37 @@ Health Module (setup!)
|
|
|
55
55
|
|
|
56
56
|
| Status | Meaning | Trigger |
|
|
57
57
|
|--------|---------|---------|
|
|
58
|
-
| `
|
|
59
|
-
| `warning` | Some failures occurring | success_rate >= warning_threshold (0.5) but < healthy |
|
|
60
|
-
| `critical` | High failure rate | success_rate < warning_threshold |
|
|
58
|
+
| `working` | Source is reliable | success_rate >= healthy_threshold (0.8) |
|
|
61
59
|
| `declining` | Consecutive failures | >= 3 consecutive failures in recent logs |
|
|
62
60
|
| `improving` | Recovery in progress | >= 2 consecutive successes after a failure |
|
|
63
|
-
| `
|
|
61
|
+
| `failing` | High failure rate | success_rate < auto_pause_threshold (0.2) |
|
|
64
62
|
|
|
65
|
-
### Status
|
|
63
|
+
### Status Decision Tree
|
|
66
64
|
|
|
67
65
|
```
|
|
68
|
-
|
|
66
|
+
failing > declining > improving > working
|
|
69
67
|
```
|
|
70
68
|
|
|
71
69
|
Determination logic:
|
|
72
70
|
|
|
73
71
|
```ruby
|
|
74
|
-
def determine_status(rate,
|
|
75
|
-
if
|
|
76
|
-
"
|
|
72
|
+
def determine_status(rate, logs)
|
|
73
|
+
if rate < auto_pause_threshold
|
|
74
|
+
"failing"
|
|
77
75
|
elsif consecutive_failures(logs) >= 3
|
|
78
76
|
"declining"
|
|
79
77
|
elsif improving_streak?(logs)
|
|
80
78
|
"improving"
|
|
81
79
|
elsif rate >= healthy_threshold
|
|
82
|
-
"
|
|
83
|
-
elsif rate >= warning_threshold
|
|
84
|
-
"warning"
|
|
80
|
+
"working"
|
|
85
81
|
else
|
|
86
|
-
"
|
|
82
|
+
"declining" # fallback for intermediate rates
|
|
87
83
|
end
|
|
88
84
|
end
|
|
89
85
|
```
|
|
90
86
|
|
|
87
|
+
Note: Auto-pause is tracked separately as operational state (`auto_paused_at`/`auto_paused_until`), not as a health status value. A source can be both "failing" and auto-paused.
|
|
88
|
+
|
|
91
89
|
## Health Configuration
|
|
92
90
|
|
|
93
91
|
### Default Settings
|
|
@@ -95,8 +93,7 @@ end
|
|
|
95
93
|
| Setting | Default | Purpose |
|
|
96
94
|
|---------|---------|---------|
|
|
97
95
|
| `window_size` | 20 | Number of recent fetch logs to evaluate |
|
|
98
|
-
| `healthy_threshold` | 0.8 | Success rate for "
|
|
99
|
-
| `warning_threshold` | 0.5 | Success rate for "warning" status |
|
|
96
|
+
| `healthy_threshold` | 0.8 | Success rate for "working" status |
|
|
100
97
|
| `auto_pause_threshold` | 0.2 | Below this, source is auto-paused |
|
|
101
98
|
| `auto_resume_threshold` | 0.6 | Above this, auto-pause is lifted |
|
|
102
99
|
| `auto_pause_cooldown_minutes` | 60 | Minimum pause duration |
|
|
@@ -211,7 +208,7 @@ SourceMonitor::Health::SourceHealthReset.call(source: source)
|
|
|
211
208
|
```
|
|
212
209
|
|
|
213
210
|
Resets:
|
|
214
|
-
- `health_status` -> "
|
|
211
|
+
- `health_status` -> "working"
|
|
215
212
|
- `auto_paused_at`, `auto_paused_until` -> nil
|
|
216
213
|
- `rolling_success_rate` -> nil
|
|
217
214
|
- `failure_count` -> 0
|
|
@@ -230,7 +227,7 @@ Lightweight health check for import candidates (no Source record needed):
|
|
|
230
227
|
|
|
231
228
|
```ruby
|
|
232
229
|
result = Health::ImportSourceHealthCheck.new(feed_url: url).call
|
|
233
|
-
result.status # => "
|
|
230
|
+
result.status # => "working" or "failing"
|
|
234
231
|
result.error_message # => nil or error description
|
|
235
232
|
result.http_status # => HTTP status code
|
|
236
233
|
```
|
|
@@ -255,9 +252,9 @@ end
|
|
|
255
252
|
Add the condition to `determine_status`:
|
|
256
253
|
|
|
257
254
|
```ruby
|
|
258
|
-
def determine_status(rate,
|
|
259
|
-
if
|
|
260
|
-
"
|
|
255
|
+
def determine_status(rate, logs)
|
|
256
|
+
if rate < auto_pause_threshold
|
|
257
|
+
"failing"
|
|
261
258
|
elsif my_custom_condition?(logs) # Add here
|
|
262
259
|
"my_custom_status"
|
|
263
260
|
elsif consecutive_failures(logs) >= 3
|
|
@@ -206,7 +206,7 @@ From migration `20251012090000_add_health_fields_to_sources`:
|
|
|
206
206
|
|
|
207
207
|
```ruby
|
|
208
208
|
# Health status tracking
|
|
209
|
-
:health_status # string, default: "
|
|
209
|
+
:health_status # string, default: "working" (values: working, declining, improving, failing)
|
|
210
210
|
:health_status_changed_at # datetime
|
|
211
211
|
:rolling_success_rate # float
|
|
212
212
|
:health_auto_pause_threshold # float (per-source override)
|
|
@@ -108,8 +108,7 @@ SourceMonitor.configure do |config|
|
|
|
108
108
|
# ===========================================================================
|
|
109
109
|
|
|
110
110
|
config.health.window_size = 20 # Fetch attempts to evaluate
|
|
111
|
-
config.health.healthy_threshold = 0.8 # Ratio for "
|
|
112
|
-
config.health.warning_threshold = 0.5 # Ratio for "warning" badge
|
|
111
|
+
config.health.healthy_threshold = 0.8 # Ratio for "working" status
|
|
113
112
|
config.health.auto_pause_threshold = 0.2 # Auto-pause below this
|
|
114
113
|
config.health.auto_resume_threshold = 0.6 # Auto-resume above this
|
|
115
114
|
config.health.auto_pause_cooldown_minutes = 60 # Grace period before re-enable
|
|
@@ -137,6 +136,7 @@ SourceMonitor.configure do |config|
|
|
|
137
136
|
|
|
138
137
|
# config.scraping.max_in_flight_per_source = 25 # Concurrent scrapes per source
|
|
139
138
|
# config.scraping.max_bulk_batch_size = 100 # Max bulk enqueue size
|
|
139
|
+
# config.scraping.scrape_recommendation_threshold = 200
|
|
140
140
|
|
|
141
141
|
# ===========================================================================
|
|
142
142
|
# Event Callbacks
|
|
@@ -2,26 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
Version-specific migration notes for each major/minor version transition. Agents should reference this file when guiding users through multi-version upgrades.
|
|
4
4
|
|
|
5
|
-
## 0.
|
|
5
|
+
## 0.10.2 to 0.11.0
|
|
6
6
|
|
|
7
7
|
**Key changes:**
|
|
8
|
-
-
|
|
9
|
-
- `
|
|
10
|
-
- `
|
|
11
|
-
- `config.
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
8
|
+
- Health status simplified from 7 values to 4 (`working`, `declining`, `improving`, `failing`). Auto-pause tracked as operational state via `auto_paused_at`/`auto_paused_until` columns.
|
|
9
|
+
- New `consecutive_fetch_failures` column (integer, NOT NULL, default 0) on `sourcemon_sources` for streak-based health detection.
|
|
10
|
+
- New `error_category` column (string, nullable) on `sourcemon_fetch_logs` for classifying failure types.
|
|
11
|
+
- New `config.scraping.scrape_recommendation_threshold` (default 200) controls the word-count threshold for dashboard scrape recommendations.
|
|
12
|
+
- Dashboard pagination for sources and items lists.
|
|
13
|
+
- Automatic Cloudflare bypass via cookie replay and UA rotation (no configuration needed).
|
|
14
|
+
- Smart scrape recommendations widget on the dashboard highlights sources that may benefit from scraping.
|
|
15
|
+
- New third queue: `source_monitor_maintenance` for non-fetch jobs (health checks, cleanup, favicon, images, OPML import).
|
|
16
|
+
- `config.fetching.scheduler_batch_size` (default `25`, was hardcoded `100`) and `config.fetching.stale_timeout_minutes` (default `5`, was `10`).
|
|
17
|
+
- Fixed-interval sources now get +/-10% jitter on `next_fetch_at`.
|
|
18
|
+
- Fetch pipeline error handling hardened: DB errors propagate, `ensure` block guarantees status reset.
|
|
15
19
|
|
|
16
20
|
**Action items:**
|
|
17
|
-
1. **Action required:**
|
|
21
|
+
1. **Action required:** If your initializer sets `config.health.warning_threshold`, remove that line. The setting no longer exists.
|
|
22
|
+
2. **Action required:** Add the maintenance queue to your `solid_queue.yml`:
|
|
18
23
|
```yaml
|
|
19
24
|
source_monitor_maintenance:
|
|
20
25
|
concurrency: <%= ENV.fetch("SOURCE_MONITOR_MAINTENANCE_CONCURRENCY", 1) %>
|
|
21
26
|
```
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
3. If your host app queries `health_status` directly (e.g., `Source.where(health_status: "healthy")`), update to use the new values (`working`, `declining`, `improving`, `failing`).
|
|
28
|
+
4. If you have many overdue sources after upgrading, run `bin/rails source_monitor:maintenance:stagger_fetch_times` to break the thundering herd.
|
|
29
|
+
5. All existing configuration (except `warning_threshold`) remains valid.
|
|
25
30
|
|
|
26
31
|
## 0.7.x to 0.8.0
|
|
27
32
|
|
data/CHANGELOG.md
CHANGED
|
@@ -15,6 +15,48 @@ All notable changes to this project are documented below. The format follows [Ke
|
|
|
15
15
|
|
|
16
16
|
- No unreleased changes yet.
|
|
17
17
|
|
|
18
|
+
## [0.11.0] - 2026-03-13
|
|
19
|
+
|
|
20
|
+
### Breaking Changes
|
|
21
|
+
|
|
22
|
+
- **Health status model simplified from 7 values to 4.** `healthy`/`auto_paused`/`unknown` → `working`, `warning`/`critical` → `failing`. `declining` and `improving` unchanged. Requires migration: `bin/rails source_monitor:upgrade && bin/rails db:migrate`.
|
|
23
|
+
- **`config.health.warning_threshold` removed.** Delete this line from your initializer if present.
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- **Smart scrape recommendations.** Dashboard widget and sources index badge identify sources with low feed word counts that would benefit from scraping. Configurable threshold via `config.scraping.scrape_recommendation_threshold`.
|
|
28
|
+
- **Bulk scrape enablement.** Select multiple recommended sources and enable scraping in one action with confirmation modal.
|
|
29
|
+
- **Test scrape with modal results.** "Test Scrape" button on source detail page runs a synchronous scrape of the latest item and shows feed vs scraped word count comparison in a modal with "Enable Auto-Scraping" action.
|
|
30
|
+
- **Cloudflare bypass techniques.** Cookie replay and UA rotation for Cloudflare-challenged feeds with clear "Blocked" badge on affected sources.
|
|
31
|
+
- **Consecutive failure auto-pause.** Sources that fail consecutively are automatically paused with a `consecutive_fetch_failures` counter and configurable threshold.
|
|
32
|
+
- **Error categorization on fetch logs.** New `error_category` column classifies fetch failures (blocked, auth, timeout, parse, etc.) for structured diagnostics.
|
|
33
|
+
- **Dashboard pagination.** Turbo Frame pagination for sources list, fetch schedule buckets, and health distribution badges. Handles 100+ sources efficiently.
|
|
34
|
+
- **Health distribution badges.** Dashboard shows working/declining/improving/failing source counts below stats cards.
|
|
35
|
+
- **Sortable computed columns.** New Items/Day, Avg Feed Words, and Avg Scraped Words columns sortable on sources index.
|
|
36
|
+
- **Dismissible OPML import banner.** Import history banner includes a dismiss button.
|
|
37
|
+
- **SVG favicon conversion.** SVG favicons auto-converted to PNG via MiniMagick.
|
|
38
|
+
|
|
39
|
+
### Changed
|
|
40
|
+
|
|
41
|
+
- Health badge colors: working (green), declining (yellow), improving (sky), failing (rose).
|
|
42
|
+
- Sources index splits Health and Fetch Status into separate columns.
|
|
43
|
+
- Fetch interval shown as rate "(X.Xx / day)" under fetch status badge instead of separate column.
|
|
44
|
+
- Scrape recommended badge positioned below source URL for better wrapping.
|
|
45
|
+
- Enabling auto-scrape now sets both `scraping_enabled` and `auto_scrape`, and queues existing unscraped items.
|
|
46
|
+
- Bulk scrape enablement also sets `auto_scrape` alongside `scraping_enabled`.
|
|
47
|
+
|
|
48
|
+
### Fixed
|
|
49
|
+
|
|
50
|
+
- Dashboard fetch schedule source links no longer show "Content missing" (turbo_frame _top added).
|
|
51
|
+
- Force-fetch lock contention handled gracefully (no permanent failure after retries).
|
|
52
|
+
- Blocked response detection via HTML body sniffing.
|
|
53
|
+
|
|
54
|
+
### Migration Required
|
|
55
|
+
|
|
56
|
+
- Run `bin/rails source_monitor:upgrade` then `bin/rails db:migrate`.
|
|
57
|
+
- New columns: `consecutive_fetch_failures` on sources, `error_category` on fetch logs.
|
|
58
|
+
- Health status value migration automatically remaps existing records.
|
|
59
|
+
|
|
18
60
|
## [0.10.2] - 2026-02-26
|
|
19
61
|
|
|
20
62
|
### Fixed
|
data/CLAUDE.md
CHANGED
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
## Active Context
|
|
6
6
|
|
|
7
|
-
**Last shipped:**
|
|
8
|
-
**Next action:** /vbw:vibe to
|
|
7
|
+
**Last shipped:** ui-fixes-and-smart-scraping (5 phases, 22 plans)
|
|
8
|
+
**Next action:** /vbw:vibe to plan Phase 01 (Ultimate Turbo Modal Integration)
|
|
9
9
|
|
|
10
10
|
## Key Decisions
|
|
11
11
|
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
source_monitor (0.
|
|
4
|
+
source_monitor (0.11.0)
|
|
5
5
|
cssbundling-rails (~> 1.4)
|
|
6
6
|
faraday (~> 2.9)
|
|
7
7
|
faraday-follow_redirects (~> 0.4)
|
|
@@ -177,6 +177,8 @@ GEM
|
|
|
177
177
|
net-smtp
|
|
178
178
|
marcel (1.1.0)
|
|
179
179
|
matrix (0.4.3)
|
|
180
|
+
mini_magick (5.3.1)
|
|
181
|
+
logger
|
|
180
182
|
mini_mime (1.1.5)
|
|
181
183
|
mini_portile2 (2.8.9)
|
|
182
184
|
minitest (6.0.1)
|
|
@@ -404,6 +406,7 @@ DEPENDENCIES
|
|
|
404
406
|
cgi
|
|
405
407
|
digest
|
|
406
408
|
json
|
|
409
|
+
mini_magick
|
|
407
410
|
minitest-mock
|
|
408
411
|
ostruct
|
|
409
412
|
pg
|
data/README.md
CHANGED
|
@@ -9,8 +9,8 @@ SourceMonitor is a production-ready Rails 8 mountable engine for ingesting, norm
|
|
|
9
9
|
In your host Rails app:
|
|
10
10
|
|
|
11
11
|
```bash
|
|
12
|
-
bundle add source_monitor --version "~> 0.
|
|
13
|
-
# or add `gem "source_monitor", "~> 0.
|
|
12
|
+
bundle add source_monitor --version "~> 0.11.0"
|
|
13
|
+
# or add `gem "source_monitor", "~> 0.11.0"` manually, then run:
|
|
14
14
|
bundle install
|
|
15
15
|
```
|
|
16
16
|
|
|
@@ -43,7 +43,7 @@ This exposes `bin/source_monitor` (via Bundler binstubs) so you can run the guid
|
|
|
43
43
|
Before running any SourceMonitor commands inside your host app, add the gem and install dependencies:
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
bundle add source_monitor --version "~> 0.
|
|
46
|
+
bundle add source_monitor --version "~> 0.11.0"
|
|
47
47
|
# or edit your Gemfile, then run
|
|
48
48
|
bundle install
|
|
49
49
|
```
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.11.0
|