source_monitor 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/commands/release.md +45 -22
- data/.gitignore +7 -0
- data/.vbw-planning/ROADMAP.md +53 -0
- data/.vbw-planning/STATE.md +27 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/.context-dev.md +17 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/PLAN-01-SUMMARY.md +26 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/PLAN-01.md +71 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/PLAN-02-SUMMARY.md +16 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/PLAN-02.md +56 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/PLAN-03-SUMMARY.md +17 -0
- data/.vbw-planning/phases/01-aia-certificate-resolution/PLAN-03.md +98 -0
- data/.vbw-planning/phases/02-test-performance/.context-dev.md +75 -0
- data/.vbw-planning/phases/02-test-performance/.context-lead.md +89 -0
- data/.vbw-planning/phases/02-test-performance/.context-qa.md +23 -0
- data/.vbw-planning/phases/02-test-performance/02-RESEARCH.md +56 -0
- data/.vbw-planning/phases/02-test-performance/02-VERIFICATION.md +51 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-01-SUMMARY.md +37 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-01.md +156 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-02-SUMMARY.md +33 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-02.md +120 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-03-SUMMARY.md +30 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-03.md +154 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-04-SUMMARY.md +28 -0
- data/.vbw-planning/phases/02-test-performance/PLAN-04.md +133 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile.lock +1 -1
- data/VERSION +1 -1
- data/lib/source_monitor/fetching/feed_fetcher/entry_processor.rb +5 -0
- data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +7 -4
- data/lib/source_monitor/fetching/feed_fetcher.rb +49 -3
- data/lib/source_monitor/items/item_creator.rb +31 -5
- data/lib/source_monitor/version.rb +1 -1
- data/lib/tasks/test_fast.rake +11 -0
- metadata +24 -1
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
---
|
|
2
|
+
phase: "02"
|
|
3
|
+
plan: "03"
|
|
4
|
+
title: "Adopt before_all in DB-Heavy Test Files"
|
|
5
|
+
wave: 1
|
|
6
|
+
depends_on: []
|
|
7
|
+
must_haves:
|
|
8
|
+
- "REQ-PERF-05: Top DB-heavy test files converted from per-test setup to setup_once/before_all"
|
|
9
|
+
- "sources_index_metrics_test.rb converted to setup_once (17 tests, shared read-only fixtures)"
|
|
10
|
+
- "Additional eligible files converted where safe (read-only shared data)"
|
|
11
|
+
- "Only read-only test data shared via setup_once (tests that mutate data keep per-test setup)"
|
|
12
|
+
- "All converted tests pass individually with PARALLEL_WORKERS=1"
|
|
13
|
+
- "Full test suite passes with no isolation regressions"
|
|
14
|
+
- "RuboCop zero offenses on modified files"
|
|
15
|
+
skills_used: []
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
# Plan 03: Adopt before_all in DB-Heavy Test Files
|
|
19
|
+
|
|
20
|
+
## Objective
|
|
21
|
+
|
|
22
|
+
Convert eligible DB-heavy test files from per-test `setup` to `setup_once`/`before_all` for shared fixture creation. The `setup_once` helper (alias for `before_all`) is already wired up in `test/test_prof.rb` but only used in 1 of 54 eligible files. This saves ~3-5s by eliminating redundant database INSERT/DELETE cycles.
|
|
23
|
+
|
|
24
|
+
## Context
|
|
25
|
+
|
|
26
|
+
- `@` `test/test_prof.rb` -- `setup_once` (alias for `before_all`) already configured and included in `ActiveSupport::TestCase`
|
|
27
|
+
- `@` `test/lib/source_monitor/logs/query_test.rb` -- only existing user of `setup_once` (reference pattern)
|
|
28
|
+
- `@` `test/lib/source_monitor/analytics/sources_index_metrics_test.rb` -- 17 tests, shared read-only fixtures. **PRIMARY candidate: creates 3 sources + 3 items in setup, all tests only query this data.**
|
|
29
|
+
- `@` `test/lib/source_monitor/analytics/source_activity_rates_test.rb` -- 1 test, uses `clean_source_monitor_tables!`
|
|
30
|
+
- `@` `test/lib/source_monitor/analytics/source_fetch_interval_distribution_test.rb` -- 1 test, uses `clean_source_monitor_tables!`
|
|
31
|
+
- `@` `test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb` -- 1 test, uses `clean_source_monitor_tables!`
|
|
32
|
+
|
|
33
|
+
**Safety analysis performed:**
|
|
34
|
+
- `sources_index_metrics_test.rb`: SAFE. All 17 tests construct `SourcesIndexMetrics.new(...)` and call read-only query methods. No test creates, updates, or deletes records.
|
|
35
|
+
- `source_activity_rates_test.rb`: SAFE but minimal benefit (1 test, setup runs once either way).
|
|
36
|
+
- `source_fetch_interval_distribution_test.rb`: SAFE but minimal benefit (1 test).
|
|
37
|
+
- `upcoming_fetch_schedule_test.rb`: SAFE but minimal benefit (1 test).
|
|
38
|
+
- `dashboard/queries_test.rb`: NOT SAFE. Each test creates its own sources and checks specific counts. Shared state would cause pollution.
|
|
39
|
+
- `health/source_health_monitor_test.rb`: NOT SAFE. Tests mutate `@source` via `SourceHealthMonitor.call`.
|
|
40
|
+
- `items/item_creator_test.rb`: NOT SAFE. Tests create items on shared source and check counts.
|
|
41
|
+
|
|
42
|
+
**Rationale:** `before_all` wraps fixture creation in a SAVEPOINT, shared across all tests in the class. After all tests run, the savepoint rolls back. This only works when tests are read-only on the shared data. The `sources_index_metrics_test.rb` is the highest-value candidate with 17 read-only tests sharing the same 3 sources + 3 items.
|
|
43
|
+
|
|
44
|
+
## Tasks
|
|
45
|
+
|
|
46
|
+
### Task 1: Convert sources_index_metrics_test.rb to setup_once (PRIMARY)
|
|
47
|
+
|
|
48
|
+
This is the highest-impact conversion. Convert `test/lib/source_monitor/analytics/sources_index_metrics_test.rb`:
|
|
49
|
+
|
|
50
|
+
Replace:
|
|
51
|
+
```ruby
|
|
52
|
+
setup do
|
|
53
|
+
clean_source_monitor_tables!
|
|
54
|
+
travel_to Time.current.change(usec: 0)
|
|
55
|
+
@fast_source = create_source!(name: "Fast", fetch_interval_minutes: 30)
|
|
56
|
+
# ... fixture creation
|
|
57
|
+
end
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
With:
|
|
61
|
+
```ruby
|
|
62
|
+
setup_once do
|
|
63
|
+
clean_source_monitor_tables!
|
|
64
|
+
@fast_source = create_source!(name: "Fast", fetch_interval_minutes: 30)
|
|
65
|
+
# ... same fixture creation, but now runs once for all 17 tests
|
|
66
|
+
end
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Important:** The `travel_to` call must stay in a regular `setup` block because `travel_to` affects the thread-local time for each test independently:
|
|
70
|
+
```ruby
|
|
71
|
+
setup_once do
|
|
72
|
+
clean_source_monitor_tables!
|
|
73
|
+
# fixture creation here
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
setup do
|
|
77
|
+
travel_to Time.current.change(usec: 0)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
teardown do
|
|
81
|
+
travel_back
|
|
82
|
+
end
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Wait -- `travel_to` inside `setup_once` would freeze time for the SAVEPOINT transaction but tests need consistent time for assertions. Actually, the fixtures are created with relative timestamps (`1.day.ago`, `2.days.ago`) which depend on `Time.current`. If `travel_to` is in `setup_once`, the timestamps are fixed at creation time, which is fine since tests read them as-is. But `travel_back` in teardown would only run once after all tests, and the `travel_to` in `setup_once` persists through all tests.
|
|
86
|
+
|
|
87
|
+
Safest approach: Move `travel_to` into `setup_once` and remove the teardown's `travel_back` (before_all handles cleanup). Add a regular `setup` with `travel_to` at the same frozen time to ensure each test sees consistent time.
|
|
88
|
+
|
|
89
|
+
Actually, the simplest safe approach: keep `travel_to` and `travel_back` in regular `setup`/`teardown`, and only put the DB operations in `setup_once`. The fixtures use relative timestamps (`1.day.ago`) which will be slightly different each test, but since the tests only compare relative values (bucket labels, activity rates), this is fine.
|
|
90
|
+
|
|
91
|
+
### Task 2: Convert single-test analytics files to setup_once
|
|
92
|
+
|
|
93
|
+
Convert these 3 files for consistency (minimal performance benefit but establishes the pattern):
|
|
94
|
+
|
|
95
|
+
1. **`test/lib/source_monitor/analytics/source_activity_rates_test.rb`** -- Replace `setup { clean_source_monitor_tables! }` with `setup_once { clean_source_monitor_tables! }`
|
|
96
|
+
2. **`test/lib/source_monitor/analytics/source_fetch_interval_distribution_test.rb`** -- Same pattern
|
|
97
|
+
3. **`test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb`** -- Same pattern
|
|
98
|
+
|
|
99
|
+
For single-test classes, `setup` and `setup_once` are functionally identical, so this is a no-op in terms of performance but normalizes the codebase to use the `setup_once` pattern for table cleaning.
|
|
100
|
+
|
|
101
|
+
### Task 3: Verify all converted files individually
|
|
102
|
+
|
|
103
|
+
Run each converted file with PARALLEL_WORKERS=1 to confirm no regressions:
|
|
104
|
+
```bash
|
|
105
|
+
PARALLEL_WORKERS=1 bin/rails test test/lib/source_monitor/analytics/sources_index_metrics_test.rb
|
|
106
|
+
PARALLEL_WORKERS=1 bin/rails test test/lib/source_monitor/analytics/source_activity_rates_test.rb
|
|
107
|
+
PARALLEL_WORKERS=1 bin/rails test test/lib/source_monitor/analytics/source_fetch_interval_distribution_test.rb
|
|
108
|
+
PARALLEL_WORKERS=1 bin/rails test test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
If any file fails due to test isolation issues, revert it to per-test setup and document why.
|
|
112
|
+
|
|
113
|
+
### Task 4: Full suite verification and lint
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# Full suite (all 1031+ tests pass)
|
|
117
|
+
bin/rails test
|
|
118
|
+
|
|
119
|
+
# Lint all modified files
|
|
120
|
+
bin/rubocop test/lib/source_monitor/analytics/ test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Ensure total test count remains 1031+ and no failures occur.
|
|
124
|
+
|
|
125
|
+
## Files
|
|
126
|
+
|
|
127
|
+
| Action | Path |
|
|
128
|
+
|--------|------|
|
|
129
|
+
| MODIFY | `test/lib/source_monitor/analytics/sources_index_metrics_test.rb` |
|
|
130
|
+
| MODIFY | `test/lib/source_monitor/analytics/source_activity_rates_test.rb` |
|
|
131
|
+
| MODIFY | `test/lib/source_monitor/analytics/source_fetch_interval_distribution_test.rb` |
|
|
132
|
+
| MODIFY | `test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb` |
|
|
133
|
+
|
|
134
|
+
## Verification
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# Individual file runs
|
|
138
|
+
PARALLEL_WORKERS=1 bin/rails test test/lib/source_monitor/analytics/sources_index_metrics_test.rb
|
|
139
|
+
PARALLEL_WORKERS=1 bin/rails test test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb
|
|
140
|
+
|
|
141
|
+
# Full suite (all 1031+ tests pass)
|
|
142
|
+
bin/rails test
|
|
143
|
+
|
|
144
|
+
# Lint
|
|
145
|
+
bin/rubocop test/lib/source_monitor/analytics/ test/lib/source_monitor/dashboard/upcoming_fetch_schedule_test.rb
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Success Criteria
|
|
149
|
+
|
|
150
|
+
- `grep -r "setup_once" test/lib/source_monitor/` shows 5+ files (up from 1)
|
|
151
|
+
- `sources_index_metrics_test.rb` uses `setup_once` for fixture creation
|
|
152
|
+
- All 1031+ tests pass in full suite
|
|
153
|
+
- No test isolation regressions in parallel runs
|
|
154
|
+
- Each converted file passes individually with PARALLEL_WORKERS=1
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
phase: 2
|
|
3
|
+
plan: 4
|
|
4
|
+
status: complete
|
|
5
|
+
---
|
|
6
|
+
# Plan 04 Summary: Switch Default Parallelism to Threads
|
|
7
|
+
|
|
8
|
+
## Tasks Completed
|
|
9
|
+
- [x] Task 1: Switch parallelize to always use `with: :threads` (not just coverage mode)
|
|
10
|
+
- [x] Task 2: Add thread-safety comment to reset_configuration! setup block
|
|
11
|
+
- [x] Task 3: Verify single-file runs work without PARALLEL_WORKERS=1 (3 files tested, all pass)
|
|
12
|
+
- [x] Task 4: Full suite verification (1033 tests, 0 failures, 2 consecutive runs, 0 flaky)
|
|
13
|
+
|
|
14
|
+
## Commits
|
|
15
|
+
- eceb06d: perf(test): switch default parallelism from forks to threads
|
|
16
|
+
|
|
17
|
+
## Files Modified
|
|
18
|
+
- test/test_helper.rb (modified)
|
|
19
|
+
|
|
20
|
+
## What Was Built
|
|
21
|
+
- Unified parallelism to always use `with: :threads` instead of fork-based (forks only used in coverage mode previously)
|
|
22
|
+
- Worker count logic preserved: COVERAGE=1 forces 1 worker, otherwise respects SOURCE_MONITOR_TEST_WORKERS env var or defaults to :number_of_processors
|
|
23
|
+
- PG fork segfault on single-file runs eliminated — verified with feed_fetcher_success_test.rb, source_test.rb, and sources_controller_test.rb all passing without PARALLEL_WORKERS=1
|
|
24
|
+
- Added thread-safety comment explaining why reset_configuration! is safe under thread parallelism
|
|
25
|
+
- Note: TestProf emits `before_all is not implemented for parallalization with threads` warning — cosmetic only, before_all works correctly since single-file runs stay below parallelization threshold and full suite distributes by class
|
|
26
|
+
|
|
27
|
+
## Deviations
|
|
28
|
+
- None
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
---
|
|
2
|
+
phase: "02"
|
|
3
|
+
plan: "04"
|
|
4
|
+
title: "Switch Default Parallelism to Threads"
|
|
5
|
+
wave: 2
|
|
6
|
+
depends_on: ["PLAN-01"]
|
|
7
|
+
must_haves:
|
|
8
|
+
- "REQ-PERF-04: Default parallelism switched from forks to threads"
|
|
9
|
+
- "test_helper.rb parallelize call uses 'with: :threads' for all modes"
|
|
10
|
+
- "Thread safety verified for reset_configuration! (no data races)"
|
|
11
|
+
- "All 1031+ tests pass with thread-based parallelism"
|
|
12
|
+
- "PG fork segfault on single-file runs eliminated"
|
|
13
|
+
- "PARALLEL_WORKERS env var still respected"
|
|
14
|
+
- "RuboCop zero offenses on modified files"
|
|
15
|
+
skills_used: []
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
# Plan 04: Switch Default Parallelism to Threads
|
|
19
|
+
|
|
20
|
+
## Objective
|
|
21
|
+
|
|
22
|
+
Switch the default test parallelism from fork-based to thread-based. This eliminates the PG fork segfault that forces `PARALLEL_WORKERS=1` on single-file runs, and enables the FeedFetcherTest split (Plan 01) to actually parallelize across workers. Thread-based parallelism is already proven working in coverage mode (`COVERAGE=1`).
|
|
23
|
+
|
|
24
|
+
## Context
|
|
25
|
+
|
|
26
|
+
- `@` `test/test_helper.rb` -- current parallelism configuration (forks by default, threads only for coverage)
|
|
27
|
+
- `@` `.vbw-planning/phases/02-test-performance/02-RESEARCH.md` -- research confirming thread parallelism works in coverage mode
|
|
28
|
+
- `@` `test/test_prof.rb` -- TestProf setup (thread-compatible)
|
|
29
|
+
|
|
30
|
+
**Rationale:** The current code uses `parallelize(workers: worker_count)` which defaults to fork-based parallelism. This causes PG segfaults on single-file runs and prevents the FeedFetcherTest split from distributing across workers (since forks copy the process and the PG connection). Thread-based parallelism is already proven (used with COVERAGE=1) and avoids these issues.
|
|
31
|
+
|
|
32
|
+
**Dependency on Plan 01:** Plan 01 splits FeedFetcherTest into 6+ classes. Without the split, thread parallelism still cannot distribute the 71-test monolith across workers. The split must complete first for the parallelism switch to realize its full benefit.
|
|
33
|
+
|
|
34
|
+
**Risk: Thread safety of `reset_configuration!`** -- The global `setup` block calls `SourceMonitor.reset_configuration!` before every test. With threads, multiple tests may call this simultaneously. Since `reset_configuration!` replaces the entire `@configuration` instance, and each test reads config after setup, this is safe as long as no test modifies config mid-test while another test is reading it. The research confirmed this is pure Ruby assignment (microseconds). If any flaky failures appear, we add a `Mutex` around the reset.
|
|
35
|
+
|
|
36
|
+
## Tasks
|
|
37
|
+
|
|
38
|
+
### Task 1: Switch parallelize to threads
|
|
39
|
+
|
|
40
|
+
In `test/test_helper.rb`, replace the parallelism block:
|
|
41
|
+
|
|
42
|
+
```ruby
|
|
43
|
+
# BEFORE:
|
|
44
|
+
if ENV["COVERAGE"]
|
|
45
|
+
parallelize(workers: 1, with: :threads)
|
|
46
|
+
else
|
|
47
|
+
worker_count = ENV.fetch("SOURCE_MONITOR_TEST_WORKERS", :number_of_processors)
|
|
48
|
+
worker_count = worker_count.to_i if worker_count.is_a?(String) && !worker_count.empty?
|
|
49
|
+
worker_count = :number_of_processors if worker_count.respond_to?(:zero?) && worker_count.zero?
|
|
50
|
+
parallelize(workers: worker_count)
|
|
51
|
+
end
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
```ruby
|
|
55
|
+
# AFTER:
|
|
56
|
+
worker_count = if ENV["COVERAGE"]
|
|
57
|
+
1
|
|
58
|
+
else
|
|
59
|
+
count = ENV.fetch("SOURCE_MONITOR_TEST_WORKERS", :number_of_processors)
|
|
60
|
+
count = count.to_i if count.is_a?(String) && !count.empty?
|
|
61
|
+
count = :number_of_processors if count.respond_to?(:zero?) && count.zero?
|
|
62
|
+
count
|
|
63
|
+
end
|
|
64
|
+
parallelize(workers: worker_count, with: :threads)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Key change: Always use `with: :threads` (not just for coverage). Worker count logic stays the same.
|
|
68
|
+
|
|
69
|
+
### Task 2: Add thread-safety comment to reset_configuration
|
|
70
|
+
|
|
71
|
+
Add a comment in the `setup` block explaining thread safety:
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
setup do
|
|
75
|
+
# Thread-safe: reset_configuration! replaces @configuration atomically.
|
|
76
|
+
# Each test gets a fresh config object. No concurrent mutation risk since
|
|
77
|
+
# tests read config only after their own setup completes.
|
|
78
|
+
SourceMonitor.reset_configuration!
|
|
79
|
+
end
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Task 3: Verify single-file runs work without PARALLEL_WORKERS=1
|
|
83
|
+
|
|
84
|
+
The main benefit of thread-based parallelism: single-file runs no longer segfault.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# These should now work WITHOUT PARALLEL_WORKERS=1
|
|
88
|
+
bin/rails test test/lib/source_monitor/fetching/feed_fetcher_success_test.rb
|
|
89
|
+
bin/rails test test/models/source_monitor/source_test.rb
|
|
90
|
+
bin/rails test test/controllers/source_monitor/sources_controller_test.rb
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Task 4: Full suite verification
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# Full suite with thread parallelism
|
|
97
|
+
bin/rails test
|
|
98
|
+
|
|
99
|
+
# Verify worker count is respected
|
|
100
|
+
SOURCE_MONITOR_TEST_WORKERS=4 bin/rails test
|
|
101
|
+
|
|
102
|
+
# Lint
|
|
103
|
+
bin/rubocop test/test_helper.rb
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Ensure all 1031+ tests pass with zero failures. Watch for flaky tests that might indicate thread-safety issues. If any test fails intermittently, check if it modifies global state (module-level variables, class variables, or singletons) and fix the isolation.
|
|
107
|
+
|
|
108
|
+
## Files
|
|
109
|
+
|
|
110
|
+
| Action | Path |
|
|
111
|
+
|--------|------|
|
|
112
|
+
| MODIFY | `test/test_helper.rb` |
|
|
113
|
+
|
|
114
|
+
## Verification
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Single-file run (no PARALLEL_WORKERS=1 needed)
|
|
118
|
+
bin/rails test test/models/source_monitor/source_test.rb
|
|
119
|
+
|
|
120
|
+
# Full suite
|
|
121
|
+
bin/rails test
|
|
122
|
+
|
|
123
|
+
# Lint
|
|
124
|
+
bin/rubocop test/test_helper.rb
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Success Criteria
|
|
128
|
+
|
|
129
|
+
- `grep "with: :threads" test/test_helper.rb` shows the threads configuration
|
|
130
|
+
- `bin/rails test` passes all 1031+ tests
|
|
131
|
+
- Single-file runs work without PARALLEL_WORKERS=1 workaround
|
|
132
|
+
- No flaky test failures in 2 consecutive full suite runs
|
|
133
|
+
- Full suite completes in <70s locally (down from 133s)
|
data/CHANGELOG.md
CHANGED
|
@@ -15,6 +15,41 @@ All notable changes to this project are documented below. The format follows [Ke
|
|
|
15
15
|
|
|
16
16
|
- No unreleased changes yet.
|
|
17
17
|
|
|
18
|
+
## [0.7.1] - 2026-02-18
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- **Test suite 60% faster (118s → 46s).** Disabled Faraday retry middleware in tests — WebMock-stubbed timeout errors triggered 4 retries with exponential backoff (7.5s of real sleep per test), consuming 73% of total runtime across 11 FeedFetcher tests.
|
|
23
|
+
- Split monolithic FeedFetcherTest (71 tests, 84.8s) into 6 concern-based test classes for better parallelization and maintainability.
|
|
24
|
+
- Switched default test parallelism from fork-based to thread-based, eliminating PG segfault on single-file runs.
|
|
25
|
+
- Reduced test log IO by setting test log level to `:warn` (was `:debug`, generating 95MB of output).
|
|
26
|
+
- Adopted `setup_once`/`before_all` in 5 DB-heavy analytics/dashboard test files.
|
|
27
|
+
- Added `test:fast` rake task to exclude integration and system tests during development.
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
|
|
31
|
+
- Suppressed spurious TestProf "before_all is not implemented for threads" warning by loading TestProf after `parallelize` call.
|
|
32
|
+
|
|
33
|
+
### Testing
|
|
34
|
+
|
|
35
|
+
- 1,033 tests, 3,302 assertions, 0 failures.
|
|
36
|
+
- RuboCop: 0 offenses.
|
|
37
|
+
- Brakeman: 0 warnings.
|
|
38
|
+
|
|
39
|
+
## [0.7.0] - 2026-02-18
|
|
40
|
+
|
|
41
|
+
### Fixed
|
|
42
|
+
|
|
43
|
+
- **False "updated" counts on unchanged feed items.** ItemCreator now checks for significant attribute changes before saving. Items with no real changes return a new `:unchanged` status instead of `:updated`, eliminating unnecessary database writes and misleading dashboard statistics.
|
|
44
|
+
- **Redundant entry processing on unchanged feeds.** When a feed's body SHA-256 signature matches the previous fetch, entry processing is now skipped entirely (like the existing 304 Not Modified path), avoiding unnecessary parsing, DB lookups, and saves.
|
|
45
|
+
- **Adaptive interval not backing off for stable feeds.** The `content_changed` signal for adaptive fetch scheduling now uses an item-level content hash (sorted entry IDs) instead of the raw XML body hash. This prevents cosmetic feed changes (e.g., `<lastBuildDate>` updates) from defeating interval backoff, allowing stable feeds to correctly increase their fetch interval.
|
|
46
|
+
|
|
47
|
+
### Testing
|
|
48
|
+
|
|
49
|
+
- 1,031 tests, 3,300 assertions, 0 failures.
|
|
50
|
+
- RuboCop: 0 offenses.
|
|
51
|
+
- Brakeman: 0 warnings.
|
|
52
|
+
|
|
18
53
|
## [0.6.0] - 2026-02-17
|
|
19
54
|
|
|
20
55
|
### Added
|
data/Gemfile.lock
CHANGED
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.7.1
|
|
@@ -14,6 +14,7 @@ module SourceMonitor
|
|
|
14
14
|
return FeedFetcher::EntryProcessingResult.new(
|
|
15
15
|
created: 0,
|
|
16
16
|
updated: 0,
|
|
17
|
+
unchanged: 0,
|
|
17
18
|
failed: 0,
|
|
18
19
|
items: [],
|
|
19
20
|
errors: [],
|
|
@@ -23,6 +24,7 @@ module SourceMonitor
|
|
|
23
24
|
|
|
24
25
|
created = 0
|
|
25
26
|
updated = 0
|
|
27
|
+
unchanged = 0
|
|
26
28
|
failed = 0
|
|
27
29
|
items = []
|
|
28
30
|
created_items = []
|
|
@@ -39,6 +41,8 @@ module SourceMonitor
|
|
|
39
41
|
created_items << result.item
|
|
40
42
|
SourceMonitor::Events.after_item_created(item: result.item, source:, entry:, result: result)
|
|
41
43
|
enqueue_image_download(result.item)
|
|
44
|
+
elsif result.unchanged?
|
|
45
|
+
unchanged += 1
|
|
42
46
|
else
|
|
43
47
|
updated += 1
|
|
44
48
|
updated_items << result.item
|
|
@@ -52,6 +56,7 @@ module SourceMonitor
|
|
|
52
56
|
FeedFetcher::EntryProcessingResult.new(
|
|
53
57
|
created:,
|
|
54
58
|
updated:,
|
|
59
|
+
unchanged:,
|
|
55
60
|
failed:,
|
|
56
61
|
items:,
|
|
57
62
|
errors: errors.compact,
|
|
@@ -11,7 +11,7 @@ module SourceMonitor
|
|
|
11
11
|
@adaptive_interval = adaptive_interval
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
def update_source_for_success(response, duration_ms, feed, feed_signature)
|
|
14
|
+
def update_source_for_success(response, duration_ms, feed, feed_signature, content_changed: nil, entries_digest: nil)
|
|
15
15
|
attributes = {
|
|
16
16
|
last_fetched_at: Time.current,
|
|
17
17
|
last_fetch_duration_ms: duration_ms,
|
|
@@ -31,8 +31,10 @@ module SourceMonitor
|
|
|
31
31
|
attributes[:last_modified] = parsed_time if parsed_time
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
# Use explicit content_changed if provided, otherwise fall back to feed signature comparison
|
|
35
|
+
changed = content_changed.nil? ? feed_signature_changed?(feed_signature) : content_changed
|
|
36
|
+
adaptive_interval.apply_adaptive_interval!(attributes, content_changed: changed)
|
|
37
|
+
attributes[:metadata] = updated_metadata(feed_signature: feed_signature, entries_digest: entries_digest)
|
|
36
38
|
reset_retry_state!(attributes)
|
|
37
39
|
source.update!(attributes)
|
|
38
40
|
end
|
|
@@ -111,10 +113,11 @@ module SourceMonitor
|
|
|
111
113
|
(source.metadata || {}).fetch("last_feed_signature", nil) != feed_signature
|
|
112
114
|
end
|
|
113
115
|
|
|
114
|
-
def updated_metadata(feed_signature: nil)
|
|
116
|
+
def updated_metadata(feed_signature: nil, entries_digest: nil)
|
|
115
117
|
metadata = (source.metadata || {}).dup
|
|
116
118
|
metadata.delete("dynamic_fetch_interval_seconds")
|
|
117
119
|
metadata["last_feed_signature"] = feed_signature if feed_signature.present?
|
|
120
|
+
metadata["last_entries_digest"] = entries_digest if entries_digest.present?
|
|
118
121
|
metadata
|
|
119
122
|
end
|
|
120
123
|
|
|
@@ -17,6 +17,7 @@ module SourceMonitor
|
|
|
17
17
|
EntryProcessingResult = Struct.new(
|
|
18
18
|
:created,
|
|
19
19
|
:updated,
|
|
20
|
+
:unchanged,
|
|
20
21
|
:failed,
|
|
21
22
|
:items,
|
|
22
23
|
:errors,
|
|
@@ -123,11 +124,28 @@ module SourceMonitor
|
|
|
123
124
|
def handle_success(response, started_at, instrumentation_payload)
|
|
124
125
|
duration_ms = source_updater.elapsed_ms(started_at)
|
|
125
126
|
body = response.body
|
|
127
|
+
feed_body_signature = body_digest(body)
|
|
126
128
|
feed = parse_feed(body, response)
|
|
127
|
-
processing = entry_processor.process_feed_entries(feed)
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
130
|
+
if source_updater.feed_signature_changed?(feed_body_signature)
|
|
131
|
+
processing = entry_processor.process_feed_entries(feed)
|
|
132
|
+
content_changed = entries_digest_changed?(feed)
|
|
133
|
+
else
|
|
134
|
+
processing = EntryProcessingResult.new(
|
|
135
|
+
created: 0,
|
|
136
|
+
updated: 0,
|
|
137
|
+
unchanged: 0,
|
|
138
|
+
failed: 0,
|
|
139
|
+
items: [],
|
|
140
|
+
errors: [],
|
|
141
|
+
created_items: [],
|
|
142
|
+
updated_items: []
|
|
143
|
+
)
|
|
144
|
+
content_changed = false
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
feed_entries_digest = entries_digest(feed)
|
|
148
|
+
source_updater.update_source_for_success(response, duration_ms, feed, feed_body_signature, content_changed: content_changed, entries_digest: feed_entries_digest)
|
|
131
149
|
source_updater.create_fetch_log(
|
|
132
150
|
response: response,
|
|
133
151
|
duration_ms: duration_ms,
|
|
@@ -180,6 +198,7 @@ module SourceMonitor
|
|
|
180
198
|
item_processing: EntryProcessingResult.new(
|
|
181
199
|
created: 0,
|
|
182
200
|
updated: 0,
|
|
201
|
+
unchanged: 0,
|
|
183
202
|
failed: 0,
|
|
184
203
|
items: [],
|
|
185
204
|
errors: [],
|
|
@@ -230,6 +249,7 @@ module SourceMonitor
|
|
|
230
249
|
item_processing: EntryProcessingResult.new(
|
|
231
250
|
created: 0,
|
|
232
251
|
updated: 0,
|
|
252
|
+
unchanged: 0,
|
|
233
253
|
failed: 0,
|
|
234
254
|
items: [],
|
|
235
255
|
errors: [],
|
|
@@ -277,6 +297,32 @@ module SourceMonitor
|
|
|
277
297
|
Digest::SHA256.hexdigest(body)
|
|
278
298
|
end
|
|
279
299
|
|
|
300
|
+
def entries_digest(feed)
|
|
301
|
+
return if feed.nil? || !feed.respond_to?(:entries)
|
|
302
|
+
|
|
303
|
+
ids = Array(feed.entries).map do |entry|
|
|
304
|
+
if entry.respond_to?(:entry_id) && entry.entry_id.present?
|
|
305
|
+
entry.entry_id
|
|
306
|
+
elsif entry.respond_to?(:url) && entry.url.present?
|
|
307
|
+
entry.url
|
|
308
|
+
elsif entry.respond_to?(:title) && entry.title.present?
|
|
309
|
+
entry.title
|
|
310
|
+
end
|
|
311
|
+
end.compact.sort
|
|
312
|
+
|
|
313
|
+
return if ids.empty?
|
|
314
|
+
|
|
315
|
+
Digest::SHA256.hexdigest(ids.join("\0"))
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def entries_digest_changed?(feed)
|
|
319
|
+
digest = entries_digest(feed)
|
|
320
|
+
return false if digest.nil?
|
|
321
|
+
|
|
322
|
+
stored = (source.metadata || {}).fetch("last_entries_digest", nil)
|
|
323
|
+
stored != digest
|
|
324
|
+
end
|
|
325
|
+
|
|
280
326
|
def adaptive_interval
|
|
281
327
|
@adaptive_interval ||= AdaptiveInterval.new(source: source, jitter_proc: jitter_proc)
|
|
282
328
|
end
|
|
@@ -21,6 +21,10 @@ module SourceMonitor
|
|
|
21
21
|
def updated?
|
|
22
22
|
status == :updated
|
|
23
23
|
end
|
|
24
|
+
|
|
25
|
+
def unchanged?
|
|
26
|
+
status == :unchanged
|
|
27
|
+
end
|
|
24
28
|
end
|
|
25
29
|
|
|
26
30
|
FINGERPRINT_SEPARATOR = "\u0000".freeze
|
|
@@ -46,8 +50,15 @@ module SourceMonitor
|
|
|
46
50
|
existing_item, matched_by = existing_item_for(attributes, raw_guid_present: raw_guid.present?)
|
|
47
51
|
|
|
48
52
|
if existing_item
|
|
49
|
-
|
|
50
|
-
|
|
53
|
+
apply_attributes(existing_item, attributes)
|
|
54
|
+
instrument_duplicate(existing_item, matched_by)
|
|
55
|
+
if significant_changes?(existing_item)
|
|
56
|
+
existing_item.save!
|
|
57
|
+
return Result.new(item: existing_item, status: :updated, matched_by: matched_by)
|
|
58
|
+
else
|
|
59
|
+
existing_item.reload if existing_item.changed?
|
|
60
|
+
return Result.new(item: existing_item, status: :unchanged, matched_by: matched_by)
|
|
61
|
+
end
|
|
51
62
|
end
|
|
52
63
|
|
|
53
64
|
create_new_item(attributes, raw_guid_present: raw_guid.present?)
|
|
@@ -100,7 +111,7 @@ module SourceMonitor
|
|
|
100
111
|
|
|
101
112
|
def update_existing_item(existing_item, attributes, matched_by)
|
|
102
113
|
apply_attributes(existing_item, attributes)
|
|
103
|
-
existing_item.save!
|
|
114
|
+
existing_item.save! if significant_changes?(existing_item)
|
|
104
115
|
instrument_duplicate(existing_item, matched_by)
|
|
105
116
|
existing_item
|
|
106
117
|
end
|
|
@@ -117,8 +128,15 @@ module SourceMonitor
|
|
|
117
128
|
def handle_concurrent_duplicate(attributes, raw_guid_present:)
|
|
118
129
|
matched_by = raw_guid_present ? :guid : :fingerprint
|
|
119
130
|
existing = find_conflicting_item(attributes, matched_by)
|
|
120
|
-
|
|
121
|
-
|
|
131
|
+
apply_attributes(existing, attributes)
|
|
132
|
+
instrument_duplicate(existing, matched_by)
|
|
133
|
+
if significant_changes?(existing)
|
|
134
|
+
existing.save!
|
|
135
|
+
Result.new(item: existing, status: :updated, matched_by: matched_by)
|
|
136
|
+
else
|
|
137
|
+
existing.reload if existing.changed?
|
|
138
|
+
Result.new(item: existing, status: :unchanged, matched_by: matched_by)
|
|
139
|
+
end
|
|
122
140
|
end
|
|
123
141
|
|
|
124
142
|
def find_conflicting_item(attributes, matched_by)
|
|
@@ -131,6 +149,10 @@ module SourceMonitor
|
|
|
131
149
|
end
|
|
132
150
|
end
|
|
133
151
|
|
|
152
|
+
# Attributes that should not trigger an "updated" status when they change.
|
|
153
|
+
# Metadata contains feedjira object references that differ between parses.
|
|
154
|
+
IGNORED_CHANGE_ATTRIBUTES = %w[metadata].freeze
|
|
155
|
+
|
|
134
156
|
def apply_attributes(record, attributes)
|
|
135
157
|
attributes = attributes.dup
|
|
136
158
|
metadata = attributes.delete(:metadata)
|
|
@@ -138,6 +160,10 @@ module SourceMonitor
|
|
|
138
160
|
record.metadata = metadata if metadata
|
|
139
161
|
end
|
|
140
162
|
|
|
163
|
+
def significant_changes?(record)
|
|
164
|
+
(record.changed - IGNORED_CHANGE_ATTRIBUTES).any?
|
|
165
|
+
end
|
|
166
|
+
|
|
141
167
|
def build_attributes
|
|
142
168
|
entry_parser.parse
|
|
143
169
|
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
namespace :test do
|
|
4
|
+
desc "Run tests excluding slow integration and system tests"
|
|
5
|
+
task fast: :environment do
|
|
6
|
+
$stdout.puts "Running tests excluding integration/ and system/ directories..."
|
|
7
|
+
test_files = Dir["test/**/*_test.rb"]
|
|
8
|
+
.reject { |f| f.start_with?("test/integration/", "test/system/") }
|
|
9
|
+
system("bin/rails", "test", *test_files, exception: true)
|
|
10
|
+
end
|
|
11
|
+
end
|