source_monitor 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/VERSION +1 -1
- data/app/jobs/source_monitor/download_content_images_job.rb +2 -0
- data/app/jobs/source_monitor/favicon_fetch_job.rb +2 -0
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +17 -9
- data/lib/tasks/source_monitor_tasks.rake +67 -15
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ff36371d856b56cbafa201913319fda084fa7083f1483c7c3051bb595055efbe
|
|
4
|
+
data.tar.gz: 54dfba3bb65b0577d09ecda2d2bba42caa27e0f2c8abfaad35e8a7e122042cef
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 18ff88e88b9d3d6db95f1f5048b3d4d05eef1f93d7620432681dc12f64dbc4343df1400595fa81841ab5f7a2e294e41c378f4c6c989c9d209062fa99f19038f3
|
|
7
|
+
data.tar.gz: a29a25f83a159a634eb68871396695b3309f4ca7a50d23a37b0d775ac28c5f2443dcf3201de917e5c5d6eb0357cbb00688cc359e52661c86a822bdbc0aece365
|
data/CHANGELOG.md
CHANGED
|
@@ -15,6 +15,16 @@ All notable changes to this project are documented below. The format follows [Ke
|
|
|
15
15
|
|
|
16
16
|
- No unreleased changes yet.
|
|
17
17
|
|
|
18
|
+
## [0.10.1] - 2026-02-25
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
|
|
22
|
+
- **Backfill word counts rake task optimized for large datasets.** Replaced row-by-row saves with `insert_all` (Phase 1) and `upsert_all` (Phase 2), eliminating N+1 queries and `touch` cascades. ~1000x query reduction for large datasets.
|
|
23
|
+
- **ActiveRecord::Deadlocked no longer silently swallowed in jobs.** `DownloadContentImagesJob` and `FaviconFetchJob` previously caught all `StandardError` including database deadlocks, causing Active Storage operations to fail silently during concurrent access. Deadlocks now propagate so the job framework can retry.
|
|
24
|
+
- **Thread-safe configuration access.** `SourceMonitor.configure`, `.config`, and `.reset_configuration!` now synchronize via `Monitor` to prevent race conditions during parallel test execution.
|
|
25
|
+
- **Flaky seed-dependent test failures resolved.** Added `clean_source_monitor_tables!` to `StaggerFetchTimesTaskTest` to prevent cross-test database contamination from `setup_once` records leaking via test-prof with thread-based parallelism.
|
|
26
|
+
- **Suppressed spurious DeprecationRegistry warning in test output.** The "http.timeout already exists" warning from the deprecation skip-path test no longer leaks to stderr.
|
|
27
|
+
|
|
18
28
|
## [0.10.0] - 2026-02-24
|
|
19
29
|
|
|
20
30
|
### Added
|
data/Gemfile.lock
CHANGED
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.10.
|
|
1
|
+
0.10.1
|
|
@@ -60,6 +60,8 @@ module SourceMonitor
|
|
|
60
60
|
|
|
61
61
|
# Generate a serving URL for the blob
|
|
62
62
|
url_mapping[image_url] = Rails.application.routes.url_helpers.rails_blob_path(blob, only_path: true)
|
|
63
|
+
rescue ActiveRecord::Deadlocked
|
|
64
|
+
raise # let job framework retry on database deadlock
|
|
63
65
|
rescue StandardError
|
|
64
66
|
# Individual image failure should not block others.
|
|
65
67
|
# Original URL will be preserved (graceful fallback).
|
|
@@ -23,6 +23,8 @@ module SourceMonitor
|
|
|
23
23
|
else
|
|
24
24
|
record_failed_attempt(source)
|
|
25
25
|
end
|
|
26
|
+
rescue ActiveRecord::Deadlocked
|
|
27
|
+
raise # let job framework retry on database deadlock
|
|
26
28
|
rescue StandardError => error
|
|
27
29
|
record_failed_attempt(source) if source
|
|
28
30
|
log_error(source, error)
|
data/lib/source_monitor.rb
CHANGED
|
@@ -24,6 +24,7 @@ rescue LoadError
|
|
|
24
24
|
# Ransack powers search forms when available.
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
require "monitor"
|
|
27
28
|
require "source_monitor/version"
|
|
28
29
|
require "active_support/core_ext/module/redefine_method"
|
|
29
30
|
|
|
@@ -198,15 +199,20 @@ module SourceMonitor
|
|
|
198
199
|
autoload :StreamResponder, "source_monitor/turbo_streams/stream_responder"
|
|
199
200
|
end
|
|
200
201
|
|
|
202
|
+
CONFIG_MONITOR = Monitor.new
|
|
203
|
+
private_constant :CONFIG_MONITOR
|
|
204
|
+
|
|
201
205
|
class << self
|
|
202
206
|
def configure
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
207
|
+
CONFIG_MONITOR.synchronize do
|
|
208
|
+
yield config
|
|
209
|
+
config.check_deprecations!
|
|
210
|
+
SourceMonitor::ModelExtensions.reload!
|
|
211
|
+
end
|
|
206
212
|
end
|
|
207
213
|
|
|
208
214
|
def config
|
|
209
|
-
@config ||= Configuration.new
|
|
215
|
+
@config ||= CONFIG_MONITOR.synchronize { @config ||= Configuration.new }
|
|
210
216
|
end
|
|
211
217
|
|
|
212
218
|
def events
|
|
@@ -214,11 +220,13 @@ module SourceMonitor
|
|
|
214
220
|
end
|
|
215
221
|
|
|
216
222
|
def reset_configuration!
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
223
|
+
CONFIG_MONITOR.synchronize do
|
|
224
|
+
@config = Configuration.new
|
|
225
|
+
SourceMonitor::ModelExtensions.reload!
|
|
226
|
+
SourceMonitor::Health.setup!
|
|
227
|
+
SourceMonitor::Realtime.setup!
|
|
228
|
+
SourceMonitor::Dashboard::TurboBroadcaster.setup!
|
|
229
|
+
end
|
|
222
230
|
end
|
|
223
231
|
|
|
224
232
|
def queue_name(role)
|
|
@@ -1,32 +1,84 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
def backfill_resolve_source_scope
|
|
4
|
+
if ENV["SOURCE_IDS"].present?
|
|
5
|
+
ENV["SOURCE_IDS"].split(",").map(&:strip).map(&:to_i).reject(&:zero?)
|
|
6
|
+
elsif ENV["SOURCE_ID"].present?
|
|
7
|
+
id = ENV["SOURCE_ID"].to_i
|
|
8
|
+
id.positive? ? [ id ] : nil
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
3
12
|
namespace :source_monitor do
|
|
4
|
-
desc "Backfill word counts for existing item_content records."
|
|
13
|
+
desc "Backfill word counts for existing item_content records. " \
|
|
14
|
+
"Env vars: SOURCE_ID, SOURCE_IDS (comma-separated), BATCH_SIZE (default 500)."
|
|
5
15
|
task backfill_word_counts: :environment do
|
|
6
|
-
|
|
7
|
-
|
|
16
|
+
batch_size = (ENV["BATCH_SIZE"] || 500).to_i
|
|
17
|
+
source_ids = backfill_resolve_source_scope
|
|
18
|
+
sanitizer = ActionView::Base.full_sanitizer
|
|
19
|
+
|
|
20
|
+
# Phase 1: Batch-create ItemContent for items with feed content but no ItemContent
|
|
21
|
+
items_scope = SourceMonitor::Item
|
|
8
22
|
.where.not(content: [ nil, "" ])
|
|
9
23
|
.where.missing(:item_content)
|
|
24
|
+
items_scope = items_scope.where(source_id: source_ids) if source_ids
|
|
10
25
|
|
|
11
26
|
created = 0
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
27
|
+
now = Time.current
|
|
28
|
+
|
|
29
|
+
items_scope.select(:id, :content).find_in_batches(batch_size: batch_size) do |batch|
|
|
30
|
+
records = batch.filter_map do |item|
|
|
31
|
+
next if item.content.blank?
|
|
32
|
+
|
|
33
|
+
stripped = sanitizer.sanitize(item.content)
|
|
34
|
+
word_count = stripped.present? ? stripped.split.size : nil
|
|
35
|
+
|
|
36
|
+
{ item_id: item.id, feed_word_count: word_count, created_at: now, updated_at: now }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
SourceMonitor::ItemContent.insert_all(records) if records.any?
|
|
40
|
+
created += records.size
|
|
41
|
+
puts "Phase 1: created #{created} ItemContent records..."
|
|
16
42
|
end
|
|
17
|
-
puts "
|
|
43
|
+
puts "Phase 1 complete: #{created} records created." if created > 0
|
|
18
44
|
|
|
19
|
-
# Phase 2:
|
|
20
|
-
|
|
45
|
+
# Phase 2: Batch-recompute word counts for existing ItemContent
|
|
46
|
+
contents_scope = SourceMonitor::ItemContent
|
|
47
|
+
if source_ids
|
|
48
|
+
contents_scope = contents_scope.joins(:item).where(SourceMonitor::Item.table_name => { source_id: source_ids })
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
total = contents_scope.count
|
|
21
52
|
processed = 0
|
|
53
|
+
updated = 0
|
|
54
|
+
|
|
55
|
+
contents_scope.includes(:item).find_in_batches(batch_size: batch_size) do |batch|
|
|
56
|
+
updates = batch.filter_map do |content|
|
|
57
|
+
feed_content = content.item&.content
|
|
58
|
+
scraped = content.scraped_content
|
|
59
|
+
|
|
60
|
+
feed_wc = if feed_content.present?
|
|
61
|
+
stripped = sanitizer.sanitize(feed_content)
|
|
62
|
+
stripped.present? ? stripped.split.size : nil
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
scraped_wc = scraped.present? ? scraped.split.size : nil
|
|
66
|
+
|
|
67
|
+
next if content.feed_word_count == feed_wc && content.scraped_word_count == scraped_wc
|
|
68
|
+
|
|
69
|
+
{ id: content.id, item_id: content.item_id, feed_word_count: feed_wc, scraped_word_count: scraped_wc }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
if updates.any?
|
|
73
|
+
SourceMonitor::ItemContent.upsert_all(updates, unique_by: :id, update_only: %i[feed_word_count scraped_word_count])
|
|
74
|
+
updated += updates.size
|
|
75
|
+
end
|
|
22
76
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
processed += 1
|
|
26
|
-
puts "Processed #{processed}/#{total} records..." if (processed % 100).zero?
|
|
77
|
+
processed += batch.size
|
|
78
|
+
puts "Phase 2: #{processed}/#{total} checked, #{updated} updated..."
|
|
27
79
|
end
|
|
28
80
|
|
|
29
|
-
puts "Done.
|
|
81
|
+
puts "Done. Phase 1: #{created} created. Phase 2: #{updated}/#{total} updated."
|
|
30
82
|
end
|
|
31
83
|
|
|
32
84
|
namespace :cleanup do
|