source_monitor 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 303d253e46391a54167ab1396f8f855228fb4cd867dbcf22614c9aa75b9b2e30
4
- data.tar.gz: 19b54173bc76cb68615b44dd93fe1ac525e9260da83e4dbfa5311e9c71ccb73a
3
+ metadata.gz: ff36371d856b56cbafa201913319fda084fa7083f1483c7c3051bb595055efbe
4
+ data.tar.gz: 54dfba3bb65b0577d09ecda2d2bba42caa27e0f2c8abfaad35e8a7e122042cef
5
5
  SHA512:
6
- metadata.gz: 2ff0ad53a04b7685490ec6d0ae39d48906dcc92a9b16062a7cc056316dcef88c38a039372551f8a9ef2e0f3c9236a3d0a66aa150127be4d902d85c4d35a42230
7
- data.tar.gz: 11c424108aece6ae5b5866bebc79df7d1972f5df2c0091a3fd4720e4779284cefbb7222ff7fa31deae90b5777650ef5593ade5f0140aa9699e314a0b36e082a2
6
+ metadata.gz: 18ff88e88b9d3d6db95f1f5048b3d4d05eef1f93d7620432681dc12f64dbc4343df1400595fa81841ab5f7a2e294e41c378f4c6c989c9d209062fa99f19038f3
7
+ data.tar.gz: a29a25f83a159a634eb68871396695b3309f4ca7a50d23a37b0d775ac28c5f2443dcf3201de917e5c5d6eb0357cbb00688cc359e52661c86a822bdbc0aece365
data/CHANGELOG.md CHANGED
@@ -15,6 +15,16 @@ All notable changes to this project are documented below. The format follows [Ke
15
15
 
16
16
  - No unreleased changes yet.
17
17
 
18
+ ## [0.10.1] - 2026-02-25
19
+
20
+ ### Fixed
21
+
22
+ - **Backfill word counts rake task optimized for large datasets.** Replaced row-by-row saves with `insert_all` (Phase 1) and `upsert_all` (Phase 2), eliminating N+1 queries and `touch` cascades. ~1000x query reduction for large datasets.
23
+ - **ActiveRecord::Deadlocked no longer silently swallowed in jobs.** `DownloadContentImagesJob` and `FaviconFetchJob` previously caught all `StandardError` including database deadlocks, causing Active Storage operations to fail silently during concurrent access. Deadlocks now propagate so the job framework can retry.
24
+ - **Thread-safe configuration access.** `SourceMonitor.configure`, `.config`, and `.reset_configuration!` now synchronize via `Monitor` to prevent race conditions during parallel test execution.
25
+ - **Flaky seed-dependent test failures resolved.** Added `clean_source_monitor_tables!` to `StaggerFetchTimesTaskTest` to prevent cross-test database contamination from `setup_once` records leaking via test-prof with thread-based parallelism.
26
+ - **Suppressed spurious DeprecationRegistry warning in test output.** The "http.timeout already exists" warning from the deprecation skip-path test no longer leaks to stderr.
27
+
18
28
  ## [0.10.0] - 2026-02-24
19
29
 
20
30
  ### Added
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- source_monitor (0.10.0)
4
+ source_monitor (0.10.1)
5
5
  cssbundling-rails (~> 1.4)
6
6
  faraday (~> 2.9)
7
7
  faraday-follow_redirects (~> 0.4)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.0
1
+ 0.10.1
@@ -60,6 +60,8 @@ module SourceMonitor
60
60
 
61
61
  # Generate a serving URL for the blob
62
62
  url_mapping[image_url] = Rails.application.routes.url_helpers.rails_blob_path(blob, only_path: true)
63
+ rescue ActiveRecord::Deadlocked
64
+ raise # let job framework retry on database deadlock
63
65
  rescue StandardError
64
66
  # Individual image failure should not block others.
65
67
  # Original URL will be preserved (graceful fallback).
@@ -23,6 +23,8 @@ module SourceMonitor
23
23
  else
24
24
  record_failed_attempt(source)
25
25
  end
26
+ rescue ActiveRecord::Deadlocked
27
+ raise # let job framework retry on database deadlock
26
28
  rescue StandardError => error
27
29
  record_failed_attempt(source) if source
28
30
  log_error(source, error)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SourceMonitor
4
- VERSION = "0.10.0"
4
+ VERSION = "0.10.1"
5
5
  end
@@ -24,6 +24,7 @@ rescue LoadError
24
24
  # Ransack powers search forms when available.
25
25
  end
26
26
 
27
+ require "monitor"
27
28
  require "source_monitor/version"
28
29
  require "active_support/core_ext/module/redefine_method"
29
30
 
@@ -198,15 +199,20 @@ module SourceMonitor
198
199
  autoload :StreamResponder, "source_monitor/turbo_streams/stream_responder"
199
200
  end
200
201
 
202
+ CONFIG_MONITOR = Monitor.new
203
+ private_constant :CONFIG_MONITOR
204
+
201
205
  class << self
202
206
  def configure
203
- yield config
204
- config.check_deprecations!
205
- SourceMonitor::ModelExtensions.reload!
207
+ CONFIG_MONITOR.synchronize do
208
+ yield config
209
+ config.check_deprecations!
210
+ SourceMonitor::ModelExtensions.reload!
211
+ end
206
212
  end
207
213
 
208
214
  def config
209
- @config ||= Configuration.new
215
+ @config ||= CONFIG_MONITOR.synchronize { @config ||= Configuration.new }
210
216
  end
211
217
 
212
218
  def events
@@ -214,11 +220,13 @@ module SourceMonitor
214
220
  end
215
221
 
216
222
  def reset_configuration!
217
- @config = Configuration.new
218
- SourceMonitor::ModelExtensions.reload!
219
- SourceMonitor::Health.setup!
220
- SourceMonitor::Realtime.setup!
221
- SourceMonitor::Dashboard::TurboBroadcaster.setup!
223
+ CONFIG_MONITOR.synchronize do
224
+ @config = Configuration.new
225
+ SourceMonitor::ModelExtensions.reload!
226
+ SourceMonitor::Health.setup!
227
+ SourceMonitor::Realtime.setup!
228
+ SourceMonitor::Dashboard::TurboBroadcaster.setup!
229
+ end
222
230
  end
223
231
 
224
232
  def queue_name(role)
@@ -1,32 +1,84 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ def backfill_resolve_source_scope
4
+ if ENV["SOURCE_IDS"].present?
5
+ ENV["SOURCE_IDS"].split(",").map(&:strip).map(&:to_i).reject(&:zero?)
6
+ elsif ENV["SOURCE_ID"].present?
7
+ id = ENV["SOURCE_ID"].to_i
8
+ id.positive? ? [ id ] : nil
9
+ end
10
+ end
11
+
3
12
  namespace :source_monitor do
4
- desc "Backfill word counts for existing item_content records."
13
+ desc "Backfill word counts for existing item_content records. " \
14
+ "Env vars: SOURCE_ID, SOURCE_IDS (comma-separated), BATCH_SIZE (default 500)."
5
15
  task backfill_word_counts: :environment do
6
- # Phase 1: Create ItemContent for items with feed content but no ItemContent
7
- items_needing_content = SourceMonitor::Item
16
+ batch_size = (ENV["BATCH_SIZE"] || 500).to_i
17
+ source_ids = backfill_resolve_source_scope
18
+ sanitizer = ActionView::Base.full_sanitizer
19
+
20
+ # Phase 1: Batch-create ItemContent for items with feed content but no ItemContent
21
+ items_scope = SourceMonitor::Item
8
22
  .where.not(content: [ nil, "" ])
9
23
  .where.missing(:item_content)
24
+ items_scope = items_scope.where(source_id: source_ids) if source_ids
10
25
 
11
26
  created = 0
12
- items_needing_content.find_each do |item|
13
- item.ensure_feed_content_record
14
- created += 1
15
- puts "Created #{created} missing ItemContent records..." if (created % 100).zero?
27
+ now = Time.current
28
+
29
+ items_scope.select(:id, :content).find_in_batches(batch_size: batch_size) do |batch|
30
+ records = batch.filter_map do |item|
31
+ next if item.content.blank?
32
+
33
+ stripped = sanitizer.sanitize(item.content)
34
+ word_count = stripped.present? ? stripped.split.size : nil
35
+
36
+ { item_id: item.id, feed_word_count: word_count, created_at: now, updated_at: now }
37
+ end
38
+
39
+ SourceMonitor::ItemContent.insert_all(records) if records.any?
40
+ created += records.size
41
+ puts "Phase 1: created #{created} ItemContent records..."
16
42
  end
17
- puts "Created #{created} ItemContent records for feed-only items." if created > 0
43
+ puts "Phase 1 complete: #{created} records created." if created > 0
18
44
 
19
- # Phase 2: Recompute word counts for all existing ItemContent
20
- total = SourceMonitor::ItemContent.count
45
+ # Phase 2: Batch-recompute word counts for existing ItemContent
46
+ contents_scope = SourceMonitor::ItemContent
47
+ if source_ids
48
+ contents_scope = contents_scope.joins(:item).where(SourceMonitor::Item.table_name => { source_id: source_ids })
49
+ end
50
+
51
+ total = contents_scope.count
21
52
  processed = 0
53
+ updated = 0
54
+
55
+ contents_scope.includes(:item).find_in_batches(batch_size: batch_size) do |batch|
56
+ updates = batch.filter_map do |content|
57
+ feed_content = content.item&.content
58
+ scraped = content.scraped_content
59
+
60
+ feed_wc = if feed_content.present?
61
+ stripped = sanitizer.sanitize(feed_content)
62
+ stripped.present? ? stripped.split.size : nil
63
+ end
64
+
65
+ scraped_wc = scraped.present? ? scraped.split.size : nil
66
+
67
+ next if content.feed_word_count == feed_wc && content.scraped_word_count == scraped_wc
68
+
69
+ { id: content.id, item_id: content.item_id, feed_word_count: feed_wc, scraped_word_count: scraped_wc }
70
+ end
71
+
72
+ if updates.any?
73
+ SourceMonitor::ItemContent.upsert_all(updates, unique_by: :id, update_only: %i[feed_word_count scraped_word_count])
74
+ updated += updates.size
75
+ end
22
76
 
23
- SourceMonitor::ItemContent.find_each do |content|
24
- content.save!
25
- processed += 1
26
- puts "Processed #{processed}/#{total} records..." if (processed % 100).zero?
77
+ processed += batch.size
78
+ puts "Phase 2: #{processed}/#{total} checked, #{updated} updated..."
27
79
  end
28
80
 
29
- puts "Done. Backfilled word counts for #{processed} records (#{created} newly created)."
81
+ puts "Done. Phase 1: #{created} created. Phase 2: #{updated}/#{total} updated."
30
82
  end
31
83
 
32
84
  namespace :cleanup do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: source_monitor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - dchuk