source_monitor 0.13.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/skills/sm-event-handler/SKILL.md +1 -1
  3. data/.claude/skills/sm-event-handler/reference/events-api.md +1 -1
  4. data/CHANGELOG.md +5 -1
  5. data/Gemfile.lock +1 -1
  6. data/README.md +3 -3
  7. data/app/assets/builds/source_monitor/application.css +4 -0
  8. data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +1 -1
  9. data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +3 -1
  10. data/app/controllers/source_monitor/import_sessions_controller.rb +118 -72
  11. data/app/controllers/source_monitor/sources_controller.rb +4 -18
  12. data/app/models/source_monitor/source.rb +1 -1
  13. data/docs/setup.md +2 -2
  14. data/docs/upgrade.md +14 -0
  15. data/lib/source_monitor/analytics/scrape_recommendations.rb +21 -2
  16. data/lib/source_monitor/fetching/feed_fetcher/failure_outcome.rb +85 -0
  17. data/lib/source_monitor/fetching/feed_fetcher/success_outcome.rb +85 -0
  18. data/lib/source_monitor/fetching/feed_fetcher.rb +27 -88
  19. data/lib/source_monitor/fetching/fetch_runner.rb +12 -5
  20. data/lib/source_monitor/import_sessions/wizard.rb +612 -0
  21. data/lib/source_monitor/items/batch_item_creator.rb +7 -6
  22. data/lib/source_monitor/items/item_creator.rb +7 -14
  23. data/lib/source_monitor/items/normalized_entry.rb +61 -0
  24. data/lib/source_monitor/version.rb +1 -1
  25. data/lib/source_monitor.rb +2 -0
  26. metadata +5 -4
  27. data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +0 -187
  28. data/app/controllers/source_monitor/import_sessions/health_check_management.rb +0 -112
  29. data/app/controllers/source_monitor/import_sessions/opml_parser.rb +0 -130
@@ -9,11 +9,13 @@ require "source_monitor/items/item_creator"
9
9
  require "source_monitor/fetching/feed_fetcher/adaptive_interval"
10
10
  require "source_monitor/fetching/feed_fetcher/source_updater"
11
11
  require "source_monitor/fetching/feed_fetcher/entry_processor"
12
+ require "source_monitor/fetching/feed_fetcher/success_outcome"
13
+ require "source_monitor/fetching/feed_fetcher/failure_outcome"
12
14
 
13
15
  module SourceMonitor
14
16
  module Fetching
15
17
  class FeedFetcher
16
- Result = Struct.new(:status, :feed, :response, :body, :error, :item_processing, :retry_decision, keyword_init: true)
18
+ Result = Struct.new(:status, :feed, :response, :body, :error, :item_processing, :retry_decision, :outcome, keyword_init: true)
17
19
  EntryProcessingResult = Struct.new(
18
20
  :created,
19
21
  :updated,
@@ -24,7 +26,20 @@ module SourceMonitor
24
26
  :created_items,
25
27
  :updated_items,
26
28
  keyword_init: true
27
- )
29
+ ) do
30
+ def self.empty
31
+ new(
32
+ created: 0,
33
+ updated: 0,
34
+ unchanged: 0,
35
+ failed: 0,
36
+ items: [],
37
+ errors: [],
38
+ created_items: [],
39
+ updated_items: []
40
+ )
41
+ end
42
+ end
28
43
  ResponseWrapper = Struct.new(:status, :headers, :body, keyword_init: true)
29
44
 
30
45
  attr_reader :source, :client, :jitter_proc
@@ -116,7 +131,6 @@ module SourceMonitor
116
131
  end
117
132
 
118
133
  def handle_success(response, started_at, instrumentation_payload)
119
- duration_ms = source_updater.elapsed_ms(started_at)
120
134
  body = response.body
121
135
  feed_body_signature = body_digest(body)
122
136
  feed = parse_feed(body, response)
@@ -125,45 +139,19 @@ module SourceMonitor
125
139
  processing = entry_processor.process_feed_entries(feed)
126
140
  content_changed = entries_digest_changed?(feed)
127
141
  else
128
- processing = EntryProcessingResult.new(
129
- created: 0,
130
- updated: 0,
131
- unchanged: 0,
132
- failed: 0,
133
- items: [],
134
- errors: [],
135
- created_items: [],
136
- updated_items: []
137
- )
142
+ processing = EntryProcessingResult.empty
138
143
  content_changed = false
139
144
  end
140
145
 
141
- feed_entries_digest = entries_digest(feed)
142
- source_updater.update_source_for_success(response, duration_ms, feed, feed_body_signature, content_changed: content_changed, entries_digest: feed_entries_digest)
143
- source_updater.create_fetch_log(
146
+ SuccessOutcome.new(
144
147
  response: response,
145
- duration_ms: duration_ms,
146
- started_at: started_at,
147
- feed: feed,
148
- success: true,
149
148
  body: body,
149
+ feed: feed,
150
+ item_processing: processing,
150
151
  feed_signature: feed_body_signature,
151
- items_created: processing.created,
152
- items_updated: processing.updated,
153
- items_failed: processing.failed,
154
- item_errors: processing.errors
155
- )
156
-
157
- instrumentation_payload[:success] = true
158
- instrumentation_payload[:status] = :fetched
159
- instrumentation_payload[:http_status] = response.status
160
- instrumentation_payload[:parser] = feed.class.name if feed
161
- instrumentation_payload[:items_created] = processing.created
162
- instrumentation_payload[:items_updated] = processing.updated
163
- instrumentation_payload[:items_failed] = processing.failed
164
- instrumentation_payload[:retry_attempt] = 0
165
-
166
- Result.new(status: :fetched, feed:, response:, body:, item_processing: processing)
152
+ content_changed: content_changed,
153
+ entries_digest: entries_digest(feed)
154
+ ).apply(source_updater: source_updater, started_at: started_at, instrumentation_payload: instrumentation_payload)
167
155
  end
168
156
 
169
157
  def handle_not_modified(response, started_at, instrumentation_payload)
@@ -189,16 +177,7 @@ module SourceMonitor
189
177
  status: :not_modified,
190
178
  response: response,
191
179
  body: nil,
192
- item_processing: EntryProcessingResult.new(
193
- created: 0,
194
- updated: 0,
195
- unchanged: 0,
196
- failed: 0,
197
- items: [],
198
- errors: [],
199
- created_items: [],
200
- updated_items: []
201
- )
180
+ item_processing: EntryProcessingResult.empty
202
181
  )
203
182
  end
204
183
 
@@ -263,48 +242,8 @@ module SourceMonitor
263
242
  end
264
243
 
265
244
  def handle_failure(error, started_at:, instrumentation_payload:)
266
- response = error.response
267
- body = response&.body
268
- duration_ms = source_updater.elapsed_ms(started_at)
269
-
270
- retry_decision = source_updater.update_source_for_failure(error, duration_ms)
271
- source_updater.create_fetch_log(
272
- response: response,
273
- duration_ms: duration_ms,
274
- started_at: started_at,
275
- success: false,
276
- error: error,
277
- body: body
278
- )
279
-
280
- instrumentation_payload[:success] = false
281
- instrumentation_payload[:status] = :failed
282
- instrumentation_payload[:error_class] = error.class.name
283
- instrumentation_payload[:error_message] = error.message
284
- instrumentation_payload[:http_status] = error.http_status if error.http_status
285
- instrumentation_payload[:error_code] = error.code if error.respond_to?(:code)
286
- instrumentation_payload[:items_created] = 0
287
- instrumentation_payload[:items_updated] = 0
288
- instrumentation_payload[:items_failed] = 0
289
- instrumentation_payload[:retry_attempt] = retry_decision&.next_attempt ? retry_decision.next_attempt : 0
290
-
291
- Result.new(
292
- status: :failed,
293
- response: response,
294
- body: body,
295
- error: error,
296
- retry_decision: retry_decision,
297
- item_processing: EntryProcessingResult.new(
298
- created: 0,
299
- updated: 0,
300
- unchanged: 0,
301
- failed: 0,
302
- items: [],
303
- errors: [],
304
- created_items: [],
305
- updated_items: []
306
- )
307
- )
245
+ FailureOutcome.new(error: error)
246
+ .apply(source_updater: source_updater, started_at: started_at, instrumentation_payload: instrumentation_payload)
308
247
  end
309
248
 
310
249
  def attempt_aia_recovery(_error, started_at, instrumentation_payload)
@@ -78,16 +78,17 @@ module SourceMonitor
78
78
  end
79
79
 
80
80
  # Phase 3: Post-fetch DB writes under the advisory lock (still held).
81
+ completion_result = completion_result_for(result)
81
82
  begin
82
- log_handler_result("RetentionHandler", retention_handler.call(source:, result:))
83
- log_handler_result("FollowUpHandler", follow_up_handler.call(source:, result:))
84
- schedule_retry_if_needed(result)
85
- mark_complete!(result)
83
+ log_handler_result("RetentionHandler", retention_handler.call(source:, result: completion_result))
84
+ log_handler_result("FollowUpHandler", follow_up_handler.call(source:, result: completion_result))
85
+ schedule_retry_if_needed(completion_result)
86
+ mark_complete!(completion_result)
86
87
  ensure
87
88
  lock.release!
88
89
  end
89
90
 
90
- log_handler_result("EventPublisher", event_publisher.call(source:, result:))
91
+ log_handler_result("EventPublisher", event_publisher.call(source:, result: result))
91
92
  result
92
93
  rescue SourceMonitor::Fetching::AdvisoryLock::NotAcquiredError => error
93
94
  raise ConcurrencyError, error.message
@@ -156,6 +157,12 @@ module SourceMonitor
156
157
  update_source_state(fetch_status: "failed")
157
158
  end
158
159
 
160
+ def completion_result_for(result)
161
+ return result unless result.respond_to?(:outcome)
162
+
163
+ result.outcome || result
164
+ end
165
+
159
166
  def update_source_state(attrs)
160
167
  self.class.send(:update_source_state!, source, attrs)
161
168
  end