source_monitor 0.13.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/skills/sm-event-handler/SKILL.md +1 -1
- data/.claude/skills/sm-event-handler/reference/events-api.md +1 -1
- data/CHANGELOG.md +5 -1
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/app/assets/builds/source_monitor/application.css +4 -0
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +1 -1
- data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +3 -1
- data/app/controllers/source_monitor/import_sessions_controller.rb +118 -72
- data/app/controllers/source_monitor/sources_controller.rb +4 -18
- data/app/models/source_monitor/source.rb +1 -1
- data/docs/setup.md +2 -2
- data/docs/upgrade.md +14 -0
- data/lib/source_monitor/analytics/scrape_recommendations.rb +21 -2
- data/lib/source_monitor/fetching/feed_fetcher/failure_outcome.rb +85 -0
- data/lib/source_monitor/fetching/feed_fetcher/success_outcome.rb +85 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +27 -88
- data/lib/source_monitor/fetching/fetch_runner.rb +12 -5
- data/lib/source_monitor/import_sessions/wizard.rb +612 -0
- data/lib/source_monitor/items/batch_item_creator.rb +7 -6
- data/lib/source_monitor/items/item_creator.rb +7 -14
- data/lib/source_monitor/items/normalized_entry.rb +61 -0
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +2 -0
- metadata +5 -4
- data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +0 -187
- data/app/controllers/source_monitor/import_sessions/health_check_management.rb +0 -112
- data/app/controllers/source_monitor/import_sessions/opml_parser.rb +0 -130
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d863378a0fb5338b1b1fe89c40c0c0ab9705b3ce60c45889eabf1ed626d27cf1
|
|
4
|
+
data.tar.gz: d3a56da362430857991b85ab2b24ef10b5e2641dc312ecbe2a302bdca1a8352b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 885cf285ecc91bf09f2bca2bd9f384cea7f36e1c966a0a74a20d7f87a9df8f69da79dd8363762111c3e206dfe1668ddbefc46cdda71c4e83eaeb138099e74a25
|
|
7
|
+
data.tar.gz: b38579d624e4e66f5f051ce3aab2e61249fb549310e73e1959cdc0d67aef7fb63ff7c18232563269b2e826c5eb08f2a01bb5cf78f992df90bba3a0649d4d7a21
|
|
@@ -98,7 +98,7 @@ Fires after a feed fetch finishes (success or failure).
|
|
|
98
98
|
| Field | Type | Description |
|
|
99
99
|
|---|---|---|
|
|
100
100
|
| `source` | `SourceMonitor::Source` | The fetched source |
|
|
101
|
-
| `result` |
|
|
101
|
+
| `result` | `SourceMonitor::Fetching::FeedFetcher::Result` | The fetch result |
|
|
102
102
|
| `status` | String | Result status |
|
|
103
103
|
| `occurred_at` | Time | When the event fired |
|
|
104
104
|
|
|
@@ -112,7 +112,7 @@ Fired by `Events.after_fetch_completed` after a feed fetch finishes.
|
|
|
112
112
|
```ruby
|
|
113
113
|
FetchCompletedEvent = Struct.new(
|
|
114
114
|
:source, # SourceMonitor::Source - the fetched source
|
|
115
|
-
:result, #
|
|
115
|
+
:result, # SourceMonitor::Fetching::FeedFetcher::Result - fetch result
|
|
116
116
|
:status, # String - result status
|
|
117
117
|
:occurred_at, # Time - when the event fired
|
|
118
118
|
keyword_init: true
|
data/CHANGELOG.md
CHANGED
|
@@ -13,7 +13,11 @@ All notable changes to this project are documented below. The format follows [Ke
|
|
|
13
13
|
|
|
14
14
|
## [Unreleased]
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
## [0.13.1] - 2026-05-28
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
- Preserve the public `after_fetch_completed` payload as `FeedFetcher::Result`; the PR #118 fetch outcome objects remain an internal refactor detail.
|
|
20
|
+
- Keep inactive sources out of sources-index scrape recommendation badges, matching the shared recommendation query used by dashboard and bulk enablement.
|
|
17
21
|
|
|
18
22
|
## [0.13.0] - 2026-03-24
|
|
19
23
|
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -9,8 +9,8 @@ SourceMonitor is a production-ready Rails 8 mountable engine for ingesting, norm
|
|
|
9
9
|
In your host Rails app:
|
|
10
10
|
|
|
11
11
|
```bash
|
|
12
|
-
bundle add source_monitor --version "~> 0.13.
|
|
13
|
-
# or add `gem "source_monitor", "~> 0.13.
|
|
12
|
+
bundle add source_monitor --version "~> 0.13.1"
|
|
13
|
+
# or add `gem "source_monitor", "~> 0.13.1"` manually, then run:
|
|
14
14
|
bundle install
|
|
15
15
|
```
|
|
16
16
|
|
|
@@ -46,7 +46,7 @@ This exposes `bin/source_monitor` (via Bundler binstubs) so you can run the guid
|
|
|
46
46
|
Before running any SourceMonitor commands inside your host app, add the gem and install dependencies:
|
|
47
47
|
|
|
48
48
|
```bash
|
|
49
|
-
bundle add source_monitor --version "~> 0.13.
|
|
49
|
+
bundle add source_monitor --version "~> 0.13.1"
|
|
50
50
|
# or edit your Gemfile, then run
|
|
51
51
|
bundle install
|
|
52
52
|
```
|
|
@@ -1927,6 +1927,10 @@ video {
|
|
|
1927
1927
|
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
|
|
1928
1928
|
}
|
|
1929
1929
|
|
|
1930
|
+
.fm-admin .outline {
|
|
1931
|
+
outline-style: solid;
|
|
1932
|
+
}
|
|
1933
|
+
|
|
1930
1934
|
.fm-admin .ring-1 {
|
|
1931
1935
|
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
|
|
1932
1936
|
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(1px + var(--tw-ring-offset-width)) var(--tw-ring-color);
|
|
@@ -33,7 +33,7 @@ module SourceMonitor
|
|
|
33
33
|
|
|
34
34
|
def resolve_source_ids
|
|
35
35
|
if params.dig(:bulk_scrape_enablement, :select_all_pages) == "true"
|
|
36
|
-
|
|
36
|
+
SourceMonitor::Analytics::ScrapeRecommendations.new.candidate_ids
|
|
37
37
|
else
|
|
38
38
|
raw_ids = Array(params.dig(:bulk_scrape_enablement, :source_ids))
|
|
39
39
|
raw_ids.map(&:to_i).reject(&:zero?)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "source_monitor/import_sessions/entry_normalizer"
|
|
4
|
+
|
|
3
5
|
module SourceMonitor
|
|
4
6
|
module ImportSessions
|
|
5
7
|
module BulkConfiguration
|
|
@@ -33,7 +35,7 @@ module SourceMonitor
|
|
|
33
35
|
entry = selected_entries_for_identity.first
|
|
34
36
|
return fallback_identity unless entry
|
|
35
37
|
|
|
36
|
-
normalized =
|
|
38
|
+
normalized = SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry)
|
|
37
39
|
{
|
|
38
40
|
name: normalized[:title].presence || normalized[:feed_url] || fallback_identity[:name],
|
|
39
41
|
feed_url: normalized[:feed_url].presence || fallback_identity[:feed_url],
|
|
@@ -1,15 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
4
|
-
require "uri"
|
|
5
|
-
require "source_monitor/import_sessions/entry_normalizer"
|
|
3
|
+
require "source_monitor/import_sessions/wizard"
|
|
6
4
|
require "source_monitor/sources/params"
|
|
7
5
|
|
|
8
6
|
module SourceMonitor
|
|
9
7
|
class ImportSessionsController < ApplicationController
|
|
10
|
-
include SourceMonitor::ImportSessions::OpmlParser
|
|
11
|
-
include SourceMonitor::ImportSessions::EntryAnnotation
|
|
12
|
-
include SourceMonitor::ImportSessions::HealthCheckManagement
|
|
13
8
|
include SourceMonitor::ImportSessions::BulkConfiguration
|
|
14
9
|
|
|
15
10
|
STEP_HANDLERS = {
|
|
@@ -86,92 +81,60 @@ module SourceMonitor
|
|
|
86
81
|
def persist_step!
|
|
87
82
|
return if @import_session.current_step == @current_step
|
|
88
83
|
|
|
89
|
-
deactivate_health_checks
|
|
84
|
+
import_session_wizard.deactivate_health_checks if @current_step != "health_check"
|
|
90
85
|
@import_session.update_column(:current_step, @current_step)
|
|
91
86
|
end
|
|
92
87
|
|
|
93
88
|
def handle_health_check_step
|
|
94
|
-
|
|
95
|
-
@
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
89
|
+
result = import_session_wizard.handle_health_check
|
|
90
|
+
@selected_source_ids = result.selected_source_ids
|
|
91
|
+
|
|
92
|
+
if result.blocked?
|
|
93
|
+
@selection_error = result.selection_error
|
|
94
|
+
apply_health_check_context(result.health_check_context)
|
|
99
95
|
render :show, status: :unprocessable_entity
|
|
100
96
|
return
|
|
101
97
|
end
|
|
102
98
|
|
|
103
|
-
@current_step =
|
|
104
|
-
|
|
105
|
-
@import_session.update_column(:current_step, @current_step) if @import_session.current_step != @current_step
|
|
106
|
-
prepare_health_check_context if @current_step == "health_check"
|
|
99
|
+
@current_step = result.current_step
|
|
100
|
+
apply_health_check_context(result.health_check_context) if @current_step == "health_check"
|
|
107
101
|
redirect_to source_monitor.step_import_session_path(@import_session, step: @current_step), allow_other_host: false
|
|
108
102
|
end
|
|
109
103
|
|
|
110
104
|
def handle_upload_step
|
|
111
|
-
|
|
105
|
+
result = import_session_wizard.handle_upload
|
|
106
|
+
@upload_errors = result.errors
|
|
112
107
|
if @upload_errors.any?
|
|
113
108
|
render :show, status: :unprocessable_entity
|
|
114
109
|
return
|
|
115
110
|
end
|
|
116
111
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if valid_entries.empty?
|
|
120
|
-
@upload_errors = [ "We couldn't find any valid feeds in that OPML file. Check the file and try again." ]
|
|
121
|
-
@import_session.update!(opml_file_metadata: build_file_metadata, parsed_sources: parsed_entries, current_step: "upload")
|
|
122
|
-
render :show, status: :unprocessable_entity
|
|
123
|
-
return
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
@import_session.update!(
|
|
127
|
-
opml_file_metadata: build_file_metadata.merge("uploaded_at" => Time.current),
|
|
128
|
-
parsed_sources: parsed_entries,
|
|
129
|
-
current_step: target_step
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
@current_step = target_step
|
|
133
|
-
prepare_preview_context(skip_default: true) if @current_step == "preview"
|
|
112
|
+
@current_step = result.current_step
|
|
113
|
+
apply_preview_context(result.preview_context) if @current_step == "preview"
|
|
134
114
|
|
|
135
115
|
respond_to do |format|
|
|
136
116
|
format.turbo_stream { render :show }
|
|
137
117
|
format.html { redirect_to source_monitor.step_import_session_path(@import_session, step: @current_step) }
|
|
138
118
|
end
|
|
139
|
-
rescue UploadError => error
|
|
140
|
-
@upload_errors = [ error.message ]
|
|
141
|
-
render :show, status: :unprocessable_entity
|
|
142
119
|
end
|
|
143
120
|
|
|
144
121
|
def handle_preview_step
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if params.dig(:import_session, :select_all).present?
|
|
148
|
-
@selected_source_ids = selectable_entries.map { |entry| entry[:id] }
|
|
149
|
-
@import_session.update_column(:selected_source_ids, @selected_source_ids)
|
|
150
|
-
valid_ids = @selected_source_ids
|
|
151
|
-
elsif params.dig(:import_session, :select_none).present?
|
|
152
|
-
@selected_source_ids = []
|
|
153
|
-
@import_session.update_column(:selected_source_ids, @selected_source_ids)
|
|
154
|
-
valid_ids = []
|
|
155
|
-
else
|
|
156
|
-
@selected_source_ids = build_selection_from_params
|
|
157
|
-
valid_ids = selectable_entries.index_by { |entry| entry[:id] }.slice(*@selected_source_ids).keys
|
|
158
|
-
@import_session.update!(selected_source_ids: valid_ids)
|
|
159
|
-
end
|
|
122
|
+
result = import_session_wizard.handle_preview
|
|
123
|
+
@selected_source_ids = result.selected_source_ids
|
|
160
124
|
|
|
161
|
-
if
|
|
162
|
-
@selection_error =
|
|
163
|
-
|
|
125
|
+
if result.blocked?
|
|
126
|
+
@selection_error = result.selection_error
|
|
127
|
+
apply_preview_context(result.preview_context)
|
|
164
128
|
render :show, status: :unprocessable_entity
|
|
165
129
|
return
|
|
166
130
|
end
|
|
167
131
|
|
|
168
|
-
@current_step =
|
|
169
|
-
@import_session.update_column(:current_step, @current_step) if @import_session.current_step != @current_step
|
|
132
|
+
@current_step = result.current_step
|
|
170
133
|
|
|
171
134
|
if @current_step == "health_check"
|
|
172
135
|
prepare_health_check_context
|
|
173
136
|
else
|
|
174
|
-
|
|
137
|
+
apply_preview_context(result.preview_context)
|
|
175
138
|
end
|
|
176
139
|
|
|
177
140
|
respond_to do |format|
|
|
@@ -200,31 +163,25 @@ module SourceMonitor
|
|
|
200
163
|
end
|
|
201
164
|
|
|
202
165
|
def handle_confirm_step
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
166
|
+
result = import_session_wizard.handle_confirm
|
|
167
|
+
apply_confirm_context(result)
|
|
168
|
+
|
|
169
|
+
if result.blocked?
|
|
170
|
+
@selection_error = result.selection_error
|
|
208
171
|
render :show, status: :unprocessable_entity
|
|
209
172
|
return
|
|
210
173
|
end
|
|
211
|
-
|
|
212
|
-
user_id: @import_session.user_id,
|
|
213
|
-
bulk_settings: @import_session.bulk_settings
|
|
214
|
-
)
|
|
215
|
-
SourceMonitor::ImportOpmlJob.perform_later(@import_session.id, history.id)
|
|
216
|
-
@import_session.update_column(:current_step, "confirm") if @import_session.current_step != "confirm"
|
|
217
|
-
message = "Import started for #{@selected_entries.size} sources."
|
|
174
|
+
|
|
218
175
|
respond_to do |format|
|
|
219
176
|
format.turbo_stream do
|
|
220
177
|
responder = SourceMonitor::TurboStreams::StreamResponder.new
|
|
221
|
-
responder.toast(message
|
|
178
|
+
responder.toast(message: result.message, level: :success)
|
|
222
179
|
responder.redirect(source_monitor.sources_path)
|
|
223
180
|
render turbo_stream: responder.render(view_context)
|
|
224
181
|
end
|
|
225
182
|
|
|
226
183
|
format.html do
|
|
227
|
-
redirect_to source_monitor.sources_path, notice: message
|
|
184
|
+
redirect_to source_monitor.sources_path, notice: result.message
|
|
228
185
|
end
|
|
229
186
|
end
|
|
230
187
|
end
|
|
@@ -302,6 +259,95 @@ module SourceMonitor
|
|
|
302
259
|
end
|
|
303
260
|
# :nocov:
|
|
304
261
|
|
|
262
|
+
def import_session_wizard
|
|
263
|
+
SourceMonitor::ImportSessions::Wizard.new(
|
|
264
|
+
import_session: @import_session,
|
|
265
|
+
params: params,
|
|
266
|
+
current_step: @current_step
|
|
267
|
+
)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def permitted_step(value)
|
|
271
|
+
step = value.to_s.presence
|
|
272
|
+
return unless step
|
|
273
|
+
|
|
274
|
+
ImportSession::STEP_ORDER.find { |candidate| candidate == step }
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def target_step
|
|
278
|
+
permitted_step(import_session_state_params[:next_step]) || @current_step || ImportSession.default_step
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def session_attributes
|
|
282
|
+
attrs = import_session_state_params.except(:next_step, :current_step, "next_step", "current_step")
|
|
283
|
+
attrs[:current_step] = target_step
|
|
284
|
+
attrs
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def import_session_state_params
|
|
288
|
+
@import_session_state_params ||= begin
|
|
289
|
+
raw = params[:import_session] || params["import_session"] || {}
|
|
290
|
+
permitted = if raw.respond_to?(:permit)
|
|
291
|
+
raw.permit(
|
|
292
|
+
:current_step,
|
|
293
|
+
:next_step,
|
|
294
|
+
:select_all,
|
|
295
|
+
:select_none,
|
|
296
|
+
parsed_sources: [],
|
|
297
|
+
selected_source_ids: [],
|
|
298
|
+
bulk_settings: {},
|
|
299
|
+
opml_file_metadata: {}
|
|
300
|
+
)
|
|
301
|
+
else
|
|
302
|
+
raw.to_h
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
SourceMonitor::Security::ParameterSanitizer.sanitize(permitted.to_h).with_indifferent_access
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def prepare_preview_context(skip_default: false)
|
|
310
|
+
context = if skip_default
|
|
311
|
+
import_session_wizard.preview_context
|
|
312
|
+
else
|
|
313
|
+
import_session_wizard.preview_context_with_default_selection
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
apply_preview_context(context)
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def prepare_health_check_context
|
|
320
|
+
apply_health_check_context(import_session_wizard.health_check_context)
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def prepare_confirm_context
|
|
324
|
+
apply_confirm_context(import_session_wizard.confirm_context)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def apply_preview_context(context)
|
|
328
|
+
@filter = context.filter
|
|
329
|
+
@page = context.page
|
|
330
|
+
@selected_source_ids = context.selected_source_ids
|
|
331
|
+
@preview_entries = context.preview_entries
|
|
332
|
+
@filtered_entries = context.filtered_entries
|
|
333
|
+
@paginated_entries = context.paginated_entries
|
|
334
|
+
@has_next_page = context.has_next_page
|
|
335
|
+
@has_previous_page = context.has_previous_page
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def apply_health_check_context(context)
|
|
339
|
+
@selected_source_ids = context.selected_source_ids
|
|
340
|
+
@health_check_entries = context.health_check_entries
|
|
341
|
+
@health_check_target_ids = context.health_check_target_ids
|
|
342
|
+
@health_progress = context.health_progress
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def apply_confirm_context(context)
|
|
346
|
+
@selected_source_ids = context.selected_source_ids
|
|
347
|
+
@selected_entries = context.selected_entries
|
|
348
|
+
@bulk_settings = context.bulk_settings
|
|
349
|
+
end
|
|
350
|
+
|
|
305
351
|
def authorize_import_session!
|
|
306
352
|
return if !SourceMonitor::Security::Authentication.authentication_configured?
|
|
307
353
|
|
|
@@ -52,8 +52,9 @@ module SourceMonitor
|
|
|
52
52
|
@avg_feed_word_counts = word_counts[:feed]
|
|
53
53
|
@avg_scraped_word_counts = word_counts[:scraped]
|
|
54
54
|
|
|
55
|
-
@
|
|
56
|
-
@
|
|
55
|
+
@scrape_recommendations = SourceMonitor::Analytics::ScrapeRecommendations.new
|
|
56
|
+
@scrape_candidate_ids = Set.new(@scrape_recommendations.candidate_ids_for(source_ids))
|
|
57
|
+
@total_scrape_candidate_count = @scrape_recommendations.candidates_count
|
|
57
58
|
|
|
58
59
|
# Row partial preload requirements (V3): item_activity_rates,
|
|
59
60
|
# avg_feed_word_counts, avg_scraped_word_counts are pre-computed above
|
|
@@ -188,23 +189,8 @@ module SourceMonitor
|
|
|
188
189
|
def expand_scrape_recommendation_filter
|
|
189
190
|
return unless @search_params["scraping_enabled_eq"] == "recommend"
|
|
190
191
|
|
|
191
|
-
threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
|
|
192
192
|
@search_params.delete("scraping_enabled_eq")
|
|
193
|
-
@search_params
|
|
194
|
-
@search_params["active_eq"] = "true"
|
|
195
|
-
@search_params["avg_feed_words_lt"] = threshold.to_s
|
|
196
|
-
end
|
|
197
|
-
|
|
198
|
-
def compute_scrape_candidate_ids
|
|
199
|
-
threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
|
|
200
|
-
return Set.new if threshold.nil? || threshold <= 0
|
|
201
|
-
|
|
202
|
-
candidate_ids = @sources.select do |source|
|
|
203
|
-
avg = @avg_feed_word_counts[source.id]
|
|
204
|
-
avg.present? && avg < threshold && !source.scraping_enabled?
|
|
205
|
-
end.map(&:id)
|
|
206
|
-
|
|
207
|
-
Set.new(candidate_ids)
|
|
193
|
+
@search_params.merge!(SourceMonitor::Analytics::ScrapeRecommendations.new.filter_params)
|
|
208
194
|
end
|
|
209
195
|
|
|
210
196
|
def enqueue_unscraped_items(source)
|
|
@@ -66,7 +66,7 @@ module SourceMonitor
|
|
|
66
66
|
end
|
|
67
67
|
|
|
68
68
|
def scrape_candidates(threshold: SourceMonitor.config.scraping.scrape_recommendation_threshold)
|
|
69
|
-
SourceMonitor::
|
|
69
|
+
SourceMonitor::Analytics::ScrapeRecommendations.new(threshold:).relation
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
# Bulk-enable scraping for sources that don't already have it enabled.
|
data/docs/setup.md
CHANGED
|
@@ -18,8 +18,8 @@ This guide consolidates the new guided installer, verification commands, and rol
|
|
|
18
18
|
Run these commands inside your host Rails application before invoking the guided workflow:
|
|
19
19
|
|
|
20
20
|
```bash
|
|
21
|
-
bundle add source_monitor --version "~> 0.13.
|
|
22
|
-
# or add gem "source_monitor", "~> 0.13.
|
|
21
|
+
bundle add source_monitor --version "~> 0.13.1"
|
|
22
|
+
# or add gem "source_monitor", "~> 0.13.1" to Gemfile manually
|
|
23
23
|
bundle install
|
|
24
24
|
```
|
|
25
25
|
|
data/docs/upgrade.md
CHANGED
|
@@ -46,6 +46,20 @@ If a removed option raises an error (`SourceMonitor::DeprecatedOptionError`), yo
|
|
|
46
46
|
|
|
47
47
|
## Version-Specific Notes
|
|
48
48
|
|
|
49
|
+
### Upgrading to 0.13.1
|
|
50
|
+
|
|
51
|
+
**What changed:**
|
|
52
|
+
- **Compatibility:** `after_fetch_completed` callbacks continue to receive `FeedFetcher::Result`; the fetch outcome extraction remains internal.
|
|
53
|
+
- **Scrape recommendations:** Inactive sources stay excluded from source-index recommendation badges, matching dashboard and bulk enablement behavior.
|
|
54
|
+
|
|
55
|
+
**Upgrade steps:**
|
|
56
|
+
```bash
|
|
57
|
+
bundle update source_monitor
|
|
58
|
+
bin/rails source_monitor:upgrade
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
No migrations, configuration changes, or breaking changes required.
|
|
62
|
+
|
|
49
63
|
### Upgrading to 0.13.0
|
|
50
64
|
|
|
51
65
|
**What changed:**
|
|
@@ -7,18 +7,37 @@ module SourceMonitor
|
|
|
7
7
|
@threshold = threshold.to_i
|
|
8
8
|
end
|
|
9
9
|
|
|
10
|
+
def relation
|
|
11
|
+
@relation ||= SourceMonitor::Queries::ScrapeCandidatesQuery.new(threshold: threshold).call
|
|
12
|
+
end
|
|
13
|
+
|
|
10
14
|
def candidates_count
|
|
11
|
-
@candidates_count ||=
|
|
15
|
+
@candidates_count ||= relation.count
|
|
12
16
|
end
|
|
13
17
|
|
|
14
18
|
def candidate_ids
|
|
15
|
-
@candidate_ids ||=
|
|
19
|
+
@candidate_ids ||= relation.pluck(:id)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def candidate_ids_for(source_ids)
|
|
23
|
+
ids = Array(source_ids).map { |source_id| source_id.respond_to?(:id) ? source_id.id : source_id }.compact
|
|
24
|
+
return [] if ids.empty?
|
|
25
|
+
|
|
26
|
+
relation.where(id: ids).pluck(:id)
|
|
16
27
|
end
|
|
17
28
|
|
|
18
29
|
def candidate?(source_id)
|
|
19
30
|
candidate_ids.include?(source_id)
|
|
20
31
|
end
|
|
21
32
|
|
|
33
|
+
def filter_params
|
|
34
|
+
{
|
|
35
|
+
"scraping_enabled_eq" => "false",
|
|
36
|
+
"active_eq" => "true",
|
|
37
|
+
"avg_feed_words_lt" => threshold.to_s
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
|
|
22
41
|
private
|
|
23
42
|
|
|
24
43
|
attr_reader :threshold
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
class FeedFetcher
|
|
6
|
+
class FailureOutcome
|
|
7
|
+
def initialize(error:)
|
|
8
|
+
@error = error
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
attr_reader :error, :retry_decision
|
|
12
|
+
|
|
13
|
+
def apply(source_updater:, started_at:, instrumentation_payload:)
|
|
14
|
+
duration_ms = source_updater.elapsed_ms(started_at)
|
|
15
|
+
@retry_decision = update_source(source_updater, duration_ms)
|
|
16
|
+
create_fetch_log(source_updater, duration_ms, started_at)
|
|
17
|
+
apply_instrumentation(instrumentation_payload)
|
|
18
|
+
result
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def status
|
|
22
|
+
:failed
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def response
|
|
26
|
+
error.response
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def body
|
|
30
|
+
response&.body
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def feed
|
|
34
|
+
nil
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def item_processing
|
|
38
|
+
@item_processing ||= EntryProcessingResult.empty
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def update_source(source_updater, duration_ms)
|
|
44
|
+
source_updater.update_source_for_failure(error, duration_ms)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def create_fetch_log(source_updater, duration_ms, started_at)
|
|
48
|
+
source_updater.create_fetch_log(
|
|
49
|
+
response: response,
|
|
50
|
+
duration_ms: duration_ms,
|
|
51
|
+
started_at: started_at,
|
|
52
|
+
success: false,
|
|
53
|
+
error: error,
|
|
54
|
+
body: body
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def apply_instrumentation(instrumentation_payload)
|
|
59
|
+
instrumentation_payload[:success] = false
|
|
60
|
+
instrumentation_payload[:status] = status
|
|
61
|
+
instrumentation_payload[:error_class] = error.class.name
|
|
62
|
+
instrumentation_payload[:error_message] = error.message
|
|
63
|
+
instrumentation_payload[:http_status] = error.http_status if error.http_status
|
|
64
|
+
instrumentation_payload[:error_code] = error.code if error.respond_to?(:code)
|
|
65
|
+
instrumentation_payload[:items_created] = 0
|
|
66
|
+
instrumentation_payload[:items_updated] = 0
|
|
67
|
+
instrumentation_payload[:items_failed] = 0
|
|
68
|
+
instrumentation_payload[:retry_attempt] = retry_decision&.next_attempt ? retry_decision.next_attempt : 0
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def result
|
|
72
|
+
Result.new(
|
|
73
|
+
status: status,
|
|
74
|
+
response: response,
|
|
75
|
+
body: body,
|
|
76
|
+
error: error,
|
|
77
|
+
retry_decision: retry_decision,
|
|
78
|
+
item_processing: item_processing,
|
|
79
|
+
outcome: self
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
module Fetching
|
|
5
|
+
class FeedFetcher
|
|
6
|
+
class SuccessOutcome
|
|
7
|
+
def initialize(response:, body:, feed:, item_processing:, feed_signature:, content_changed:, entries_digest:)
|
|
8
|
+
@response = response
|
|
9
|
+
@body = body
|
|
10
|
+
@feed = feed
|
|
11
|
+
@item_processing = item_processing
|
|
12
|
+
@feed_signature = feed_signature
|
|
13
|
+
@content_changed = content_changed
|
|
14
|
+
@entries_digest = entries_digest
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
attr_reader :response, :body, :feed, :item_processing, :feed_signature, :content_changed, :entries_digest
|
|
18
|
+
|
|
19
|
+
def apply(source_updater:, started_at:, instrumentation_payload:)
|
|
20
|
+
duration_ms = source_updater.elapsed_ms(started_at)
|
|
21
|
+
update_source(source_updater, duration_ms)
|
|
22
|
+
create_fetch_log(source_updater, duration_ms, started_at)
|
|
23
|
+
apply_instrumentation(instrumentation_payload)
|
|
24
|
+
result
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def status
|
|
28
|
+
:fetched
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def error
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def retry_decision
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def result
|
|
40
|
+
Result.new(status: status, feed: feed, response: response, body: body, item_processing: item_processing, outcome: self)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def update_source(source_updater, duration_ms)
|
|
46
|
+
source_updater.update_source_for_success(
|
|
47
|
+
response,
|
|
48
|
+
duration_ms,
|
|
49
|
+
feed,
|
|
50
|
+
feed_signature,
|
|
51
|
+
content_changed: content_changed,
|
|
52
|
+
entries_digest: entries_digest
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def create_fetch_log(source_updater, duration_ms, started_at)
|
|
57
|
+
source_updater.create_fetch_log(
|
|
58
|
+
response: response,
|
|
59
|
+
duration_ms: duration_ms,
|
|
60
|
+
started_at: started_at,
|
|
61
|
+
feed: feed,
|
|
62
|
+
success: true,
|
|
63
|
+
body: body,
|
|
64
|
+
feed_signature: feed_signature,
|
|
65
|
+
items_created: item_processing.created,
|
|
66
|
+
items_updated: item_processing.updated,
|
|
67
|
+
items_failed: item_processing.failed,
|
|
68
|
+
item_errors: item_processing.errors
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def apply_instrumentation(instrumentation_payload)
|
|
73
|
+
instrumentation_payload[:success] = true
|
|
74
|
+
instrumentation_payload[:status] = :fetched
|
|
75
|
+
instrumentation_payload[:http_status] = response.status
|
|
76
|
+
instrumentation_payload[:parser] = feed.class.name if feed
|
|
77
|
+
instrumentation_payload[:items_created] = item_processing.created
|
|
78
|
+
instrumentation_payload[:items_updated] = item_processing.updated
|
|
79
|
+
instrumentation_payload[:items_failed] = item_processing.failed
|
|
80
|
+
instrumentation_payload[:retry_attempt] = 0
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|