source_monitor 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +1 -0
- data/.claude/skills/sm-configure/SKILL.md +8 -1
- data/.claude/skills/sm-configure/reference/configuration-reference.md +11 -0
- data/.claude/skills/sm-event-handler/SKILL.md +1 -1
- data/.claude/skills/sm-event-handler/reference/events-api.md +1 -1
- data/.claude/skills/sm-host-setup/SKILL.md +13 -3
- data/.claude/skills/sm-host-setup/reference/initializer-template.md +11 -0
- data/.claude/skills/sm-host-setup/reference/setup-checklist.md +9 -1
- data/.claude/skills/sm-upgrade/reference/version-history.md +12 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/VERSION +1 -1
- data/app/assets/builds/source_monitor/application.css +4 -0
- data/app/controllers/source_monitor/application_controller.rb +73 -14
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +1 -1
- data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +3 -1
- data/app/controllers/source_monitor/import_sessions_controller.rb +118 -72
- data/app/controllers/source_monitor/sources_controller.rb +4 -18
- data/app/models/source_monitor/source.rb +1 -1
- data/app/views/layouts/source_monitor/application.html.erb +6 -0
- data/docs/configuration.md +18 -1
- data/docs/deployment.md +1 -1
- data/docs/goals/engine-hardening/.goalbuddy-board/app.js +543 -0
- data/docs/goals/engine-hardening/.goalbuddy-board/goalbuddy-mark.png +0 -0
- data/docs/goals/engine-hardening/.goalbuddy-board/index.html +111 -0
- data/docs/goals/engine-hardening/.goalbuddy-board/styles.css +991 -0
- data/docs/goals/engine-hardening/goal.md +97 -0
- data/docs/goals/engine-hardening/notes/T001-spec-validation.md +37 -0
- data/docs/goals/engine-hardening/state.yaml +324 -0
- data/docs/setup.md +3 -3
- data/docs/upgrade.md +41 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +10 -0
- data/lib/source_monitor/analytics/scrape_recommendations.rb +21 -2
- data/lib/source_monitor/configuration/authentication_settings.rb +5 -1
- data/lib/source_monitor/fetching/feed_fetcher/failure_outcome.rb +85 -0
- data/lib/source_monitor/fetching/feed_fetcher/success_outcome.rb +85 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +27 -88
- data/lib/source_monitor/fetching/fetch_runner.rb +12 -5
- data/lib/source_monitor/import_sessions/wizard.rb +612 -0
- data/lib/source_monitor/items/batch_item_creator.rb +7 -6
- data/lib/source_monitor/items/item_creator.rb +7 -14
- data/lib/source_monitor/items/normalized_entry.rb +61 -0
- data/lib/source_monitor/security/authentication.rb +10 -0
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +2 -0
- data/source_monitor.gemspec +7 -2
- metadata +12 -68
- data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +0 -15
- data/.claude/agent-memory/vbw-vbw-dev/MEMORY.md +0 -34
- data/.claude/agent-memory/vbw-vbw-lead/MEMORY.md +0 -49
- data/.claude/agents/rails-concern.md +0 -464
- data/.claude/agents/rails-controller.md +0 -424
- data/.claude/agents/rails-hotwire.md +0 -446
- data/.claude/agents/rails-implement.md +0 -374
- data/.claude/agents/rails-job.md +0 -334
- data/.claude/agents/rails-lint.md +0 -294
- data/.claude/agents/rails-mailer.md +0 -371
- data/.claude/agents/rails-migration.md +0 -449
- data/.claude/agents/rails-model.md +0 -420
- data/.claude/agents/rails-policy.md +0 -443
- data/.claude/agents/rails-presenter.md +0 -427
- data/.claude/agents/rails-query.md +0 -412
- data/.claude/agents/rails-review.md +0 -490
- data/.claude/agents/rails-service.md +0 -458
- data/.claude/agents/rails-state-records.md +0 -465
- data/.claude/agents/rails-tdd.md +0 -314
- data/.claude/agents/rails-test.md +0 -441
- data/.claude/agents/rails-view-component.md +0 -418
- data/.claude/commands/rails-audit.md +0 -77
- data/.claude/commands/release.md +0 -366
- data/.claude/hooks/block-secrets.sh +0 -52
- data/.claude/settings.json +0 -85
- data/.claude/skills/action-cable-patterns/SKILL.md +0 -296
- data/.claude/skills/action-mailer-patterns/SKILL.md +0 -295
- data/.claude/skills/active-storage-setup/SKILL.md +0 -311
- data/.claude/skills/api-versioning/SKILL.md +0 -294
- data/.claude/skills/authentication-flow/SKILL.md +0 -335
- data/.claude/skills/authentication-flow/reference/current.md +0 -248
- data/.claude/skills/authentication-flow/reference/passwordless.md +0 -253
- data/.claude/skills/authentication-flow/reference/sessions.md +0 -201
- data/.claude/skills/authorization-pundit/SKILL.md +0 -462
- data/.claude/skills/caching-strategies/SKILL.md +0 -350
- data/.claude/skills/database-migrations/SKILL.md +0 -354
- data/.claude/skills/form-object-patterns/SKILL.md +0 -399
- data/.claude/skills/hotwire-patterns/SKILL.md +0 -247
- data/.claude/skills/hotwire-patterns/reference/stimulus.md +0 -307
- data/.claude/skills/hotwire-patterns/reference/tailwind-integration.md +0 -112
- data/.claude/skills/hotwire-patterns/reference/turbo-frames.md +0 -158
- data/.claude/skills/hotwire-patterns/reference/turbo-streams.md +0 -218
- data/.claude/skills/i18n-patterns/SKILL.md +0 -320
- data/.claude/skills/install/SKILL.md +0 -367
- data/.claude/skills/performance-optimization/SKILL.md +0 -311
- data/.claude/skills/rails-architecture/SKILL.md +0 -259
- data/.claude/skills/rails-architecture/reference/error-handling.md +0 -333
- data/.claude/skills/rails-architecture/reference/event-tracking.md +0 -142
- data/.claude/skills/rails-architecture/reference/layer-interactions.md +0 -417
- data/.claude/skills/rails-architecture/reference/multi-tenancy.md +0 -152
- data/.claude/skills/rails-architecture/reference/query-patterns.md +0 -342
- data/.claude/skills/rails-architecture/reference/service-patterns.md +0 -286
- data/.claude/skills/rails-architecture/reference/state-records.md +0 -250
- data/.claude/skills/rails-architecture/reference/testing-strategy.md +0 -326
- data/.claude/skills/rails-concern/SKILL.md +0 -399
- data/.claude/skills/rails-controller/SKILL.md +0 -336
- data/.claude/skills/rails-model-generator/SKILL.md +0 -321
- data/.claude/skills/rails-model-generator/reference/validations.md +0 -298
- data/.claude/skills/rails-presenter/SKILL.md +0 -274
- data/.claude/skills/rails-query-object/SKILL.md +0 -289
- data/.claude/skills/rails-service-object/SKILL.md +0 -349
- data/.claude/skills/solid-queue-setup/SKILL.md +0 -307
- data/.claude/skills/tdd-cycle/SKILL.md +0 -359
- data/.claude/skills/viewcomponent-patterns/SKILL.md +0 -333
- data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +0 -187
- data/app/controllers/source_monitor/import_sessions/health_check_management.rb +0 -112
- data/app/controllers/source_monitor/import_sessions/opml_parser.rb +0 -130
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "active_support/core_ext/object/blank"
|
|
6
|
+
require "source_monitor/import_sessions/entry_normalizer"
|
|
7
|
+
|
|
8
|
+
module SourceMonitor
|
|
9
|
+
module ImportSessions
|
|
10
|
+
class Wizard
|
|
11
|
+
ALLOWED_CONTENT_TYPES = %w[text/xml application/xml text/x-opml application/opml].freeze
|
|
12
|
+
GENERIC_CONTENT_TYPES = %w[application/octet-stream binary/octet-stream].freeze
|
|
13
|
+
|
|
14
|
+
class UploadError < StandardError; end
|
|
15
|
+
TRUE_PARAM_VALUES = [ true, "true", "1", 1, "on" ].freeze
|
|
16
|
+
|
|
17
|
+
UploadResult = Struct.new(:status, :errors, :current_step, :preview_context, keyword_init: true)
|
|
18
|
+
|
|
19
|
+
PreviewResult = Struct.new(
|
|
20
|
+
:status,
|
|
21
|
+
:selected_source_ids,
|
|
22
|
+
:valid_ids,
|
|
23
|
+
:current_step,
|
|
24
|
+
:selection_error,
|
|
25
|
+
:preview_context,
|
|
26
|
+
keyword_init: true
|
|
27
|
+
) do
|
|
28
|
+
def blocked?
|
|
29
|
+
status == :blocked
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
PreviewContext = Struct.new(
|
|
34
|
+
:filter,
|
|
35
|
+
:page,
|
|
36
|
+
:selected_source_ids,
|
|
37
|
+
:preview_entries,
|
|
38
|
+
:filtered_entries,
|
|
39
|
+
:paginated_entries,
|
|
40
|
+
:has_next_page,
|
|
41
|
+
:has_previous_page,
|
|
42
|
+
keyword_init: true
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
HealthCheckResult = Struct.new(
|
|
46
|
+
:status,
|
|
47
|
+
:selected_source_ids,
|
|
48
|
+
:current_step,
|
|
49
|
+
:selection_error,
|
|
50
|
+
:health_check_context,
|
|
51
|
+
keyword_init: true
|
|
52
|
+
) do
|
|
53
|
+
def blocked?
|
|
54
|
+
status == :blocked
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
HealthCheckContext = Struct.new(
|
|
59
|
+
:selected_source_ids,
|
|
60
|
+
:health_check_entries,
|
|
61
|
+
:health_check_target_ids,
|
|
62
|
+
:health_progress,
|
|
63
|
+
keyword_init: true
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
ConfirmResult = Struct.new(
|
|
67
|
+
:status,
|
|
68
|
+
:selected_source_ids,
|
|
69
|
+
:selected_entries,
|
|
70
|
+
:bulk_settings,
|
|
71
|
+
:selection_error,
|
|
72
|
+
:message,
|
|
73
|
+
:history,
|
|
74
|
+
keyword_init: true
|
|
75
|
+
) do
|
|
76
|
+
def blocked?
|
|
77
|
+
status == :blocked
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
ConfirmContext = Struct.new(:selected_source_ids, :selected_entries, :bulk_settings, keyword_init: true)
|
|
82
|
+
|
|
83
|
+
def initialize(import_session:, params:, current_step:, now: Time.current)
|
|
84
|
+
@import_session = import_session
|
|
85
|
+
@params = params
|
|
86
|
+
@current_step = current_step
|
|
87
|
+
@now = now
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def handle_upload
|
|
91
|
+
errors = validate_upload
|
|
92
|
+
return UploadResult.new(status: :invalid, errors: errors, current_step: current_step) if errors.any?
|
|
93
|
+
|
|
94
|
+
parsed_entries = parse_opml_file(opml_file)
|
|
95
|
+
valid_entries = parsed_entries.select { |entry| entry[:status] == "valid" }
|
|
96
|
+
|
|
97
|
+
if valid_entries.empty?
|
|
98
|
+
import_session.update!(
|
|
99
|
+
opml_file_metadata: build_file_metadata,
|
|
100
|
+
parsed_sources: parsed_entries,
|
|
101
|
+
current_step: "upload"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return UploadResult.new(
|
|
105
|
+
status: :invalid,
|
|
106
|
+
errors: [ "We couldn't find any valid feeds in that OPML file. Check the file and try again." ],
|
|
107
|
+
current_step: "upload"
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
next_step = target_step
|
|
112
|
+
import_session.update!(
|
|
113
|
+
opml_file_metadata: build_file_metadata.merge("uploaded_at" => now),
|
|
114
|
+
parsed_sources: parsed_entries,
|
|
115
|
+
current_step: next_step
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
UploadResult.new(
|
|
119
|
+
status: :success,
|
|
120
|
+
errors: [],
|
|
121
|
+
current_step: next_step,
|
|
122
|
+
preview_context: preview_context
|
|
123
|
+
)
|
|
124
|
+
rescue UploadError => error
|
|
125
|
+
UploadResult.new(status: :invalid, errors: [ error.message ], current_step: current_step)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def handle_preview
|
|
129
|
+
selected_source_ids = Array(import_session.selected_source_ids).map(&:to_s)
|
|
130
|
+
preview_entries = annotated_entries(selected_source_ids)
|
|
131
|
+
selectable_entries = preview_entries.select { |entry| entry[:selectable] }
|
|
132
|
+
|
|
133
|
+
valid_ids = if select_all_requested?
|
|
134
|
+
ids = selectable_entries.map { |entry| entry[:id] }
|
|
135
|
+
import_session.update_column(:selected_source_ids, ids)
|
|
136
|
+
ids
|
|
137
|
+
elsif select_none_requested?
|
|
138
|
+
import_session.update_column(:selected_source_ids, [])
|
|
139
|
+
[]
|
|
140
|
+
else
|
|
141
|
+
requested_ids = build_selection_from_params(selectable_entries)
|
|
142
|
+
ids = selectable_entries.index_by { |entry| entry[:id] }.slice(*requested_ids).keys
|
|
143
|
+
import_session.update!(selected_source_ids: ids)
|
|
144
|
+
ids
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
if advancing_from_preview? && valid_ids.empty?
|
|
148
|
+
return PreviewResult.new(
|
|
149
|
+
status: :blocked,
|
|
150
|
+
selected_source_ids: valid_ids,
|
|
151
|
+
valid_ids: valid_ids,
|
|
152
|
+
current_step: current_step,
|
|
153
|
+
selection_error: "Select at least one new source to continue.",
|
|
154
|
+
preview_context: preview_context(selected_source_ids: valid_ids)
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
next_step = target_step
|
|
159
|
+
import_session.update_column(:current_step, next_step) if import_session.current_step != next_step
|
|
160
|
+
|
|
161
|
+
PreviewResult.new(
|
|
162
|
+
status: :success,
|
|
163
|
+
selected_source_ids: valid_ids,
|
|
164
|
+
valid_ids: valid_ids,
|
|
165
|
+
current_step: next_step,
|
|
166
|
+
preview_context: preview_context(selected_source_ids: valid_ids)
|
|
167
|
+
)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def handle_health_check
|
|
171
|
+
selected_source_ids = health_check_selection_from_params
|
|
172
|
+
import_session.update!(selected_source_ids: selected_source_ids)
|
|
173
|
+
|
|
174
|
+
if advancing_from_health_check? && selected_source_ids.blank?
|
|
175
|
+
deactivate_health_checks
|
|
176
|
+
|
|
177
|
+
return HealthCheckResult.new(
|
|
178
|
+
status: :blocked,
|
|
179
|
+
selected_source_ids: selected_source_ids,
|
|
180
|
+
current_step: current_step,
|
|
181
|
+
selection_error: "Select at least one source to continue.",
|
|
182
|
+
health_check_context: health_check_context
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
next_step = target_step
|
|
187
|
+
deactivate_health_checks if next_step != "health_check"
|
|
188
|
+
import_session.update_column(:current_step, next_step) if import_session.current_step != next_step
|
|
189
|
+
|
|
190
|
+
HealthCheckResult.new(
|
|
191
|
+
status: :success,
|
|
192
|
+
selected_source_ids: selected_source_ids,
|
|
193
|
+
current_step: next_step,
|
|
194
|
+
health_check_context: (health_check_context if next_step == "health_check")
|
|
195
|
+
)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def handle_confirm
|
|
199
|
+
context = confirm_context
|
|
200
|
+
|
|
201
|
+
if context.selected_entries.empty?
|
|
202
|
+
return ConfirmResult.new(
|
|
203
|
+
status: :blocked,
|
|
204
|
+
selected_source_ids: context.selected_source_ids,
|
|
205
|
+
selected_entries: context.selected_entries,
|
|
206
|
+
bulk_settings: context.bulk_settings,
|
|
207
|
+
selection_error: "Select at least one source to import."
|
|
208
|
+
)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
history = SourceMonitor::ImportHistory.create!(
|
|
212
|
+
user_id: import_session.user_id,
|
|
213
|
+
bulk_settings: import_session.bulk_settings
|
|
214
|
+
)
|
|
215
|
+
SourceMonitor::ImportOpmlJob.perform_later(import_session.id, history.id)
|
|
216
|
+
import_session.update_column(:current_step, "confirm") if import_session.current_step != "confirm"
|
|
217
|
+
|
|
218
|
+
ConfirmResult.new(
|
|
219
|
+
status: :success,
|
|
220
|
+
selected_source_ids: context.selected_source_ids,
|
|
221
|
+
selected_entries: context.selected_entries,
|
|
222
|
+
bulk_settings: context.bulk_settings,
|
|
223
|
+
history: history,
|
|
224
|
+
message: "Import started for #{context.selected_entries.size} sources."
|
|
225
|
+
)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def preview_context(selected_source_ids: nil)
|
|
229
|
+
filter = permitted_filter(params[:filter]) || "all"
|
|
230
|
+
page = normalize_page_param(params[:page])
|
|
231
|
+
selected_source_ids = Array(selected_source_ids || import_session.selected_source_ids).map(&:to_s)
|
|
232
|
+
preview_entries = annotated_entries(selected_source_ids)
|
|
233
|
+
|
|
234
|
+
filtered_entries = filter_entries(preview_entries, filter)
|
|
235
|
+
paginator = SourceMonitor::Pagination::Paginator.new(
|
|
236
|
+
scope: filtered_entries,
|
|
237
|
+
page: page,
|
|
238
|
+
per_page: preview_per_page
|
|
239
|
+
).paginate
|
|
240
|
+
|
|
241
|
+
PreviewContext.new(
|
|
242
|
+
filter: filter,
|
|
243
|
+
page: paginator.page,
|
|
244
|
+
selected_source_ids: selected_source_ids,
|
|
245
|
+
preview_entries: preview_entries,
|
|
246
|
+
filtered_entries: filtered_entries,
|
|
247
|
+
paginated_entries: paginator.records,
|
|
248
|
+
has_next_page: paginator.has_next_page,
|
|
249
|
+
has_previous_page: paginator.has_previous_page
|
|
250
|
+
)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def preview_context_with_default_selection
|
|
254
|
+
selected_source_ids = Array(import_session.selected_source_ids).map(&:to_s)
|
|
255
|
+
preview_entries = annotated_entries(selected_source_ids)
|
|
256
|
+
|
|
257
|
+
if selected_source_ids.blank? && preview_entries.present?
|
|
258
|
+
selected_source_ids = preview_entries.select { |entry| entry[:selectable] }.map { |entry| entry[:id] }
|
|
259
|
+
import_session.update_column(:selected_source_ids, selected_source_ids)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
preview_context(selected_source_ids: selected_source_ids)
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def health_check_context
|
|
266
|
+
start_health_checks_if_needed
|
|
267
|
+
|
|
268
|
+
selected_source_ids = Array(import_session.selected_source_ids).map(&:to_s)
|
|
269
|
+
entries = health_check_entries(selected_source_ids)
|
|
270
|
+
target_ids = health_check_targets
|
|
271
|
+
|
|
272
|
+
HealthCheckContext.new(
|
|
273
|
+
selected_source_ids: selected_source_ids,
|
|
274
|
+
health_check_entries: entries,
|
|
275
|
+
health_check_target_ids: target_ids,
|
|
276
|
+
health_progress: health_check_progress(entries)
|
|
277
|
+
)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def deactivate_health_checks
|
|
281
|
+
return unless import_session.health_checks_active?
|
|
282
|
+
|
|
283
|
+
import_session.update_columns(
|
|
284
|
+
health_checks_active: false,
|
|
285
|
+
health_check_completed_at: now
|
|
286
|
+
)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def confirm_context
|
|
290
|
+
selected_source_ids = Array(import_session.selected_source_ids).map(&:to_s)
|
|
291
|
+
selected_entries = annotated_entries(selected_source_ids)
|
|
292
|
+
.select { |entry| selected_source_ids.include?(entry[:id]) }
|
|
293
|
+
|
|
294
|
+
ConfirmContext.new(
|
|
295
|
+
selected_source_ids: selected_source_ids,
|
|
296
|
+
selected_entries: selected_entries,
|
|
297
|
+
bulk_settings: import_session.bulk_settings || {}
|
|
298
|
+
)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
private
|
|
302
|
+
|
|
303
|
+
attr_reader :import_session, :params, :current_step, :now
|
|
304
|
+
|
|
305
|
+
def validate_upload
|
|
306
|
+
return [ "Upload an OPML file to continue." ] unless opml_file.present?
|
|
307
|
+
|
|
308
|
+
errors = []
|
|
309
|
+
errors << "The uploaded file is empty. Choose another OPML file." if opml_file.size.to_i <= 0
|
|
310
|
+
|
|
311
|
+
if opml_file.content_type.present? && !content_type_allowed?(opml_file.content_type) && !generic_content_type?(opml_file.content_type)
|
|
312
|
+
errors << "Upload must be an OPML or XML file."
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
errors
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def opml_file
|
|
319
|
+
params[:opml_file]
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def build_file_metadata
|
|
323
|
+
return {} unless opml_file.respond_to?(:original_filename)
|
|
324
|
+
|
|
325
|
+
{
|
|
326
|
+
"filename" => opml_file.original_filename,
|
|
327
|
+
"byte_size" => opml_file.size,
|
|
328
|
+
"content_type" => opml_file.content_type
|
|
329
|
+
}
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def content_type_allowed?(content_type)
|
|
333
|
+
ALLOWED_CONTENT_TYPES.include?(content_type)
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def generic_content_type?(content_type)
|
|
337
|
+
GENERIC_CONTENT_TYPES.include?(content_type)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def parse_opml_file(file)
|
|
341
|
+
content = file.read
|
|
342
|
+
file.rewind if file.respond_to?(:rewind)
|
|
343
|
+
|
|
344
|
+
raise UploadError, "The uploaded file appears to be empty." if content.blank?
|
|
345
|
+
|
|
346
|
+
document = Nokogiri::XML(content) { |config| config.strict.nonet }
|
|
347
|
+
raise UploadError, "The uploaded file is not valid XML or OPML." if document.root.nil?
|
|
348
|
+
|
|
349
|
+
document.xpath("//outline").each_with_index.filter_map do |outline, index|
|
|
350
|
+
next unless outline.attribute_nodes.any? { |attr| attr.name.casecmp("xmlurl").zero? }
|
|
351
|
+
|
|
352
|
+
build_entry(outline, index)
|
|
353
|
+
end
|
|
354
|
+
rescue Nokogiri::XML::SyntaxError => error
|
|
355
|
+
raise UploadError, "We couldn't parse that OPML file: #{error.message}"
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def build_entry(outline, index)
|
|
359
|
+
feed_url = outline_attribute(outline, "xmlUrl")
|
|
360
|
+
website_url = outline_attribute(outline, "htmlUrl")
|
|
361
|
+
title = outline_attribute(outline, "title") || outline_attribute(outline, "text")
|
|
362
|
+
|
|
363
|
+
if feed_url.blank?
|
|
364
|
+
return malformed_entry(index, feed_url, title, website_url, "Missing feed URL")
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
unless valid_feed_url?(feed_url)
|
|
368
|
+
return malformed_entry(index, feed_url, title, website_url, "Feed URL must be HTTP or HTTPS")
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
{
|
|
372
|
+
id: "outline-#{index}",
|
|
373
|
+
raw_outline_index: index,
|
|
374
|
+
feed_url: feed_url,
|
|
375
|
+
title: title,
|
|
376
|
+
website_url: website_url,
|
|
377
|
+
status: "valid",
|
|
378
|
+
error: nil,
|
|
379
|
+
health_status: nil,
|
|
380
|
+
health_error: nil
|
|
381
|
+
}
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def malformed_entry(index, feed_url, title, website_url, error)
|
|
385
|
+
{
|
|
386
|
+
id: "outline-#{index}",
|
|
387
|
+
raw_outline_index: index,
|
|
388
|
+
feed_url: feed_url.presence,
|
|
389
|
+
title: title,
|
|
390
|
+
website_url: website_url,
|
|
391
|
+
status: "malformed",
|
|
392
|
+
error: error,
|
|
393
|
+
health_status: nil,
|
|
394
|
+
health_error: nil
|
|
395
|
+
}
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def outline_attribute(outline, name)
|
|
399
|
+
attribute = outline.attribute_nodes.find { |attr| attr.name.casecmp(name).zero? }
|
|
400
|
+
attribute&.value.to_s.presence
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def valid_feed_url?(url)
|
|
404
|
+
parsed = URI.parse(url)
|
|
405
|
+
parsed.is_a?(URI::HTTP) && parsed.host.present?
|
|
406
|
+
rescue URI::InvalidURIError
|
|
407
|
+
false
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def annotated_entries(selected_ids)
|
|
411
|
+
selected_ids ||= []
|
|
412
|
+
entries = Array(import_session.parsed_sources)
|
|
413
|
+
return [] if entries.blank?
|
|
414
|
+
|
|
415
|
+
normalized = entries.map { |entry| normalize_entry(entry) }
|
|
416
|
+
feed_urls = normalized.filter_map { |entry| entry[:feed_url]&.downcase }
|
|
417
|
+
duplicate_lookup = if feed_urls.present?
|
|
418
|
+
SourceMonitor::Source.where("LOWER(feed_url) IN (?)", feed_urls).pluck(:feed_url).map(&:downcase)
|
|
419
|
+
else
|
|
420
|
+
[]
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
normalized.map do |entry|
|
|
424
|
+
duplicate = entry[:feed_url].present? && duplicate_lookup.include?(entry[:feed_url].downcase)
|
|
425
|
+
entry.merge(
|
|
426
|
+
duplicate: duplicate,
|
|
427
|
+
selectable: entry[:status] == "valid" && !duplicate,
|
|
428
|
+
selected: selected_ids.include?(entry[:id])
|
|
429
|
+
)
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def normalize_entry(entry)
|
|
434
|
+
SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def filter_entries(entries, filter)
|
|
438
|
+
case filter
|
|
439
|
+
when "new"
|
|
440
|
+
entries.select { |entry| entry[:selectable] }
|
|
441
|
+
when "existing"
|
|
442
|
+
entries.select { |entry| entry[:duplicate] }
|
|
443
|
+
else
|
|
444
|
+
entries
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def build_selection_from_params(selectable_entries)
|
|
449
|
+
ids = import_session_params[:selected_source_ids]
|
|
450
|
+
return [] unless ids
|
|
451
|
+
|
|
452
|
+
Array(ids).map(&:to_s).uniq & selectable_entries.map { |entry| entry[:id] }
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def advancing_from_preview?
|
|
456
|
+
target_step != "preview"
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def health_check_selection_from_params
|
|
460
|
+
if select_all_requested?
|
|
461
|
+
return health_check_targets.dup
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
return [] if select_none_requested?
|
|
465
|
+
|
|
466
|
+
ids = import_session_params[:selected_source_ids]
|
|
467
|
+
return Array(import_session.selected_source_ids).map(&:to_s) unless ids
|
|
468
|
+
|
|
469
|
+
Array(ids).map(&:to_s).uniq & health_check_targets
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def advancing_from_health_check?
|
|
473
|
+
target_step != "health_check"
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
def start_health_checks_if_needed
|
|
477
|
+
return unless current_step == "health_check"
|
|
478
|
+
|
|
479
|
+
jobs_to_enqueue = []
|
|
480
|
+
|
|
481
|
+
import_session.with_lock do
|
|
482
|
+
import_session.reload
|
|
483
|
+
selected = Array(import_session.selected_source_ids).map(&:to_s)
|
|
484
|
+
|
|
485
|
+
if selected.blank?
|
|
486
|
+
import_session.update_columns(health_checks_active: false, health_check_target_ids: [])
|
|
487
|
+
next
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
if import_session.health_checks_active? && import_session.health_check_targets.sort == selected.sort
|
|
491
|
+
next
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
import_session.update!(
|
|
495
|
+
parsed_sources: reset_health_results(import_session.parsed_sources, selected),
|
|
496
|
+
health_checks_active: true,
|
|
497
|
+
health_check_target_ids: selected,
|
|
498
|
+
health_check_started_at: now,
|
|
499
|
+
health_check_completed_at: nil
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
jobs_to_enqueue = selected
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
enqueue_health_check_jobs(import_session, jobs_to_enqueue) if jobs_to_enqueue.any?
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
def reset_health_results(entries, target_ids)
|
|
509
|
+
Array(entries).map do |entry|
|
|
510
|
+
entry_hash = entry.to_h
|
|
511
|
+
entry_id = entry_hash["id"] || entry_hash[:id]
|
|
512
|
+
next entry_hash unless target_ids.include?(entry_id.to_s)
|
|
513
|
+
|
|
514
|
+
entry_hash.merge("health_status" => "pending", "health_error" => nil)
|
|
515
|
+
end
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
def enqueue_health_check_jobs(import_session, target_ids)
|
|
519
|
+
target_ids.each do |target_id|
|
|
520
|
+
SourceMonitor::ImportSessionHealthCheckJob.set(wait: 1.second).perform_later(import_session.id, target_id)
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def health_check_entries(selected_ids)
|
|
525
|
+
targets = health_check_targets
|
|
526
|
+
entries = Array(import_session.parsed_sources).map { |entry| normalize_entry(entry) }
|
|
527
|
+
|
|
528
|
+
entries.select { |entry| targets.include?(entry[:id]) }.map do |entry|
|
|
529
|
+
entry.merge(selected: selected_ids.include?(entry[:id]))
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
def health_check_progress(entries)
|
|
534
|
+
total = health_check_targets.size
|
|
535
|
+
completed = entries.count { |entry| health_check_complete?(entry) }
|
|
536
|
+
|
|
537
|
+
{
|
|
538
|
+
completed: completed,
|
|
539
|
+
total: total,
|
|
540
|
+
pending: [ total - completed, 0 ].max,
|
|
541
|
+
active: import_session.health_checks_active?,
|
|
542
|
+
done: total.positive? && completed >= total
|
|
543
|
+
}
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def health_check_complete?(entry)
|
|
547
|
+
%w[working failing].include?(entry[:health_status].to_s)
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
def health_check_targets
|
|
551
|
+
targets = import_session.health_check_targets
|
|
552
|
+
targets = Array(import_session.selected_source_ids).map(&:to_s) if targets.blank?
|
|
553
|
+
targets
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
def normalize_page_param(value)
|
|
557
|
+
number = value.to_i
|
|
558
|
+
number = 1 if number <= 0
|
|
559
|
+
number
|
|
560
|
+
rescue StandardError
|
|
561
|
+
1
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
def permitted_filter(raw)
|
|
565
|
+
value = raw.to_s.presence
|
|
566
|
+
return unless value
|
|
567
|
+
|
|
568
|
+
%w[all new existing].find { |candidate| candidate == value }
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
def preview_per_page
|
|
572
|
+
25
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
def target_step
|
|
576
|
+
permitted_step(import_session_params[:next_step]) || current_step || ImportSession.default_step
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def permitted_step(value)
|
|
580
|
+
step = value.to_s.presence
|
|
581
|
+
return unless step
|
|
582
|
+
|
|
583
|
+
ImportSession::STEP_ORDER.find { |candidate| candidate == step }
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
def import_session_params
|
|
587
|
+
@import_session_params ||= begin
|
|
588
|
+
raw = params[:import_session] || params["import_session"] || {}
|
|
589
|
+
permitted = if raw.respond_to?(:permit)
|
|
590
|
+
raw.permit(:next_step, :select_all, :select_none, selected_source_ids: [])
|
|
591
|
+
else
|
|
592
|
+
raw.to_h
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
SourceMonitor::Security::ParameterSanitizer.sanitize(permitted.to_h).with_indifferent_access
|
|
596
|
+
end
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
def select_all_requested?
|
|
600
|
+
truthy_import_session_param?(:select_all)
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
def select_none_requested?
|
|
604
|
+
truthy_import_session_param?(:select_none)
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def truthy_import_session_param?(key)
|
|
608
|
+
TRUE_PARAM_VALUES.include?(import_session_params[key])
|
|
609
|
+
end
|
|
610
|
+
end
|
|
611
|
+
end
|
|
612
|
+
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "source_monitor/items/item_creator"
|
|
4
|
+
require "source_monitor/items/normalized_entry"
|
|
4
5
|
|
|
5
6
|
module SourceMonitor
|
|
6
7
|
module Items
|
|
@@ -32,17 +33,17 @@ module SourceMonitor
|
|
|
32
33
|
|
|
33
34
|
# Step 1: Pre-parse entries to extract GUIDs and fingerprints for bulk lookup.
|
|
34
35
|
entry_identifiers = @entries.map do |entry|
|
|
35
|
-
|
|
36
|
+
normalized_entry = NormalizedEntry.new(
|
|
36
37
|
source: @source,
|
|
37
38
|
entry: entry,
|
|
38
39
|
content_extractor: content_extractor
|
|
39
40
|
)
|
|
40
|
-
attrs = parser.parse
|
|
41
|
-
raw_guid = attrs[:guid]
|
|
42
|
-
normalized_guid = raw_guid.present? ? raw_guid.downcase : nil
|
|
43
|
-
guid = normalized_guid.presence || attrs[:content_fingerprint]
|
|
44
41
|
|
|
45
|
-
{
|
|
42
|
+
{
|
|
43
|
+
guid: normalized_entry.item_guid,
|
|
44
|
+
fingerprint: normalized_entry.content_fingerprint,
|
|
45
|
+
raw_guid_present: normalized_entry.raw_guid_present?
|
|
46
|
+
}
|
|
46
47
|
end
|
|
47
48
|
|
|
48
49
|
# Step 2: Batch-fetch existing items by GUID (single query)
|
|
@@ -9,6 +9,7 @@ require "source_monitor/instrumentation"
|
|
|
9
9
|
require "source_monitor/scrapers/readability"
|
|
10
10
|
require "source_monitor/items/item_creator/entry_parser"
|
|
11
11
|
require "source_monitor/items/item_creator/content_extractor"
|
|
12
|
+
require "source_monitor/items/normalized_entry"
|
|
12
13
|
|
|
13
14
|
module SourceMonitor
|
|
14
15
|
module Items
|
|
@@ -49,14 +50,10 @@ module SourceMonitor
|
|
|
49
50
|
end
|
|
50
51
|
|
|
51
52
|
def call
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# Normalize GUID to lowercase so the plain btree index on guid is used
|
|
55
|
-
# for lookups instead of LOWER(guid) which forces sequential scans.
|
|
56
|
-
normalized_guid = raw_guid.present? ? raw_guid.downcase : nil
|
|
57
|
-
attributes[:guid] = normalized_guid.presence || attributes[:content_fingerprint]
|
|
53
|
+
normalized_entry = build_normalized_entry
|
|
54
|
+
attributes = normalized_entry.item_attributes
|
|
58
55
|
|
|
59
|
-
existing_item, matched_by = existing_item_for(attributes, raw_guid_present:
|
|
56
|
+
existing_item, matched_by = existing_item_for(attributes, raw_guid_present: normalized_entry.raw_guid_present?)
|
|
60
57
|
|
|
61
58
|
if existing_item
|
|
62
59
|
apply_attributes(existing_item, attributes)
|
|
@@ -70,7 +67,7 @@ module SourceMonitor
|
|
|
70
67
|
end
|
|
71
68
|
end
|
|
72
69
|
|
|
73
|
-
create_new_item(attributes, raw_guid_present:
|
|
70
|
+
create_new_item(attributes, raw_guid_present: normalized_entry.raw_guid_present?)
|
|
74
71
|
end
|
|
75
72
|
|
|
76
73
|
private
|
|
@@ -193,12 +190,8 @@ module SourceMonitor
|
|
|
193
190
|
(record.changed - IGNORED_CHANGE_ATTRIBUTES).any?
|
|
194
191
|
end
|
|
195
192
|
|
|
196
|
-
def
|
|
197
|
-
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
def entry_parser
|
|
201
|
-
@entry_parser ||= EntryParser.new(source: source, entry: entry, content_extractor: content_extractor)
|
|
193
|
+
def build_normalized_entry
|
|
194
|
+
@normalized_entry ||= NormalizedEntry.new(source: source, entry: entry, content_extractor: content_extractor)
|
|
202
195
|
end
|
|
203
196
|
|
|
204
197
|
def content_extractor
|