source_monitor 0.13.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/skills/sm-event-handler/SKILL.md +1 -1
- data/.claude/skills/sm-event-handler/reference/events-api.md +1 -1
- data/CHANGELOG.md +5 -1
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/app/assets/builds/source_monitor/application.css +4 -0
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +1 -1
- data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +3 -1
- data/app/controllers/source_monitor/import_sessions_controller.rb +118 -72
- data/app/controllers/source_monitor/sources_controller.rb +4 -18
- data/app/models/source_monitor/source.rb +1 -1
- data/docs/setup.md +2 -2
- data/docs/upgrade.md +14 -0
- data/lib/source_monitor/analytics/scrape_recommendations.rb +21 -2
- data/lib/source_monitor/fetching/feed_fetcher/failure_outcome.rb +85 -0
- data/lib/source_monitor/fetching/feed_fetcher/success_outcome.rb +85 -0
- data/lib/source_monitor/fetching/feed_fetcher.rb +27 -88
- data/lib/source_monitor/fetching/fetch_runner.rb +12 -5
- data/lib/source_monitor/import_sessions/wizard.rb +612 -0
- data/lib/source_monitor/items/batch_item_creator.rb +7 -6
- data/lib/source_monitor/items/item_creator.rb +7 -14
- data/lib/source_monitor/items/normalized_entry.rb +61 -0
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +2 -0
- metadata +5 -4
- data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +0 -187
- data/app/controllers/source_monitor/import_sessions/health_check_management.rb +0 -112
- data/app/controllers/source_monitor/import_sessions/opml_parser.rb +0 -130
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/core_ext/object/blank"
|
|
4
|
+
require "source_monitor/items/item_creator/entry_parser"
|
|
5
|
+
require "source_monitor/items/item_creator/content_extractor"
|
|
6
|
+
|
|
7
|
+
module SourceMonitor
|
|
8
|
+
module Items
|
|
9
|
+
class NormalizedEntry
|
|
10
|
+
def self.call(source:, entry:, content_extractor: nil)
|
|
11
|
+
new(source: source, entry: entry, content_extractor: content_extractor).item_attributes
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(source:, entry:, content_extractor: nil)
|
|
15
|
+
@source = source
|
|
16
|
+
@entry = entry
|
|
17
|
+
@content_extractor = content_extractor || ItemCreator::ContentExtractor.new(source: source)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def attributes
|
|
21
|
+
@attributes ||= parser.parse
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def item_attributes
|
|
25
|
+
attributes.merge(guid: item_guid)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def raw_guid
|
|
29
|
+
attributes[:guid]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def normalized_guid
|
|
33
|
+
raw_guid.present? ? raw_guid.downcase : nil
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def raw_guid_present?
|
|
37
|
+
normalized_guid.present?
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def item_guid
|
|
41
|
+
normalized_guid.presence || content_fingerprint
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def content_fingerprint
|
|
45
|
+
attributes[:content_fingerprint]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
attr_reader :source, :entry, :content_extractor
|
|
51
|
+
|
|
52
|
+
def parser
|
|
53
|
+
@parser ||= ItemCreator::EntryParser.new(
|
|
54
|
+
source: source,
|
|
55
|
+
entry: entry,
|
|
56
|
+
content_extractor: content_extractor
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
data/lib/source_monitor.rb
CHANGED
|
@@ -89,6 +89,7 @@ module SourceMonitor
|
|
|
89
89
|
|
|
90
90
|
module ImportSessions
|
|
91
91
|
autoload :EntryNormalizer, "source_monitor/import_sessions/entry_normalizer"
|
|
92
|
+
autoload :Wizard, "source_monitor/import_sessions/wizard"
|
|
92
93
|
autoload :HealthCheckBroadcaster, "source_monitor/import_sessions/health_check_broadcaster"
|
|
93
94
|
autoload :HealthCheckUpdater, "source_monitor/import_sessions/health_check_updater"
|
|
94
95
|
autoload :OPMLImporter, "source_monitor/import_sessions/opml_importer"
|
|
@@ -108,6 +109,7 @@ module SourceMonitor
|
|
|
108
109
|
|
|
109
110
|
module Items
|
|
110
111
|
autoload :ItemCreator, "source_monitor/items/item_creator"
|
|
112
|
+
autoload :NormalizedEntry, "source_monitor/items/normalized_entry"
|
|
111
113
|
autoload :RetentionPruner, "source_monitor/items/retention_pruner"
|
|
112
114
|
autoload :RetentionStrategies, "source_monitor/items/retention_strategies"
|
|
113
115
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: source_monitor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.13.
|
|
4
|
+
version: 0.13.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- dchuk
|
|
@@ -408,9 +408,6 @@ files:
|
|
|
408
408
|
- app/controllers/source_monitor/health_controller.rb
|
|
409
409
|
- app/controllers/source_monitor/import_history_dismissals_controller.rb
|
|
410
410
|
- app/controllers/source_monitor/import_sessions/bulk_configuration.rb
|
|
411
|
-
- app/controllers/source_monitor/import_sessions/entry_annotation.rb
|
|
412
|
-
- app/controllers/source_monitor/import_sessions/health_check_management.rb
|
|
413
|
-
- app/controllers/source_monitor/import_sessions/opml_parser.rb
|
|
414
411
|
- app/controllers/source_monitor/import_sessions_controller.rb
|
|
415
412
|
- app/controllers/source_monitor/item_scrapes_controller.rb
|
|
416
413
|
- app/controllers/source_monitor/items_controller.rb
|
|
@@ -598,7 +595,9 @@ files:
|
|
|
598
595
|
- lib/source_monitor/fetching/feed_fetcher.rb
|
|
599
596
|
- lib/source_monitor/fetching/feed_fetcher/adaptive_interval.rb
|
|
600
597
|
- lib/source_monitor/fetching/feed_fetcher/entry_processor.rb
|
|
598
|
+
- lib/source_monitor/fetching/feed_fetcher/failure_outcome.rb
|
|
601
599
|
- lib/source_monitor/fetching/feed_fetcher/source_updater.rb
|
|
600
|
+
- lib/source_monitor/fetching/feed_fetcher/success_outcome.rb
|
|
602
601
|
- lib/source_monitor/fetching/fetch_error.rb
|
|
603
602
|
- lib/source_monitor/fetching/fetch_runner.rb
|
|
604
603
|
- lib/source_monitor/fetching/retry_orchestrator.rb
|
|
@@ -619,12 +618,14 @@ files:
|
|
|
619
618
|
- lib/source_monitor/import_sessions/health_check_broadcaster.rb
|
|
620
619
|
- lib/source_monitor/import_sessions/health_check_updater.rb
|
|
621
620
|
- lib/source_monitor/import_sessions/opml_importer.rb
|
|
621
|
+
- lib/source_monitor/import_sessions/wizard.rb
|
|
622
622
|
- lib/source_monitor/instrumentation.rb
|
|
623
623
|
- lib/source_monitor/items/batch_item_creator.rb
|
|
624
624
|
- lib/source_monitor/items/item_creator.rb
|
|
625
625
|
- lib/source_monitor/items/item_creator/content_extractor.rb
|
|
626
626
|
- lib/source_monitor/items/item_creator/entry_parser.rb
|
|
627
627
|
- lib/source_monitor/items/item_creator/entry_parser/media_extraction.rb
|
|
628
|
+
- lib/source_monitor/items/normalized_entry.rb
|
|
628
629
|
- lib/source_monitor/items/retention_pruner.rb
|
|
629
630
|
- lib/source_monitor/items/retention_strategies.rb
|
|
630
631
|
- lib/source_monitor/items/retention_strategies/destroy.rb
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module SourceMonitor
|
|
4
|
-
module ImportSessions
|
|
5
|
-
module EntryAnnotation
|
|
6
|
-
extend ActiveSupport::Concern
|
|
7
|
-
|
|
8
|
-
private
|
|
9
|
-
|
|
10
|
-
def annotated_entries(selected_ids)
|
|
11
|
-
selected_ids ||= []
|
|
12
|
-
entries = Array(@import_session.parsed_sources)
|
|
13
|
-
return [] if entries.blank?
|
|
14
|
-
|
|
15
|
-
normalized = entries.map { |entry| normalize_entry(entry) }
|
|
16
|
-
|
|
17
|
-
feed_urls = normalized.filter_map { |entry| entry[:feed_url]&.downcase }
|
|
18
|
-
duplicate_lookup = if feed_urls.present?
|
|
19
|
-
SourceMonitor::Source.where("LOWER(feed_url) IN (?)", feed_urls).pluck(:feed_url).map(&:downcase)
|
|
20
|
-
else
|
|
21
|
-
[]
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
normalized.map do |entry|
|
|
25
|
-
duplicate = entry[:feed_url].present? && duplicate_lookup.include?(entry[:feed_url].downcase)
|
|
26
|
-
entry.merge(
|
|
27
|
-
duplicate: duplicate,
|
|
28
|
-
selectable: entry[:status] == "valid" && !duplicate,
|
|
29
|
-
selected: selected_ids.include?(entry[:id])
|
|
30
|
-
)
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def normalize_entry(entry)
|
|
35
|
-
entry = entry.to_h
|
|
36
|
-
SourceMonitor::ImportSessions::EntryNormalizer.normalize(entry)
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def filter_entries(entries, filter)
|
|
40
|
-
case filter
|
|
41
|
-
when "new"
|
|
42
|
-
entries.select { |entry| entry[:selectable] }
|
|
43
|
-
when "existing"
|
|
44
|
-
entries.select { |entry| entry[:duplicate] }
|
|
45
|
-
else
|
|
46
|
-
entries
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def selectable_entries_from(entries)
|
|
51
|
-
entries.select { |entry| entry[:selectable] }
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def selectable_entries
|
|
55
|
-
@selectable_entries ||= annotated_entries(@selected_source_ids).select { |entry| entry[:selectable] }
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def build_selection_from_params
|
|
59
|
-
@selected_source_ids ||= []
|
|
60
|
-
|
|
61
|
-
if params.dig(:import_session, :select_all) == "true"
|
|
62
|
-
return selectable_entries.map { |entry| entry[:id] }
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
if params.dig(:import_session, :select_none) == "true"
|
|
66
|
-
return []
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
ids = params.dig(:import_session, :selected_source_ids)
|
|
70
|
-
return [] unless ids
|
|
71
|
-
|
|
72
|
-
Array(ids).map { |id| id.to_s }.uniq
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def health_check_selection_from_params
|
|
76
|
-
if params.dig(:import_session, :select_all) == "true"
|
|
77
|
-
return health_check_targets.dup
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
return [] if params.dig(:import_session, :select_none) == "true"
|
|
81
|
-
|
|
82
|
-
ids = params.dig(:import_session, :selected_source_ids)
|
|
83
|
-
return Array(@import_session.selected_source_ids).map(&:to_s) unless ids
|
|
84
|
-
|
|
85
|
-
Array(ids).map { |id| id.to_s }.uniq & health_check_targets
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def advancing_from_health_check?
|
|
89
|
-
target_step != "health_check"
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
def advancing_from_preview?
|
|
93
|
-
target_step != "preview"
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def normalize_page_param(value)
|
|
97
|
-
number = value.to_i
|
|
98
|
-
number = 1 if number <= 0
|
|
99
|
-
number
|
|
100
|
-
rescue StandardError
|
|
101
|
-
1
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def permitted_filter(raw)
|
|
105
|
-
value = raw.to_s.presence
|
|
106
|
-
return unless value
|
|
107
|
-
|
|
108
|
-
%w[all new existing].find { |candidate| candidate == value }
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def preview_per_page
|
|
112
|
-
25
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def state_params
|
|
116
|
-
@state_params ||= begin
|
|
117
|
-
permitted = params.fetch(:import_session, {}).permit(
|
|
118
|
-
:current_step,
|
|
119
|
-
:next_step,
|
|
120
|
-
:select_all,
|
|
121
|
-
:select_none,
|
|
122
|
-
parsed_sources: [],
|
|
123
|
-
selected_source_ids: [],
|
|
124
|
-
bulk_settings: {},
|
|
125
|
-
opml_file_metadata: {}
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
SourceMonitor::Security::ParameterSanitizer.sanitize(permitted.to_h)
|
|
129
|
-
end
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def permitted_step(value)
|
|
133
|
-
step = value.to_s.presence
|
|
134
|
-
return unless step
|
|
135
|
-
|
|
136
|
-
ImportSession::STEP_ORDER.find { |candidate| candidate == step }
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
def target_step
|
|
140
|
-
next_step = state_params[:next_step] || state_params["next_step"]
|
|
141
|
-
permitted_step(next_step) || @current_step || ImportSession.default_step
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
def session_attributes
|
|
145
|
-
attrs = state_params.except(:next_step, :current_step, "next_step", "current_step")
|
|
146
|
-
attrs[:opml_file_metadata] = build_file_metadata if uploading_file?
|
|
147
|
-
attrs[:current_step] = target_step
|
|
148
|
-
attrs
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
def prepare_preview_context(skip_default: false)
|
|
152
|
-
@filter = permitted_filter(params[:filter]) || "all"
|
|
153
|
-
@page = normalize_page_param(params[:page])
|
|
154
|
-
@selected_source_ids = Array(@import_session.selected_source_ids).map(&:to_s)
|
|
155
|
-
|
|
156
|
-
@preview_entries = annotated_entries(@selected_source_ids)
|
|
157
|
-
|
|
158
|
-
if !skip_default && @selected_source_ids.blank? && @preview_entries.present?
|
|
159
|
-
defaults = selectable_entries_from(@preview_entries).map { |entry| entry[:id] }
|
|
160
|
-
@selected_source_ids = defaults
|
|
161
|
-
@import_session.update_column(:selected_source_ids, defaults)
|
|
162
|
-
@preview_entries = annotated_entries(@selected_source_ids)
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
@filtered_entries = filter_entries(@preview_entries, @filter)
|
|
166
|
-
|
|
167
|
-
paginator = SourceMonitor::Pagination::Paginator.new(
|
|
168
|
-
scope: @filtered_entries,
|
|
169
|
-
page: @page,
|
|
170
|
-
per_page: preview_per_page
|
|
171
|
-
).paginate
|
|
172
|
-
|
|
173
|
-
@paginated_entries = paginator.records
|
|
174
|
-
@has_next_page = paginator.has_next_page
|
|
175
|
-
@has_previous_page = paginator.has_previous_page
|
|
176
|
-
@page = paginator.page
|
|
177
|
-
end
|
|
178
|
-
|
|
179
|
-
def prepare_confirm_context
|
|
180
|
-
@selected_source_ids = Array(@import_session.selected_source_ids).map(&:to_s)
|
|
181
|
-
@selected_entries = annotated_entries(@selected_source_ids)
|
|
182
|
-
.select { |entry| @selected_source_ids.include?(entry[:id]) }
|
|
183
|
-
@bulk_settings = @import_session.bulk_settings || {}
|
|
184
|
-
end
|
|
185
|
-
end
|
|
186
|
-
end
|
|
187
|
-
end
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module SourceMonitor
|
|
4
|
-
module ImportSessions
|
|
5
|
-
module HealthCheckManagement
|
|
6
|
-
extend ActiveSupport::Concern
|
|
7
|
-
|
|
8
|
-
private
|
|
9
|
-
|
|
10
|
-
def start_health_checks_if_needed
|
|
11
|
-
return unless @current_step == "health_check"
|
|
12
|
-
|
|
13
|
-
jobs_to_enqueue = []
|
|
14
|
-
|
|
15
|
-
@import_session.with_lock do
|
|
16
|
-
@import_session.reload
|
|
17
|
-
selected = Array(@import_session.selected_source_ids).map(&:to_s)
|
|
18
|
-
|
|
19
|
-
if selected.blank?
|
|
20
|
-
@import_session.update_columns(health_checks_active: false, health_check_target_ids: [])
|
|
21
|
-
next
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
if @import_session.health_checks_active? && @import_session.health_check_targets.sort == selected.sort
|
|
25
|
-
@health_check_target_ids = @import_session.health_check_targets
|
|
26
|
-
next
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
updated_entries = reset_health_results(@import_session.parsed_sources, selected)
|
|
30
|
-
@import_session.update!(
|
|
31
|
-
parsed_sources: updated_entries,
|
|
32
|
-
health_checks_active: true,
|
|
33
|
-
health_check_target_ids: selected,
|
|
34
|
-
health_check_started_at: Time.current,
|
|
35
|
-
health_check_completed_at: nil
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
@health_check_target_ids = selected
|
|
39
|
-
jobs_to_enqueue = selected
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
enqueue_health_check_jobs(@import_session, jobs_to_enqueue) if jobs_to_enqueue.any?
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def reset_health_results(entries, target_ids)
|
|
46
|
-
Array(entries).map do |entry|
|
|
47
|
-
entry_hash = entry.to_h
|
|
48
|
-
entry_id = entry_hash["id"] || entry_hash[:id]
|
|
49
|
-
next entry_hash unless target_ids.include?(entry_id.to_s)
|
|
50
|
-
|
|
51
|
-
entry_hash.merge("health_status" => "pending", "health_error" => nil)
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def enqueue_health_check_jobs(import_session, target_ids)
|
|
56
|
-
target_ids.each do |target_id|
|
|
57
|
-
SourceMonitor::ImportSessionHealthCheckJob.set(wait: 1.second).perform_later(import_session.id, target_id)
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def deactivate_health_checks!
|
|
62
|
-
return unless @import_session.health_checks_active?
|
|
63
|
-
|
|
64
|
-
@import_session.update_columns(
|
|
65
|
-
health_checks_active: false,
|
|
66
|
-
health_check_completed_at: Time.current
|
|
67
|
-
)
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def health_check_entries(selected_ids)
|
|
71
|
-
targets = health_check_targets
|
|
72
|
-
entries = Array(@import_session.parsed_sources).map { |entry| normalize_entry(entry) }
|
|
73
|
-
|
|
74
|
-
entries.select { |entry| targets.include?(entry[:id]) }.map do |entry|
|
|
75
|
-
entry.merge(selected: selected_ids.include?(entry[:id]))
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def health_check_progress(entries)
|
|
80
|
-
total = health_check_targets.size
|
|
81
|
-
completed = entries.count { |entry| health_check_complete?(entry) }
|
|
82
|
-
|
|
83
|
-
{
|
|
84
|
-
completed: completed,
|
|
85
|
-
total: total,
|
|
86
|
-
pending: [ total - completed, 0 ].max,
|
|
87
|
-
active: @import_session.health_checks_active?,
|
|
88
|
-
done: total.positive? && completed >= total
|
|
89
|
-
}
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
def health_check_complete?(entry)
|
|
93
|
-
%w[working failing].include?(entry[:health_status].to_s)
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def health_check_targets
|
|
97
|
-
targets = @import_session.health_check_targets
|
|
98
|
-
targets = Array(@import_session.selected_source_ids).map(&:to_s) if targets.blank?
|
|
99
|
-
targets
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def prepare_health_check_context
|
|
103
|
-
start_health_checks_if_needed
|
|
104
|
-
|
|
105
|
-
@selected_source_ids = Array(@import_session.selected_source_ids).map(&:to_s)
|
|
106
|
-
@health_check_entries = health_check_entries(@selected_source_ids)
|
|
107
|
-
@health_check_target_ids = health_check_targets
|
|
108
|
-
@health_progress = health_check_progress(@health_check_entries)
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
end
|
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module SourceMonitor
|
|
4
|
-
module ImportSessions
|
|
5
|
-
module OpmlParser
|
|
6
|
-
extend ActiveSupport::Concern
|
|
7
|
-
|
|
8
|
-
ALLOWED_CONTENT_TYPES = %w[text/xml application/xml text/x-opml application/opml].freeze
|
|
9
|
-
GENERIC_CONTENT_TYPES = %w[application/octet-stream binary/octet-stream].freeze
|
|
10
|
-
|
|
11
|
-
class UploadError < StandardError; end
|
|
12
|
-
|
|
13
|
-
private
|
|
14
|
-
|
|
15
|
-
def build_file_metadata
|
|
16
|
-
return {} unless params[:opml_file].respond_to?(:original_filename)
|
|
17
|
-
|
|
18
|
-
file = params[:opml_file]
|
|
19
|
-
{
|
|
20
|
-
"filename" => file.original_filename,
|
|
21
|
-
"byte_size" => file.size,
|
|
22
|
-
"content_type" => file.content_type
|
|
23
|
-
}
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def uploading_file?
|
|
27
|
-
params[:opml_file].present?
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def validate_upload!
|
|
31
|
-
return [ "Upload an OPML file to continue." ] unless uploading_file?
|
|
32
|
-
|
|
33
|
-
file = params[:opml_file]
|
|
34
|
-
errors = []
|
|
35
|
-
|
|
36
|
-
errors << "The uploaded file is empty. Choose another OPML file." if file.size.to_i <= 0
|
|
37
|
-
|
|
38
|
-
if file.content_type.present? && !content_type_allowed?(file.content_type) && !generic_content_type?(file.content_type)
|
|
39
|
-
errors << "Upload must be an OPML or XML file."
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
errors
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def content_type_allowed?(content_type)
|
|
46
|
-
ALLOWED_CONTENT_TYPES.include?(content_type)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def generic_content_type?(content_type)
|
|
50
|
-
GENERIC_CONTENT_TYPES.include?(content_type)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def parse_opml_file(file)
|
|
54
|
-
content = file.read
|
|
55
|
-
file.rewind if file.respond_to?(:rewind)
|
|
56
|
-
|
|
57
|
-
raise UploadError, "The uploaded file appears to be empty." if content.blank?
|
|
58
|
-
|
|
59
|
-
document = Nokogiri::XML(content) { |config| config.strict.nonet }
|
|
60
|
-
raise UploadError, "The uploaded file is not valid XML or OPML." if document.root.nil?
|
|
61
|
-
|
|
62
|
-
outlines = document.xpath("//outline")
|
|
63
|
-
|
|
64
|
-
entries = []
|
|
65
|
-
|
|
66
|
-
outlines.each_with_index do |outline, index|
|
|
67
|
-
next unless outline.attribute_nodes.any? { |attr| attr.name.casecmp("xmlurl").zero? }
|
|
68
|
-
|
|
69
|
-
entries << build_entry(outline, index)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
entries
|
|
73
|
-
rescue Nokogiri::XML::SyntaxError => error
|
|
74
|
-
raise UploadError, "We couldn't parse that OPML file: #{error.message}"
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def build_entry(outline, index)
|
|
78
|
-
feed_url = outline_attribute(outline, "xmlUrl")
|
|
79
|
-
website_url = outline_attribute(outline, "htmlUrl")
|
|
80
|
-
title = outline_attribute(outline, "title") || outline_attribute(outline, "text")
|
|
81
|
-
|
|
82
|
-
if feed_url.blank?
|
|
83
|
-
return malformed_entry(index, feed_url, title, website_url, "Missing feed URL")
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
unless valid_feed_url?(feed_url)
|
|
87
|
-
return malformed_entry(index, feed_url, title, website_url, "Feed URL must be HTTP or HTTPS")
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
{
|
|
91
|
-
id: "outline-#{index}",
|
|
92
|
-
raw_outline_index: index,
|
|
93
|
-
feed_url: feed_url,
|
|
94
|
-
title: title,
|
|
95
|
-
website_url: website_url,
|
|
96
|
-
status: "valid",
|
|
97
|
-
error: nil,
|
|
98
|
-
health_status: nil,
|
|
99
|
-
health_error: nil
|
|
100
|
-
}
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def malformed_entry(index, feed_url, title, website_url, error)
|
|
104
|
-
{
|
|
105
|
-
id: "outline-#{index}",
|
|
106
|
-
raw_outline_index: index,
|
|
107
|
-
feed_url: feed_url.presence,
|
|
108
|
-
title: title,
|
|
109
|
-
website_url: website_url,
|
|
110
|
-
status: "malformed",
|
|
111
|
-
error: error,
|
|
112
|
-
health_status: nil,
|
|
113
|
-
health_error: nil
|
|
114
|
-
}
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
def outline_attribute(outline, name)
|
|
118
|
-
attribute = outline.attribute_nodes.find { |attr| attr.name.casecmp(name).zero? }
|
|
119
|
-
attribute&.value.to_s.presence
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
def valid_feed_url?(url)
|
|
123
|
-
parsed = URI.parse(url)
|
|
124
|
-
parsed.is_a?(URI::HTTP) && parsed.host.present?
|
|
125
|
-
rescue URI::InvalidURIError
|
|
126
|
-
false
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
end
|