source_monitor 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/commands/release.md +101 -58
- data/.claude/skills/sm-configure/SKILL.md +13 -2
- data/.claude/skills/sm-configure/reference/configuration-reference.md +33 -0
- data/.claude/skills/sm-host-setup/SKILL.md +18 -2
- data/.claude/skills/sm-host-setup/reference/setup-checklist.md +33 -0
- data/.claude/skills/sm-job/SKILL.md +1 -1
- data/.claude/skills/sm-upgrade/SKILL.md +102 -0
- data/.claude/skills/sm-upgrade/reference/upgrade-workflow.md +92 -0
- data/.claude/skills/sm-upgrade/reference/version-history.md +68 -0
- data/.vbw-planning/SHIPPED.md +35 -0
- data/.vbw-planning/config.json +24 -1
- data/.vbw-planning/discovery.json +3 -1
- data/.vbw-planning/{REQUIREMENTS.md → milestones/generator-enhancements/REQUIREMENTS.md} +22 -0
- data/.vbw-planning/milestones/generator-enhancements/ROADMAP.md +125 -0
- data/.vbw-planning/milestones/generator-enhancements/SHIPPED.md +40 -0
- data/.vbw-planning/milestones/generator-enhancements/STATE.md +43 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/01-CONTEXT.md +33 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/01-VERIFICATION.md +86 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/PLAN-01-SUMMARY.md +61 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/PLAN-01.md +380 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/02-verification/02-VERIFICATION.md +78 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/02-verification/PLAN-01-SUMMARY.md +46 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/02-verification/PLAN-01.md +500 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/03-docs-alignment/03-VERIFICATION.md +89 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/03-docs-alignment/PLAN-01-SUMMARY.md +48 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/03-docs-alignment/PLAN-01.md +456 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/04-dashboard-ux/04-VERIFICATION.md +129 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/04-dashboard-ux/PLAN-01-SUMMARY.md +70 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/04-dashboard-ux/PLAN-01.md +747 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/05-VERIFICATION.md +156 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-01-SUMMARY.md +69 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-01.md +455 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-02-SUMMARY.md +39 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-02.md +488 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/06-netflix-feed-fix/06-VERIFICATION.md +100 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/06-netflix-feed-fix/PLAN-01-SUMMARY.md +37 -0
- data/.vbw-planning/milestones/generator-enhancements/phases/06-netflix-feed-fix/PLAN-01.md +345 -0
- data/.vbw-planning/milestones/upgrade-assurance/REQUIREMENTS.md +80 -0
- data/.vbw-planning/milestones/upgrade-assurance/ROADMAP.md +75 -0
- data/.vbw-planning/milestones/upgrade-assurance/STATE.md +29 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/01-upgrade-command/01-VERIFICATION.md +144 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/01-upgrade-command/PLAN-01-SUMMARY.md +43 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/01-upgrade-command/PLAN-01.md +405 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/02-config-deprecation/PLAN-01-SUMMARY.md +27 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/02-config-deprecation/PLAN-01.md +303 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/03-upgrade-skill-docs/03-VERIFICATION.md +380 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/03-upgrade-skill-docs/PLAN-01-SUMMARY.md +36 -0
- data/.vbw-planning/milestones/upgrade-assurance/phases/03-upgrade-skill-docs/PLAN-01.md +652 -0
- data/CHANGELOG.md +48 -0
- data/CLAUDE.md +5 -3
- data/Gemfile.lock +1 -1
- data/VERSION +1 -1
- data/app/assets/builds/source_monitor/application.css +9 -0
- data/app/helpers/source_monitor/application_helper.rb +38 -0
- data/app/jobs/source_monitor/download_content_images_job.rb +72 -0
- data/app/models/source_monitor/item_content.rb +2 -0
- data/app/views/source_monitor/dashboard/_recent_activity.html.erb +9 -0
- data/app/views/source_monitor/items/_details.html.erb +2 -2
- data/app/views/source_monitor/logs/index.html.erb +9 -0
- data/app/views/source_monitor/sources/_details.html.erb +2 -2
- data/app/views/source_monitor/sources/_row.html.erb +1 -1
- data/docs/setup.md +10 -1
- data/docs/troubleshooting.md +38 -7
- data/docs/upgrade.md +140 -0
- data/lib/generators/source_monitor/install/install_generator.rb +101 -0
- data/lib/source_monitor/configuration/deprecation_registry.rb +237 -0
- data/lib/source_monitor/configuration/http_settings.rb +7 -1
- data/lib/source_monitor/configuration/images_settings.rb +37 -0
- data/lib/source_monitor/configuration.rb +11 -1
- data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +16 -7
- data/lib/source_monitor/dashboard/recent_activity.rb +1 -0
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +15 -2
- data/lib/source_monitor/fetching/feed_fetcher/entry_processor.rb +13 -0
- data/lib/source_monitor/http.rb +23 -0
- data/lib/source_monitor/images/content_rewriter.rb +81 -0
- data/lib/source_monitor/images/downloader.rb +82 -0
- data/lib/source_monitor/logs/table_presenter.rb +25 -0
- data/lib/source_monitor/setup/cli.rb +7 -0
- data/lib/source_monitor/setup/procfile_patcher.rb +31 -0
- data/lib/source_monitor/setup/queue_config_patcher.rb +84 -0
- data/lib/source_monitor/setup/skills_installer.rb +1 -0
- data/lib/source_monitor/setup/upgrade_command.rb +59 -0
- data/lib/source_monitor/setup/verification/pending_migrations_verifier.rb +92 -0
- data/lib/source_monitor/setup/verification/recurring_schedule_verifier.rb +102 -0
- data/lib/source_monitor/setup/verification/runner.rb +1 -1
- data/lib/source_monitor/setup/verification/solid_queue_verifier.rb +1 -1
- data/lib/source_monitor/setup/workflow.rb +10 -0
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +11 -0
- metadata +51 -2
|
@@ -49,7 +49,48 @@ module SourceMonitor
|
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
51
|
|
|
52
|
+
def patch_procfile_dev
|
|
53
|
+
procfile_path = File.join(destination_root, "Procfile.dev")
|
|
54
|
+
|
|
55
|
+
if File.exist?(procfile_path)
|
|
56
|
+
content = File.read(procfile_path)
|
|
57
|
+
if content.match?(/^jobs:/)
|
|
58
|
+
say_status :skip, "Procfile.dev (jobs entry already present)", :yellow
|
|
59
|
+
return
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
File.open(procfile_path, "a") { |f| f.puts("", PROCFILE_JOBS_ENTRY) }
|
|
63
|
+
say_status :append, "Procfile.dev", :green
|
|
64
|
+
else
|
|
65
|
+
File.write(procfile_path, "web: bin/rails server -p 3000\n#{PROCFILE_JOBS_ENTRY}\n")
|
|
66
|
+
say_status :create, "Procfile.dev", :green
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def configure_queue_dispatcher
|
|
71
|
+
queue_path = File.join(destination_root, "config/queue.yml")
|
|
72
|
+
|
|
73
|
+
unless File.exist?(queue_path)
|
|
74
|
+
say_status :skip, "config/queue.yml (file not found — create it or run rails app:update to generate)", :yellow
|
|
75
|
+
return
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
parsed = YAML.safe_load(File.read(queue_path), aliases: true) || {}
|
|
79
|
+
|
|
80
|
+
if queue_config_has_recurring_schedule?(parsed)
|
|
81
|
+
say_status :skip, "config/queue.yml (recurring_schedule already configured)", :yellow
|
|
82
|
+
return
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
add_recurring_schedule_to_dispatchers!(parsed)
|
|
86
|
+
File.write(queue_path, YAML.dump(parsed))
|
|
87
|
+
say_status :append, "config/queue.yml (added recurring_schedule to dispatchers)", :green
|
|
88
|
+
end
|
|
89
|
+
|
|
52
90
|
def print_next_steps
|
|
91
|
+
say_status :info,
|
|
92
|
+
"Procfile.dev configured — run bin/dev to start both web server and Solid Queue workers.",
|
|
93
|
+
:green
|
|
53
94
|
say_status :info,
|
|
54
95
|
"Recurring jobs configured in config/recurring.yml — they'll run automatically with bin/dev or bin/jobs.",
|
|
55
96
|
:green
|
|
@@ -60,6 +101,8 @@ module SourceMonitor
|
|
|
60
101
|
|
|
61
102
|
private
|
|
62
103
|
|
|
104
|
+
PROCFILE_JOBS_ENTRY = "jobs: bundle exec rake solid_queue:start"
|
|
105
|
+
|
|
63
106
|
RECURRING_ENTRIES = {
|
|
64
107
|
"source_monitor_schedule_fetches" => {
|
|
65
108
|
"class" => "SourceMonitor::ScheduleFetchesJob",
|
|
@@ -154,6 +197,64 @@ module SourceMonitor
|
|
|
154
197
|
path = (raw_path && !raw_path.strip.empty?) ? raw_path.strip : "/source_monitor"
|
|
155
198
|
path.start_with?("/") ? path : "/#{path}"
|
|
156
199
|
end
|
|
200
|
+
|
|
201
|
+
RECURRING_SCHEDULE_VALUE = "config/recurring.yml"
|
|
202
|
+
|
|
203
|
+
DEFAULT_DISPATCHER = {
|
|
204
|
+
"polling_interval" => 1,
|
|
205
|
+
"batch_size" => 500,
|
|
206
|
+
"recurring_schedule" => RECURRING_SCHEDULE_VALUE
|
|
207
|
+
}.freeze
|
|
208
|
+
|
|
209
|
+
def queue_config_has_recurring_schedule?(parsed)
|
|
210
|
+
parsed.each_value do |value|
|
|
211
|
+
next unless value.is_a?(Hash)
|
|
212
|
+
|
|
213
|
+
dispatchers = value["dispatchers"] || value[:dispatchers]
|
|
214
|
+
if dispatchers.is_a?(Array)
|
|
215
|
+
return true if dispatchers.any? { |d| d.is_a?(Hash) && d.key?("recurring_schedule") }
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
return true if queue_config_has_recurring_schedule?(value)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Check top-level dispatchers (flat config)
|
|
222
|
+
if parsed.key?("dispatchers") && parsed["dispatchers"].is_a?(Array)
|
|
223
|
+
return true if parsed["dispatchers"].any? { |d| d.is_a?(Hash) && d.key?("recurring_schedule") }
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
false
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def add_recurring_schedule_to_dispatchers!(parsed)
|
|
230
|
+
found_dispatchers = false
|
|
231
|
+
|
|
232
|
+
parsed.each_value do |value|
|
|
233
|
+
next unless value.is_a?(Hash)
|
|
234
|
+
|
|
235
|
+
if value.key?("dispatchers") && value["dispatchers"].is_a?(Array)
|
|
236
|
+
value["dispatchers"].each do |dispatcher|
|
|
237
|
+
next unless dispatcher.is_a?(Hash)
|
|
238
|
+
dispatcher["recurring_schedule"] ||= RECURRING_SCHEDULE_VALUE
|
|
239
|
+
end
|
|
240
|
+
found_dispatchers = true
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Check top-level dispatchers (flat config)
|
|
245
|
+
if parsed.key?("dispatchers") && parsed["dispatchers"].is_a?(Array)
|
|
246
|
+
parsed["dispatchers"].each do |dispatcher|
|
|
247
|
+
next unless dispatcher.is_a?(Hash)
|
|
248
|
+
dispatcher["recurring_schedule"] ||= RECURRING_SCHEDULE_VALUE
|
|
249
|
+
end
|
|
250
|
+
found_dispatchers = true
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# No dispatchers found at all — add a default section
|
|
254
|
+
unless found_dispatchers
|
|
255
|
+
parsed["dispatchers"] = [ DEFAULT_DISPATCHER.dup ]
|
|
256
|
+
end
|
|
257
|
+
end
|
|
157
258
|
end
|
|
158
259
|
end
|
|
159
260
|
end
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
class DeprecatedOptionError < StandardError; end
|
|
5
|
+
|
|
6
|
+
class Configuration
|
|
7
|
+
# Registry for deprecated configuration options.
|
|
8
|
+
#
|
|
9
|
+
# Engine developers register deprecations at boot time via the DSL:
|
|
10
|
+
#
|
|
11
|
+
# SourceMonitor::Configuration::DeprecationRegistry.register(
|
|
12
|
+
# "http.old_proxy_url",
|
|
13
|
+
# removed_in: "0.5.0",
|
|
14
|
+
# replacement: "http.proxy",
|
|
15
|
+
# severity: :warning,
|
|
16
|
+
# message: "Use config.http.proxy instead"
|
|
17
|
+
# )
|
|
18
|
+
#
|
|
19
|
+
# When a host app's initializer accesses a deprecated option, the
|
|
20
|
+
# trapping method fires automatically:
|
|
21
|
+
# - :warning -- logs via Rails.logger.warn and forwards to replacement
|
|
22
|
+
# - :error -- raises SourceMonitor::DeprecatedOptionError
|
|
23
|
+
#
|
|
24
|
+
class DeprecationRegistry
|
|
25
|
+
# Maps settings accessor names (as used on Configuration) to their classes.
|
|
26
|
+
SETTINGS_CLASSES = {
|
|
27
|
+
"http" => "HTTPSettings",
|
|
28
|
+
"fetching" => "FetchingSettings",
|
|
29
|
+
"health" => "HealthSettings",
|
|
30
|
+
"scraping" => "ScrapingSettings",
|
|
31
|
+
"retention" => "RetentionSettings",
|
|
32
|
+
"realtime" => "RealtimeSettings",
|
|
33
|
+
"authentication" => "AuthenticationSettings",
|
|
34
|
+
"images" => "ImagesSettings",
|
|
35
|
+
"scrapers" => "ScraperRegistry",
|
|
36
|
+
"events" => "Events",
|
|
37
|
+
"models" => "Models"
|
|
38
|
+
}.freeze
|
|
39
|
+
|
|
40
|
+
class << self
|
|
41
|
+
# Register a deprecated configuration option.
|
|
42
|
+
#
|
|
43
|
+
# @param path [String] dot-notation path, e.g. "http.old_proxy_url" or "old_queue_prefix"
|
|
44
|
+
# @param removed_in [String] version in which the option was deprecated
|
|
45
|
+
# @param replacement [String, nil] dot-notation path to the replacement option
|
|
46
|
+
# @param severity [:warning, :error] :warning logs + forwards, :error raises
|
|
47
|
+
# @param message [String, nil] additional migration guidance
|
|
48
|
+
def register(path, removed_in:, replacement: nil, severity: :warning, message: nil)
|
|
49
|
+
segments = path.split(".")
|
|
50
|
+
source_prefix = nil
|
|
51
|
+
if segments.length == 1
|
|
52
|
+
target_class = Configuration
|
|
53
|
+
option_name = segments.first
|
|
54
|
+
else
|
|
55
|
+
source_prefix = segments.first
|
|
56
|
+
option_name = segments.last
|
|
57
|
+
class_name = SETTINGS_CLASSES[source_prefix]
|
|
58
|
+
raise ArgumentError, "Unknown settings accessor: #{source_prefix}" unless class_name
|
|
59
|
+
|
|
60
|
+
target_class = Configuration.const_get(class_name)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
deprecation_message = build_message(path, removed_in, replacement, message)
|
|
64
|
+
|
|
65
|
+
if target_class.method_defined?(:"#{option_name}=") || target_class.method_defined?(option_name.to_sym)
|
|
66
|
+
warn "[SourceMonitor] DeprecationRegistry: '#{path}' already exists on #{target_class.name}. " \
|
|
67
|
+
"Skipping trap definition -- the option is not yet removed/renamed."
|
|
68
|
+
entries[path] = { path: path, removed_in: removed_in, replacement: replacement,
|
|
69
|
+
severity: severity, message: deprecation_message, skipped: true }
|
|
70
|
+
return
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
define_trap_methods(target_class, option_name, deprecation_message, severity, replacement,
|
|
74
|
+
source_prefix: source_prefix)
|
|
75
|
+
|
|
76
|
+
entries[path] = { path: path, removed_in: removed_in, replacement: replacement,
|
|
77
|
+
severity: severity, message: deprecation_message, skipped: false }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Remove all registered deprecation traps and clear state.
|
|
81
|
+
# Essential for test isolation.
|
|
82
|
+
def clear!
|
|
83
|
+
defined_methods.each do |target_class, method_name|
|
|
84
|
+
target_class.remove_method(method_name) if target_class.method_defined?(method_name)
|
|
85
|
+
rescue NameError
|
|
86
|
+
# Method was already removed or never defined; ignore.
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
@entries = {}
|
|
90
|
+
@defined_methods = []
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Returns a duplicate of the entries hash for inspection.
|
|
94
|
+
def entries
|
|
95
|
+
@entries ||= {}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Check if a path is registered.
|
|
99
|
+
def registered?(path)
|
|
100
|
+
entries.key?(path)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# No-op hook for future "default changed" checks.
|
|
104
|
+
# Called by Configuration#check_deprecations! after the configure block.
|
|
105
|
+
def check_defaults!(_config)
|
|
106
|
+
# Reserved for future use. Phases may add checks like:
|
|
107
|
+
# "option X changed its default from A to B in version Y"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
def defined_methods
|
|
113
|
+
@defined_methods ||= []
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def build_message(path, removed_in, replacement, extra_message)
|
|
117
|
+
parts = +"[SourceMonitor] DEPRECATION: '#{path}' was deprecated in v#{removed_in}"
|
|
118
|
+
parts << " and replaced by '#{replacement}'" if replacement
|
|
119
|
+
parts << ". #{extra_message}" if extra_message
|
|
120
|
+
parts << "." unless parts.end_with?(".")
|
|
121
|
+
parts.freeze
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def define_trap_methods(target_class, option_name, deprecation_message, severity, replacement, source_prefix: nil)
|
|
125
|
+
writer_name = :"#{option_name}="
|
|
126
|
+
reader_name = option_name.to_sym
|
|
127
|
+
|
|
128
|
+
case severity
|
|
129
|
+
when :warning
|
|
130
|
+
define_warning_writer(target_class, writer_name, deprecation_message, replacement, source_prefix)
|
|
131
|
+
define_warning_reader(target_class, reader_name, deprecation_message, replacement, source_prefix)
|
|
132
|
+
when :error
|
|
133
|
+
define_error_method(target_class, writer_name, deprecation_message)
|
|
134
|
+
define_error_method(target_class, reader_name, deprecation_message)
|
|
135
|
+
else
|
|
136
|
+
raise ArgumentError, "Unknown severity: #{severity}. Must be :warning or :error."
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
defined_methods.push([ target_class, writer_name ], [ target_class, reader_name ])
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def define_warning_writer(target_class, writer_name, deprecation_message, replacement, source_prefix)
|
|
143
|
+
replacement_writer = replacement_setter_for(replacement, source_prefix)
|
|
144
|
+
|
|
145
|
+
target_class.define_method(writer_name) do |value|
|
|
146
|
+
Rails.logger.warn(deprecation_message)
|
|
147
|
+
if replacement_writer
|
|
148
|
+
resolve_replacement_target(replacement_writer[:target]).public_send(
|
|
149
|
+
replacement_writer[:setter], value
|
|
150
|
+
)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def define_warning_reader(target_class, reader_name, deprecation_message, replacement, source_prefix)
|
|
156
|
+
replacement_reader = replacement_getter_for(replacement, source_prefix)
|
|
157
|
+
|
|
158
|
+
target_class.define_method(reader_name) do
|
|
159
|
+
Rails.logger.warn(deprecation_message)
|
|
160
|
+
if replacement_reader
|
|
161
|
+
resolve_replacement_target(replacement_reader[:target]).public_send(
|
|
162
|
+
replacement_reader[:getter]
|
|
163
|
+
)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def define_error_method(target_class, method_name, deprecation_message)
|
|
169
|
+
target_class.define_method(method_name) do |*|
|
|
170
|
+
raise SourceMonitor::DeprecatedOptionError, deprecation_message
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Parse replacement path into target accessor chain and setter name.
|
|
175
|
+
# When source_prefix matches the replacement prefix, the target is nil
|
|
176
|
+
# (replacement is on the same settings class).
|
|
177
|
+
#
|
|
178
|
+
# "http.proxy" with source_prefix "http" => { target: nil, setter: "proxy=" }
|
|
179
|
+
# "queue_namespace" => { target: nil, setter: "queue_namespace=" }
|
|
180
|
+
# "http.proxy" with source_prefix nil => { target: :http, setter: "proxy=" }
|
|
181
|
+
def replacement_setter_for(replacement, source_prefix = nil)
|
|
182
|
+
return nil unless replacement
|
|
183
|
+
|
|
184
|
+
segments = replacement.split(".")
|
|
185
|
+
if segments.length == 1
|
|
186
|
+
{ target: nil, setter: :"#{segments.first}=" }
|
|
187
|
+
elsif source_prefix && segments.first == source_prefix
|
|
188
|
+
{ target: nil, setter: :"#{segments.last}=" }
|
|
189
|
+
else
|
|
190
|
+
{ target: segments.first.to_sym, setter: :"#{segments.last}=" }
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Parse replacement path into target accessor chain and getter name.
|
|
195
|
+
def replacement_getter_for(replacement, source_prefix = nil)
|
|
196
|
+
return nil unless replacement
|
|
197
|
+
|
|
198
|
+
segments = replacement.split(".")
|
|
199
|
+
if segments.length == 1
|
|
200
|
+
{ target: nil, getter: segments.first.to_sym }
|
|
201
|
+
elsif source_prefix && segments.first == source_prefix
|
|
202
|
+
{ target: nil, getter: segments.last.to_sym }
|
|
203
|
+
else
|
|
204
|
+
{ target: segments.first.to_sym, getter: segments.last.to_sym }
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Add a helper method to settings classes and Configuration for resolving
|
|
213
|
+
# replacement targets. This allows "http.proxy" to resolve as self.http.proxy
|
|
214
|
+
# from within a Configuration instance, or as self.proxy from within an
|
|
215
|
+
# HTTPSettings instance.
|
|
216
|
+
module SourceMonitor
|
|
217
|
+
class Configuration
|
|
218
|
+
private
|
|
219
|
+
|
|
220
|
+
def resolve_replacement_target(accessor)
|
|
221
|
+
accessor ? public_send(accessor) : self
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Add the same helper to all settings classes so forwarding works
|
|
227
|
+
# when the deprecated method is defined on a nested settings class
|
|
228
|
+
# and the replacement is on the same class (e.g. "http.old_proxy" -> "http.proxy").
|
|
229
|
+
SourceMonitor::Configuration::DeprecationRegistry::SETTINGS_CLASSES.each_value do |class_name|
|
|
230
|
+
klass = SourceMonitor::Configuration.const_get(class_name)
|
|
231
|
+
unless klass.method_defined?(:resolve_replacement_target, false)
|
|
232
|
+
klass.define_method(:resolve_replacement_target) do |accessor|
|
|
233
|
+
accessor ? public_send(accessor) : self
|
|
234
|
+
end
|
|
235
|
+
klass.send(:private, :resolve_replacement_target)
|
|
236
|
+
end
|
|
237
|
+
end
|
|
@@ -13,7 +13,10 @@ module SourceMonitor
|
|
|
13
13
|
:retry_interval,
|
|
14
14
|
:retry_interval_randomness,
|
|
15
15
|
:retry_backoff_factor,
|
|
16
|
-
:retry_statuses
|
|
16
|
+
:retry_statuses,
|
|
17
|
+
:ssl_ca_file,
|
|
18
|
+
:ssl_ca_path,
|
|
19
|
+
:ssl_verify
|
|
17
20
|
|
|
18
21
|
def initialize
|
|
19
22
|
reset!
|
|
@@ -31,6 +34,9 @@ module SourceMonitor
|
|
|
31
34
|
@retry_interval_randomness = 0.5
|
|
32
35
|
@retry_backoff_factor = 2
|
|
33
36
|
@retry_statuses = nil
|
|
37
|
+
@ssl_ca_file = nil
|
|
38
|
+
@ssl_ca_path = nil
|
|
39
|
+
@ssl_verify = true
|
|
34
40
|
end
|
|
35
41
|
|
|
36
42
|
private
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
class Configuration
|
|
5
|
+
class ImagesSettings
|
|
6
|
+
attr_accessor :download_to_active_storage,
|
|
7
|
+
:max_download_size,
|
|
8
|
+
:download_timeout,
|
|
9
|
+
:allowed_content_types
|
|
10
|
+
|
|
11
|
+
DEFAULT_MAX_DOWNLOAD_SIZE = 10 * 1024 * 1024 # 10 MB
|
|
12
|
+
DEFAULT_DOWNLOAD_TIMEOUT = 30 # seconds
|
|
13
|
+
DEFAULT_ALLOWED_CONTENT_TYPES = %w[
|
|
14
|
+
image/jpeg
|
|
15
|
+
image/png
|
|
16
|
+
image/gif
|
|
17
|
+
image/webp
|
|
18
|
+
image/svg+xml
|
|
19
|
+
].freeze
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
reset!
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def reset!
|
|
26
|
+
@download_to_active_storage = false
|
|
27
|
+
@max_download_size = DEFAULT_MAX_DOWNLOAD_SIZE
|
|
28
|
+
@download_timeout = DEFAULT_DOWNLOAD_TIMEOUT
|
|
29
|
+
@allowed_content_types = DEFAULT_ALLOWED_CONTENT_TYPES.dup
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def download_enabled?
|
|
33
|
+
!!download_to_active_storage
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -8,11 +8,13 @@ require "source_monitor/configuration/scraping_settings"
|
|
|
8
8
|
require "source_monitor/configuration/realtime_settings"
|
|
9
9
|
require "source_monitor/configuration/retention_settings"
|
|
10
10
|
require "source_monitor/configuration/authentication_settings"
|
|
11
|
+
require "source_monitor/configuration/images_settings"
|
|
11
12
|
require "source_monitor/configuration/scraper_registry"
|
|
12
13
|
require "source_monitor/configuration/events"
|
|
13
14
|
require "source_monitor/configuration/validation_definition"
|
|
14
15
|
require "source_monitor/configuration/model_definition"
|
|
15
16
|
require "source_monitor/configuration/models"
|
|
17
|
+
require "source_monitor/configuration/deprecation_registry"
|
|
16
18
|
|
|
17
19
|
module SourceMonitor
|
|
18
20
|
class Configuration
|
|
@@ -26,7 +28,7 @@ module SourceMonitor
|
|
|
26
28
|
:mission_control_enabled,
|
|
27
29
|
:mission_control_dashboard_path
|
|
28
30
|
|
|
29
|
-
attr_reader :http, :scrapers, :retention, :events, :models, :realtime, :fetching, :health, :authentication, :scraping
|
|
31
|
+
attr_reader :http, :scrapers, :retention, :events, :models, :realtime, :fetching, :health, :authentication, :scraping, :images
|
|
30
32
|
|
|
31
33
|
DEFAULT_QUEUE_NAMESPACE = "source_monitor"
|
|
32
34
|
|
|
@@ -50,6 +52,7 @@ module SourceMonitor
|
|
|
50
52
|
@health = HealthSettings.new
|
|
51
53
|
@authentication = AuthenticationSettings.new
|
|
52
54
|
@scraping = ScrapingSettings.new
|
|
55
|
+
@images = ImagesSettings.new
|
|
53
56
|
end
|
|
54
57
|
|
|
55
58
|
def queue_name_for(role)
|
|
@@ -83,5 +86,12 @@ module SourceMonitor
|
|
|
83
86
|
raise ArgumentError, "unknown queue role #{role.inspect}"
|
|
84
87
|
end
|
|
85
88
|
end
|
|
89
|
+
|
|
90
|
+
# Post-configure hook for deprecation validation.
|
|
91
|
+
# Delegates to DeprecationRegistry.check_defaults! for future
|
|
92
|
+
# "default changed" checks. Currently a no-op.
|
|
93
|
+
def check_deprecations!
|
|
94
|
+
DeprecationRegistry.check_defaults!(self)
|
|
95
|
+
end
|
|
86
96
|
end
|
|
87
97
|
end
|
|
@@ -37,7 +37,8 @@ module SourceMonitor
|
|
|
37
37
|
item_title: row["item_title"],
|
|
38
38
|
item_url: row["item_url"],
|
|
39
39
|
source_name: row["source_name"],
|
|
40
|
-
source_id: row["source_id"]
|
|
40
|
+
source_id: row["source_id"],
|
|
41
|
+
source_feed_url: row["source_feed_url"]
|
|
41
42
|
)
|
|
42
43
|
end
|
|
43
44
|
|
|
@@ -57,7 +58,8 @@ module SourceMonitor
|
|
|
57
58
|
item_title,
|
|
58
59
|
item_url,
|
|
59
60
|
source_name,
|
|
60
|
-
source_id
|
|
61
|
+
source_id,
|
|
62
|
+
source_feed_url
|
|
61
63
|
FROM (
|
|
62
64
|
#{fetch_log_sql}
|
|
63
65
|
UNION ALL
|
|
@@ -83,9 +85,12 @@ module SourceMonitor
|
|
|
83
85
|
NULL AS scraper_adapter,
|
|
84
86
|
NULL AS item_title,
|
|
85
87
|
NULL AS item_url,
|
|
86
|
-
|
|
87
|
-
#{SourceMonitor::FetchLog.quoted_table_name}.source_id AS source_id
|
|
88
|
+
#{SourceMonitor::Source.quoted_table_name}.#{quoted_source_name} AS source_name,
|
|
89
|
+
#{SourceMonitor::FetchLog.quoted_table_name}.source_id AS source_id,
|
|
90
|
+
#{SourceMonitor::Source.quoted_table_name}.feed_url AS source_feed_url
|
|
88
91
|
FROM #{SourceMonitor::FetchLog.quoted_table_name}
|
|
92
|
+
LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
|
|
93
|
+
ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::FetchLog.quoted_table_name}.source_id
|
|
89
94
|
SQL
|
|
90
95
|
end
|
|
91
96
|
|
|
@@ -100,12 +105,15 @@ module SourceMonitor
|
|
|
100
105
|
NULL AS items_updated,
|
|
101
106
|
#{SourceMonitor::ScrapeLog.quoted_table_name}.scraper_adapter AS scraper_adapter,
|
|
102
107
|
NULL AS item_title,
|
|
103
|
-
|
|
108
|
+
#{SourceMonitor::Item.quoted_table_name}.url AS item_url,
|
|
104
109
|
#{SourceMonitor::Source.quoted_table_name}.#{quoted_source_name} AS source_name,
|
|
105
|
-
#{SourceMonitor::ScrapeLog.quoted_table_name}.source_id AS source_id
|
|
110
|
+
#{SourceMonitor::ScrapeLog.quoted_table_name}.source_id AS source_id,
|
|
111
|
+
NULL AS source_feed_url
|
|
106
112
|
FROM #{SourceMonitor::ScrapeLog.quoted_table_name}
|
|
107
113
|
LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
|
|
108
114
|
ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::ScrapeLog.quoted_table_name}.source_id
|
|
115
|
+
LEFT JOIN #{SourceMonitor::Item.quoted_table_name}
|
|
116
|
+
ON #{SourceMonitor::Item.quoted_table_name}.id = #{SourceMonitor::ScrapeLog.quoted_table_name}.item_id
|
|
109
117
|
SQL
|
|
110
118
|
end
|
|
111
119
|
|
|
@@ -122,7 +130,8 @@ module SourceMonitor
|
|
|
122
130
|
#{SourceMonitor::Item.quoted_table_name}.title AS item_title,
|
|
123
131
|
#{SourceMonitor::Item.quoted_table_name}.url AS item_url,
|
|
124
132
|
#{SourceMonitor::Source.quoted_table_name}.#{quoted_source_name} AS source_name,
|
|
125
|
-
#{SourceMonitor::Item.quoted_table_name}.source_id AS source_id
|
|
133
|
+
#{SourceMonitor::Item.quoted_table_name}.source_id AS source_id,
|
|
134
|
+
NULL AS source_feed_url
|
|
126
135
|
FROM #{SourceMonitor::Item.quoted_table_name}
|
|
127
136
|
LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
|
|
128
137
|
ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::Item.quoted_table_name}.source_id
|
|
@@ -30,13 +30,16 @@ module SourceMonitor
|
|
|
30
30
|
end
|
|
31
31
|
|
|
32
32
|
def fetch_event(event)
|
|
33
|
+
domain = source_domain(event.source_feed_url)
|
|
33
34
|
{
|
|
34
35
|
label: "Fetch ##{event.id}",
|
|
35
36
|
description: "#{event.items_created.to_i} created / #{event.items_updated.to_i} updated",
|
|
36
37
|
status: event.success? ? :success : :failure,
|
|
37
38
|
type: :fetch,
|
|
38
39
|
time: event.occurred_at,
|
|
39
|
-
path: url_helpers.fetch_log_path(event.id)
|
|
40
|
+
path: url_helpers.fetch_log_path(event.id),
|
|
41
|
+
url_display: domain,
|
|
42
|
+
url_href: event.source_feed_url
|
|
40
43
|
}
|
|
41
44
|
end
|
|
42
45
|
|
|
@@ -47,10 +50,20 @@ module SourceMonitor
|
|
|
47
50
|
status: event.success? ? :success : :failure,
|
|
48
51
|
type: :scrape,
|
|
49
52
|
time: event.occurred_at,
|
|
50
|
-
path: url_helpers.scrape_log_path(event.id)
|
|
53
|
+
path: url_helpers.scrape_log_path(event.id),
|
|
54
|
+
url_display: event.item_url,
|
|
55
|
+
url_href: event.item_url
|
|
51
56
|
}
|
|
52
57
|
end
|
|
53
58
|
|
|
59
|
+
def source_domain(feed_url)
|
|
60
|
+
return nil if feed_url.blank?
|
|
61
|
+
|
|
62
|
+
URI.parse(feed_url.to_s).host
|
|
63
|
+
rescue URI::InvalidURIError
|
|
64
|
+
nil
|
|
65
|
+
end
|
|
66
|
+
|
|
54
67
|
def item_event(event)
|
|
55
68
|
{
|
|
56
69
|
label: event.item_title.presence || "New Item",
|
|
@@ -38,6 +38,7 @@ module SourceMonitor
|
|
|
38
38
|
created += 1
|
|
39
39
|
created_items << result.item
|
|
40
40
|
SourceMonitor::Events.after_item_created(item: result.item, source:, entry:, result: result)
|
|
41
|
+
enqueue_image_download(result.item)
|
|
41
42
|
else
|
|
42
43
|
updated += 1
|
|
43
44
|
updated_items << result.item
|
|
@@ -61,6 +62,18 @@ module SourceMonitor
|
|
|
61
62
|
|
|
62
63
|
private
|
|
63
64
|
|
|
65
|
+
def enqueue_image_download(item)
|
|
66
|
+
return unless SourceMonitor.config.images.download_enabled?
|
|
67
|
+
return if item.content.blank?
|
|
68
|
+
|
|
69
|
+
SourceMonitor::DownloadContentImagesJob.perform_later(item.id)
|
|
70
|
+
rescue StandardError => error
|
|
71
|
+
# Image download enqueue failure must never break feed processing
|
|
72
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
73
|
+
Rails.logger.error("[SourceMonitor] Failed to enqueue image download for item #{item.id}: #{error.message}")
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
64
77
|
def normalize_item_error(entry, error)
|
|
65
78
|
{
|
|
66
79
|
guid: safe_entry_guid(entry),
|
data/lib/source_monitor/http.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "openssl"
|
|
3
4
|
require "faraday"
|
|
4
5
|
require "faraday/retry"
|
|
5
6
|
require "faraday/follow_redirects"
|
|
@@ -57,9 +58,31 @@ module SourceMonitor
|
|
|
57
58
|
connection.headers[key] = value
|
|
58
59
|
end
|
|
59
60
|
|
|
61
|
+
configure_ssl(connection, settings)
|
|
62
|
+
|
|
60
63
|
connection.adapter Faraday.default_adapter
|
|
61
64
|
end
|
|
62
65
|
|
|
66
|
+
# Configure SSL to use a proper cert store. Without this, some systems
|
|
67
|
+
# fail to verify certificate chains that depend on intermediate CAs
|
|
68
|
+
# (e.g., Medium/Netflix on AWS). OpenSSL::X509::Store#set_default_paths
|
|
69
|
+
# loads all system-trusted CAs including intermediates.
|
|
70
|
+
def configure_ssl(connection, settings)
|
|
71
|
+
connection.ssl.verify = settings.ssl_verify != false
|
|
72
|
+
|
|
73
|
+
if settings.ssl_ca_file
|
|
74
|
+
connection.ssl.ca_file = settings.ssl_ca_file
|
|
75
|
+
elsif settings.ssl_ca_path
|
|
76
|
+
connection.ssl.ca_path = settings.ssl_ca_path
|
|
77
|
+
else
|
|
78
|
+
connection.ssl.cert_store = default_cert_store
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def default_cert_store
|
|
83
|
+
OpenSSL::X509::Store.new.tap(&:set_default_paths)
|
|
84
|
+
end
|
|
85
|
+
|
|
63
86
|
def default_headers(settings)
|
|
64
87
|
base_headers = {
|
|
65
88
|
"User-Agent" => resolve_callable(settings.user_agent).presence || DEFAULT_USER_AGENT,
|