source_monitor 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/release.md +101 -58
  3. data/.claude/skills/sm-configure/SKILL.md +13 -2
  4. data/.claude/skills/sm-configure/reference/configuration-reference.md +33 -0
  5. data/.claude/skills/sm-host-setup/SKILL.md +18 -2
  6. data/.claude/skills/sm-host-setup/reference/setup-checklist.md +33 -0
  7. data/.claude/skills/sm-job/SKILL.md +1 -1
  8. data/.claude/skills/sm-upgrade/SKILL.md +102 -0
  9. data/.claude/skills/sm-upgrade/reference/upgrade-workflow.md +92 -0
  10. data/.claude/skills/sm-upgrade/reference/version-history.md +68 -0
  11. data/.vbw-planning/SHIPPED.md +35 -0
  12. data/.vbw-planning/config.json +24 -1
  13. data/.vbw-planning/discovery.json +3 -1
  14. data/.vbw-planning/{REQUIREMENTS.md → milestones/generator-enhancements/REQUIREMENTS.md} +22 -0
  15. data/.vbw-planning/milestones/generator-enhancements/ROADMAP.md +125 -0
  16. data/.vbw-planning/milestones/generator-enhancements/SHIPPED.md +40 -0
  17. data/.vbw-planning/milestones/generator-enhancements/STATE.md +43 -0
  18. data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/01-CONTEXT.md +33 -0
  19. data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/01-VERIFICATION.md +86 -0
  20. data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/PLAN-01-SUMMARY.md +61 -0
  21. data/.vbw-planning/milestones/generator-enhancements/phases/01-generator-steps/PLAN-01.md +380 -0
  22. data/.vbw-planning/milestones/generator-enhancements/phases/02-verification/02-VERIFICATION.md +78 -0
  23. data/.vbw-planning/milestones/generator-enhancements/phases/02-verification/PLAN-01-SUMMARY.md +46 -0
  24. data/.vbw-planning/milestones/generator-enhancements/phases/02-verification/PLAN-01.md +500 -0
  25. data/.vbw-planning/milestones/generator-enhancements/phases/03-docs-alignment/03-VERIFICATION.md +89 -0
  26. data/.vbw-planning/milestones/generator-enhancements/phases/03-docs-alignment/PLAN-01-SUMMARY.md +48 -0
  27. data/.vbw-planning/milestones/generator-enhancements/phases/03-docs-alignment/PLAN-01.md +456 -0
  28. data/.vbw-planning/milestones/generator-enhancements/phases/04-dashboard-ux/04-VERIFICATION.md +129 -0
  29. data/.vbw-planning/milestones/generator-enhancements/phases/04-dashboard-ux/PLAN-01-SUMMARY.md +70 -0
  30. data/.vbw-planning/milestones/generator-enhancements/phases/04-dashboard-ux/PLAN-01.md +747 -0
  31. data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/05-VERIFICATION.md +156 -0
  32. data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-01-SUMMARY.md +69 -0
  33. data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-01.md +455 -0
  34. data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-02-SUMMARY.md +39 -0
  35. data/.vbw-planning/milestones/generator-enhancements/phases/05-active-storage-images/PLAN-02.md +488 -0
  36. data/.vbw-planning/milestones/generator-enhancements/phases/06-netflix-feed-fix/06-VERIFICATION.md +100 -0
  37. data/.vbw-planning/milestones/generator-enhancements/phases/06-netflix-feed-fix/PLAN-01-SUMMARY.md +37 -0
  38. data/.vbw-planning/milestones/generator-enhancements/phases/06-netflix-feed-fix/PLAN-01.md +345 -0
  39. data/.vbw-planning/milestones/upgrade-assurance/REQUIREMENTS.md +80 -0
  40. data/.vbw-planning/milestones/upgrade-assurance/ROADMAP.md +75 -0
  41. data/.vbw-planning/milestones/upgrade-assurance/STATE.md +29 -0
  42. data/.vbw-planning/milestones/upgrade-assurance/phases/01-upgrade-command/01-VERIFICATION.md +144 -0
  43. data/.vbw-planning/milestones/upgrade-assurance/phases/01-upgrade-command/PLAN-01-SUMMARY.md +43 -0
  44. data/.vbw-planning/milestones/upgrade-assurance/phases/01-upgrade-command/PLAN-01.md +405 -0
  45. data/.vbw-planning/milestones/upgrade-assurance/phases/02-config-deprecation/PLAN-01-SUMMARY.md +27 -0
  46. data/.vbw-planning/milestones/upgrade-assurance/phases/02-config-deprecation/PLAN-01.md +303 -0
  47. data/.vbw-planning/milestones/upgrade-assurance/phases/03-upgrade-skill-docs/03-VERIFICATION.md +380 -0
  48. data/.vbw-planning/milestones/upgrade-assurance/phases/03-upgrade-skill-docs/PLAN-01-SUMMARY.md +36 -0
  49. data/.vbw-planning/milestones/upgrade-assurance/phases/03-upgrade-skill-docs/PLAN-01.md +652 -0
  50. data/CHANGELOG.md +48 -0
  51. data/CLAUDE.md +5 -3
  52. data/Gemfile.lock +1 -1
  53. data/VERSION +1 -1
  54. data/app/assets/builds/source_monitor/application.css +9 -0
  55. data/app/helpers/source_monitor/application_helper.rb +38 -0
  56. data/app/jobs/source_monitor/download_content_images_job.rb +72 -0
  57. data/app/models/source_monitor/item_content.rb +2 -0
  58. data/app/views/source_monitor/dashboard/_recent_activity.html.erb +9 -0
  59. data/app/views/source_monitor/items/_details.html.erb +2 -2
  60. data/app/views/source_monitor/logs/index.html.erb +9 -0
  61. data/app/views/source_monitor/sources/_details.html.erb +2 -2
  62. data/app/views/source_monitor/sources/_row.html.erb +1 -1
  63. data/docs/setup.md +10 -1
  64. data/docs/troubleshooting.md +38 -7
  65. data/docs/upgrade.md +140 -0
  66. data/lib/generators/source_monitor/install/install_generator.rb +101 -0
  67. data/lib/source_monitor/configuration/deprecation_registry.rb +237 -0
  68. data/lib/source_monitor/configuration/http_settings.rb +7 -1
  69. data/lib/source_monitor/configuration/images_settings.rb +37 -0
  70. data/lib/source_monitor/configuration.rb +11 -1
  71. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +16 -7
  72. data/lib/source_monitor/dashboard/recent_activity.rb +1 -0
  73. data/lib/source_monitor/dashboard/recent_activity_presenter.rb +15 -2
  74. data/lib/source_monitor/fetching/feed_fetcher/entry_processor.rb +13 -0
  75. data/lib/source_monitor/http.rb +23 -0
  76. data/lib/source_monitor/images/content_rewriter.rb +81 -0
  77. data/lib/source_monitor/images/downloader.rb +82 -0
  78. data/lib/source_monitor/logs/table_presenter.rb +25 -0
  79. data/lib/source_monitor/setup/cli.rb +7 -0
  80. data/lib/source_monitor/setup/procfile_patcher.rb +31 -0
  81. data/lib/source_monitor/setup/queue_config_patcher.rb +84 -0
  82. data/lib/source_monitor/setup/skills_installer.rb +1 -0
  83. data/lib/source_monitor/setup/upgrade_command.rb +59 -0
  84. data/lib/source_monitor/setup/verification/pending_migrations_verifier.rb +92 -0
  85. data/lib/source_monitor/setup/verification/recurring_schedule_verifier.rb +102 -0
  86. data/lib/source_monitor/setup/verification/runner.rb +1 -1
  87. data/lib/source_monitor/setup/verification/solid_queue_verifier.rb +1 -1
  88. data/lib/source_monitor/setup/workflow.rb +10 -0
  89. data/lib/source_monitor/version.rb +1 -1
  90. data/lib/source_monitor.rb +11 -0
  91. metadata +51 -2
@@ -49,7 +49,48 @@ module SourceMonitor
49
49
  end
50
50
  end
51
51
 
52
+ def patch_procfile_dev
53
+ procfile_path = File.join(destination_root, "Procfile.dev")
54
+
55
+ if File.exist?(procfile_path)
56
+ content = File.read(procfile_path)
57
+ if content.match?(/^jobs:/)
58
+ say_status :skip, "Procfile.dev (jobs entry already present)", :yellow
59
+ return
60
+ end
61
+
62
+ File.open(procfile_path, "a") { |f| f.puts("", PROCFILE_JOBS_ENTRY) }
63
+ say_status :append, "Procfile.dev", :green
64
+ else
65
+ File.write(procfile_path, "web: bin/rails server -p 3000\n#{PROCFILE_JOBS_ENTRY}\n")
66
+ say_status :create, "Procfile.dev", :green
67
+ end
68
+ end
69
+
70
+ def configure_queue_dispatcher
71
+ queue_path = File.join(destination_root, "config/queue.yml")
72
+
73
+ unless File.exist?(queue_path)
74
+ say_status :skip, "config/queue.yml (file not found — create it or run rails app:update to generate)", :yellow
75
+ return
76
+ end
77
+
78
+ parsed = YAML.safe_load(File.read(queue_path), aliases: true) || {}
79
+
80
+ if queue_config_has_recurring_schedule?(parsed)
81
+ say_status :skip, "config/queue.yml (recurring_schedule already configured)", :yellow
82
+ return
83
+ end
84
+
85
+ add_recurring_schedule_to_dispatchers!(parsed)
86
+ File.write(queue_path, YAML.dump(parsed))
87
+ say_status :append, "config/queue.yml (added recurring_schedule to dispatchers)", :green
88
+ end
89
+
52
90
  def print_next_steps
91
+ say_status :info,
92
+ "Procfile.dev configured — run bin/dev to start both web server and Solid Queue workers.",
93
+ :green
53
94
  say_status :info,
54
95
  "Recurring jobs configured in config/recurring.yml — they'll run automatically with bin/dev or bin/jobs.",
55
96
  :green
@@ -60,6 +101,8 @@ module SourceMonitor
60
101
 
61
102
  private
62
103
 
104
+ PROCFILE_JOBS_ENTRY = "jobs: bundle exec rake solid_queue:start"
105
+
63
106
  RECURRING_ENTRIES = {
64
107
  "source_monitor_schedule_fetches" => {
65
108
  "class" => "SourceMonitor::ScheduleFetchesJob",
@@ -154,6 +197,64 @@ module SourceMonitor
154
197
  path = (raw_path && !raw_path.strip.empty?) ? raw_path.strip : "/source_monitor"
155
198
  path.start_with?("/") ? path : "/#{path}"
156
199
  end
200
+
201
+ RECURRING_SCHEDULE_VALUE = "config/recurring.yml"
202
+
203
+ DEFAULT_DISPATCHER = {
204
+ "polling_interval" => 1,
205
+ "batch_size" => 500,
206
+ "recurring_schedule" => RECURRING_SCHEDULE_VALUE
207
+ }.freeze
208
+
209
+ def queue_config_has_recurring_schedule?(parsed)
210
+ parsed.each_value do |value|
211
+ next unless value.is_a?(Hash)
212
+
213
+ dispatchers = value["dispatchers"] || value[:dispatchers]
214
+ if dispatchers.is_a?(Array)
215
+ return true if dispatchers.any? { |d| d.is_a?(Hash) && d.key?("recurring_schedule") }
216
+ end
217
+
218
+ return true if queue_config_has_recurring_schedule?(value)
219
+ end
220
+
221
+ # Check top-level dispatchers (flat config)
222
+ if parsed.key?("dispatchers") && parsed["dispatchers"].is_a?(Array)
223
+ return true if parsed["dispatchers"].any? { |d| d.is_a?(Hash) && d.key?("recurring_schedule") }
224
+ end
225
+
226
+ false
227
+ end
228
+
229
+ def add_recurring_schedule_to_dispatchers!(parsed)
230
+ found_dispatchers = false
231
+
232
+ parsed.each_value do |value|
233
+ next unless value.is_a?(Hash)
234
+
235
+ if value.key?("dispatchers") && value["dispatchers"].is_a?(Array)
236
+ value["dispatchers"].each do |dispatcher|
237
+ next unless dispatcher.is_a?(Hash)
238
+ dispatcher["recurring_schedule"] ||= RECURRING_SCHEDULE_VALUE
239
+ end
240
+ found_dispatchers = true
241
+ end
242
+ end
243
+
244
+ # Check top-level dispatchers (flat config)
245
+ if parsed.key?("dispatchers") && parsed["dispatchers"].is_a?(Array)
246
+ parsed["dispatchers"].each do |dispatcher|
247
+ next unless dispatcher.is_a?(Hash)
248
+ dispatcher["recurring_schedule"] ||= RECURRING_SCHEDULE_VALUE
249
+ end
250
+ found_dispatchers = true
251
+ end
252
+
253
+ # No dispatchers found at all — add a default section
254
+ unless found_dispatchers
255
+ parsed["dispatchers"] = [ DEFAULT_DISPATCHER.dup ]
256
+ end
257
+ end
157
258
  end
158
259
  end
159
260
  end
@@ -0,0 +1,237 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class DeprecatedOptionError < StandardError; end
5
+
6
+ class Configuration
7
+ # Registry for deprecated configuration options.
8
+ #
9
+ # Engine developers register deprecations at boot time via the DSL:
10
+ #
11
+ # SourceMonitor::Configuration::DeprecationRegistry.register(
12
+ # "http.old_proxy_url",
13
+ # removed_in: "0.5.0",
14
+ # replacement: "http.proxy",
15
+ # severity: :warning,
16
+ # message: "Use config.http.proxy instead"
17
+ # )
18
+ #
19
+ # When a host app's initializer accesses a deprecated option, the
20
+ # trapping method fires automatically:
21
+ # - :warning -- logs via Rails.logger.warn and forwards to replacement
22
+ # - :error -- raises SourceMonitor::DeprecatedOptionError
23
+ #
24
+ class DeprecationRegistry
25
+ # Maps settings accessor names (as used on Configuration) to their classes.
26
+ SETTINGS_CLASSES = {
27
+ "http" => "HTTPSettings",
28
+ "fetching" => "FetchingSettings",
29
+ "health" => "HealthSettings",
30
+ "scraping" => "ScrapingSettings",
31
+ "retention" => "RetentionSettings",
32
+ "realtime" => "RealtimeSettings",
33
+ "authentication" => "AuthenticationSettings",
34
+ "images" => "ImagesSettings",
35
+ "scrapers" => "ScraperRegistry",
36
+ "events" => "Events",
37
+ "models" => "Models"
38
+ }.freeze
39
+
40
+ class << self
41
+ # Register a deprecated configuration option.
42
+ #
43
+ # @param path [String] dot-notation path, e.g. "http.old_proxy_url" or "old_queue_prefix"
44
+ # @param removed_in [String] version in which the option was deprecated
45
+ # @param replacement [String, nil] dot-notation path to the replacement option
46
+ # @param severity [:warning, :error] :warning logs + forwards, :error raises
47
+ # @param message [String, nil] additional migration guidance
48
+ def register(path, removed_in:, replacement: nil, severity: :warning, message: nil)
49
+ segments = path.split(".")
50
+ source_prefix = nil
51
+ if segments.length == 1
52
+ target_class = Configuration
53
+ option_name = segments.first
54
+ else
55
+ source_prefix = segments.first
56
+ option_name = segments.last
57
+ class_name = SETTINGS_CLASSES[source_prefix]
58
+ raise ArgumentError, "Unknown settings accessor: #{source_prefix}" unless class_name
59
+
60
+ target_class = Configuration.const_get(class_name)
61
+ end
62
+
63
+ deprecation_message = build_message(path, removed_in, replacement, message)
64
+
65
+ if target_class.method_defined?(:"#{option_name}=") || target_class.method_defined?(option_name.to_sym)
66
+ warn "[SourceMonitor] DeprecationRegistry: '#{path}' already exists on #{target_class.name}. " \
67
+ "Skipping trap definition -- the option is not yet removed/renamed."
68
+ entries[path] = { path: path, removed_in: removed_in, replacement: replacement,
69
+ severity: severity, message: deprecation_message, skipped: true }
70
+ return
71
+ end
72
+
73
+ define_trap_methods(target_class, option_name, deprecation_message, severity, replacement,
74
+ source_prefix: source_prefix)
75
+
76
+ entries[path] = { path: path, removed_in: removed_in, replacement: replacement,
77
+ severity: severity, message: deprecation_message, skipped: false }
78
+ end
79
+
80
+ # Remove all registered deprecation traps and clear state.
81
+ # Essential for test isolation.
82
+ def clear!
83
+ defined_methods.each do |target_class, method_name|
84
+ target_class.remove_method(method_name) if target_class.method_defined?(method_name)
85
+ rescue NameError
86
+ # Method was already removed or never defined; ignore.
87
+ end
88
+
89
+ @entries = {}
90
+ @defined_methods = []
91
+ end
92
+
93
+ # Returns a duplicate of the entries hash for inspection.
94
+ def entries
95
+ @entries ||= {}
96
+ end
97
+
98
+ # Check if a path is registered.
99
+ def registered?(path)
100
+ entries.key?(path)
101
+ end
102
+
103
+ # No-op hook for future "default changed" checks.
104
+ # Called by Configuration#check_deprecations! after the configure block.
105
+ def check_defaults!(_config)
106
+ # Reserved for future use. Phases may add checks like:
107
+ # "option X changed its default from A to B in version Y"
108
+ end
109
+
110
+ private
111
+
112
+ def defined_methods
113
+ @defined_methods ||= []
114
+ end
115
+
116
+ def build_message(path, removed_in, replacement, extra_message)
117
+ parts = +"[SourceMonitor] DEPRECATION: '#{path}' was deprecated in v#{removed_in}"
118
+ parts << " and replaced by '#{replacement}'" if replacement
119
+ parts << ". #{extra_message}" if extra_message
120
+ parts << "." unless parts.end_with?(".")
121
+ parts.freeze
122
+ end
123
+
124
+ def define_trap_methods(target_class, option_name, deprecation_message, severity, replacement, source_prefix: nil)
125
+ writer_name = :"#{option_name}="
126
+ reader_name = option_name.to_sym
127
+
128
+ case severity
129
+ when :warning
130
+ define_warning_writer(target_class, writer_name, deprecation_message, replacement, source_prefix)
131
+ define_warning_reader(target_class, reader_name, deprecation_message, replacement, source_prefix)
132
+ when :error
133
+ define_error_method(target_class, writer_name, deprecation_message)
134
+ define_error_method(target_class, reader_name, deprecation_message)
135
+ else
136
+ raise ArgumentError, "Unknown severity: #{severity}. Must be :warning or :error."
137
+ end
138
+
139
+ defined_methods.push([ target_class, writer_name ], [ target_class, reader_name ])
140
+ end
141
+
142
+ def define_warning_writer(target_class, writer_name, deprecation_message, replacement, source_prefix)
143
+ replacement_writer = replacement_setter_for(replacement, source_prefix)
144
+
145
+ target_class.define_method(writer_name) do |value|
146
+ Rails.logger.warn(deprecation_message)
147
+ if replacement_writer
148
+ resolve_replacement_target(replacement_writer[:target]).public_send(
149
+ replacement_writer[:setter], value
150
+ )
151
+ end
152
+ end
153
+ end
154
+
155
+ def define_warning_reader(target_class, reader_name, deprecation_message, replacement, source_prefix)
156
+ replacement_reader = replacement_getter_for(replacement, source_prefix)
157
+
158
+ target_class.define_method(reader_name) do
159
+ Rails.logger.warn(deprecation_message)
160
+ if replacement_reader
161
+ resolve_replacement_target(replacement_reader[:target]).public_send(
162
+ replacement_reader[:getter]
163
+ )
164
+ end
165
+ end
166
+ end
167
+
168
+ def define_error_method(target_class, method_name, deprecation_message)
169
+ target_class.define_method(method_name) do |*|
170
+ raise SourceMonitor::DeprecatedOptionError, deprecation_message
171
+ end
172
+ end
173
+
174
+ # Parse replacement path into target accessor chain and setter name.
175
+ # When source_prefix matches the replacement prefix, the target is nil
176
+ # (replacement is on the same settings class).
177
+ #
178
+ # "http.proxy" with source_prefix "http" => { target: nil, setter: "proxy=" }
179
+ # "queue_namespace" => { target: nil, setter: "queue_namespace=" }
180
+ # "http.proxy" with source_prefix nil => { target: :http, setter: "proxy=" }
181
+ def replacement_setter_for(replacement, source_prefix = nil)
182
+ return nil unless replacement
183
+
184
+ segments = replacement.split(".")
185
+ if segments.length == 1
186
+ { target: nil, setter: :"#{segments.first}=" }
187
+ elsif source_prefix && segments.first == source_prefix
188
+ { target: nil, setter: :"#{segments.last}=" }
189
+ else
190
+ { target: segments.first.to_sym, setter: :"#{segments.last}=" }
191
+ end
192
+ end
193
+
194
+ # Parse replacement path into target accessor chain and getter name.
195
+ def replacement_getter_for(replacement, source_prefix = nil)
196
+ return nil unless replacement
197
+
198
+ segments = replacement.split(".")
199
+ if segments.length == 1
200
+ { target: nil, getter: segments.first.to_sym }
201
+ elsif source_prefix && segments.first == source_prefix
202
+ { target: nil, getter: segments.last.to_sym }
203
+ else
204
+ { target: segments.first.to_sym, getter: segments.last.to_sym }
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
210
+ end
211
+
212
+ # Add a helper method to settings classes and Configuration for resolving
213
+ # replacement targets. This allows "http.proxy" to resolve as self.http.proxy
214
+ # from within a Configuration instance, or as self.proxy from within an
215
+ # HTTPSettings instance.
216
+ module SourceMonitor
217
+ class Configuration
218
+ private
219
+
220
+ def resolve_replacement_target(accessor)
221
+ accessor ? public_send(accessor) : self
222
+ end
223
+ end
224
+ end
225
+
226
+ # Add the same helper to all settings classes so forwarding works
227
+ # when the deprecated method is defined on a nested settings class
228
+ # and the replacement is on the same class (e.g. "http.old_proxy" -> "http.proxy").
229
+ SourceMonitor::Configuration::DeprecationRegistry::SETTINGS_CLASSES.each_value do |class_name|
230
+ klass = SourceMonitor::Configuration.const_get(class_name)
231
+ unless klass.method_defined?(:resolve_replacement_target, false)
232
+ klass.define_method(:resolve_replacement_target) do |accessor|
233
+ accessor ? public_send(accessor) : self
234
+ end
235
+ klass.send(:private, :resolve_replacement_target)
236
+ end
237
+ end
@@ -13,7 +13,10 @@ module SourceMonitor
13
13
  :retry_interval,
14
14
  :retry_interval_randomness,
15
15
  :retry_backoff_factor,
16
- :retry_statuses
16
+ :retry_statuses,
17
+ :ssl_ca_file,
18
+ :ssl_ca_path,
19
+ :ssl_verify
17
20
 
18
21
  def initialize
19
22
  reset!
@@ -31,6 +34,9 @@ module SourceMonitor
31
34
  @retry_interval_randomness = 0.5
32
35
  @retry_backoff_factor = 2
33
36
  @retry_statuses = nil
37
+ @ssl_ca_file = nil
38
+ @ssl_ca_path = nil
39
+ @ssl_verify = true
34
40
  end
35
41
 
36
42
  private
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ class Configuration
5
+ class ImagesSettings
6
+ attr_accessor :download_to_active_storage,
7
+ :max_download_size,
8
+ :download_timeout,
9
+ :allowed_content_types
10
+
11
+ DEFAULT_MAX_DOWNLOAD_SIZE = 10 * 1024 * 1024 # 10 MB
12
+ DEFAULT_DOWNLOAD_TIMEOUT = 30 # seconds
13
+ DEFAULT_ALLOWED_CONTENT_TYPES = %w[
14
+ image/jpeg
15
+ image/png
16
+ image/gif
17
+ image/webp
18
+ image/svg+xml
19
+ ].freeze
20
+
21
+ def initialize
22
+ reset!
23
+ end
24
+
25
+ def reset!
26
+ @download_to_active_storage = false
27
+ @max_download_size = DEFAULT_MAX_DOWNLOAD_SIZE
28
+ @download_timeout = DEFAULT_DOWNLOAD_TIMEOUT
29
+ @allowed_content_types = DEFAULT_ALLOWED_CONTENT_TYPES.dup
30
+ end
31
+
32
+ def download_enabled?
33
+ !!download_to_active_storage
34
+ end
35
+ end
36
+ end
37
+ end
@@ -8,11 +8,13 @@ require "source_monitor/configuration/scraping_settings"
8
8
  require "source_monitor/configuration/realtime_settings"
9
9
  require "source_monitor/configuration/retention_settings"
10
10
  require "source_monitor/configuration/authentication_settings"
11
+ require "source_monitor/configuration/images_settings"
11
12
  require "source_monitor/configuration/scraper_registry"
12
13
  require "source_monitor/configuration/events"
13
14
  require "source_monitor/configuration/validation_definition"
14
15
  require "source_monitor/configuration/model_definition"
15
16
  require "source_monitor/configuration/models"
17
+ require "source_monitor/configuration/deprecation_registry"
16
18
 
17
19
  module SourceMonitor
18
20
  class Configuration
@@ -26,7 +28,7 @@ module SourceMonitor
26
28
  :mission_control_enabled,
27
29
  :mission_control_dashboard_path
28
30
 
29
- attr_reader :http, :scrapers, :retention, :events, :models, :realtime, :fetching, :health, :authentication, :scraping
31
+ attr_reader :http, :scrapers, :retention, :events, :models, :realtime, :fetching, :health, :authentication, :scraping, :images
30
32
 
31
33
  DEFAULT_QUEUE_NAMESPACE = "source_monitor"
32
34
 
@@ -50,6 +52,7 @@ module SourceMonitor
50
52
  @health = HealthSettings.new
51
53
  @authentication = AuthenticationSettings.new
52
54
  @scraping = ScrapingSettings.new
55
+ @images = ImagesSettings.new
53
56
  end
54
57
 
55
58
  def queue_name_for(role)
@@ -83,5 +86,12 @@ module SourceMonitor
83
86
  raise ArgumentError, "unknown queue role #{role.inspect}"
84
87
  end
85
88
  end
89
+
90
+ # Post-configure hook for deprecation validation.
91
+ # Delegates to DeprecationRegistry.check_defaults! for future
92
+ # "default changed" checks. Currently a no-op.
93
+ def check_deprecations!
94
+ DeprecationRegistry.check_defaults!(self)
95
+ end
86
96
  end
87
97
  end
@@ -37,7 +37,8 @@ module SourceMonitor
37
37
  item_title: row["item_title"],
38
38
  item_url: row["item_url"],
39
39
  source_name: row["source_name"],
40
- source_id: row["source_id"]
40
+ source_id: row["source_id"],
41
+ source_feed_url: row["source_feed_url"]
41
42
  )
42
43
  end
43
44
 
@@ -57,7 +58,8 @@ module SourceMonitor
57
58
  item_title,
58
59
  item_url,
59
60
  source_name,
60
- source_id
61
+ source_id,
62
+ source_feed_url
61
63
  FROM (
62
64
  #{fetch_log_sql}
63
65
  UNION ALL
@@ -83,9 +85,12 @@ module SourceMonitor
83
85
  NULL AS scraper_adapter,
84
86
  NULL AS item_title,
85
87
  NULL AS item_url,
86
- NULL AS source_name,
87
- #{SourceMonitor::FetchLog.quoted_table_name}.source_id AS source_id
88
+ #{SourceMonitor::Source.quoted_table_name}.#{quoted_source_name} AS source_name,
89
+ #{SourceMonitor::FetchLog.quoted_table_name}.source_id AS source_id,
90
+ #{SourceMonitor::Source.quoted_table_name}.feed_url AS source_feed_url
88
91
  FROM #{SourceMonitor::FetchLog.quoted_table_name}
92
+ LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
93
+ ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::FetchLog.quoted_table_name}.source_id
89
94
  SQL
90
95
  end
91
96
 
@@ -100,12 +105,15 @@ module SourceMonitor
100
105
  NULL AS items_updated,
101
106
  #{SourceMonitor::ScrapeLog.quoted_table_name}.scraper_adapter AS scraper_adapter,
102
107
  NULL AS item_title,
103
- NULL AS item_url,
108
+ #{SourceMonitor::Item.quoted_table_name}.url AS item_url,
104
109
  #{SourceMonitor::Source.quoted_table_name}.#{quoted_source_name} AS source_name,
105
- #{SourceMonitor::ScrapeLog.quoted_table_name}.source_id AS source_id
110
+ #{SourceMonitor::ScrapeLog.quoted_table_name}.source_id AS source_id,
111
+ NULL AS source_feed_url
106
112
  FROM #{SourceMonitor::ScrapeLog.quoted_table_name}
107
113
  LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
108
114
  ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::ScrapeLog.quoted_table_name}.source_id
115
+ LEFT JOIN #{SourceMonitor::Item.quoted_table_name}
116
+ ON #{SourceMonitor::Item.quoted_table_name}.id = #{SourceMonitor::ScrapeLog.quoted_table_name}.item_id
109
117
  SQL
110
118
  end
111
119
 
@@ -122,7 +130,8 @@ module SourceMonitor
122
130
  #{SourceMonitor::Item.quoted_table_name}.title AS item_title,
123
131
  #{SourceMonitor::Item.quoted_table_name}.url AS item_url,
124
132
  #{SourceMonitor::Source.quoted_table_name}.#{quoted_source_name} AS source_name,
125
- #{SourceMonitor::Item.quoted_table_name}.source_id AS source_id
133
+ #{SourceMonitor::Item.quoted_table_name}.source_id AS source_id,
134
+ NULL AS source_feed_url
126
135
  FROM #{SourceMonitor::Item.quoted_table_name}
127
136
  LEFT JOIN #{SourceMonitor::Source.quoted_table_name}
128
137
  ON #{SourceMonitor::Source.quoted_table_name}.id = #{SourceMonitor::Item.quoted_table_name}.source_id
@@ -15,6 +15,7 @@ module SourceMonitor
15
15
  :item_url,
16
16
  :source_name,
17
17
  :source_id,
18
+ :source_feed_url,
18
19
  keyword_init: true
19
20
  ) do
20
21
  def type
@@ -30,13 +30,16 @@ module SourceMonitor
30
30
  end
31
31
 
32
32
  def fetch_event(event)
33
+ domain = source_domain(event.source_feed_url)
33
34
  {
34
35
  label: "Fetch ##{event.id}",
35
36
  description: "#{event.items_created.to_i} created / #{event.items_updated.to_i} updated",
36
37
  status: event.success? ? :success : :failure,
37
38
  type: :fetch,
38
39
  time: event.occurred_at,
39
- path: url_helpers.fetch_log_path(event.id)
40
+ path: url_helpers.fetch_log_path(event.id),
41
+ url_display: domain,
42
+ url_href: event.source_feed_url
40
43
  }
41
44
  end
42
45
 
@@ -47,10 +50,20 @@ module SourceMonitor
47
50
  status: event.success? ? :success : :failure,
48
51
  type: :scrape,
49
52
  time: event.occurred_at,
50
- path: url_helpers.scrape_log_path(event.id)
53
+ path: url_helpers.scrape_log_path(event.id),
54
+ url_display: event.item_url,
55
+ url_href: event.item_url
51
56
  }
52
57
  end
53
58
 
59
+ def source_domain(feed_url)
60
+ return nil if feed_url.blank?
61
+
62
+ URI.parse(feed_url.to_s).host
63
+ rescue URI::InvalidURIError
64
+ nil
65
+ end
66
+
54
67
  def item_event(event)
55
68
  {
56
69
  label: event.item_title.presence || "New Item",
@@ -38,6 +38,7 @@ module SourceMonitor
38
38
  created += 1
39
39
  created_items << result.item
40
40
  SourceMonitor::Events.after_item_created(item: result.item, source:, entry:, result: result)
41
+ enqueue_image_download(result.item)
41
42
  else
42
43
  updated += 1
43
44
  updated_items << result.item
@@ -61,6 +62,18 @@ module SourceMonitor
61
62
 
62
63
  private
63
64
 
65
+ def enqueue_image_download(item)
66
+ return unless SourceMonitor.config.images.download_enabled?
67
+ return if item.content.blank?
68
+
69
+ SourceMonitor::DownloadContentImagesJob.perform_later(item.id)
70
+ rescue StandardError => error
71
+ # Image download enqueue failure must never break feed processing
72
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
73
+ Rails.logger.error("[SourceMonitor] Failed to enqueue image download for item #{item.id}: #{error.message}")
74
+ end
75
+ end
76
+
64
77
  def normalize_item_error(entry, error)
65
78
  {
66
79
  guid: safe_entry_guid(entry),
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "openssl"
3
4
  require "faraday"
4
5
  require "faraday/retry"
5
6
  require "faraday/follow_redirects"
@@ -57,9 +58,31 @@ module SourceMonitor
57
58
  connection.headers[key] = value
58
59
  end
59
60
 
61
+ configure_ssl(connection, settings)
62
+
60
63
  connection.adapter Faraday.default_adapter
61
64
  end
62
65
 
66
+ # Configure SSL to use a proper cert store. Without this, some systems
67
+ # fail to verify certificate chains that depend on intermediate CAs
68
+ # (e.g., Medium/Netflix on AWS). OpenSSL::X509::Store#set_default_paths
69
+ # loads all system-trusted CAs including intermediates.
70
+ def configure_ssl(connection, settings)
71
+ connection.ssl.verify = settings.ssl_verify != false
72
+
73
+ if settings.ssl_ca_file
74
+ connection.ssl.ca_file = settings.ssl_ca_file
75
+ elsif settings.ssl_ca_path
76
+ connection.ssl.ca_path = settings.ssl_ca_path
77
+ else
78
+ connection.ssl.cert_store = default_cert_store
79
+ end
80
+ end
81
+
82
+ def default_cert_store
83
+ OpenSSL::X509::Store.new.tap(&:set_default_paths)
84
+ end
85
+
63
86
  def default_headers(settings)
64
87
  base_headers = {
65
88
  "User-Agent" => resolve_callable(settings.user_agent).presence || DEFAULT_USER_AGENT,