source_monitor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/agents/rails-concern.md +464 -0
  3. data/.claude/agents/rails-controller.md +424 -0
  4. data/.claude/agents/rails-hotwire.md +446 -0
  5. data/.claude/agents/rails-implement.md +374 -0
  6. data/.claude/agents/rails-job.md +334 -0
  7. data/.claude/agents/rails-lint.md +294 -0
  8. data/.claude/agents/rails-mailer.md +371 -0
  9. data/.claude/agents/rails-migration.md +449 -0
  10. data/.claude/agents/rails-model.md +420 -0
  11. data/.claude/agents/rails-policy.md +443 -0
  12. data/.claude/agents/rails-presenter.md +427 -0
  13. data/.claude/agents/rails-query.md +412 -0
  14. data/.claude/agents/rails-review.md +490 -0
  15. data/.claude/agents/rails-service.md +458 -0
  16. data/.claude/agents/rails-state-records.md +465 -0
  17. data/.claude/agents/rails-tdd.md +314 -0
  18. data/.claude/agents/rails-test.md +441 -0
  19. data/.claude/agents/rails-view-component.md +418 -0
  20. data/.claude/hooks/block-secrets.sh +52 -0
  21. data/.claude/settings.json +85 -0
  22. data/.claude/skills/action-cable-patterns/SKILL.md +296 -0
  23. data/.claude/skills/action-mailer-patterns/SKILL.md +295 -0
  24. data/.claude/skills/active-storage-setup/SKILL.md +311 -0
  25. data/.claude/skills/api-versioning/SKILL.md +294 -0
  26. data/.claude/skills/authentication-flow/SKILL.md +335 -0
  27. data/.claude/skills/authentication-flow/reference/current.md +248 -0
  28. data/.claude/skills/authentication-flow/reference/passwordless.md +253 -0
  29. data/.claude/skills/authentication-flow/reference/sessions.md +201 -0
  30. data/.claude/skills/authorization-pundit/SKILL.md +462 -0
  31. data/.claude/skills/caching-strategies/SKILL.md +350 -0
  32. data/.claude/skills/database-migrations/SKILL.md +354 -0
  33. data/.claude/skills/form-object-patterns/SKILL.md +399 -0
  34. data/.claude/skills/hotwire-patterns/SKILL.md +247 -0
  35. data/.claude/skills/hotwire-patterns/reference/stimulus.md +307 -0
  36. data/.claude/skills/hotwire-patterns/reference/tailwind-integration.md +112 -0
  37. data/.claude/skills/hotwire-patterns/reference/turbo-frames.md +158 -0
  38. data/.claude/skills/hotwire-patterns/reference/turbo-streams.md +218 -0
  39. data/.claude/skills/i18n-patterns/SKILL.md +320 -0
  40. data/.claude/skills/install/SKILL.md +367 -0
  41. data/.claude/skills/performance-optimization/SKILL.md +311 -0
  42. data/.claude/skills/rails-architecture/SKILL.md +259 -0
  43. data/.claude/skills/rails-architecture/reference/error-handling.md +333 -0
  44. data/.claude/skills/rails-architecture/reference/event-tracking.md +142 -0
  45. data/.claude/skills/rails-architecture/reference/layer-interactions.md +417 -0
  46. data/.claude/skills/rails-architecture/reference/multi-tenancy.md +152 -0
  47. data/.claude/skills/rails-architecture/reference/query-patterns.md +342 -0
  48. data/.claude/skills/rails-architecture/reference/service-patterns.md +286 -0
  49. data/.claude/skills/rails-architecture/reference/state-records.md +250 -0
  50. data/.claude/skills/rails-architecture/reference/testing-strategy.md +326 -0
  51. data/.claude/skills/rails-concern/SKILL.md +399 -0
  52. data/.claude/skills/rails-controller/SKILL.md +336 -0
  53. data/.claude/skills/rails-model-generator/SKILL.md +321 -0
  54. data/.claude/skills/rails-model-generator/reference/validations.md +298 -0
  55. data/.claude/skills/rails-presenter/SKILL.md +274 -0
  56. data/.claude/skills/rails-query-object/SKILL.md +289 -0
  57. data/.claude/skills/rails-service-object/SKILL.md +349 -0
  58. data/.claude/skills/solid-queue-setup/SKILL.md +307 -0
  59. data/.claude/skills/tdd-cycle/SKILL.md +359 -0
  60. data/.claude/skills/viewcomponent-patterns/SKILL.md +333 -0
  61. data/.gitignore +1 -0
  62. data/.rubocop.yml +2 -0
  63. data/.ruby-version +1 -1
  64. data/.vbw-planning/.notification-log.jsonl +192 -0
  65. data/.vbw-planning/.session-log.jsonl +871 -0
  66. data/.vbw-planning/PROJECT.md +51 -0
  67. data/.vbw-planning/REQUIREMENTS.md +50 -0
  68. data/.vbw-planning/SHIPPED.md +28 -0
  69. data/.vbw-planning/codebase/ARCHITECTURE.md +147 -0
  70. data/.vbw-planning/codebase/CONCERNS.md +99 -0
  71. data/.vbw-planning/codebase/CONVENTIONS.md +97 -0
  72. data/.vbw-planning/codebase/DEPENDENCIES.md +100 -0
  73. data/.vbw-planning/codebase/INDEX.md +86 -0
  74. data/.vbw-planning/codebase/META.md +42 -0
  75. data/.vbw-planning/codebase/PATTERNS.md +262 -0
  76. data/.vbw-planning/codebase/STACK.md +101 -0
  77. data/.vbw-planning/codebase/STRUCTURE.md +324 -0
  78. data/.vbw-planning/codebase/TESTING.md +154 -0
  79. data/.vbw-planning/config.json +12 -0
  80. data/.vbw-planning/discovery.json +24 -0
  81. data/.vbw-planning/milestones/default/ROADMAP.md +115 -0
  82. data/.vbw-planning/milestones/default/STATE.md +83 -0
  83. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-01-SUMMARY.md +56 -0
  84. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-01.md +187 -0
  85. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-02-SUMMARY.md +64 -0
  86. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-02.md +137 -0
  87. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-01-SUMMARY.md +67 -0
  88. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-01.md +142 -0
  89. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-02-SUMMARY.md +64 -0
  90. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-02.md +138 -0
  91. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-03-SUMMARY.md +85 -0
  92. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-03.md +147 -0
  93. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-04-SUMMARY.md +63 -0
  94. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-04.md +129 -0
  95. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-05-SUMMARY.md +74 -0
  96. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-05.md +154 -0
  97. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/03-VERIFICATION-wave1.md +303 -0
  98. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/03-VERIFICATION.md +510 -0
  99. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-01-SUMMARY.md +61 -0
  100. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-01.md +161 -0
  101. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-02-SUMMARY.md +66 -0
  102. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-02.md +132 -0
  103. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-03-SUMMARY.md +59 -0
  104. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-03.md +171 -0
  105. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-04-SUMMARY.md +56 -0
  106. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-04.md +152 -0
  107. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/04-CONTEXT.md +33 -0
  108. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-01-SUMMARY.md +42 -0
  109. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-01.md +119 -0
  110. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-02-SUMMARY.md +52 -0
  111. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-02.md +195 -0
  112. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-03-SUMMARY.md +79 -0
  113. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-03.md +130 -0
  114. data/CHANGELOG.md +28 -0
  115. data/CLAUDE.md +179 -0
  116. data/Gemfile +8 -0
  117. data/Gemfile.lock +114 -101
  118. data/Rakefile +2 -0
  119. data/app/assets/builds/source_monitor/application.css +2076 -0
  120. data/app/assets/builds/source_monitor/application.js +2758 -0
  121. data/app/assets/builds/source_monitor/application.js.map +7 -0
  122. data/app/controllers/source_monitor/application_controller.rb +2 -0
  123. data/app/controllers/source_monitor/health_controller.rb +2 -0
  124. data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +106 -0
  125. data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +187 -0
  126. data/app/controllers/source_monitor/import_sessions/health_check_management.rb +112 -0
  127. data/app/controllers/source_monitor/import_sessions/opml_parser.rb +130 -0
  128. data/app/controllers/source_monitor/import_sessions_controller.rb +6 -507
  129. data/app/controllers/source_monitor/items_controller.rb +2 -0
  130. data/app/controllers/source_monitor/sources_controller.rb +0 -14
  131. data/app/helpers/source_monitor/application_helper.rb +4 -112
  132. data/app/helpers/source_monitor/health_badge_helper.rb +69 -0
  133. data/app/helpers/source_monitor/table_sort_helper.rb +53 -0
  134. data/app/jobs/source_monitor/application_job.rb +2 -0
  135. data/app/models/source_monitor/application_record.rb +2 -0
  136. data/app/models/source_monitor/log_entry.rb +0 -2
  137. data/config/coverage_baseline.json +217 -1862
  138. data/config/routes.rb +2 -0
  139. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +2 -0
  140. data/db/migrate/20251014171659_add_performance_indexes.rb +2 -0
  141. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +2 -0
  142. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +2 -0
  143. data/db/migrate/20260210204022_add_composite_index_to_log_entries.rb +17 -0
  144. data/lib/source_monitor/assets/bundler.rb +2 -0
  145. data/lib/source_monitor/assets.rb +2 -0
  146. data/lib/source_monitor/configuration/authentication_settings.rb +62 -0
  147. data/lib/source_monitor/configuration/events.rb +60 -0
  148. data/lib/source_monitor/configuration/fetching_settings.rb +27 -0
  149. data/lib/source_monitor/configuration/health_settings.rb +27 -0
  150. data/lib/source_monitor/configuration/http_settings.rb +43 -0
  151. data/lib/source_monitor/configuration/model_definition.rb +108 -0
  152. data/lib/source_monitor/configuration/models.rb +36 -0
  153. data/lib/source_monitor/configuration/realtime_settings.rb +95 -0
  154. data/lib/source_monitor/configuration/retention_settings.rb +45 -0
  155. data/lib/source_monitor/configuration/scraper_registry.rb +67 -0
  156. data/lib/source_monitor/configuration/scraping_settings.rb +39 -0
  157. data/lib/source_monitor/configuration/validation_definition.rb +32 -0
  158. data/lib/source_monitor/configuration.rb +12 -579
  159. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +138 -0
  160. data/lib/source_monitor/dashboard/queries/stats_query.rb +71 -0
  161. data/lib/source_monitor/dashboard/queries.rb +2 -195
  162. data/lib/source_monitor/engine.rb +2 -0
  163. data/lib/source_monitor/fetching/feed_fetcher/adaptive_interval.rb +141 -0
  164. data/lib/source_monitor/fetching/feed_fetcher/entry_processor.rb +89 -0
  165. data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +200 -0
  166. data/lib/source_monitor/fetching/feed_fetcher.rb +37 -379
  167. data/lib/source_monitor/items/item_creator/content_extractor.rb +113 -0
  168. data/lib/source_monitor/items/item_creator/entry_parser/media_extraction.rb +96 -0
  169. data/lib/source_monitor/items/item_creator/entry_parser.rb +294 -0
  170. data/lib/source_monitor/items/item_creator.rb +28 -455
  171. data/lib/source_monitor/setup/bundle_installer.rb +2 -0
  172. data/lib/source_monitor/setup/cli.rb +2 -0
  173. data/lib/source_monitor/setup/dependency_checker.rb +2 -0
  174. data/lib/source_monitor/setup/detectors.rb +2 -0
  175. data/lib/source_monitor/setup/gemfile_editor.rb +2 -0
  176. data/lib/source_monitor/setup/initializer_patcher.rb +2 -0
  177. data/lib/source_monitor/setup/install_generator.rb +2 -0
  178. data/lib/source_monitor/setup/migration_installer.rb +2 -0
  179. data/lib/source_monitor/setup/node_installer.rb +2 -0
  180. data/lib/source_monitor/setup/prompter.rb +2 -0
  181. data/lib/source_monitor/setup/requirements.rb +2 -0
  182. data/lib/source_monitor/setup/shell_runner.rb +2 -0
  183. data/lib/source_monitor/setup/verification/action_cable_verifier.rb +2 -0
  184. data/lib/source_monitor/setup/verification/printer.rb +2 -0
  185. data/lib/source_monitor/setup/verification/result.rb +2 -0
  186. data/lib/source_monitor/setup/verification/runner.rb +2 -0
  187. data/lib/source_monitor/setup/verification/solid_queue_verifier.rb +2 -0
  188. data/lib/source_monitor/setup/verification/telemetry_logger.rb +2 -0
  189. data/lib/source_monitor/setup/workflow.rb +2 -0
  190. data/lib/source_monitor/version.rb +3 -1
  191. data/lib/source_monitor.rb +140 -58
  192. data/lib/tasks/source_monitor_assets.rake +2 -0
  193. data/lib/tasks/source_monitor_setup.rake +2 -0
  194. data/lib/tasks/source_monitor_tasks.rake +2 -0
  195. data/source_monitor.gemspec +3 -1
  196. metadata +144 -4
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Fetching
5
+ class FeedFetcher
6
+ class SourceUpdater
7
+ attr_reader :source, :adaptive_interval
8
+
9
+ def initialize(source:, adaptive_interval:)
10
+ @source = source
11
+ @adaptive_interval = adaptive_interval
12
+ end
13
+
14
+ def update_source_for_success(response, duration_ms, feed, feed_signature)
15
+ attributes = {
16
+ last_fetched_at: Time.current,
17
+ last_fetch_duration_ms: duration_ms,
18
+ last_http_status: response.status,
19
+ last_error: nil,
20
+ last_error_at: nil,
21
+ failure_count: 0,
22
+ feed_format: derive_feed_format(feed)
23
+ }
24
+
25
+ if (etag = response.headers["etag"] || response.headers["ETag"])
26
+ attributes[:etag] = etag
27
+ end
28
+
29
+ if (last_modified_header = response.headers["last-modified"] || response.headers["Last-Modified"])
30
+ parsed_time = parse_http_time(last_modified_header)
31
+ attributes[:last_modified] = parsed_time if parsed_time
32
+ end
33
+
34
+ adaptive_interval.apply_adaptive_interval!(attributes, content_changed: feed_signature_changed?(feed_signature))
35
+ attributes[:metadata] = updated_metadata(feed_signature: feed_signature)
36
+ reset_retry_state!(attributes)
37
+ source.update!(attributes)
38
+ end
39
+
40
+ def update_source_for_not_modified(response, duration_ms)
41
+ attributes = {
42
+ last_fetched_at: Time.current,
43
+ last_fetch_duration_ms: duration_ms,
44
+ last_http_status: response.status,
45
+ last_error: nil,
46
+ last_error_at: nil,
47
+ failure_count: 0
48
+ }
49
+
50
+ if (etag = response.headers["etag"] || response.headers["ETag"])
51
+ attributes[:etag] = etag
52
+ end
53
+
54
+ if (last_modified_header = response.headers["last-modified"] || response.headers["Last-Modified"])
55
+ parsed_time = parse_http_time(last_modified_header)
56
+ attributes[:last_modified] = parsed_time if parsed_time
57
+ end
58
+
59
+ adaptive_interval.apply_adaptive_interval!(attributes, content_changed: false)
60
+ attributes[:metadata] = updated_metadata
61
+ reset_retry_state!(attributes)
62
+ source.update!(attributes)
63
+ end
64
+
65
+ def update_source_for_failure(error, duration_ms)
66
+ now = Time.current
67
+ attrs = {
68
+ last_fetched_at: now,
69
+ last_fetch_duration_ms: duration_ms,
70
+ last_http_status: error.http_status,
71
+ last_error: error.message,
72
+ last_error_at: now,
73
+ failure_count: source.failure_count.to_i + 1
74
+ }
75
+
76
+ adaptive_interval.apply_adaptive_interval!(attrs, content_changed: false, failure: true)
77
+ attrs[:metadata] = updated_metadata
78
+ decision = apply_retry_strategy!(attrs, error, now)
79
+ source.update!(attrs)
80
+ decision
81
+ end
82
+
83
+ def create_fetch_log(response:, duration_ms:, started_at:, success:, feed: nil, error: nil, body: nil, feed_signature: nil,
84
+ items_created: 0, items_updated: 0, items_failed: 0, item_errors: [])
85
+ source.fetch_logs.create!(
86
+ success:,
87
+ started_at: started_at,
88
+ completed_at: started_at + (duration_ms / 1000.0),
89
+ duration_ms: duration_ms,
90
+ http_status: response&.status,
91
+ http_response_headers: normalized_headers(response&.headers),
92
+ feed_size_bytes: body&.bytesize,
93
+ items_in_feed: feed&.respond_to?(:entries) ? feed.entries.size : nil,
94
+ items_created: items_created,
95
+ items_updated: items_updated,
96
+ items_failed: items_failed,
97
+ error_class: error&.class&.name,
98
+ error_message: error&.message,
99
+ error_backtrace: error_backtrace(error),
100
+ metadata: feed_metadata(feed, error: error, feed_signature: feed_signature, item_errors: item_errors)
101
+ )
102
+ end
103
+
104
+ def elapsed_ms(started_at)
105
+ ((Time.current - started_at) * 1000.0).round
106
+ end
107
+
108
+ def feed_signature_changed?(feed_signature)
109
+ return false if feed_signature.blank?
110
+
111
+ (source.metadata || {}).fetch("last_feed_signature", nil) != feed_signature
112
+ end
113
+
114
+ def updated_metadata(feed_signature: nil)
115
+ metadata = (source.metadata || {}).dup
116
+ metadata.delete("dynamic_fetch_interval_seconds")
117
+ metadata["last_feed_signature"] = feed_signature if feed_signature.present?
118
+ metadata
119
+ end
120
+
121
+ def parse_http_time(value)
122
+ return if value.blank?
123
+
124
+ Time.httpdate(value)
125
+ rescue ArgumentError
126
+ nil
127
+ end
128
+
129
+ private
130
+
131
+ def reset_retry_state!(attributes)
132
+ attributes[:fetch_retry_attempt] = 0
133
+ attributes[:fetch_circuit_opened_at] = nil
134
+ attributes[:fetch_circuit_until] = nil
135
+ end
136
+
137
+ def apply_retry_strategy!(attributes, error, now)
138
+ decision = SourceMonitor::Fetching::RetryPolicy.new(source:, error:, now:).decision
139
+
140
+ if decision.open_circuit?
141
+ attributes[:fetch_retry_attempt] = 0
142
+ attributes[:fetch_circuit_opened_at] = now
143
+ attributes[:fetch_circuit_until] = decision.circuit_until
144
+ attributes[:next_fetch_at] = decision.circuit_until
145
+ attributes[:backoff_until] = decision.circuit_until
146
+ elsif decision.retry?
147
+ attributes[:fetch_retry_attempt] = decision.next_attempt
148
+ attributes[:fetch_circuit_opened_at] = nil
149
+ attributes[:fetch_circuit_until] = nil
150
+ unless source.adaptive_fetching_enabled? == false
151
+ retry_at = now + decision.wait
152
+ current_next = attributes[:next_fetch_at]
153
+ attributes[:next_fetch_at] = [ current_next, retry_at ].compact.min
154
+ attributes[:backoff_until] = retry_at
155
+ end
156
+ else
157
+ attributes[:fetch_retry_attempt] = 0
158
+ end
159
+
160
+ decision
161
+ rescue StandardError => policy_error
162
+ Rails.logger.error(
163
+ "[SourceMonitor] Failed to apply retry strategy for source #{source.id}: #{policy_error.class} - #{policy_error.message}"
164
+ ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
165
+ attributes[:fetch_retry_attempt] ||= 0
166
+ attributes[:fetch_circuit_opened_at] ||= nil
167
+ attributes[:fetch_circuit_until] ||= nil
168
+ nil
169
+ end
170
+
171
+ def derive_feed_format(feed)
172
+ return unless feed
173
+
174
+ feed.class.name.split("::").last.underscore
175
+ end
176
+
177
+ def feed_metadata(feed, error: nil, feed_signature: nil, item_errors: [])
178
+ metadata = {}
179
+ metadata[:parser] = feed.class.name if feed
180
+ metadata[:error_code] = error.code if error&.respond_to?(:code)
181
+ metadata[:feed_signature] = feed_signature if feed_signature
182
+ metadata[:item_errors] = item_errors if item_errors.present?
183
+ metadata
184
+ end
185
+
186
+ def normalized_headers(headers)
187
+ return {} unless headers
188
+
189
+ headers.to_h.transform_keys { |key| key.to_s.downcase }
190
+ end
191
+
192
+ def error_backtrace(error)
193
+ return if error.nil? || error.original_error.nil?
194
+
195
+ Array(error.original_error.backtrace).first(20).join("\n")
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
@@ -6,6 +6,9 @@ require "source_monitor/http"
6
6
  require "source_monitor/fetching/fetch_error"
7
7
  require "source_monitor/fetching/retry_policy"
8
8
  require "source_monitor/items/item_creator"
9
+ require "source_monitor/fetching/feed_fetcher/adaptive_interval"
10
+ require "source_monitor/fetching/feed_fetcher/source_updater"
11
+ require "source_monitor/fetching/feed_fetcher/entry_processor"
9
12
 
10
13
  module SourceMonitor
11
14
  module Fetching
@@ -23,12 +26,12 @@ module SourceMonitor
23
26
  )
24
27
  ResponseWrapper = Struct.new(:status, :headers, :body, keyword_init: true)
25
28
 
26
- MIN_FETCH_INTERVAL = 5.minutes.to_f
27
- MAX_FETCH_INTERVAL = 24.hours.to_f
28
- INCREASE_FACTOR = 1.25
29
- DECREASE_FACTOR = 0.75
30
- FAILURE_INCREASE_FACTOR = 1.5
31
- JITTER_PERCENT = 0.1
29
+ MIN_FETCH_INTERVAL = AdaptiveInterval::MIN_FETCH_INTERVAL
30
+ MAX_FETCH_INTERVAL = AdaptiveInterval::MAX_FETCH_INTERVAL
31
+ INCREASE_FACTOR = AdaptiveInterval::INCREASE_FACTOR
32
+ DECREASE_FACTOR = AdaptiveInterval::DECREASE_FACTOR
33
+ FAILURE_INCREASE_FACTOR = AdaptiveInterval::FAILURE_INCREASE_FACTOR
34
+ JITTER_PERCENT = AdaptiveInterval::JITTER_PERCENT
32
35
 
33
36
  attr_reader :source, :client, :jitter_proc
34
37
 
@@ -115,14 +118,14 @@ module SourceMonitor
115
118
  end
116
119
 
117
120
  def handle_success(response, started_at, instrumentation_payload)
118
- duration_ms = elapsed_ms(started_at)
121
+ duration_ms = source_updater.elapsed_ms(started_at)
119
122
  body = response.body
120
123
  feed = parse_feed(body, response)
121
- processing = process_feed_entries(feed)
124
+ processing = entry_processor.process_feed_entries(feed)
122
125
 
123
126
  feed_body_signature = body_digest(body)
124
- update_source_for_success(response, duration_ms, feed, feed_body_signature)
125
- create_fetch_log(
127
+ source_updater.update_source_for_success(response, duration_ms, feed, feed_body_signature)
128
+ source_updater.create_fetch_log(
126
129
  response: response,
127
130
  duration_ms: duration_ms,
128
131
  started_at: started_at,
@@ -149,10 +152,10 @@ module SourceMonitor
149
152
  end
150
153
 
151
154
  def handle_not_modified(response, started_at, instrumentation_payload)
152
- duration_ms = elapsed_ms(started_at)
155
+ duration_ms = source_updater.elapsed_ms(started_at)
153
156
 
154
- update_source_for_not_modified(response, duration_ms)
155
- create_fetch_log(
157
+ source_updater.update_source_for_not_modified(response, duration_ms)
158
+ source_updater.create_fetch_log(
156
159
  response: response,
157
160
  duration_ms: duration_ms,
158
161
  started_at: started_at,
@@ -189,182 +192,13 @@ module SourceMonitor
189
192
  raise ParsingError.new(error.message, response: response, original_error: error)
190
193
  end
191
194
 
192
- def update_source_for_success(response, duration_ms, feed, feed_signature)
193
- attributes = {
194
- last_fetched_at: Time.current,
195
- last_fetch_duration_ms: duration_ms,
196
- last_http_status: response.status,
197
- last_error: nil,
198
- last_error_at: nil,
199
- failure_count: 0,
200
- feed_format: derive_feed_format(feed)
201
- }
202
-
203
- if (etag = response.headers["etag"] || response.headers["ETag"])
204
- attributes[:etag] = etag
205
- end
206
-
207
- if (last_modified_header = response.headers["last-modified"] || response.headers["Last-Modified"])
208
- parsed_time = parse_http_time(last_modified_header)
209
- attributes[:last_modified] = parsed_time if parsed_time
210
- end
211
-
212
- apply_adaptive_interval!(attributes, content_changed: feed_signature_changed?(feed_signature))
213
- attributes[:metadata] = updated_metadata(feed_signature: feed_signature)
214
- reset_retry_state!(attributes)
215
- source.update!(attributes)
216
- end
217
-
218
- def update_source_for_not_modified(response, duration_ms)
219
- attributes = {
220
- last_fetched_at: Time.current,
221
- last_fetch_duration_ms: duration_ms,
222
- last_http_status: response.status,
223
- last_error: nil,
224
- last_error_at: nil,
225
- failure_count: 0
226
- }
227
-
228
- if (etag = response.headers["etag"] || response.headers["ETag"])
229
- attributes[:etag] = etag
230
- end
231
-
232
- if (last_modified_header = response.headers["last-modified"] || response.headers["Last-Modified"])
233
- parsed_time = parse_http_time(last_modified_header)
234
- attributes[:last_modified] = parsed_time if parsed_time
235
- end
236
-
237
- apply_adaptive_interval!(attributes, content_changed: false)
238
- attributes[:metadata] = updated_metadata
239
- reset_retry_state!(attributes)
240
- source.update!(attributes)
241
- end
242
-
243
- def update_source_for_failure(error, duration_ms)
244
- now = Time.current
245
- attrs = {
246
- last_fetched_at: now,
247
- last_fetch_duration_ms: duration_ms,
248
- last_http_status: error.http_status,
249
- last_error: error.message,
250
- last_error_at: now,
251
- failure_count: source.failure_count.to_i + 1
252
- }
253
-
254
- apply_adaptive_interval!(attrs, content_changed: false, failure: true)
255
- attrs[:metadata] = updated_metadata
256
- decision = apply_retry_strategy!(attrs, error, now)
257
- source.update!(attrs)
258
- decision
259
- end
260
-
261
- def reset_retry_state!(attributes)
262
- attributes[:fetch_retry_attempt] = 0
263
- attributes[:fetch_circuit_opened_at] = nil
264
- attributes[:fetch_circuit_until] = nil
265
- end
266
-
267
- def apply_retry_strategy!(attributes, error, now)
268
- decision = SourceMonitor::Fetching::RetryPolicy.new(source:, error:, now:).decision
269
-
270
- if decision.open_circuit?
271
- attributes[:fetch_retry_attempt] = 0
272
- attributes[:fetch_circuit_opened_at] = now
273
- attributes[:fetch_circuit_until] = decision.circuit_until
274
- attributes[:next_fetch_at] = decision.circuit_until
275
- attributes[:backoff_until] = decision.circuit_until
276
- elsif decision.retry?
277
- attributes[:fetch_retry_attempt] = decision.next_attempt
278
- attributes[:fetch_circuit_opened_at] = nil
279
- attributes[:fetch_circuit_until] = nil
280
- unless source.adaptive_fetching_enabled? == false
281
- retry_at = now + decision.wait
282
- current_next = attributes[:next_fetch_at]
283
- attributes[:next_fetch_at] = [ current_next, retry_at ].compact.min
284
- attributes[:backoff_until] = retry_at
285
- end
286
- else
287
- attributes[:fetch_retry_attempt] = 0
288
- end
289
-
290
- decision
291
- rescue StandardError => policy_error
292
- Rails.logger.error(
293
- "[SourceMonitor] Failed to apply retry strategy for source #{source.id}: #{policy_error.class} - #{policy_error.message}"
294
- ) if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
295
- attributes[:fetch_retry_attempt] ||= 0
296
- attributes[:fetch_circuit_opened_at] ||= nil
297
- attributes[:fetch_circuit_until] ||= nil
298
- nil
299
- end
300
-
301
- def create_fetch_log(response:, duration_ms:, started_at:, success:, feed: nil, error: nil, body: nil, feed_signature: nil,
302
- items_created: 0, items_updated: 0, items_failed: 0, item_errors: [])
303
- source.fetch_logs.create!(
304
- success:,
305
- started_at: started_at,
306
- completed_at: started_at + (duration_ms / 1000.0),
307
- duration_ms: duration_ms,
308
- http_status: response&.status,
309
- http_response_headers: normalized_headers(response&.headers),
310
- feed_size_bytes: body&.bytesize,
311
- items_in_feed: feed&.respond_to?(:entries) ? feed.entries.size : nil,
312
- items_created: items_created,
313
- items_updated: items_updated,
314
- items_failed: items_failed,
315
- error_class: error&.class&.name,
316
- error_message: error&.message,
317
- error_backtrace: error_backtrace(error),
318
- metadata: feed_metadata(feed, error: error, feed_signature: feed_signature, item_errors: item_errors)
319
- )
320
- end
321
-
322
- def derive_feed_format(feed)
323
- return unless feed
324
-
325
- feed.class.name.split("::").last.underscore
326
- end
327
-
328
- def feed_metadata(feed, error: nil, feed_signature: nil, item_errors: [])
329
- metadata = {}
330
- metadata[:parser] = feed.class.name if feed
331
- metadata[:error_code] = error.code if error&.respond_to?(:code)
332
- metadata[:feed_signature] = feed_signature if feed_signature
333
- metadata[:item_errors] = item_errors if item_errors.present?
334
- metadata
335
- end
336
-
337
- def normalized_headers(headers)
338
- return {} unless headers
339
-
340
- headers.to_h.transform_keys { |key| key.to_s.downcase }
341
- end
342
-
343
- def error_backtrace(error)
344
- return if error.nil? || error.original_error.nil?
345
-
346
- Array(error.original_error.backtrace).first(20).join("\n")
347
- end
348
-
349
- def parse_http_time(value)
350
- return if value.blank?
351
-
352
- Time.httpdate(value)
353
- rescue ArgumentError
354
- nil
355
- end
356
-
357
- def elapsed_ms(started_at)
358
- ((Time.current - started_at) * 1000.0).round
359
- end
360
-
361
195
  def handle_failure(error, started_at:, instrumentation_payload:)
362
196
  response = error.response
363
197
  body = response&.body
364
- duration_ms = elapsed_ms(started_at)
198
+ duration_ms = source_updater.elapsed_ms(started_at)
365
199
 
366
- retry_decision = update_source_for_failure(error, duration_ms)
367
- create_fetch_log(
200
+ retry_decision = source_updater.update_source_for_failure(error, duration_ms)
201
+ source_updater.create_fetch_log(
368
202
  response: response,
369
203
  duration_ms: duration_ms,
370
204
  started_at: started_at,
@@ -416,212 +250,36 @@ module SourceMonitor
416
250
  end
417
251
  end
418
252
 
419
- def feed_signature_changed?(feed_signature)
420
- return false if feed_signature.blank?
421
-
422
- (source.metadata || {}).fetch("last_feed_signature", nil) != feed_signature
423
- end
424
-
425
- def apply_adaptive_interval!(attributes, content_changed:, failure: false)
426
- if source.adaptive_fetching_enabled?
427
- interval_seconds = compute_next_interval_seconds(content_changed:, failure:)
428
- scheduled_time = Time.current + adjusted_interval_with_jitter(interval_seconds)
429
- scheduled_time = [ scheduled_time, source.backoff_until ].compact.max if source.backoff_until.present?
430
-
431
- attributes[:fetch_interval_minutes] = interval_minutes_for(interval_seconds)
432
- attributes[:next_fetch_at] = scheduled_time
433
- attributes[:backoff_until] = failure ? scheduled_time : nil
434
- else
435
- fixed_minutes = [ source.fetch_interval_minutes.to_i, 1 ].max
436
- attributes[:next_fetch_at] = Time.current + fixed_minutes.minutes
437
- attributes[:backoff_until] = nil
438
- end
439
- end
440
-
441
- def compute_next_interval_seconds(content_changed:, failure:)
442
- current = [ current_interval_seconds, min_fetch_interval_seconds ].max
443
-
444
- next_interval = if failure
445
- current * failure_increase_factor_value
446
- elsif content_changed
447
- current * decrease_factor_value
448
- else
449
- current * increase_factor_value
450
- end
451
-
452
- next_interval = min_fetch_interval_seconds if next_interval < min_fetch_interval_seconds
453
- next_interval = max_fetch_interval_seconds if next_interval > max_fetch_interval_seconds
454
- next_interval.to_f
455
- end
456
-
457
- def current_interval_seconds
458
- source.fetch_interval_minutes.to_f * 60.0
459
- end
460
-
461
- def interval_minutes_for(interval_seconds)
462
- minutes = (interval_seconds / 60.0).round
463
- [ minutes, 1 ].max
464
- end
465
-
466
- def min_fetch_interval_seconds
467
- configured_seconds(fetching_config&.min_interval_minutes, MIN_FETCH_INTERVAL)
468
- end
469
-
470
- def max_fetch_interval_seconds
471
- configured_seconds(fetching_config&.max_interval_minutes, MAX_FETCH_INTERVAL)
472
- end
473
-
474
- def increase_factor_value
475
- configured_positive(fetching_config&.increase_factor, INCREASE_FACTOR)
476
- end
477
-
478
- def decrease_factor_value
479
- configured_positive(fetching_config&.decrease_factor, DECREASE_FACTOR)
480
- end
481
-
482
- def failure_increase_factor_value
483
- configured_positive(fetching_config&.failure_increase_factor, FAILURE_INCREASE_FACTOR)
484
- end
485
-
486
- def jitter_percent_value
487
- configured_non_negative(fetching_config&.jitter_percent, JITTER_PERCENT)
488
- end
489
-
490
- def updated_metadata(feed_signature: nil)
491
- metadata = (source.metadata || {}).dup
492
- metadata.delete("dynamic_fetch_interval_seconds")
493
- metadata["last_feed_signature"] = feed_signature if feed_signature.present?
494
- metadata
495
- end
496
-
497
- def adjusted_interval_with_jitter(interval_seconds)
498
- jitter = jitter_offset(interval_seconds)
499
- adjusted = interval_seconds + jitter
500
- adjusted = min_fetch_interval_seconds if adjusted < min_fetch_interval_seconds
501
- adjusted
502
- end
503
-
504
- def jitter_offset(interval_seconds)
505
- return 0 if interval_seconds <= 0
506
- return jitter_proc.call(interval_seconds) if jitter_proc.respond_to?(:call)
507
-
508
- jitter_range = interval_seconds * jitter_percent_value
509
- return 0 if jitter_range <= 0
510
-
511
- ((rand * 2) - 1) * jitter_range
512
- end
513
-
514
253
  def body_digest(body)
515
254
  return if body.blank?
516
255
 
517
256
  Digest::SHA256.hexdigest(body)
518
257
  end
519
258
 
520
- def process_feed_entries(feed)
521
- return EntryProcessingResult.new(
522
- created: 0,
523
- updated: 0,
524
- failed: 0,
525
- items: [],
526
- errors: [],
527
- created_items: [],
528
- updated_items: []
529
- ) unless feed.respond_to?(:entries)
530
-
531
- created = 0
532
- updated = 0
533
- failed = 0
534
- items = []
535
- created_items = []
536
- updated_items = []
537
- errors = []
538
-
539
- Array(feed.entries).each do |entry|
540
- begin
541
- result = SourceMonitor::Items::ItemCreator.call(source:, entry:)
542
- SourceMonitor::Events.run_item_processors(source:, entry:, result: result)
543
- items << result.item
544
- if result.created?
545
- created += 1
546
- created_items << result.item
547
- SourceMonitor::Events.after_item_created(item: result.item, source:, entry:, result: result)
548
- else
549
- updated += 1
550
- updated_items << result.item
551
- end
552
- rescue StandardError => error
553
- failed += 1
554
- errors << normalize_item_error(entry, error)
555
- end
556
- end
557
-
558
- EntryProcessingResult.new(
559
- created:,
560
- updated:,
561
- failed:,
562
- items:,
563
- errors: errors.compact,
564
- created_items:,
565
- updated_items:
566
- )
567
- end
568
-
569
- def configured_seconds(minutes_value, default)
570
- minutes = extract_numeric(minutes_value)
571
- return default unless minutes && minutes.positive?
572
-
573
- minutes * 60.0
574
- end
575
-
576
- def configured_positive(value, default)
577
- number = extract_numeric(value)
578
- return default unless number && number.positive?
579
-
580
- number
259
+ def adaptive_interval
260
+ @adaptive_interval ||= AdaptiveInterval.new(source: source, jitter_proc: jitter_proc)
581
261
  end
582
262
 
583
- def configured_non_negative(value, default)
584
- number = extract_numeric(value)
585
- return default if number.nil?
586
-
587
- number.negative? ? 0.0 : number
263
+ def source_updater
264
+ @source_updater ||= SourceUpdater.new(source: source, adaptive_interval: adaptive_interval)
588
265
  end
589
266
 
590
- def extract_numeric(value)
591
- return value if value.is_a?(Numeric)
592
- return value.to_f if value.respond_to?(:to_f)
593
-
594
- nil
595
- rescue StandardError
596
- nil
267
+ def entry_processor
268
+ @entry_processor ||= EntryProcessor.new(source: source)
597
269
  end
598
270
 
599
- def fetching_config
600
- SourceMonitor.config.fetching
601
- end
602
-
603
- def normalize_item_error(entry, error)
604
- {
605
- guid: safe_entry_guid(entry),
606
- title: safe_entry_title(entry),
607
- error_class: error.class.name,
608
- error_message: error.message
609
- }
610
- rescue StandardError
611
- { error_class: error.class.name, error_message: error.message }
612
- end
613
-
614
- def safe_entry_guid(entry)
615
- if entry.respond_to?(:entry_id)
616
- entry.entry_id
617
- elsif entry.respond_to?(:id)
618
- entry.id
619
- end
620
- end
621
-
622
- def safe_entry_title(entry)
623
- entry.title if entry.respond_to?(:title)
624
- end
271
+ # Forwarding methods for backward compatibility with tests
272
+ def process_feed_entries(feed) = entry_processor.process_feed_entries(feed)
273
+ def jitter_offset(interval_seconds) = adaptive_interval.jitter_offset(interval_seconds)
274
+ def adjusted_interval_with_jitter(interval_seconds) = adaptive_interval.adjusted_interval_with_jitter(interval_seconds)
275
+ def updated_metadata(feed_signature: nil) = source_updater.updated_metadata(feed_signature: feed_signature)
276
+ def feed_signature_changed?(feed_signature) = source_updater.feed_signature_changed?(feed_signature)
277
+ def configured_seconds(minutes_value, default) = adaptive_interval.configured_seconds(minutes_value, default)
278
+ def configured_positive(value, default) = adaptive_interval.configured_positive(value, default)
279
+ def configured_non_negative(value, default) = adaptive_interval.configured_non_negative(value, default)
280
+ def interval_minutes_for(interval_seconds) = adaptive_interval.interval_minutes_for(interval_seconds)
281
+ def parse_http_time(value) = source_updater.parse_http_time(value)
282
+ def extract_numeric(value) = adaptive_interval.extract_numeric(value)
625
283
  end
626
284
  end
627
285
  end