source_monitor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/agents/rails-concern.md +464 -0
  3. data/.claude/agents/rails-controller.md +424 -0
  4. data/.claude/agents/rails-hotwire.md +446 -0
  5. data/.claude/agents/rails-implement.md +374 -0
  6. data/.claude/agents/rails-job.md +334 -0
  7. data/.claude/agents/rails-lint.md +294 -0
  8. data/.claude/agents/rails-mailer.md +371 -0
  9. data/.claude/agents/rails-migration.md +449 -0
  10. data/.claude/agents/rails-model.md +420 -0
  11. data/.claude/agents/rails-policy.md +443 -0
  12. data/.claude/agents/rails-presenter.md +427 -0
  13. data/.claude/agents/rails-query.md +412 -0
  14. data/.claude/agents/rails-review.md +490 -0
  15. data/.claude/agents/rails-service.md +458 -0
  16. data/.claude/agents/rails-state-records.md +465 -0
  17. data/.claude/agents/rails-tdd.md +314 -0
  18. data/.claude/agents/rails-test.md +441 -0
  19. data/.claude/agents/rails-view-component.md +418 -0
  20. data/.claude/hooks/block-secrets.sh +52 -0
  21. data/.claude/settings.json +85 -0
  22. data/.claude/skills/action-cable-patterns/SKILL.md +296 -0
  23. data/.claude/skills/action-mailer-patterns/SKILL.md +295 -0
  24. data/.claude/skills/active-storage-setup/SKILL.md +311 -0
  25. data/.claude/skills/api-versioning/SKILL.md +294 -0
  26. data/.claude/skills/authentication-flow/SKILL.md +335 -0
  27. data/.claude/skills/authentication-flow/reference/current.md +248 -0
  28. data/.claude/skills/authentication-flow/reference/passwordless.md +253 -0
  29. data/.claude/skills/authentication-flow/reference/sessions.md +201 -0
  30. data/.claude/skills/authorization-pundit/SKILL.md +462 -0
  31. data/.claude/skills/caching-strategies/SKILL.md +350 -0
  32. data/.claude/skills/database-migrations/SKILL.md +354 -0
  33. data/.claude/skills/form-object-patterns/SKILL.md +399 -0
  34. data/.claude/skills/hotwire-patterns/SKILL.md +247 -0
  35. data/.claude/skills/hotwire-patterns/reference/stimulus.md +307 -0
  36. data/.claude/skills/hotwire-patterns/reference/tailwind-integration.md +112 -0
  37. data/.claude/skills/hotwire-patterns/reference/turbo-frames.md +158 -0
  38. data/.claude/skills/hotwire-patterns/reference/turbo-streams.md +218 -0
  39. data/.claude/skills/i18n-patterns/SKILL.md +320 -0
  40. data/.claude/skills/install/SKILL.md +367 -0
  41. data/.claude/skills/performance-optimization/SKILL.md +311 -0
  42. data/.claude/skills/rails-architecture/SKILL.md +259 -0
  43. data/.claude/skills/rails-architecture/reference/error-handling.md +333 -0
  44. data/.claude/skills/rails-architecture/reference/event-tracking.md +142 -0
  45. data/.claude/skills/rails-architecture/reference/layer-interactions.md +417 -0
  46. data/.claude/skills/rails-architecture/reference/multi-tenancy.md +152 -0
  47. data/.claude/skills/rails-architecture/reference/query-patterns.md +342 -0
  48. data/.claude/skills/rails-architecture/reference/service-patterns.md +286 -0
  49. data/.claude/skills/rails-architecture/reference/state-records.md +250 -0
  50. data/.claude/skills/rails-architecture/reference/testing-strategy.md +326 -0
  51. data/.claude/skills/rails-concern/SKILL.md +399 -0
  52. data/.claude/skills/rails-controller/SKILL.md +336 -0
  53. data/.claude/skills/rails-model-generator/SKILL.md +321 -0
  54. data/.claude/skills/rails-model-generator/reference/validations.md +298 -0
  55. data/.claude/skills/rails-presenter/SKILL.md +274 -0
  56. data/.claude/skills/rails-query-object/SKILL.md +289 -0
  57. data/.claude/skills/rails-service-object/SKILL.md +349 -0
  58. data/.claude/skills/solid-queue-setup/SKILL.md +307 -0
  59. data/.claude/skills/tdd-cycle/SKILL.md +359 -0
  60. data/.claude/skills/viewcomponent-patterns/SKILL.md +333 -0
  61. data/.gitignore +1 -0
  62. data/.rubocop.yml +2 -0
  63. data/.ruby-version +1 -1
  64. data/.vbw-planning/.notification-log.jsonl +192 -0
  65. data/.vbw-planning/.session-log.jsonl +871 -0
  66. data/.vbw-planning/PROJECT.md +51 -0
  67. data/.vbw-planning/REQUIREMENTS.md +50 -0
  68. data/.vbw-planning/SHIPPED.md +28 -0
  69. data/.vbw-planning/codebase/ARCHITECTURE.md +147 -0
  70. data/.vbw-planning/codebase/CONCERNS.md +99 -0
  71. data/.vbw-planning/codebase/CONVENTIONS.md +97 -0
  72. data/.vbw-planning/codebase/DEPENDENCIES.md +100 -0
  73. data/.vbw-planning/codebase/INDEX.md +86 -0
  74. data/.vbw-planning/codebase/META.md +42 -0
  75. data/.vbw-planning/codebase/PATTERNS.md +262 -0
  76. data/.vbw-planning/codebase/STACK.md +101 -0
  77. data/.vbw-planning/codebase/STRUCTURE.md +324 -0
  78. data/.vbw-planning/codebase/TESTING.md +154 -0
  79. data/.vbw-planning/config.json +12 -0
  80. data/.vbw-planning/discovery.json +24 -0
  81. data/.vbw-planning/milestones/default/ROADMAP.md +115 -0
  82. data/.vbw-planning/milestones/default/STATE.md +83 -0
  83. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-01-SUMMARY.md +56 -0
  84. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-01.md +187 -0
  85. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-02-SUMMARY.md +64 -0
  86. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-02.md +137 -0
  87. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-01-SUMMARY.md +67 -0
  88. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-01.md +142 -0
  89. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-02-SUMMARY.md +64 -0
  90. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-02.md +138 -0
  91. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-03-SUMMARY.md +85 -0
  92. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-03.md +147 -0
  93. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-04-SUMMARY.md +63 -0
  94. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-04.md +129 -0
  95. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-05-SUMMARY.md +74 -0
  96. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-05.md +154 -0
  97. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/03-VERIFICATION-wave1.md +303 -0
  98. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/03-VERIFICATION.md +510 -0
  99. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-01-SUMMARY.md +61 -0
  100. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-01.md +161 -0
  101. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-02-SUMMARY.md +66 -0
  102. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-02.md +132 -0
  103. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-03-SUMMARY.md +59 -0
  104. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-03.md +171 -0
  105. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-04-SUMMARY.md +56 -0
  106. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-04.md +152 -0
  107. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/04-CONTEXT.md +33 -0
  108. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-01-SUMMARY.md +42 -0
  109. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-01.md +119 -0
  110. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-02-SUMMARY.md +52 -0
  111. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-02.md +195 -0
  112. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-03-SUMMARY.md +79 -0
  113. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-03.md +130 -0
  114. data/CHANGELOG.md +28 -0
  115. data/CLAUDE.md +179 -0
  116. data/Gemfile +8 -0
  117. data/Gemfile.lock +114 -101
  118. data/Rakefile +2 -0
  119. data/app/assets/builds/source_monitor/application.css +2076 -0
  120. data/app/assets/builds/source_monitor/application.js +2758 -0
  121. data/app/assets/builds/source_monitor/application.js.map +7 -0
  122. data/app/controllers/source_monitor/application_controller.rb +2 -0
  123. data/app/controllers/source_monitor/health_controller.rb +2 -0
  124. data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +106 -0
  125. data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +187 -0
  126. data/app/controllers/source_monitor/import_sessions/health_check_management.rb +112 -0
  127. data/app/controllers/source_monitor/import_sessions/opml_parser.rb +130 -0
  128. data/app/controllers/source_monitor/import_sessions_controller.rb +6 -507
  129. data/app/controllers/source_monitor/items_controller.rb +2 -0
  130. data/app/controllers/source_monitor/sources_controller.rb +0 -14
  131. data/app/helpers/source_monitor/application_helper.rb +4 -112
  132. data/app/helpers/source_monitor/health_badge_helper.rb +69 -0
  133. data/app/helpers/source_monitor/table_sort_helper.rb +53 -0
  134. data/app/jobs/source_monitor/application_job.rb +2 -0
  135. data/app/models/source_monitor/application_record.rb +2 -0
  136. data/app/models/source_monitor/log_entry.rb +0 -2
  137. data/config/coverage_baseline.json +217 -1862
  138. data/config/routes.rb +2 -0
  139. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +2 -0
  140. data/db/migrate/20251014171659_add_performance_indexes.rb +2 -0
  141. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +2 -0
  142. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +2 -0
  143. data/db/migrate/20260210204022_add_composite_index_to_log_entries.rb +17 -0
  144. data/lib/source_monitor/assets/bundler.rb +2 -0
  145. data/lib/source_monitor/assets.rb +2 -0
  146. data/lib/source_monitor/configuration/authentication_settings.rb +62 -0
  147. data/lib/source_monitor/configuration/events.rb +60 -0
  148. data/lib/source_monitor/configuration/fetching_settings.rb +27 -0
  149. data/lib/source_monitor/configuration/health_settings.rb +27 -0
  150. data/lib/source_monitor/configuration/http_settings.rb +43 -0
  151. data/lib/source_monitor/configuration/model_definition.rb +108 -0
  152. data/lib/source_monitor/configuration/models.rb +36 -0
  153. data/lib/source_monitor/configuration/realtime_settings.rb +95 -0
  154. data/lib/source_monitor/configuration/retention_settings.rb +45 -0
  155. data/lib/source_monitor/configuration/scraper_registry.rb +67 -0
  156. data/lib/source_monitor/configuration/scraping_settings.rb +39 -0
  157. data/lib/source_monitor/configuration/validation_definition.rb +32 -0
  158. data/lib/source_monitor/configuration.rb +12 -579
  159. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +138 -0
  160. data/lib/source_monitor/dashboard/queries/stats_query.rb +71 -0
  161. data/lib/source_monitor/dashboard/queries.rb +2 -195
  162. data/lib/source_monitor/engine.rb +2 -0
  163. data/lib/source_monitor/fetching/feed_fetcher/adaptive_interval.rb +141 -0
  164. data/lib/source_monitor/fetching/feed_fetcher/entry_processor.rb +89 -0
  165. data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +200 -0
  166. data/lib/source_monitor/fetching/feed_fetcher.rb +37 -379
  167. data/lib/source_monitor/items/item_creator/content_extractor.rb +113 -0
  168. data/lib/source_monitor/items/item_creator/entry_parser/media_extraction.rb +96 -0
  169. data/lib/source_monitor/items/item_creator/entry_parser.rb +294 -0
  170. data/lib/source_monitor/items/item_creator.rb +28 -455
  171. data/lib/source_monitor/setup/bundle_installer.rb +2 -0
  172. data/lib/source_monitor/setup/cli.rb +2 -0
  173. data/lib/source_monitor/setup/dependency_checker.rb +2 -0
  174. data/lib/source_monitor/setup/detectors.rb +2 -0
  175. data/lib/source_monitor/setup/gemfile_editor.rb +2 -0
  176. data/lib/source_monitor/setup/initializer_patcher.rb +2 -0
  177. data/lib/source_monitor/setup/install_generator.rb +2 -0
  178. data/lib/source_monitor/setup/migration_installer.rb +2 -0
  179. data/lib/source_monitor/setup/node_installer.rb +2 -0
  180. data/lib/source_monitor/setup/prompter.rb +2 -0
  181. data/lib/source_monitor/setup/requirements.rb +2 -0
  182. data/lib/source_monitor/setup/shell_runner.rb +2 -0
  183. data/lib/source_monitor/setup/verification/action_cable_verifier.rb +2 -0
  184. data/lib/source_monitor/setup/verification/printer.rb +2 -0
  185. data/lib/source_monitor/setup/verification/result.rb +2 -0
  186. data/lib/source_monitor/setup/verification/runner.rb +2 -0
  187. data/lib/source_monitor/setup/verification/solid_queue_verifier.rb +2 -0
  188. data/lib/source_monitor/setup/verification/telemetry_logger.rb +2 -0
  189. data/lib/source_monitor/setup/workflow.rb +2 -0
  190. data/lib/source_monitor/version.rb +3 -1
  191. data/lib/source_monitor.rb +140 -58
  192. data/lib/tasks/source_monitor_assets.rake +2 -0
  193. data/lib/tasks/source_monitor_setup.rake +2 -0
  194. data/lib/tasks/source_monitor_tasks.rake +2 -0
  195. data/source_monitor.gemspec +3 -1
  196. metadata +144 -4
@@ -7,6 +7,8 @@ require "active_support/core_ext/object/blank"
7
7
  require "active_support/core_ext/time"
8
8
  require "source_monitor/instrumentation"
9
9
  require "source_monitor/scrapers/readability"
10
+ require "source_monitor/items/item_creator/entry_parser"
11
+ require "source_monitor/items/item_creator/content_extractor"
10
12
 
11
13
  module SourceMonitor
12
14
  module Items
@@ -20,11 +22,13 @@ module SourceMonitor
20
22
  status == :updated
21
23
  end
22
24
  end
25
+
23
26
  FINGERPRINT_SEPARATOR = "\u0000".freeze
24
27
  CONTENT_METHODS = %i[content content_encoded summary].freeze
25
28
  TIMESTAMP_METHODS = %i[published updated].freeze
26
29
  KEYWORD_SEPARATORS = /[,;]+/.freeze
27
30
  METADATA_ROOT_KEY = "feedjira_entry".freeze
31
+
28
32
  def self.call(source:, entry:)
29
33
  new(source:, entry:).call
30
34
  end
@@ -134,468 +138,37 @@ module SourceMonitor
134
138
  record.metadata = metadata if metadata
135
139
  end
136
140
 
137
- def process_feed_content(raw_content, title:)
138
- return [ raw_content, nil ] unless should_process_feed_content?(raw_content)
139
-
140
- parser = feed_content_parser_class.new
141
- html = wrap_content_for_readability(raw_content, title: title)
142
- result = parser.parse(html: html, readability: default_feed_readability_options)
143
-
144
- processed_content = result.content.presence || raw_content
145
- metadata = build_feed_content_metadata(result: result, raw_content: raw_content, processed_content: processed_content)
146
-
147
- [ processed_content, metadata.presence ]
148
- rescue StandardError => error
149
- metadata = {
150
- "status" => "failed",
151
- "strategy" => "readability",
152
- "applied" => false,
153
- "changed" => false,
154
- "error_class" => error.class.name,
155
- "error_message" => error.message
156
- }
157
- [ raw_content, metadata ]
158
- end
159
-
160
- def should_process_feed_content?(raw_content)
161
- source.respond_to?(:feed_content_readability_enabled?) &&
162
- source.feed_content_readability_enabled? &&
163
- raw_content.present? &&
164
- html_fragment?(raw_content)
165
- end
166
-
167
- def feed_content_parser_class
168
- SourceMonitor::Scrapers::Parsers::ReadabilityParser
169
- end
170
-
171
- def wrap_content_for_readability(content, title:)
172
- safe_title = title.present? ? CGI.escapeHTML(title) : "Feed Entry"
173
- <<~HTML
174
- <!DOCTYPE html>
175
- <html>
176
- <head>
177
- <meta charset="utf-8">
178
- <title>#{safe_title}</title>
179
- </head>
180
- <body>
181
- #{content}
182
- </body>
183
- </html>
184
- HTML
185
- end
186
-
187
- def default_feed_readability_options
188
- default = SourceMonitor::Scrapers::Readability.default_settings[:readability]
189
- return {} unless default
190
-
191
- deep_copy(default)
192
- end
193
-
194
- def build_feed_content_metadata(result:, raw_content:, processed_content:)
195
- metadata = {
196
- "strategy" => result.strategy&.to_s,
197
- "status" => result.status&.to_s,
198
- "applied" => result.content.present?,
199
- "changed" => processed_content != raw_content
200
- }
201
-
202
- if result.metadata && result.metadata[:readability_text_length]
203
- metadata["readability_text_length"] = result.metadata[:readability_text_length]
204
- end
205
-
206
- metadata["title"] = result.title if result.title.present?
207
- metadata.compact
208
- end
209
-
210
- def html_fragment?(value)
211
- value.to_s.match?(/<\s*\w+/)
212
- end
213
-
214
- def deep_copy(value)
215
- if value.respond_to?(:deep_dup)
216
- return value.deep_dup
217
- end
218
-
219
- case value
220
- when Hash
221
- value.each_with_object(value.class.new) do |(key, nested), copy|
222
- copy[key] = deep_copy(nested)
223
- end
224
- when Array
225
- value.map { |element| deep_copy(element) }
226
- else
227
- value.dup
228
- end
229
- rescue TypeError
230
- value
231
- end
232
-
233
141
  def build_attributes
234
- url = extract_url
235
- title = string_or_nil(entry.title) if entry.respond_to?(:title)
236
- raw_content = extract_content
237
- content, content_processing_metadata = process_feed_content(raw_content, title: title)
238
- fingerprint = generate_fingerprint(title, url, content)
239
- published_at = extract_timestamp
240
- updated_at_source = extract_updated_timestamp
241
-
242
- metadata = extract_metadata
243
- if content_processing_metadata.present?
244
- metadata = metadata.merge("feed_content_processing" => content_processing_metadata)
245
- end
246
-
247
- {
248
- guid: extract_guid,
249
- title: title,
250
- url: url,
251
- canonical_url: url,
252
- author: extract_author,
253
- authors: extract_authors,
254
- summary: extract_summary,
255
- content: content,
256
- published_at: published_at,
257
- updated_at_source: updated_at_source,
258
- categories: extract_categories,
259
- tags: extract_tags,
260
- keywords: extract_keywords,
261
- enclosures: extract_enclosures,
262
- media_thumbnail_url: extract_media_thumbnail_url,
263
- media_content: extract_media_content,
264
- language: extract_language,
265
- copyright: extract_copyright,
266
- comments_url: extract_comments_url,
267
- comments_count: extract_comments_count,
268
- metadata: metadata,
269
- content_fingerprint: fingerprint
270
- }.compact
142
+ entry_parser.parse
271
143
  end
272
144
 
273
- def extract_guid
274
- entry_guid = entry.respond_to?(:entry_id) ? string_or_nil(entry.entry_id) : nil
275
- return entry_guid if entry_guid.present?
276
-
277
- return unless entry.respond_to?(:id)
278
-
279
- entry_id = string_or_nil(entry.id)
280
- return if entry_id.blank?
281
-
282
- url = extract_url
283
- return entry_id if url.blank? || entry_id != url
284
-
285
- nil
145
+ def entry_parser
146
+ @entry_parser ||= EntryParser.new(source: source, entry: entry, content_extractor: content_extractor)
286
147
  end
287
148
 
288
- def extract_url
289
- if entry.respond_to?(:url)
290
- primary_url = string_or_nil(entry.url)
291
- return primary_url if primary_url.present?
292
- end
293
-
294
- if entry.respond_to?(:link_nodes)
295
- alternate = Array(entry.link_nodes).find do |node|
296
- rel = string_or_nil(node&.rel)&.downcase
297
- rel.nil? || rel == "alternate"
298
- end
299
- alternate ||= Array(entry.link_nodes).first
300
- href = string_or_nil(alternate&.href)
301
- return href if href.present?
302
- end
303
-
304
- if entry.respond_to?(:links)
305
- href = Array(entry.links).map { |link| string_or_nil(link) }.find(&:present?)
306
- return href if href.present?
307
- end
308
-
309
- nil
310
- end
311
-
312
- def extract_summary
313
- return unless entry.respond_to?(:summary)
314
-
315
- string_or_nil(entry.summary)
316
- end
317
-
318
- def extract_content
319
- CONTENT_METHODS.each do |method|
320
- next unless entry.respond_to?(method)
321
-
322
- value = string_or_nil(entry.public_send(method))
323
- return value if value.present?
324
- end
325
- nil
326
- end
327
-
328
- def extract_timestamp
329
- TIMESTAMP_METHODS.each do |method|
330
- next unless entry.respond_to?(method)
331
-
332
- value = entry.public_send(method)
333
- return value if value.present?
334
- end
335
- nil
336
- end
337
-
338
- def extract_updated_timestamp
339
- return entry.updated if entry.respond_to?(:updated) && entry.updated.present?
340
-
341
- nil
342
- end
343
-
344
- def extract_author
345
- string_or_nil(entry.author) if entry.respond_to?(:author)
346
- end
347
-
348
- def extract_authors
349
- values = []
350
-
351
- if entry.respond_to?(:rss_authors)
352
- values.concat(Array(entry.rss_authors).map { |value| string_or_nil(value) })
353
- end
354
-
355
- if entry.respond_to?(:dc_creators)
356
- values.concat(Array(entry.dc_creators).map { |value| string_or_nil(value) })
357
- elsif entry.respond_to?(:dc_creator)
358
- values << string_or_nil(entry.dc_creator)
359
- end
360
-
361
- if entry.respond_to?(:author_nodes)
362
- values.concat(
363
- Array(entry.author_nodes).map do |node|
364
- next unless node.respond_to?(:name) || node.respond_to?(:email) || node.respond_to?(:uri)
365
-
366
- string_or_nil(node.name) || string_or_nil(node.email) || string_or_nil(node.uri)
367
- end
368
- )
369
- end
370
-
371
- if json_entry?
372
- if entry.respond_to?(:json) && entry.json
373
- json_authors = Array(entry.json["authors"]).map { |author| string_or_nil(author["name"]) }
374
- values.concat(json_authors)
375
- values << string_or_nil(entry.json.dig("author", "name"))
376
- end
377
- end
378
-
379
- primary_author = extract_author
380
- values << primary_author if primary_author.present?
381
-
382
- values.compact.uniq
383
- end
384
-
385
- def extract_categories
386
- list = []
387
- list.concat(Array(entry.categories)) if entry.respond_to?(:categories)
388
- list.concat(Array(entry.tags)) if entry.respond_to?(:tags)
389
- if json_entry? && entry.respond_to?(:json) && entry.json
390
- list.concat(Array(entry.json["tags"]))
391
- end
392
- sanitize_string_array(list)
149
+ def content_extractor
150
+ @content_extractor ||= ContentExtractor.new(source: source)
393
151
  end
394
152
 
395
- def extract_tags
396
- tags = []
397
-
398
- tags.concat(Array(entry.tags)) if entry.respond_to?(:tags)
399
-
400
- if json_entry? && entry.respond_to?(:json) && entry.json
401
- tags.concat(Array(entry.json["tags"]))
402
- end
403
-
404
- tags = extract_categories if tags.empty? && entry.respond_to?(:categories)
405
-
406
- sanitize_string_array(tags)
407
- end
408
-
409
- def extract_keywords
410
- keywords = []
411
- keywords.concat(split_keywords(entry.media_keywords_raw)) if entry.respond_to?(:media_keywords_raw)
412
- keywords.concat(split_keywords(entry.itunes_keywords_raw)) if entry.respond_to?(:itunes_keywords_raw)
413
- sanitize_string_array(keywords)
414
- end
415
-
416
- def extract_enclosures
417
- enclosures = []
418
-
419
- if entry.respond_to?(:enclosure_nodes)
420
- Array(entry.enclosure_nodes).each do |node|
421
- url = string_or_nil(node&.url)
422
- next if url.blank?
423
-
424
- enclosures << {
425
- "url" => url,
426
- "type" => string_or_nil(node&.type),
427
- "length" => safe_integer(node&.length),
428
- "source" => "rss_enclosure"
429
- }.compact
430
- end
431
- end
432
-
433
- if atom_entry? && entry.respond_to?(:link_nodes)
434
- Array(entry.link_nodes).each do |link|
435
- next unless string_or_nil(link&.rel)&.downcase == "enclosure"
436
-
437
- url = string_or_nil(link&.href)
438
- next if url.blank?
439
-
440
- enclosures << {
441
- "url" => url,
442
- "type" => string_or_nil(link&.type),
443
- "length" => safe_integer(link&.length),
444
- "source" => "atom_link"
445
- }.compact
446
- end
447
- end
448
-
449
- if json_entry? && entry.respond_to?(:json) && entry.json
450
- Array(entry.json["attachments"]).each do |attachment|
451
- url = string_or_nil(attachment["url"])
452
- next if url.blank?
453
-
454
- enclosures << {
455
- "url" => url,
456
- "type" => string_or_nil(attachment["mime_type"]),
457
- "length" => safe_integer(attachment["size_in_bytes"]),
458
- "duration" => safe_integer(attachment["duration_in_seconds"]),
459
- "title" => string_or_nil(attachment["title"]),
460
- "source" => "json_feed_attachment"
461
- }.compact
462
- end
463
- end
464
-
465
- enclosures.uniq
466
- end
467
-
468
- def extract_media_thumbnail_url
469
- if entry.respond_to?(:media_thumbnail_nodes)
470
- thumbnail = Array(entry.media_thumbnail_nodes).find { |node| string_or_nil(node&.url).present? }
471
- return string_or_nil(thumbnail&.url) if thumbnail
472
- end
473
-
474
- string_or_nil(entry.image) if entry.respond_to?(:image)
475
- end
476
-
477
- def extract_media_content
478
- contents = []
479
-
480
- if entry.respond_to?(:media_content_nodes)
481
- Array(entry.media_content_nodes).each do |node|
482
- url = string_or_nil(node&.url)
483
- next if url.blank?
484
-
485
- contents << {
486
- "url" => url,
487
- "type" => string_or_nil(node&.type),
488
- "medium" => string_or_nil(node&.medium),
489
- "height" => safe_integer(node&.height),
490
- "width" => safe_integer(node&.width),
491
- "file_size" => safe_integer(node&.file_size),
492
- "duration" => safe_integer(node&.duration),
493
- "expression" => string_or_nil(node&.expression)
494
- }.compact
495
- end
496
- end
497
-
498
- contents.uniq
499
- end
500
-
501
- def extract_language
502
- if entry.respond_to?(:language)
503
- return string_or_nil(entry.language)
504
- end
505
-
506
- if json_entry? && entry.respond_to?(:json) && entry.json
507
- return string_or_nil(entry.json["language"])
508
- end
509
-
510
- nil
511
- end
512
-
513
- def extract_copyright
514
- if entry.respond_to?(:copyright)
515
- return string_or_nil(entry.copyright)
516
- end
517
-
518
- if json_entry? && entry.respond_to?(:json) && entry.json
519
- return string_or_nil(entry.json["copyright"])
520
- end
521
-
522
- nil
523
- end
524
-
525
- def extract_comments_url
526
- string_or_nil(entry.comments) if entry.respond_to?(:comments)
527
- end
528
-
529
- def extract_comments_count
530
- raw = nil
531
- raw ||= entry.slash_comments_raw if entry.respond_to?(:slash_comments_raw)
532
- raw ||= entry.comments_count if entry.respond_to?(:comments_count)
533
- safe_integer(raw)
534
- end
535
-
536
- def extract_metadata
537
- return {} unless entry.respond_to?(:to_h)
538
-
539
- normalized = normalize_metadata(entry.to_h)
540
- return {} if normalized.blank?
541
-
542
- { METADATA_ROOT_KEY => normalized }
543
- end
544
-
545
- def generate_fingerprint(title, url, content)
546
- Digest::SHA256.hexdigest(
547
- [
548
- title.to_s,
549
- url.to_s,
550
- content.to_s
551
- ].join(FINGERPRINT_SEPARATOR)
552
- )
553
- end
554
-
555
- def string_or_nil(value)
556
- return value unless value.is_a?(String)
557
-
558
- value.strip.presence
559
- end
560
-
561
- def sanitize_string_array(values)
562
- Array(values).map { |value| string_or_nil(value) }.compact.uniq
563
- end
564
-
565
- def split_keywords(value)
566
- return [] if value.nil?
567
-
568
- string = string_or_nil(value)
569
- return [] if string.blank?
570
-
571
- string.split(KEYWORD_SEPARATORS).map { |keyword| keyword.strip.presence }.compact
572
- end
573
-
574
- def safe_integer(value)
575
- return if value.nil?
576
- return value if value.is_a?(Integer)
577
-
578
- string = value.to_s.strip
579
- return if string.blank?
580
-
581
- Integer(string, 10)
582
- rescue ArgumentError
583
- nil
584
- end
585
-
586
- def json_entry?
587
- defined?(Feedjira::Parser::JSONFeedItem) && entry.is_a?(Feedjira::Parser::JSONFeedItem)
588
- end
589
-
590
- def atom_entry?
591
- defined?(Feedjira::Parser::AtomEntry) && entry.is_a?(Feedjira::Parser::AtomEntry)
592
- end
593
-
594
- def normalize_metadata(value)
595
- JSON.parse(JSON.generate(value))
596
- rescue JSON::GeneratorError, JSON::ParserError, TypeError
597
- {}
598
- end
153
+ # Forwarding methods for backward compatibility with tests
154
+ def process_feed_content(raw_content, title:) = content_extractor.process_feed_content(raw_content, title: title)
155
+ def should_process_feed_content?(raw_content) = content_extractor.should_process_feed_content?(raw_content)
156
+ def feed_content_parser_class = content_extractor.feed_content_parser_class
157
+ def wrap_content_for_readability(content, title:) = content_extractor.wrap_content_for_readability(content, title: title)
158
+ def default_feed_readability_options = content_extractor.default_feed_readability_options
159
+ def build_feed_content_metadata(result:, raw_content:, processed_content:)
160
+ content_extractor.build_feed_content_metadata(result: result, raw_content: raw_content, processed_content: processed_content)
161
+ end
162
+ def html_fragment?(value) = content_extractor.html_fragment?(value)
163
+ def deep_copy(value) = content_extractor.deep_copy(value)
164
+ def string_or_nil(value) = entry_parser.string_or_nil(value)
165
+ def sanitize_string_array(values) = entry_parser.sanitize_string_array(values)
166
+ def split_keywords(value) = entry_parser.split_keywords(value)
167
+ def safe_integer(value) = entry_parser.safe_integer(value)
168
+ def json_entry? = entry_parser.json_entry?
169
+ def atom_entry? = entry_parser.atom_entry?
170
+ def normalize_metadata(value) = entry_parser.normalize_metadata(value)
171
+ def generate_fingerprint(title, url, content) = entry_parser.generate_fingerprint(title, url, content)
599
172
  end
600
173
  end
601
174
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  class BundleInstaller
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "thor"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  class DependencyChecker
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  module Detectors
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pathname"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pathname"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  class InstallGenerator
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "fileutils"
2
4
  require "pathname"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pathname"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "thor"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  module Requirements
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "open3"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  module Verification
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  module Verification
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "json"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  module Verification
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
4
  module Setup
3
5
  module Verification
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "fileutils"
2
4
  require "pathname"
3
5
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pathname"
2
4
 
3
5
  module SourceMonitor
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module SourceMonitor
2
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
3
5
  end