source_monitor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/agents/rails-concern.md +464 -0
  3. data/.claude/agents/rails-controller.md +424 -0
  4. data/.claude/agents/rails-hotwire.md +446 -0
  5. data/.claude/agents/rails-implement.md +374 -0
  6. data/.claude/agents/rails-job.md +334 -0
  7. data/.claude/agents/rails-lint.md +294 -0
  8. data/.claude/agents/rails-mailer.md +371 -0
  9. data/.claude/agents/rails-migration.md +449 -0
  10. data/.claude/agents/rails-model.md +420 -0
  11. data/.claude/agents/rails-policy.md +443 -0
  12. data/.claude/agents/rails-presenter.md +427 -0
  13. data/.claude/agents/rails-query.md +412 -0
  14. data/.claude/agents/rails-review.md +490 -0
  15. data/.claude/agents/rails-service.md +458 -0
  16. data/.claude/agents/rails-state-records.md +465 -0
  17. data/.claude/agents/rails-tdd.md +314 -0
  18. data/.claude/agents/rails-test.md +441 -0
  19. data/.claude/agents/rails-view-component.md +418 -0
  20. data/.claude/hooks/block-secrets.sh +52 -0
  21. data/.claude/settings.json +85 -0
  22. data/.claude/skills/action-cable-patterns/SKILL.md +296 -0
  23. data/.claude/skills/action-mailer-patterns/SKILL.md +295 -0
  24. data/.claude/skills/active-storage-setup/SKILL.md +311 -0
  25. data/.claude/skills/api-versioning/SKILL.md +294 -0
  26. data/.claude/skills/authentication-flow/SKILL.md +335 -0
  27. data/.claude/skills/authentication-flow/reference/current.md +248 -0
  28. data/.claude/skills/authentication-flow/reference/passwordless.md +253 -0
  29. data/.claude/skills/authentication-flow/reference/sessions.md +201 -0
  30. data/.claude/skills/authorization-pundit/SKILL.md +462 -0
  31. data/.claude/skills/caching-strategies/SKILL.md +350 -0
  32. data/.claude/skills/database-migrations/SKILL.md +354 -0
  33. data/.claude/skills/form-object-patterns/SKILL.md +399 -0
  34. data/.claude/skills/hotwire-patterns/SKILL.md +247 -0
  35. data/.claude/skills/hotwire-patterns/reference/stimulus.md +307 -0
  36. data/.claude/skills/hotwire-patterns/reference/tailwind-integration.md +112 -0
  37. data/.claude/skills/hotwire-patterns/reference/turbo-frames.md +158 -0
  38. data/.claude/skills/hotwire-patterns/reference/turbo-streams.md +218 -0
  39. data/.claude/skills/i18n-patterns/SKILL.md +320 -0
  40. data/.claude/skills/install/SKILL.md +367 -0
  41. data/.claude/skills/performance-optimization/SKILL.md +311 -0
  42. data/.claude/skills/rails-architecture/SKILL.md +259 -0
  43. data/.claude/skills/rails-architecture/reference/error-handling.md +333 -0
  44. data/.claude/skills/rails-architecture/reference/event-tracking.md +142 -0
  45. data/.claude/skills/rails-architecture/reference/layer-interactions.md +417 -0
  46. data/.claude/skills/rails-architecture/reference/multi-tenancy.md +152 -0
  47. data/.claude/skills/rails-architecture/reference/query-patterns.md +342 -0
  48. data/.claude/skills/rails-architecture/reference/service-patterns.md +286 -0
  49. data/.claude/skills/rails-architecture/reference/state-records.md +250 -0
  50. data/.claude/skills/rails-architecture/reference/testing-strategy.md +326 -0
  51. data/.claude/skills/rails-concern/SKILL.md +399 -0
  52. data/.claude/skills/rails-controller/SKILL.md +336 -0
  53. data/.claude/skills/rails-model-generator/SKILL.md +321 -0
  54. data/.claude/skills/rails-model-generator/reference/validations.md +298 -0
  55. data/.claude/skills/rails-presenter/SKILL.md +274 -0
  56. data/.claude/skills/rails-query-object/SKILL.md +289 -0
  57. data/.claude/skills/rails-service-object/SKILL.md +349 -0
  58. data/.claude/skills/solid-queue-setup/SKILL.md +307 -0
  59. data/.claude/skills/tdd-cycle/SKILL.md +359 -0
  60. data/.claude/skills/viewcomponent-patterns/SKILL.md +333 -0
  61. data/.gitignore +1 -0
  62. data/.rubocop.yml +2 -0
  63. data/.ruby-version +1 -1
  64. data/.vbw-planning/.notification-log.jsonl +192 -0
  65. data/.vbw-planning/.session-log.jsonl +871 -0
  66. data/.vbw-planning/PROJECT.md +51 -0
  67. data/.vbw-planning/REQUIREMENTS.md +50 -0
  68. data/.vbw-planning/SHIPPED.md +28 -0
  69. data/.vbw-planning/codebase/ARCHITECTURE.md +147 -0
  70. data/.vbw-planning/codebase/CONCERNS.md +99 -0
  71. data/.vbw-planning/codebase/CONVENTIONS.md +97 -0
  72. data/.vbw-planning/codebase/DEPENDENCIES.md +100 -0
  73. data/.vbw-planning/codebase/INDEX.md +86 -0
  74. data/.vbw-planning/codebase/META.md +42 -0
  75. data/.vbw-planning/codebase/PATTERNS.md +262 -0
  76. data/.vbw-planning/codebase/STACK.md +101 -0
  77. data/.vbw-planning/codebase/STRUCTURE.md +324 -0
  78. data/.vbw-planning/codebase/TESTING.md +154 -0
  79. data/.vbw-planning/config.json +12 -0
  80. data/.vbw-planning/discovery.json +24 -0
  81. data/.vbw-planning/milestones/default/ROADMAP.md +115 -0
  82. data/.vbw-planning/milestones/default/STATE.md +83 -0
  83. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-01-SUMMARY.md +56 -0
  84. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-01.md +187 -0
  85. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-02-SUMMARY.md +64 -0
  86. data/.vbw-planning/milestones/default/phases/01-coverage-analysis-quick-wins/PLAN-02.md +137 -0
  87. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-01-SUMMARY.md +67 -0
  88. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-01.md +142 -0
  89. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-02-SUMMARY.md +64 -0
  90. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-02.md +138 -0
  91. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-03-SUMMARY.md +85 -0
  92. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-03.md +147 -0
  93. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-04-SUMMARY.md +63 -0
  94. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-04.md +129 -0
  95. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-05-SUMMARY.md +74 -0
  96. data/.vbw-planning/milestones/default/phases/02-critical-path-test-coverage/PLAN-05.md +154 -0
  97. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/03-VERIFICATION-wave1.md +303 -0
  98. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/03-VERIFICATION.md +510 -0
  99. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-01-SUMMARY.md +61 -0
  100. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-01.md +161 -0
  101. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-02-SUMMARY.md +66 -0
  102. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-02.md +132 -0
  103. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-03-SUMMARY.md +59 -0
  104. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-03.md +171 -0
  105. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-04-SUMMARY.md +56 -0
  106. data/.vbw-planning/milestones/default/phases/03-large-file-refactoring/PLAN-04.md +152 -0
  107. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/04-CONTEXT.md +33 -0
  108. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-01-SUMMARY.md +42 -0
  109. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-01.md +119 -0
  110. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-02-SUMMARY.md +52 -0
  111. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-02.md +195 -0
  112. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-03-SUMMARY.md +79 -0
  113. data/.vbw-planning/milestones/default/phases/04-code-quality-conventions-cleanup/PLAN-03.md +130 -0
  114. data/CHANGELOG.md +28 -0
  115. data/CLAUDE.md +179 -0
  116. data/Gemfile +8 -0
  117. data/Gemfile.lock +114 -101
  118. data/Rakefile +2 -0
  119. data/app/assets/builds/source_monitor/application.css +2076 -0
  120. data/app/assets/builds/source_monitor/application.js +2758 -0
  121. data/app/assets/builds/source_monitor/application.js.map +7 -0
  122. data/app/controllers/source_monitor/application_controller.rb +2 -0
  123. data/app/controllers/source_monitor/health_controller.rb +2 -0
  124. data/app/controllers/source_monitor/import_sessions/bulk_configuration.rb +106 -0
  125. data/app/controllers/source_monitor/import_sessions/entry_annotation.rb +187 -0
  126. data/app/controllers/source_monitor/import_sessions/health_check_management.rb +112 -0
  127. data/app/controllers/source_monitor/import_sessions/opml_parser.rb +130 -0
  128. data/app/controllers/source_monitor/import_sessions_controller.rb +6 -507
  129. data/app/controllers/source_monitor/items_controller.rb +2 -0
  130. data/app/controllers/source_monitor/sources_controller.rb +0 -14
  131. data/app/helpers/source_monitor/application_helper.rb +4 -112
  132. data/app/helpers/source_monitor/health_badge_helper.rb +69 -0
  133. data/app/helpers/source_monitor/table_sort_helper.rb +53 -0
  134. data/app/jobs/source_monitor/application_job.rb +2 -0
  135. data/app/models/source_monitor/application_record.rb +2 -0
  136. data/app/models/source_monitor/log_entry.rb +0 -2
  137. data/config/coverage_baseline.json +217 -1862
  138. data/config/routes.rb +2 -0
  139. data/db/migrate/20251009103000_add_feed_content_readability_to_sources.rb +2 -0
  140. data/db/migrate/20251014171659_add_performance_indexes.rb +2 -0
  141. data/db/migrate/20251014172525_add_fetch_status_check_constraint.rb +2 -0
  142. data/db/migrate/20251108120116_refresh_fetch_status_constraint.rb +2 -0
  143. data/db/migrate/20260210204022_add_composite_index_to_log_entries.rb +17 -0
  144. data/lib/source_monitor/assets/bundler.rb +2 -0
  145. data/lib/source_monitor/assets.rb +2 -0
  146. data/lib/source_monitor/configuration/authentication_settings.rb +62 -0
  147. data/lib/source_monitor/configuration/events.rb +60 -0
  148. data/lib/source_monitor/configuration/fetching_settings.rb +27 -0
  149. data/lib/source_monitor/configuration/health_settings.rb +27 -0
  150. data/lib/source_monitor/configuration/http_settings.rb +43 -0
  151. data/lib/source_monitor/configuration/model_definition.rb +108 -0
  152. data/lib/source_monitor/configuration/models.rb +36 -0
  153. data/lib/source_monitor/configuration/realtime_settings.rb +95 -0
  154. data/lib/source_monitor/configuration/retention_settings.rb +45 -0
  155. data/lib/source_monitor/configuration/scraper_registry.rb +67 -0
  156. data/lib/source_monitor/configuration/scraping_settings.rb +39 -0
  157. data/lib/source_monitor/configuration/validation_definition.rb +32 -0
  158. data/lib/source_monitor/configuration.rb +12 -579
  159. data/lib/source_monitor/dashboard/queries/recent_activity_query.rb +138 -0
  160. data/lib/source_monitor/dashboard/queries/stats_query.rb +71 -0
  161. data/lib/source_monitor/dashboard/queries.rb +2 -195
  162. data/lib/source_monitor/engine.rb +2 -0
  163. data/lib/source_monitor/fetching/feed_fetcher/adaptive_interval.rb +141 -0
  164. data/lib/source_monitor/fetching/feed_fetcher/entry_processor.rb +89 -0
  165. data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +200 -0
  166. data/lib/source_monitor/fetching/feed_fetcher.rb +37 -379
  167. data/lib/source_monitor/items/item_creator/content_extractor.rb +113 -0
  168. data/lib/source_monitor/items/item_creator/entry_parser/media_extraction.rb +96 -0
  169. data/lib/source_monitor/items/item_creator/entry_parser.rb +294 -0
  170. data/lib/source_monitor/items/item_creator.rb +28 -455
  171. data/lib/source_monitor/setup/bundle_installer.rb +2 -0
  172. data/lib/source_monitor/setup/cli.rb +2 -0
  173. data/lib/source_monitor/setup/dependency_checker.rb +2 -0
  174. data/lib/source_monitor/setup/detectors.rb +2 -0
  175. data/lib/source_monitor/setup/gemfile_editor.rb +2 -0
  176. data/lib/source_monitor/setup/initializer_patcher.rb +2 -0
  177. data/lib/source_monitor/setup/install_generator.rb +2 -0
  178. data/lib/source_monitor/setup/migration_installer.rb +2 -0
  179. data/lib/source_monitor/setup/node_installer.rb +2 -0
  180. data/lib/source_monitor/setup/prompter.rb +2 -0
  181. data/lib/source_monitor/setup/requirements.rb +2 -0
  182. data/lib/source_monitor/setup/shell_runner.rb +2 -0
  183. data/lib/source_monitor/setup/verification/action_cable_verifier.rb +2 -0
  184. data/lib/source_monitor/setup/verification/printer.rb +2 -0
  185. data/lib/source_monitor/setup/verification/result.rb +2 -0
  186. data/lib/source_monitor/setup/verification/runner.rb +2 -0
  187. data/lib/source_monitor/setup/verification/solid_queue_verifier.rb +2 -0
  188. data/lib/source_monitor/setup/verification/telemetry_logger.rb +2 -0
  189. data/lib/source_monitor/setup/workflow.rb +2 -0
  190. data/lib/source_monitor/version.rb +3 -1
  191. data/lib/source_monitor.rb +140 -58
  192. data/lib/tasks/source_monitor_assets.rake +2 -0
  193. data/lib/tasks/source_monitor_setup.rake +2 -0
  194. data/lib/tasks/source_monitor_tasks.rake +2 -0
  195. data/source_monitor.gemspec +3 -1
  196. metadata +144 -4
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+
5
+ module SourceMonitor
6
+ module Items
7
+ class ItemCreator
8
+ class ContentExtractor
9
+ attr_reader :source
10
+
11
+ def initialize(source:)
12
+ @source = source
13
+ end
14
+
15
+ def process_feed_content(raw_content, title:)
16
+ return [ raw_content, nil ] unless should_process_feed_content?(raw_content)
17
+
18
+ parser = feed_content_parser_class.new
19
+ html = wrap_content_for_readability(raw_content, title: title)
20
+ result = parser.parse(html: html, readability: default_feed_readability_options)
21
+
22
+ processed_content = result.content.presence || raw_content
23
+ metadata = build_feed_content_metadata(result: result, raw_content: raw_content, processed_content: processed_content)
24
+
25
+ [ processed_content, metadata.presence ]
26
+ rescue StandardError => error
27
+ metadata = {
28
+ "status" => "failed",
29
+ "strategy" => "readability",
30
+ "applied" => false,
31
+ "changed" => false,
32
+ "error_class" => error.class.name,
33
+ "error_message" => error.message
34
+ }
35
+ [ raw_content, metadata ]
36
+ end
37
+
38
+ def should_process_feed_content?(raw_content)
39
+ source.respond_to?(:feed_content_readability_enabled?) &&
40
+ source.feed_content_readability_enabled? &&
41
+ raw_content.present? &&
42
+ html_fragment?(raw_content)
43
+ end
44
+
45
+ def feed_content_parser_class
46
+ SourceMonitor::Scrapers::Parsers::ReadabilityParser
47
+ end
48
+
49
+ def wrap_content_for_readability(content, title:)
50
+ safe_title = title.present? ? CGI.escapeHTML(title) : "Feed Entry"
51
+ <<~HTML
52
+ <!DOCTYPE html>
53
+ <html>
54
+ <head>
55
+ <meta charset="utf-8">
56
+ <title>#{safe_title}</title>
57
+ </head>
58
+ <body>
59
+ #{content}
60
+ </body>
61
+ </html>
62
+ HTML
63
+ end
64
+
65
+ def default_feed_readability_options
66
+ default = SourceMonitor::Scrapers::Readability.default_settings[:readability]
67
+ return {} unless default
68
+
69
+ deep_copy(default)
70
+ end
71
+
72
+ def build_feed_content_metadata(result:, raw_content:, processed_content:)
73
+ metadata = {
74
+ "strategy" => result.strategy&.to_s,
75
+ "status" => result.status&.to_s,
76
+ "applied" => result.content.present?,
77
+ "changed" => processed_content != raw_content
78
+ }
79
+
80
+ if result.metadata && result.metadata[:readability_text_length]
81
+ metadata["readability_text_length"] = result.metadata[:readability_text_length]
82
+ end
83
+
84
+ metadata["title"] = result.title if result.title.present?
85
+ metadata.compact
86
+ end
87
+
88
+ def html_fragment?(value)
89
+ value.to_s.match?(/<\s*\w+/)
90
+ end
91
+
92
+ def deep_copy(value)
93
+ if value.respond_to?(:deep_dup)
94
+ return value.deep_dup
95
+ end
96
+
97
+ case value
98
+ when Hash
99
+ value.each_with_object(value.class.new) do |(key, nested), copy|
100
+ copy[key] = deep_copy(nested)
101
+ end
102
+ when Array
103
+ value.map { |element| deep_copy(element) }
104
+ else
105
+ value.dup
106
+ end
107
+ rescue TypeError
108
+ value
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SourceMonitor
4
+ module Items
5
+ class ItemCreator
6
+ class EntryParser
7
+ module MediaExtraction
8
+ def extract_enclosures
9
+ enclosures = []
10
+
11
+ if entry.respond_to?(:enclosure_nodes)
12
+ Array(entry.enclosure_nodes).each do |node|
13
+ url = string_or_nil(node&.url)
14
+ next if url.blank?
15
+
16
+ enclosures << {
17
+ "url" => url,
18
+ "type" => string_or_nil(node&.type),
19
+ "length" => safe_integer(node&.length),
20
+ "source" => "rss_enclosure"
21
+ }.compact
22
+ end
23
+ end
24
+
25
+ if atom_entry? && entry.respond_to?(:link_nodes)
26
+ Array(entry.link_nodes).each do |link|
27
+ next unless string_or_nil(link&.rel)&.downcase == "enclosure"
28
+
29
+ url = string_or_nil(link&.href)
30
+ next if url.blank?
31
+
32
+ enclosures << {
33
+ "url" => url,
34
+ "type" => string_or_nil(link&.type),
35
+ "length" => safe_integer(link&.length),
36
+ "source" => "atom_link"
37
+ }.compact
38
+ end
39
+ end
40
+
41
+ if json_entry? && entry.respond_to?(:json) && entry.json
42
+ Array(entry.json["attachments"]).each do |attachment|
43
+ url = string_or_nil(attachment["url"])
44
+ next if url.blank?
45
+
46
+ enclosures << {
47
+ "url" => url,
48
+ "type" => string_or_nil(attachment["mime_type"]),
49
+ "length" => safe_integer(attachment["size_in_bytes"]),
50
+ "duration" => safe_integer(attachment["duration_in_seconds"]),
51
+ "title" => string_or_nil(attachment["title"]),
52
+ "source" => "json_feed_attachment"
53
+ }.compact
54
+ end
55
+ end
56
+
57
+ enclosures.uniq
58
+ end
59
+
60
+ def extract_media_thumbnail_url
61
+ if entry.respond_to?(:media_thumbnail_nodes)
62
+ thumbnail = Array(entry.media_thumbnail_nodes).find { |node| string_or_nil(node&.url).present? }
63
+ return string_or_nil(thumbnail&.url) if thumbnail
64
+ end
65
+
66
+ string_or_nil(entry.image) if entry.respond_to?(:image)
67
+ end
68
+
69
+ def extract_media_content
70
+ contents = []
71
+
72
+ if entry.respond_to?(:media_content_nodes)
73
+ Array(entry.media_content_nodes).each do |node|
74
+ url = string_or_nil(node&.url)
75
+ next if url.blank?
76
+
77
+ contents << {
78
+ "url" => url,
79
+ "type" => string_or_nil(node&.type),
80
+ "medium" => string_or_nil(node&.medium),
81
+ "height" => safe_integer(node&.height),
82
+ "width" => safe_integer(node&.width),
83
+ "file_size" => safe_integer(node&.file_size),
84
+ "duration" => safe_integer(node&.duration),
85
+ "expression" => string_or_nil(node&.expression)
86
+ }.compact
87
+ end
88
+ end
89
+
90
+ contents.uniq
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,294 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "source_monitor/items/item_creator/entry_parser/media_extraction"
4
+
5
+ module SourceMonitor
6
+ module Items
7
+ class ItemCreator
8
+ class EntryParser
9
+ include MediaExtraction
10
+ CONTENT_METHODS = %i[content content_encoded summary].freeze
11
+ TIMESTAMP_METHODS = %i[published updated].freeze
12
+ KEYWORD_SEPARATORS = /[,;]+/.freeze
13
+ METADATA_ROOT_KEY = "feedjira_entry".freeze
14
+ FINGERPRINT_SEPARATOR = "\u0000".freeze
15
+
16
+ attr_reader :source, :entry
17
+
18
+ def initialize(source:, entry:, content_extractor:)
19
+ @source = source
20
+ @entry = entry
21
+ @content_extractor = content_extractor
22
+ end
23
+
24
+ def parse
25
+ url = extract_url
26
+ title = string_or_nil(entry.title) if entry.respond_to?(:title)
27
+ raw_content = extract_content
28
+ content, content_processing_metadata = @content_extractor.process_feed_content(raw_content, title: title)
29
+ fingerprint = generate_fingerprint(title, url, content)
30
+ published_at = extract_timestamp
31
+ updated_at_source = extract_updated_timestamp
32
+
33
+ metadata = extract_metadata
34
+ if content_processing_metadata.present?
35
+ metadata = metadata.merge("feed_content_processing" => content_processing_metadata)
36
+ end
37
+
38
+ {
39
+ guid: extract_guid,
40
+ title: title,
41
+ url: url,
42
+ canonical_url: url,
43
+ author: extract_author,
44
+ authors: extract_authors,
45
+ summary: extract_summary,
46
+ content: content,
47
+ published_at: published_at,
48
+ updated_at_source: updated_at_source,
49
+ categories: extract_categories,
50
+ tags: extract_tags,
51
+ keywords: extract_keywords,
52
+ enclosures: extract_enclosures,
53
+ media_thumbnail_url: extract_media_thumbnail_url,
54
+ media_content: extract_media_content,
55
+ language: extract_language,
56
+ copyright: extract_copyright,
57
+ comments_url: extract_comments_url,
58
+ comments_count: extract_comments_count,
59
+ metadata: metadata,
60
+ content_fingerprint: fingerprint
61
+ }.compact
62
+ end
63
+
64
+ def extract_guid
65
+ entry_guid = entry.respond_to?(:entry_id) ? string_or_nil(entry.entry_id) : nil
66
+ return entry_guid if entry_guid.present?
67
+
68
+ return unless entry.respond_to?(:id)
69
+
70
+ entry_id = string_or_nil(entry.id)
71
+ return if entry_id.blank?
72
+
73
+ url = extract_url
74
+ return entry_id if url.blank? || entry_id != url
75
+
76
+ nil
77
+ end
78
+
79
+ def extract_url
80
+ if entry.respond_to?(:url)
81
+ primary_url = string_or_nil(entry.url)
82
+ return primary_url if primary_url.present?
83
+ end
84
+
85
+ if entry.respond_to?(:link_nodes)
86
+ alternate = Array(entry.link_nodes).find do |node|
87
+ rel = string_or_nil(node&.rel)&.downcase
88
+ rel.nil? || rel == "alternate"
89
+ end
90
+ alternate ||= Array(entry.link_nodes).first
91
+ href = string_or_nil(alternate&.href)
92
+ return href if href.present?
93
+ end
94
+
95
+ if entry.respond_to?(:links)
96
+ href = Array(entry.links).map { |link| string_or_nil(link) }.find(&:present?)
97
+ return href if href.present?
98
+ end
99
+
100
+ nil
101
+ end
102
+
103
+ def extract_summary
104
+ return unless entry.respond_to?(:summary)
105
+
106
+ string_or_nil(entry.summary)
107
+ end
108
+
109
+ def extract_content
110
+ CONTENT_METHODS.each do |method|
111
+ next unless entry.respond_to?(method)
112
+
113
+ value = string_or_nil(entry.public_send(method))
114
+ return value if value.present?
115
+ end
116
+ nil
117
+ end
118
+
119
+ def extract_timestamp
120
+ TIMESTAMP_METHODS.each do |method|
121
+ next unless entry.respond_to?(method)
122
+
123
+ value = entry.public_send(method)
124
+ return value if value.present?
125
+ end
126
+ nil
127
+ end
128
+
129
+ def extract_updated_timestamp
130
+ entry.updated if entry.respond_to?(:updated) && entry.updated.present?
131
+ end
132
+
133
+ def extract_author
134
+ string_or_nil(entry.author) if entry.respond_to?(:author)
135
+ end
136
+
137
+ def extract_authors
138
+ values = []
139
+
140
+ if entry.respond_to?(:rss_authors)
141
+ values.concat(Array(entry.rss_authors).map { |value| string_or_nil(value) })
142
+ end
143
+
144
+ if entry.respond_to?(:dc_creators)
145
+ values.concat(Array(entry.dc_creators).map { |value| string_or_nil(value) })
146
+ elsif entry.respond_to?(:dc_creator)
147
+ values << string_or_nil(entry.dc_creator)
148
+ end
149
+
150
+ if entry.respond_to?(:author_nodes)
151
+ values.concat(
152
+ Array(entry.author_nodes).map do |node|
153
+ next unless node.respond_to?(:name) || node.respond_to?(:email) || node.respond_to?(:uri)
154
+
155
+ string_or_nil(node.name) || string_or_nil(node.email) || string_or_nil(node.uri)
156
+ end
157
+ )
158
+ end
159
+
160
+ if json_entry?
161
+ if entry.respond_to?(:json) && entry.json
162
+ json_authors = Array(entry.json["authors"]).map { |author| string_or_nil(author["name"]) }
163
+ values.concat(json_authors)
164
+ values << string_or_nil(entry.json.dig("author", "name"))
165
+ end
166
+ end
167
+
168
+ primary_author = extract_author
169
+ values << primary_author if primary_author.present?
170
+
171
+ values.compact.uniq
172
+ end
173
+
174
+ def extract_categories
175
+ list = []
176
+ list.concat(Array(entry.categories)) if entry.respond_to?(:categories)
177
+ list.concat(Array(entry.tags)) if entry.respond_to?(:tags)
178
+ if json_entry? && entry.respond_to?(:json) && entry.json
179
+ list.concat(Array(entry.json["tags"]))
180
+ end
181
+ sanitize_string_array(list)
182
+ end
183
+
184
+ def extract_tags
185
+ tags = []
186
+
187
+ tags.concat(Array(entry.tags)) if entry.respond_to?(:tags)
188
+
189
+ if json_entry? && entry.respond_to?(:json) && entry.json
190
+ tags.concat(Array(entry.json["tags"]))
191
+ end
192
+
193
+ tags = extract_categories if tags.empty? && entry.respond_to?(:categories)
194
+
195
+ sanitize_string_array(tags)
196
+ end
197
+
198
+ def extract_keywords
199
+ keywords = []
200
+ keywords.concat(split_keywords(entry.media_keywords_raw)) if entry.respond_to?(:media_keywords_raw)
201
+ keywords.concat(split_keywords(entry.itunes_keywords_raw)) if entry.respond_to?(:itunes_keywords_raw)
202
+ sanitize_string_array(keywords)
203
+ end
204
+
205
+ def extract_language
206
+ return string_or_nil(entry.language) if entry.respond_to?(:language)
207
+
208
+ string_or_nil(entry.json["language"]) if json_entry? && entry.respond_to?(:json) && entry.json
209
+ end
210
+
211
+ def extract_copyright
212
+ return string_or_nil(entry.copyright) if entry.respond_to?(:copyright)
213
+
214
+ string_or_nil(entry.json["copyright"]) if json_entry? && entry.respond_to?(:json) && entry.json
215
+ end
216
+
217
+ def extract_comments_url
218
+ string_or_nil(entry.comments) if entry.respond_to?(:comments)
219
+ end
220
+
221
+ def extract_comments_count
222
+ raw = nil
223
+ raw ||= entry.slash_comments_raw if entry.respond_to?(:slash_comments_raw)
224
+ raw ||= entry.comments_count if entry.respond_to?(:comments_count)
225
+ safe_integer(raw)
226
+ end
227
+
228
+ def extract_metadata
229
+ return {} unless entry.respond_to?(:to_h)
230
+
231
+ normalized = normalize_metadata(entry.to_h)
232
+ return {} if normalized.blank?
233
+
234
+ { METADATA_ROOT_KEY => normalized }
235
+ end
236
+
237
+ def generate_fingerprint(title, url, content)
238
+ Digest::SHA256.hexdigest(
239
+ [
240
+ title.to_s,
241
+ url.to_s,
242
+ content.to_s
243
+ ].join(FINGERPRINT_SEPARATOR)
244
+ )
245
+ end
246
+
247
+ def string_or_nil(value)
248
+ return value unless value.is_a?(String)
249
+
250
+ value.strip.presence
251
+ end
252
+
253
+ def sanitize_string_array(values)
254
+ Array(values).map { |value| string_or_nil(value) }.compact.uniq
255
+ end
256
+
257
+ def split_keywords(value)
258
+ return [] if value.nil?
259
+
260
+ string = string_or_nil(value)
261
+ return [] if string.blank?
262
+
263
+ string.split(KEYWORD_SEPARATORS).map { |keyword| keyword.strip.presence }.compact
264
+ end
265
+
266
+ def safe_integer(value)
267
+ return if value.nil?
268
+ return value if value.is_a?(Integer)
269
+
270
+ string = value.to_s.strip
271
+ return if string.blank?
272
+
273
+ Integer(string, 10)
274
+ rescue ArgumentError
275
+ nil
276
+ end
277
+
278
+ def json_entry?
279
+ defined?(Feedjira::Parser::JSONFeedItem) && entry.is_a?(Feedjira::Parser::JSONFeedItem)
280
+ end
281
+
282
+ def atom_entry?
283
+ defined?(Feedjira::Parser::AtomEntry) && entry.is_a?(Feedjira::Parser::AtomEntry)
284
+ end
285
+
286
+ def normalize_metadata(value)
287
+ JSON.parse(JSON.generate(value))
288
+ rescue JSON::GeneratorError, JSON::ParserError, TypeError
289
+ {}
290
+ end
291
+ end
292
+ end
293
+ end
294
+ end