html2rss 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +323 -270
  3. data/exe/html2rss +6 -0
  4. data/html2rss.gemspec +18 -23
  5. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  6. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  7. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  8. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  9. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  10. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  11. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  12. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  13. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  14. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  15. data/lib/html2rss/attribute_post_processors.rb +28 -5
  16. data/lib/html2rss/cli.rb +29 -0
  17. data/lib/html2rss/config/channel.rb +117 -0
  18. data/lib/html2rss/config/selectors.rb +91 -0
  19. data/lib/html2rss/config.rb +71 -82
  20. data/lib/html2rss/item.rb +122 -46
  21. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  22. data/lib/html2rss/item_extractors/href.rb +20 -4
  23. data/lib/html2rss/item_extractors/html.rb +18 -6
  24. data/lib/html2rss/item_extractors/static.rb +18 -7
  25. data/lib/html2rss/item_extractors/text.rb +17 -5
  26. data/lib/html2rss/item_extractors.rb +75 -10
  27. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  28. data/lib/html2rss/rss_builder/channel.rb +21 -0
  29. data/lib/html2rss/rss_builder/item.rb +83 -0
  30. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  31. data/lib/html2rss/rss_builder.rb +96 -0
  32. data/lib/html2rss/utils.rb +94 -19
  33. data/lib/html2rss/version.rb +5 -1
  34. data/lib/html2rss.rb +57 -20
  35. metadata +53 -165
  36. data/.gitignore +0 -12
  37. data/.rspec +0 -4
  38. data/.rubocop.yml +0 -164
  39. data/.travis.yml +0 -25
  40. data/.yardopts +0 -6
  41. data/CHANGELOG.md +0 -221
  42. data/Gemfile +0 -8
  43. data/Gemfile.lock +0 -139
  44. data/bin/console +0 -15
  45. data/bin/setup +0 -8
  46. data/lib/html2rss/feed_builder.rb +0 -81
  47. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  48. data/support/logo.png +0 -0
data/exe/html2rss ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'html2rss/cli'
5
+
6
+ Html2rss::CLI.start(ARGV)
data/html2rss.gemspec CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  lib = File.expand_path('lib', __dir__)
2
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
5
  require 'html2rss/version'
@@ -8,46 +10,39 @@ Gem::Specification.new do |spec|
8
10
  spec.authors = ['Gil Desmarais']
9
11
  spec.email = ['html2rss@desmarais.de']
10
12
 
11
- spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
12
- spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
- spec.homepage = 'https://github.com/gildesmarais/html2rss'
13
+ spec.summary = 'Generates RSS feeds from websites by scraping a URL and using CSS selectors to extract item.'
14
+ spec.description = 'Supports JSON content, custom HTTP headers, and post-processing of extracted content.'
15
+ spec.homepage = 'https://github.com/html2rss/html2rss'
14
16
  spec.license = 'MIT'
15
- spec.required_ruby_version = '>= 2.5.0'
17
+ spec.required_ruby_version = '>= 3.1'
16
18
 
17
19
  if spec.respond_to?(:metadata)
18
20
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
19
- spec.metadata['changelog_uri'] = 'https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md'
21
+ spec.metadata['changelog_uri'] = "#{spec.homepage}/releases/tag/v#{spec.version}"
22
+ spec.metadata['rubygems_mfa_required'] = 'true'
20
23
  else
21
24
  raise 'RubyGems 2.0 or newer is required to protect against ' \
22
- 'public gem pushes.'
25
+ 'public gem pushes.'
23
26
  end
24
27
 
25
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
- f.match(%r{^(test|spec|features)/})
28
+ spec.files = `git ls-files -z`.split("\x0").select do |f|
29
+ f.match(%r{^(lib/|exe/|README.md|LICENSE|html2rss.gemspec)})
27
30
  end
28
31
  spec.bindir = 'exe'
29
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
33
  spec.require_paths = ['lib']
31
34
 
32
- spec.add_dependency 'activesupport', '>= 5', '< 7'
33
35
  spec.add_dependency 'addressable', '~> 2.7'
34
- spec.add_dependency 'builder'
35
- spec.add_dependency 'faraday', '~> 1.0'
36
- spec.add_dependency 'faraday_middleware'
36
+ spec.add_dependency 'faraday', '> 2.0.1', '< 3.0'
37
+ spec.add_dependency 'faraday-follow_redirects'
37
38
  spec.add_dependency 'kramdown'
38
39
  spec.add_dependency 'mime-types', '> 3.0'
39
40
  spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
41
+ spec.add_dependency 'regexp_parser'
40
42
  spec.add_dependency 'reverse_markdown', '~> 2.0'
41
- spec.add_dependency 'sanitize', '~> 5.0'
42
- spec.add_dependency 'to_regexp'
43
+ spec.add_dependency 'rss'
44
+ spec.add_dependency 'sanitize', '~> 6.0'
45
+ spec.add_dependency 'thor'
46
+ spec.add_dependency 'tzinfo'
43
47
  spec.add_dependency 'zeitwerk'
44
- spec.add_development_dependency 'bundler'
45
- spec.add_development_dependency 'byebug'
46
- spec.add_development_dependency 'rspec', '~> 3.0'
47
- spec.add_development_dependency 'rubocop'
48
- spec.add_development_dependency 'rubocop-performance'
49
- spec.add_development_dependency 'rubocop-rspec'
50
- spec.add_development_dependency 'simplecov'
51
- spec.add_development_dependency 'vcr'
52
- spec.add_development_dependency 'yard'
53
48
  end
@@ -1,9 +1,8 @@
1
- require 'to_regexp'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Html2rss
4
4
  module AttributePostProcessors
5
5
  ##
6
- #
7
6
  # Imagine this HTML:
8
7
  # <h1>Foo bar and boo<h1>
9
8
  #
@@ -19,23 +18,46 @@ module Html2rss
19
18
  # Would return:
20
19
  # 'Foo bar and baz'
21
20
  #
22
- # `pattern` can be a Regexp or a String.
21
+ # `pattern` can be a Regexp or a String. If it is a String, it will remove
22
+ # one pair of surrounding slashes ('/') to keep backwards compatibility
23
+ # and then parse it to build a Regexp.
23
24
  #
24
25
  # `replacement` can be a String or a Hash.
25
26
  #
26
27
  # See the doc on [String#gsub](https://ruby-doc.org/core/String.html#method-i-gsub) for more info.
27
28
  class Gsub
28
- def initialize(value, env)
29
+ ##
30
+ # @param value [String]
31
+ # @param context [Item::Context]
32
+ def initialize(value, context)
29
33
  @value = value
30
- options = env[:options]
31
- @pattern = options[:pattern].to_regexp || options[:pattern]
32
- @replacement = options[:replacement]
34
+ @options = context[:options]
33
35
  end
34
36
 
35
37
  ##
36
38
  # @return [String]
37
39
  def get
38
- @value.to_s.gsub(@pattern, @replacement)
40
+ @value.to_s.gsub(pattern, replacement)
41
+ end
42
+
43
+ private
44
+
45
+ ##
46
+ # @return [Regexp]
47
+ def pattern
48
+ pattern = @options[:pattern]
49
+ raise ArgumentError, 'The `pattern` option is missing' unless pattern
50
+
51
+ pattern.is_a?(String) ? Utils.build_regexp_from_string(pattern) : pattern
52
+ end
53
+
54
+ ##
55
+ # @return [Hash, String]
56
+ def replacement
57
+ replacement = @options[:replacement]
58
+ return replacement if replacement.is_a?(String) || replacement.is_a?(Hash)
59
+
60
+ raise ArgumentError, 'The `replacement` option must be a String or Hash'
39
61
  end
40
62
  end
41
63
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'reverse_markdown'
2
4
 
3
5
  module Html2rss
@@ -25,14 +27,17 @@ module Html2rss
25
27
  # Would return:
26
28
  # 'Lorem **ipsum** dolor'
27
29
  class HtmlToMarkdown
30
+ ##
31
+ # @param value [String]
32
+ # @param env [Item::Context]
28
33
  def initialize(value, env)
29
- @value = SanitizeHtml.new(value, env).get
34
+ @sanitized_value = SanitizeHtml.new(value, env).get
30
35
  end
31
36
 
32
37
  ##
33
38
  # @return [String] formatted in Markdown
34
39
  def get
35
- ReverseMarkdown.convert @value
40
+ ReverseMarkdown.convert(@sanitized_value)
36
41
  end
37
42
  end
38
43
  end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module AttributePostProcessors
5
+ module HtmlTransformers
6
+ ##
7
+ # Transformer that converts relative URLs to absolute URLs within specified HTML elements.
8
+ class TransformUrlsToAbsoluteOnes
9
+ URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
10
+
11
+ def initialize(channel_url)
12
+ @channel_url = channel_url
13
+ end
14
+
15
+ ##
16
+ # Transforms URLs to absolute ones.
17
+ def call(node_name:, node:, **_env)
18
+ return unless URL_ELEMENTS_WITH_URL_ATTRIBUTE.key?(node_name)
19
+
20
+ url_attribute = URL_ELEMENTS_WITH_URL_ATTRIBUTE[node_name]
21
+ url = node[url_attribute]
22
+ node[url_attribute] = Html2rss::Utils.build_absolute_url_from_relative(url, @channel_url)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module AttributePostProcessors
5
+ module HtmlTransformers
6
+ ##
7
+ # Transformer that wraps <img> tags into <a> tags linking to `img.src`.
8
+ class WrapImgInA
9
+ ##
10
+ # Wraps <img> tags into <a> tags that link to `img.src`.
11
+ #
12
+ # @param node_name [String]
13
+ # @param node [Nokogiri::XML::Node]
14
+ # @return [nil]
15
+ def call(node_name:, node:, **_env)
16
+ return unless already_wrapped?(node_name, node)
17
+
18
+ wrap_image_in_anchor(node)
19
+ end
20
+
21
+ def already_wrapped?(node_name, node)
22
+ node_name == 'img' && node.parent.name != 'a'
23
+ end
24
+
25
+ private
26
+
27
+ ##
28
+ # Wraps the <img> node in an <a> tag.
29
+ #
30
+ # @param node [Nokogiri::XML::Node]
31
+ # @return [nil]
32
+ def wrap_image_in_anchor(node)
33
+ anchor = Nokogiri::XML::Node.new('a', node.document)
34
+ anchor['href'] = node['src']
35
+ node.add_next_sibling(anchor)
36
+ anchor.add_child(node.remove)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,4 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'kramdown'
4
+ require_relative 'sanitize_html'
2
5
 
3
6
  module Html2rss
4
7
  module AttributePostProcessors
@@ -30,15 +33,21 @@ module Html2rss
30
33
  #
31
34
  # <p>Price: 12.34</p>
32
35
  class MarkdownToHtml
36
+ ##
37
+ # @param value [String] Markdown content to convert to HTML
38
+ # @param env [Item::Context] Context object providing additional environment details
33
39
  def initialize(value, env)
34
40
  @value = value
35
41
  @env = env
36
42
  end
37
43
 
38
44
  ##
39
- # @return [String] formatted in Markdown
45
+ # Converts Markdown to sanitized HTML.
46
+ #
47
+ # @return [String] Sanitized HTML content
40
48
  def get
41
- SanitizeHtml.new(Kramdown::Document.new(@value).to_html, @env).get
49
+ html_content = Kramdown::Document.new(@value).to_html
50
+ SanitizeHtml.new(html_content, @env).get
42
51
  end
43
52
  end
44
53
  end
@@ -1,5 +1,7 @@
1
- require 'active_support'
2
- require 'active_support/core_ext/time'
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+ require_relative '../utils'
3
5
 
4
6
  module Html2rss
5
7
  module AttributePostProcessors
@@ -24,15 +26,20 @@ module Html2rss
24
26
  #
25
27
  # It uses {https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse Time.parse}.
26
28
  class ParseTime
29
+ ##
30
+ # @param value [String] the time to parse
31
+ # @param env [Item::Context] Context object providing additional environment details
27
32
  def initialize(value, env)
28
33
  @value = value.to_s
29
34
  @time_zone = env[:config].time_zone
30
35
  end
31
36
 
32
37
  ##
33
- # @return [String] rfc822 formatted time
38
+ # Converts the provided time string to RFC822 format, taking into account the configured time zone.
39
+ #
40
+ # @return [String] RFC822 formatted time
34
41
  def get
35
- Time.use_zone(@time_zone) { Time.zone.parse(@value).rfc822 }
42
+ Utils.use_zone(@time_zone) { Time.parse(@value).rfc822 }
36
43
  end
37
44
  end
38
45
  end
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module AttributePostProcessors
3
5
  ##
4
6
  # Returns the URI as String.
7
+ # If the URL is relative, it builds an absolute one with the channel's URL as base.
5
8
  #
6
9
  # Imagine this HTML structure:
7
10
  #
@@ -19,14 +22,21 @@ module Html2rss
19
22
  # Would return:
20
23
  # 'http://why-not-use-a-link.uh'
21
24
  class ParseUri
22
- def initialize(value, _env)
25
+ ##
26
+ # @param value [String]
27
+ # @param context [Item::Context]
28
+ def initialize(value, context)
23
29
  @value = value
30
+ @config_url = context.config.url
24
31
  end
25
32
 
26
33
  ##
27
34
  # @return [String]
28
35
  def get
29
- URI(Html2rss::Utils.sanitize_url(@value)).to_s
36
+ Html2rss::Utils.build_absolute_url_from_relative(
37
+ Html2rss::Utils.sanitize_url(@value),
38
+ @config_url
39
+ ).to_s
30
40
  end
31
41
  end
32
42
  end
@@ -1,17 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sanitize'
4
+ require_relative 'html_transformers/transform_urls_to_absolute_ones'
5
+ require_relative 'html_transformers/wrap_img_in_a'
2
6
 
3
7
  module Html2rss
4
8
  module AttributePostProcessors
5
9
  ##
6
10
  # Returns sanitized HTML code as String.
7
11
  #
8
- # It adds:
12
+ # It sanitizes by using the [sanitize gem](https://github.com/rgrove/sanitize) with
13
+ # [Sanitize::Config::RELAXED](https://github.com/rgrove/sanitize#sanitizeconfigrelaxed).
14
+ #
15
+ # Furthermore, it adds:
9
16
  #
10
17
  # - `rel="nofollow noopener noreferrer"` to <a> tags
11
18
  # - `referrer-policy='no-referrer'` to <img> tags
12
- #
13
- # It also:
14
- #
15
19
  # - wraps all <img> tags, whose direct parent is not an <a>, into an <a>
16
20
  # linking to the <img>'s `src`.
17
21
  #
@@ -35,68 +39,60 @@ module Html2rss
35
39
  # Would return:
36
40
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
37
41
  class SanitizeHtml
38
- URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
39
- private_constant :URL_ELEMENTS_WITH_URL_ATTRIBUTE
40
-
42
+ ##
43
+ # @param value [String]
44
+ # @param env [Item::Context]
41
45
  def initialize(value, env)
42
46
  @value = value
43
47
  @channel_url = env[:config].url
44
48
  end
45
49
 
46
50
  ##
47
- # - uses the {https://github.com/rgrove/sanitize sanitize gem}
48
- # - uses the config {https://github.com/rgrove/sanitize#sanitizeconfigrelaxed Sanitize::Config::RELAXED}
49
- # - adds rel="nofollow noopener noreferrer" to a elements
50
- # - adds target="_blank" to a elements
51
51
  # @return [String]
52
52
  def get
53
- Sanitize.fragment(@value, sanitize_config).to_s.split.join(' ')
53
+ sanitized_html = Sanitize.fragment(@value, sanitize_config)
54
+ sanitized_html.to_s.gsub(/\s+/, ' ').strip
54
55
  end
55
56
 
56
57
  private
57
58
 
59
+ ##
60
+ # @return [Sanitize::Config]
58
61
  def sanitize_config
59
62
  Sanitize::Config.merge(
60
63
  Sanitize::Config::RELAXED,
61
64
  attributes: { all: %w[dir lang alt title translate] },
62
- add_attributes: {
63
- 'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
64
- 'img' => { 'referrer-policy' => 'no-referrer' }
65
- },
66
- transformers: [transform_urls_to_absolute_ones, wrap_img_in_a]
65
+ add_attributes:,
66
+ transformers: [
67
+ method(:transform_urls_to_absolute_ones),
68
+ method(:wrap_img_in_a)
69
+ ]
67
70
  )
68
71
  end
69
72
 
70
- def transform_urls_to_absolute_ones
71
- lambda do |env|
72
- return unless URL_ELEMENTS_WITH_URL_ATTRIBUTE.key?(env[:node_name])
73
-
74
- url_attribute = URL_ELEMENTS_WITH_URL_ATTRIBUTE[env[:node_name]]
75
- url = env[:node][url_attribute]
76
-
77
- return if URI(url).absolute?
78
-
79
- absolute_url = Html2rss::Utils.build_absolute_url_from_relative(url, @channel_url)
80
-
81
- env[:node][url_attribute] = absolute_url
82
- end
73
+ def add_attributes
74
+ {
75
+ 'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
76
+ 'img' => { 'referrer-policy' => 'no-referrer' }
77
+ }
83
78
  end
84
79
 
85
- def wrap_img_in_a
86
- lambda do |env|
87
- return if env[:node_name] != 'img'
88
-
89
- img = env[:node]
90
-
91
- return if img.parent.name == 'a'
92
-
93
- anchor = Nokogiri::XML::Node.new('a', img)
94
- anchor[:href] = img[:src]
95
-
96
- anchor.add_child img.dup
80
+ ##
81
+ # Wrapper for transform_urls_to_absolute_ones to pass the channel_url.
82
+ #
83
+ # @param env [Hash]
84
+ # @return [nil]
85
+ def transform_urls_to_absolute_ones(env)
86
+ HtmlTransformers::TransformUrlsToAbsoluteOnes.new(@channel_url).call(**env)
87
+ end
97
88
 
98
- img.replace(anchor)
99
- end
89
+ ##
90
+ # Wrapper for wrap_img_in_a.
91
+ #
92
+ # @param env [Hash]
93
+ # @return [nil]
94
+ def wrap_img_in_a(env)
95
+ HtmlTransformers::WrapImgInA.new.call(**env)
100
96
  end
101
97
  end
102
98
  end
@@ -1,6 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module AttributePostProcessors
3
- ## Returns a defined part of a String.
5
+ ##
6
+ # Returns a defined part of a String.
4
7
  #
5
8
  # Both parameters must be an Integer and they can be negative.
6
9
  # The +end+ parameter can be omitted, in that case it will not cut the
@@ -26,16 +29,23 @@ module Html2rss
26
29
  # Would return:
27
30
  # 'bar'
28
31
  class Substring
32
+ ##
33
+ # @param value [String] The original string to extract a substring from.
34
+ # @param env [Item::Context] Context object providing additional environment details.
29
35
  def initialize(value, env)
30
36
  @value = value
31
37
  @options = env[:options]
32
38
  end
33
39
 
34
40
  ##
35
- # @return [String]
41
+ # Extracts the substring from the original string based on the provided start and end indices.
42
+ #
43
+ # @return [String] The extracted substring.
36
44
  def get
37
- ending = @options.fetch(:end, @value.length).to_i
38
- @value[@options[:start].to_i..ending]
45
+ start_index = @options[:start].to_i
46
+ end_index = @options[:end]&.to_i || @value.length
47
+
48
+ @value[start_index..end_index]
39
49
  end
40
50
  end
41
51
  end
@@ -1,25 +1,28 @@
1
- require 'sanitize'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Html2rss
4
4
  module AttributePostProcessors
5
- ## Returns a formatted String according to the string pattern.
5
+ ##
6
+ # Returns a formatted String according to the string pattern.
6
7
  #
7
8
  # If +self+ is used, the selectors extracted value will be used.
8
9
  # It uses [Kernel#format](https://ruby-doc.org/core/Kernel.html#method-i-format)
9
10
  #
10
11
  # Imagine this HTML:
12
+ #
11
13
  # <li>
12
14
  # <h1>Product</h1>
13
15
  # <span class="price">23,42€</span>
14
16
  # </li>
15
17
  #
18
+ #
16
19
  # YAML usage example:
17
20
  #
18
21
  # selectors:
19
22
  # items:
20
23
  # selector: 'li'
21
24
  # price:
22
- # selector: '.price'
25
+ # selector: '.price'
23
26
  # title:
24
27
  # selector: h1
25
28
  # post_process:
@@ -29,6 +32,9 @@ module Html2rss
29
32
  # Would return:
30
33
  # 'Product (23,42€)'
31
34
  class Template
35
+ ##
36
+ # @param value [String]
37
+ # @param env [Item::Context]
32
38
  def initialize(value, env)
33
39
  @value = value
34
40
  @options = env[:options]
@@ -39,28 +45,46 @@ module Html2rss
39
45
  ##
40
46
  # @return [String]
41
47
  def get
42
- return format_string_with_methods if @options[:methods]
43
-
44
- names = string.scan(/%[<|{](\w*)[>|}]/)
45
- names.flatten!
46
- names.compact!
47
- names.map!(&:to_sym)
48
-
49
- format(string, names.map { |name| [name, item_value(name)] }.to_h)
48
+ @options[:methods] ? format_string_with_methods : format_string_with_dynamic_params
50
49
  end
51
50
 
52
51
  private
53
52
 
53
+ ##
54
+ # @return [String] the string containing the template
54
55
  attr_reader :string
55
56
 
57
+ ##
58
+ # @return [Array<String>]
56
59
  def methods
57
- @methods ||= @options[:methods].map(&method(:item_value))
60
+ @methods ||= @options[:methods].map { |method_name| item_value(method_name) }
58
61
  end
59
62
 
63
+ ##
64
+ # Formats a string using methods.
65
+ #
66
+ # @return [String]
67
+ # @deprecated Use %<id>s formatting instead. Will be removed in version 1.0.0. See README / Dynamic parameters.
60
68
  def format_string_with_methods
69
+ warn '[DEPRECATION] This method of using params is deprecated and \
70
+ support for it will be removed in version 1.0.0.\
71
+ Please use dynamic parameters (i.e. %<id>s, see README.md) instead.'
72
+
61
73
  string % methods
62
74
  end
63
75
 
76
+ ##
77
+ # @return [String]
78
+ def format_string_with_dynamic_params
79
+ param_names = string.scan(/%[<|{](\w*)[>|}]/)
80
+ param_names.flatten!
81
+
82
+ format(string, param_names.to_h { |name| [name.to_sym, item_value(name)] })
83
+ end
84
+
85
+ ##
86
+ # @param method_name [String, Symbol]
87
+ # @return [String]
64
88
  def item_value(method_name)
65
89
  method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
66
90
  end
@@ -1,13 +1,36 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  ##
3
5
  # Provides a namespace for attribute post processors.
4
6
  module AttributePostProcessors
5
- def self.get_processor(name)
6
- @get_processor ||= Hash.new do |processors, key|
7
- processors[key] = Utils.get_class_from_name(key, 'AttributePostProcessors')
8
- end
7
+ ##
8
+ # Error raised when an unknown post processor name is requested.
9
+ class UnknownPostProcessorName < Html2rss::Error; end
9
10
 
10
- @get_processor[name]
11
+ ##
12
+ # Maps the post processor name to the class implementing the post processor.
13
+ #
14
+ # The key is the name to use in the feed config.
15
+ NAME_TO_CLASS = {
16
+ gsub: Gsub,
17
+ html_to_markdown: HtmlToMarkdown,
18
+ markdown_to_html: MarkdownToHtml,
19
+ parse_time: ParseTime,
20
+ parse_uri: ParseUri,
21
+ sanitize_html: SanitizeHtml,
22
+ substring: Substring,
23
+ template: Template
24
+ }.freeze
25
+
26
+ ##
27
+ # Retrieves the attribute post processor class based on the given name.
28
+ #
29
+ # @param name [Symbol] The name of the post processor.
30
+ # @return [Class] The attribute post processor class.
31
+ # @raise [UnknownPostProcessorName] If the requested name is not found in NAME_TO_CLASS.
32
+ def self.get_processor(name)
33
+ NAME_TO_CLASS[name.to_sym] || raise(UnknownPostProcessorName, "Can't find a post processor named '#{name}'")
11
34
  end
12
35
  end
13
36
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../html2rss'
4
+ require 'thor'
5
+
6
+ module Html2rss
7
+ ##
8
+ # The Html2rss command line interface.
9
+ class CLI < Thor
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ desc 'feed YAML_FILE [FEED_NAME] [param=value ...]', 'Print RSS built from the YAML_FILE file to stdout'
15
+ ##
16
+ # Prints the feed to STDOUT.
17
+ #
18
+ # @param yaml_file [String] Path to the YAML configuration file.
19
+ # @param options [Array<String>] Additional options including feed name and parameters.
20
+ # @return [nil]
21
+ def feed(yaml_file, *options)
22
+ raise "File '#{yaml_file}' does not exist" unless File.exist?(yaml_file)
23
+
24
+ feed_name = options.shift
25
+ params = options.to_h { |opt| opt.split('=', 2) }
26
+ puts Html2rss.feed_from_yaml_config(yaml_file, feed_name, params:)
27
+ end
28
+ end
29
+ end