html2rss 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +11 -145
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +111 -97
  7. data/README.md +323 -270
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +15 -20
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -82
  26. data/lib/html2rss/item.rb +118 -42
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -10
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +5 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +51 -154
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -221
  45. data/lib/html2rss/feed_builder.rb +0 -81
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
data/bin/console CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'bundler/setup'
4
5
  require 'html2rss'
data/exe/html2rss ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'html2rss/cli'
5
+
6
+ Html2rss::CLI.start(ARGV)
data/html2rss.gemspec CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  lib = File.expand_path('lib', __dir__)
2
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
5
  require 'html2rss/version'
@@ -10,44 +12,37 @@ Gem::Specification.new do |spec|
10
12
 
11
13
  spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
12
14
  spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
- spec.homepage = 'https://github.com/gildesmarais/html2rss'
15
+ spec.homepage = 'https://github.com/html2rss/html2rss'
14
16
  spec.license = 'MIT'
15
- spec.required_ruby_version = '>= 2.5.0'
17
+ spec.required_ruby_version = '>= 3.1'
16
18
 
17
19
  if spec.respond_to?(:metadata)
18
20
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
19
- spec.metadata['changelog_uri'] = 'https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md'
21
+ spec.metadata['changelog_uri'] = 'https://github.com/html2rss/html2rss/releases'
22
+ spec.metadata['rubygems_mfa_required'] = 'true'
20
23
  else
21
24
  raise 'RubyGems 2.0 or newer is required to protect against ' \
22
- 'public gem pushes.'
25
+ 'public gem pushes.'
23
26
  end
24
27
 
25
28
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
- f.match(%r{^(test|spec|features)/})
29
+ f.match(%r{^(test|spec|features|support|docs|.github|.yardoc)/})
27
30
  end
28
31
  spec.bindir = 'exe'
29
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
33
  spec.require_paths = ['lib']
31
34
 
32
- spec.add_dependency 'activesupport', '>= 5', '< 7'
33
35
  spec.add_dependency 'addressable', '~> 2.7'
34
- spec.add_dependency 'builder'
35
- spec.add_dependency 'faraday', '~> 1.0'
36
- spec.add_dependency 'faraday_middleware'
36
+ spec.add_dependency 'faraday', '> 2.0.1', '< 3.0'
37
+ spec.add_dependency 'faraday-follow_redirects'
37
38
  spec.add_dependency 'kramdown'
38
39
  spec.add_dependency 'mime-types', '> 3.0'
39
40
  spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
41
+ spec.add_dependency 'regexp_parser'
40
42
  spec.add_dependency 'reverse_markdown', '~> 2.0'
41
- spec.add_dependency 'sanitize', '~> 5.0'
42
- spec.add_dependency 'to_regexp'
43
+ spec.add_dependency 'rss'
44
+ spec.add_dependency 'sanitize', '~> 6.0'
45
+ spec.add_dependency 'thor'
46
+ spec.add_dependency 'tzinfo'
43
47
  spec.add_dependency 'zeitwerk'
44
- spec.add_development_dependency 'bundler'
45
- spec.add_development_dependency 'byebug'
46
- spec.add_development_dependency 'rspec', '~> 3.0'
47
- spec.add_development_dependency 'rubocop'
48
- spec.add_development_dependency 'rubocop-performance'
49
- spec.add_development_dependency 'rubocop-rspec'
50
- spec.add_development_dependency 'simplecov'
51
- spec.add_development_dependency 'vcr'
52
- spec.add_development_dependency 'yard'
53
48
  end
@@ -1,9 +1,8 @@
1
- require 'to_regexp'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Html2rss
4
4
  module AttributePostProcessors
5
5
  ##
6
- #
7
6
  # Imagine this HTML:
8
7
  # <h1>Foo bar and boo<h1>
9
8
  #
@@ -19,23 +18,46 @@ module Html2rss
19
18
  # Would return:
20
19
  # 'Foo bar and baz'
21
20
  #
22
- # `pattern` can be a Regexp or a String.
21
+ # `pattern` can be a Regexp or a String. If it is a String, it will remove
22
+ # one pair of surrounding slashes ('/') to keep backwards compatibility
23
+ # and then parse it to build a Regexp.
23
24
  #
24
25
  # `replacement` can be a String or a Hash.
25
26
  #
26
27
  # See the doc on [String#gsub](https://ruby-doc.org/core/String.html#method-i-gsub) for more info.
27
28
  class Gsub
28
- def initialize(value, env)
29
+ ##
30
+ # @param value [String]
31
+ # @param context [Item::Context]
32
+ def initialize(value, context)
29
33
  @value = value
30
- options = env[:options]
31
- @pattern = options[:pattern].to_regexp || options[:pattern]
32
- @replacement = options[:replacement]
34
+ @options = context[:options]
33
35
  end
34
36
 
35
37
  ##
36
38
  # @return [String]
37
39
  def get
38
- @value.to_s.gsub(@pattern, @replacement)
40
+ @value.to_s.gsub(pattern, replacement)
41
+ end
42
+
43
+ private
44
+
45
+ ##
46
+ # @return [Regexp]
47
+ def pattern
48
+ pattern = @options[:pattern]
49
+ raise ArgumentError, 'The `pattern` option is missing' unless pattern
50
+
51
+ pattern.is_a?(String) ? Utils.build_regexp_from_string(pattern) : pattern
52
+ end
53
+
54
+ ##
55
+ # @return [Hash, String]
56
+ def replacement
57
+ replacement = @options[:replacement]
58
+ return replacement if replacement.is_a?(String) || replacement.is_a?(Hash)
59
+
60
+ raise ArgumentError, 'The `replacement` option must be a String or Hash'
39
61
  end
40
62
  end
41
63
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'reverse_markdown'
2
4
 
3
5
  module Html2rss
@@ -25,14 +27,17 @@ module Html2rss
25
27
  # Would return:
26
28
  # 'Lorem **ipsum** dolor'
27
29
  class HtmlToMarkdown
30
+ ##
31
+ # @param value [String]
32
+ # @param env [Item::Context]
28
33
  def initialize(value, env)
29
- @value = SanitizeHtml.new(value, env).get
34
+ @sanitized_value = SanitizeHtml.new(value, env).get
30
35
  end
31
36
 
32
37
  ##
33
38
  # @return [String] formatted in Markdown
34
39
  def get
35
- ReverseMarkdown.convert @value
40
+ ReverseMarkdown.convert(@sanitized_value)
36
41
  end
37
42
  end
38
43
  end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module AttributePostProcessors
5
+ module HtmlTransformers
6
+ ##
7
+ # Transformer that converts relative URLs to absolute URLs within specified HTML elements.
8
+ class TransformUrlsToAbsoluteOnes
9
+ URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
10
+
11
+ def initialize(channel_url)
12
+ @channel_url = channel_url
13
+ end
14
+
15
+ ##
16
+ # Transforms URLs to absolute ones.
17
+ def call(node_name:, node:, **_env)
18
+ return unless URL_ELEMENTS_WITH_URL_ATTRIBUTE.key?(node_name)
19
+
20
+ url_attribute = URL_ELEMENTS_WITH_URL_ATTRIBUTE[node_name]
21
+ url = node[url_attribute]
22
+ node[url_attribute] = Html2rss::Utils.build_absolute_url_from_relative(url, @channel_url)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ module AttributePostProcessors
5
+ module HtmlTransformers
6
+ ##
7
+ # Transformer that wraps <img> tags into <a> tags linking to `img.src`.
8
+ class WrapImgInA
9
+ ##
10
+ # Wraps <img> tags into <a> tags that link to `img.src`.
11
+ #
12
+ # @param node_name [String]
13
+ # @param node [Nokogiri::XML::Node]
14
+ # @return [nil]
15
+ def call(node_name:, node:, **_env)
16
+ return unless already_wrapped?(node_name, node)
17
+
18
+ wrap_image_in_anchor(node)
19
+ end
20
+
21
+ def already_wrapped?(node_name, node)
22
+ node_name == 'img' && node.parent.name != 'a'
23
+ end
24
+
25
+ private
26
+
27
+ ##
28
+ # Wraps the <img> node in an <a> tag.
29
+ #
30
+ # @param node [Nokogiri::XML::Node]
31
+ # @return [nil]
32
+ def wrap_image_in_anchor(node)
33
+ anchor = Nokogiri::XML::Node.new('a', node.document)
34
+ anchor['href'] = node['src']
35
+ node.add_next_sibling(anchor)
36
+ anchor.add_child(node.remove)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,4 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'kramdown'
4
+ require_relative 'sanitize_html'
2
5
 
3
6
  module Html2rss
4
7
  module AttributePostProcessors
@@ -30,15 +33,21 @@ module Html2rss
30
33
  #
31
34
  # <p>Price: 12.34</p>
32
35
  class MarkdownToHtml
36
+ ##
37
+ # @param value [String] Markdown content to convert to HTML
38
+ # @param env [Item::Context] Context object providing additional environment details
33
39
  def initialize(value, env)
34
40
  @value = value
35
41
  @env = env
36
42
  end
37
43
 
38
44
  ##
39
- # @return [String] formatted in Markdown
45
+ # Converts Markdown to sanitized HTML.
46
+ #
47
+ # @return [String] Sanitized HTML content
40
48
  def get
41
- SanitizeHtml.new(Kramdown::Document.new(@value).to_html, @env).get
49
+ html_content = Kramdown::Document.new(@value).to_html
50
+ SanitizeHtml.new(html_content, @env).get
42
51
  end
43
52
  end
44
53
  end
@@ -1,5 +1,7 @@
1
- require 'active_support'
2
- require 'active_support/core_ext/time'
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+ require_relative '../utils'
3
5
 
4
6
  module Html2rss
5
7
  module AttributePostProcessors
@@ -24,15 +26,20 @@ module Html2rss
24
26
  #
25
27
  # It uses {https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse Time.parse}.
26
28
  class ParseTime
29
+ ##
30
+ # @param value [String] the time to parse
31
+ # @param env [Item::Context] Context object providing additional environment details
27
32
  def initialize(value, env)
28
33
  @value = value.to_s
29
34
  @time_zone = env[:config].time_zone
30
35
  end
31
36
 
32
37
  ##
33
- # @return [String] rfc822 formatted time
38
+ # Converts the provided time string to RFC822 format, taking into account the configured time zone.
39
+ #
40
+ # @return [String] RFC822 formatted time
34
41
  def get
35
- Time.use_zone(@time_zone) { Time.zone.parse(@value).rfc822 }
42
+ Utils.use_zone(@time_zone) { Time.parse(@value).rfc822 }
36
43
  end
37
44
  end
38
45
  end
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module AttributePostProcessors
3
5
  ##
4
6
  # Returns the URI as String.
7
+ # If the URL is relative, it builds an absolute one with the channel's URL as base.
5
8
  #
6
9
  # Imagine this HTML structure:
7
10
  #
@@ -19,14 +22,21 @@ module Html2rss
19
22
  # Would return:
20
23
  # 'http://why-not-use-a-link.uh'
21
24
  class ParseUri
22
- def initialize(value, _env)
25
+ ##
26
+ # @param value [String]
27
+ # @param context [Item::Context]
28
+ def initialize(value, context)
23
29
  @value = value
30
+ @config_url = context.config.url
24
31
  end
25
32
 
26
33
  ##
27
34
  # @return [String]
28
35
  def get
29
- URI(Html2rss::Utils.sanitize_url(@value)).to_s
36
+ Html2rss::Utils.build_absolute_url_from_relative(
37
+ Html2rss::Utils.sanitize_url(@value),
38
+ @config_url
39
+ ).to_s
30
40
  end
31
41
  end
32
42
  end
@@ -1,17 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sanitize'
4
+ require_relative 'html_transformers/transform_urls_to_absolute_ones'
5
+ require_relative 'html_transformers/wrap_img_in_a'
2
6
 
3
7
  module Html2rss
4
8
  module AttributePostProcessors
5
9
  ##
6
10
  # Returns sanitized HTML code as String.
7
11
  #
8
- # It adds:
12
+ # It sanitizes by using the [sanitize gem](https://github.com/rgrove/sanitize) with
13
+ # [Sanitize::Config::RELAXED](https://github.com/rgrove/sanitize#sanitizeconfigrelaxed).
14
+ #
15
+ # Furthermore, it adds:
9
16
  #
10
17
  # - `rel="nofollow noopener noreferrer"` to <a> tags
11
18
  # - `referrer-policy='no-referrer'` to <img> tags
12
- #
13
- # It also:
14
- #
15
19
  # - wraps all <img> tags, whose direct parent is not an <a>, into an <a>
16
20
  # linking to the <img>'s `src`.
17
21
  #
@@ -35,68 +39,60 @@ module Html2rss
35
39
  # Would return:
36
40
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
37
41
  class SanitizeHtml
38
- URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
39
- private_constant :URL_ELEMENTS_WITH_URL_ATTRIBUTE
40
-
42
+ ##
43
+ # @param value [String]
44
+ # @param env [Item::Context]
41
45
  def initialize(value, env)
42
46
  @value = value
43
47
  @channel_url = env[:config].url
44
48
  end
45
49
 
46
50
  ##
47
- # - uses the {https://github.com/rgrove/sanitize sanitize gem}
48
- # - uses the config {https://github.com/rgrove/sanitize#sanitizeconfigrelaxed Sanitize::Config::RELAXED}
49
- # - adds rel="nofollow noopener noreferrer" to a elements
50
- # - adds target="_blank" to a elements
51
51
  # @return [String]
52
52
  def get
53
- Sanitize.fragment(@value, sanitize_config).to_s.split.join(' ')
53
+ sanitized_html = Sanitize.fragment(@value, sanitize_config)
54
+ sanitized_html.to_s.gsub(/\s+/, ' ').strip
54
55
  end
55
56
 
56
57
  private
57
58
 
59
+ ##
60
+ # @return [Sanitize::Config]
58
61
  def sanitize_config
59
62
  Sanitize::Config.merge(
60
63
  Sanitize::Config::RELAXED,
61
64
  attributes: { all: %w[dir lang alt title translate] },
62
- add_attributes: {
63
- 'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
64
- 'img' => { 'referrer-policy' => 'no-referrer' }
65
- },
66
- transformers: [transform_urls_to_absolute_ones, wrap_img_in_a]
65
+ add_attributes:,
66
+ transformers: [
67
+ method(:transform_urls_to_absolute_ones),
68
+ method(:wrap_img_in_a)
69
+ ]
67
70
  )
68
71
  end
69
72
 
70
- def transform_urls_to_absolute_ones
71
- lambda do |env|
72
- return unless URL_ELEMENTS_WITH_URL_ATTRIBUTE.key?(env[:node_name])
73
-
74
- url_attribute = URL_ELEMENTS_WITH_URL_ATTRIBUTE[env[:node_name]]
75
- url = env[:node][url_attribute]
76
-
77
- return if URI(url).absolute?
78
-
79
- absolute_url = Html2rss::Utils.build_absolute_url_from_relative(url, @channel_url)
80
-
81
- env[:node][url_attribute] = absolute_url
82
- end
73
+ def add_attributes
74
+ {
75
+ 'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
76
+ 'img' => { 'referrer-policy' => 'no-referrer' }
77
+ }
83
78
  end
84
79
 
85
- def wrap_img_in_a
86
- lambda do |env|
87
- return if env[:node_name] != 'img'
88
-
89
- img = env[:node]
90
-
91
- return if img.parent.name == 'a'
92
-
93
- anchor = Nokogiri::XML::Node.new('a', img)
94
- anchor[:href] = img[:src]
95
-
96
- anchor.add_child img.dup
80
+ ##
81
+ # Wrapper for transform_urls_to_absolute_ones to pass the channel_url.
82
+ #
83
+ # @param env [Hash]
84
+ # @return [nil]
85
+ def transform_urls_to_absolute_ones(env)
86
+ HtmlTransformers::TransformUrlsToAbsoluteOnes.new(@channel_url).call(**env)
87
+ end
97
88
 
98
- img.replace(anchor)
99
- end
89
+ ##
90
+ # Wrapper for wrap_img_in_a.
91
+ #
92
+ # @param env [Hash]
93
+ # @return [nil]
94
+ def wrap_img_in_a(env)
95
+ HtmlTransformers::WrapImgInA.new.call(**env)
100
96
  end
101
97
  end
102
98
  end
@@ -1,6 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  module AttributePostProcessors
3
- ## Returns a defined part of a String.
5
+ ##
6
+ # Returns a defined part of a String.
4
7
  #
5
8
  # Both parameters must be an Integer and they can be negative.
6
9
  # The +end+ parameter can be omitted, in that case it will not cut the
@@ -26,16 +29,23 @@ module Html2rss
26
29
  # Would return:
27
30
  # 'bar'
28
31
  class Substring
32
+ ##
33
+ # @param value [String] The original string to extract a substring from.
34
+ # @param env [Item::Context] Context object providing additional environment details.
29
35
  def initialize(value, env)
30
36
  @value = value
31
37
  @options = env[:options]
32
38
  end
33
39
 
34
40
  ##
35
- # @return [String]
41
+ # Extracts the substring from the original string based on the provided start and end indices.
42
+ #
43
+ # @return [String] The extracted substring.
36
44
  def get
37
- ending = @options.fetch(:end, @value.length).to_i
38
- @value[@options[:start].to_i..ending]
45
+ start_index = @options[:start].to_i
46
+ end_index = @options[:end]&.to_i || @value.length
47
+
48
+ @value[start_index..end_index]
39
49
  end
40
50
  end
41
51
  end
@@ -1,25 +1,28 @@
1
- require 'sanitize'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Html2rss
4
4
  module AttributePostProcessors
5
- ## Returns a formatted String according to the string pattern.
5
+ ##
6
+ # Returns a formatted String according to the string pattern.
6
7
  #
7
8
  # If +self+ is used, the selectors extracted value will be used.
8
9
  # It uses [Kernel#format](https://ruby-doc.org/core/Kernel.html#method-i-format)
9
10
  #
10
11
  # Imagine this HTML:
12
+ #
11
13
  # <li>
12
14
  # <h1>Product</h1>
13
15
  # <span class="price">23,42€</span>
14
16
  # </li>
15
17
  #
18
+ #
16
19
  # YAML usage example:
17
20
  #
18
21
  # selectors:
19
22
  # items:
20
23
  # selector: 'li'
21
24
  # price:
22
- # selector: '.price'
25
+ # selector: '.price'
23
26
  # title:
24
27
  # selector: h1
25
28
  # post_process:
@@ -29,6 +32,9 @@ module Html2rss
29
32
  # Would return:
30
33
  # 'Product (23,42€)'
31
34
  class Template
35
+ ##
36
+ # @param value [String]
37
+ # @param env [Item::Context]
32
38
  def initialize(value, env)
33
39
  @value = value
34
40
  @options = env[:options]
@@ -39,28 +45,46 @@ module Html2rss
39
45
  ##
40
46
  # @return [String]
41
47
  def get
42
- return format_string_with_methods if @options[:methods]
43
-
44
- names = string.scan(/%[<|{](\w*)[>|}]/)
45
- names.flatten!
46
- names.compact!
47
- names.map!(&:to_sym)
48
-
49
- format(string, names.map { |name| [name, item_value(name)] }.to_h)
48
+ @options[:methods] ? format_string_with_methods : format_string_with_dynamic_params
50
49
  end
51
50
 
52
51
  private
53
52
 
53
+ ##
54
+ # @return [String] the string containing the template
54
55
  attr_reader :string
55
56
 
57
+ ##
58
+ # @return [Array<String>]
56
59
  def methods
57
- @methods ||= @options[:methods].map(&method(:item_value))
60
+ @methods ||= @options[:methods].map { |method_name| item_value(method_name) }
58
61
  end
59
62
 
63
+ ##
64
+ # Formats a string using methods.
65
+ #
66
+ # @return [String]
67
+ # @deprecated Use %<id>s formatting instead. Will be removed in version 1.0.0. See README / Dynamic parameters.
60
68
  def format_string_with_methods
69
+ warn '[DEPRECATION] This method of using params is deprecated and \
70
+ support for it will be removed in version 1.0.0.\
71
+ Please use dynamic parameters (i.e. %<id>s, see README.md) instead.'
72
+
61
73
  string % methods
62
74
  end
63
75
 
76
+ ##
77
+ # @return [String]
78
+ def format_string_with_dynamic_params
79
+ param_names = string.scan(/%[<|{](\w*)[>|}]/)
80
+ param_names.flatten!
81
+
82
+ format(string, param_names.to_h { |name| [name.to_sym, item_value(name)] })
83
+ end
84
+
85
+ ##
86
+ # @param method_name [String, Symbol]
87
+ # @return [String]
64
88
  def item_value(method_name)
65
89
  method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
66
90
  end
@@ -1,13 +1,36 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Html2rss
2
4
  ##
3
5
  # Provides a namespace for attribute post processors.
4
6
  module AttributePostProcessors
5
- def self.get_processor(name)
6
- @get_processor ||= Hash.new do |processors, key|
7
- processors[key] = Utils.get_class_from_name(key, 'AttributePostProcessors')
8
- end
7
+ ##
8
+ # Error raised when an unknown post processor name is requested.
9
+ class UnknownPostProcessorName < StandardError; end
9
10
 
10
- @get_processor[name]
11
+ ##
12
+ # Maps the post processor name to the class implementing the post processor.
13
+ #
14
+ # The key is the name to use in the feed config.
15
+ NAME_TO_CLASS = {
16
+ gsub: Gsub,
17
+ html_to_markdown: HtmlToMarkdown,
18
+ markdown_to_html: MarkdownToHtml,
19
+ parse_time: ParseTime,
20
+ parse_uri: ParseUri,
21
+ sanitize_html: SanitizeHtml,
22
+ substring: Substring,
23
+ template: Template
24
+ }.freeze
25
+
26
+ ##
27
+ # Retrieves the attribute post processor class based on the given name.
28
+ #
29
+ # @param name [Symbol] The name of the post processor.
30
+ # @return [Class] The attribute post processor class.
31
+ # @raise [UnknownPostProcessorName] If the requested name is not found in NAME_TO_CLASS.
32
+ def self.get_processor(name)
33
+ NAME_TO_CLASS[name.to_sym] || raise(UnknownPostProcessorName, "Can't find a post processor named '#{name}'")
11
34
  end
12
35
  end
13
36
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../html2rss'
4
+ require 'thor'
5
+
6
+ module Html2rss
7
+ ##
8
+ # The Html2rss command line interface.
9
+ class CLI < Thor
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ desc 'feed YAML_FILE [FEED_NAME] [param=value ...]', 'Print RSS built from the YAML_FILE file to stdout'
15
+ ##
16
+ # Prints the feed to STDOUT.
17
+ #
18
+ # @param yaml_file [String] Path to the YAML configuration file.
19
+ # @param options [Array<String>] Additional options including feed name and parameters.
20
+ # @return [nil]
21
+ def feed(yaml_file, *options)
22
+ raise "File '#{yaml_file}' does not exist" unless File.exist?(yaml_file)
23
+
24
+ feed_name = options.shift
25
+ params = options.to_h { |opt| opt.split('=', 2) }
26
+ puts Html2rss.feed_from_yaml_config(yaml_file, feed_name, params:)
27
+ end
28
+ end
29
+ end