html2rss 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.gitignore +1 -1
 - data/.mergify.yml +15 -0
 - data/.rubocop.yml +11 -145
 - data/Gemfile +19 -2
 - data/Gemfile.lock +111 -97
 - data/README.md +323 -270
 - data/bin/console +1 -0
 - data/exe/html2rss +6 -0
 - data/html2rss.gemspec +15 -20
 - data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
 - data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
 - data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
 - data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
 - data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
 - data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
 - data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
 - data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
 - data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
 - data/lib/html2rss/attribute_post_processors/template.rb +36 -12
 - data/lib/html2rss/attribute_post_processors.rb +28 -5
 - data/lib/html2rss/cli.rb +29 -0
 - data/lib/html2rss/config/channel.rb +117 -0
 - data/lib/html2rss/config/selectors.rb +91 -0
 - data/lib/html2rss/config.rb +71 -82
 - data/lib/html2rss/item.rb +118 -42
 - data/lib/html2rss/item_extractors/attribute.rb +20 -7
 - data/lib/html2rss/item_extractors/href.rb +20 -4
 - data/lib/html2rss/item_extractors/html.rb +18 -6
 - data/lib/html2rss/item_extractors/static.rb +18 -7
 - data/lib/html2rss/item_extractors/text.rb +17 -5
 - data/lib/html2rss/item_extractors.rb +75 -10
 - data/lib/html2rss/object_to_xml_converter.rb +56 -0
 - data/lib/html2rss/rss_builder/channel.rb +21 -0
 - data/lib/html2rss/rss_builder/item.rb +83 -0
 - data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
 - data/lib/html2rss/rss_builder.rb +96 -0
 - data/lib/html2rss/utils.rb +94 -19
 - data/lib/html2rss/version.rb +5 -1
 - data/lib/html2rss.rb +51 -20
 - data/rakefile.rb +16 -0
 - metadata +51 -154
 - data/.travis.yml +0 -25
 - data/CHANGELOG.md +0 -221
 - data/lib/html2rss/feed_builder.rb +0 -81
 - data/lib/html2rss/item_extractors/current_time.rb +0 -21
 - data/support/logo.png +0 -0
 
    
        data/bin/console
    CHANGED
    
    
    
        data/exe/html2rss
    ADDED
    
    
    
        data/html2rss.gemspec
    CHANGED
    
    | 
         @@ -1,3 +1,5 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            lib = File.expand_path('lib', __dir__)
         
     | 
| 
       2 
4 
     | 
    
         
             
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
       3 
5 
     | 
    
         
             
            require 'html2rss/version'
         
     | 
| 
         @@ -10,44 +12,37 @@ Gem::Specification.new do |spec| 
     | 
|
| 
       10 
12 
     | 
    
         | 
| 
       11 
13 
     | 
    
         
             
              spec.summary       = 'Returns an RSS::Rss object by scraping a URL.'
         
     | 
| 
       12 
14 
     | 
    
         
             
              spec.description   = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
         
     | 
| 
       13 
     | 
    
         
            -
              spec.homepage      = 'https://github.com/ 
     | 
| 
      
 15 
     | 
    
         
            +
              spec.homepage      = 'https://github.com/html2rss/html2rss'
         
     | 
| 
       14 
16 
     | 
    
         
             
              spec.license       = 'MIT'
         
     | 
| 
       15 
     | 
    
         
            -
              spec.required_ruby_version = '>=  
     | 
| 
      
 17 
     | 
    
         
            +
              spec.required_ruby_version = '>= 3.1'
         
     | 
| 
       16 
18 
     | 
    
         | 
| 
       17 
19 
     | 
    
         
             
              if spec.respond_to?(:metadata)
         
     | 
| 
       18 
20 
     | 
    
         
             
                spec.metadata['allowed_push_host'] = 'https://rubygems.org'
         
     | 
| 
       19 
     | 
    
         
            -
                spec.metadata['changelog_uri'] = 'https://github.com/ 
     | 
| 
      
 21 
     | 
    
         
            +
                spec.metadata['changelog_uri'] = 'https://github.com/html2rss/html2rss/releases'
         
     | 
| 
      
 22 
     | 
    
         
            +
                spec.metadata['rubygems_mfa_required'] = 'true'
         
     | 
| 
       20 
23 
     | 
    
         
             
              else
         
     | 
| 
       21 
24 
     | 
    
         
             
                raise 'RubyGems 2.0 or newer is required to protect against ' \
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
      
 25 
     | 
    
         
            +
                      'public gem pushes.'
         
     | 
| 
       23 
26 
     | 
    
         
             
              end
         
     | 
| 
       24 
27 
     | 
    
         | 
| 
       25 
28 
     | 
    
         
             
              spec.files = `git ls-files -z`.split("\x0").reject do |f|
         
     | 
| 
       26 
     | 
    
         
            -
                f.match(%r{^(test|spec|features)/})
         
     | 
| 
      
 29 
     | 
    
         
            +
                f.match(%r{^(test|spec|features|support|docs|.github|.yardoc)/})
         
     | 
| 
       27 
30 
     | 
    
         
             
              end
         
     | 
| 
       28 
31 
     | 
    
         
             
              spec.bindir        = 'exe'
         
     | 
| 
       29 
32 
     | 
    
         
             
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         
     | 
| 
       30 
33 
     | 
    
         
             
              spec.require_paths = ['lib']
         
     | 
| 
       31 
34 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
              spec.add_dependency 'activesupport', '>= 5', '< 7'
         
     | 
| 
       33 
35 
     | 
    
         
             
              spec.add_dependency 'addressable', '~> 2.7'
         
     | 
| 
       34 
     | 
    
         
            -
              spec.add_dependency ' 
     | 
| 
       35 
     | 
    
         
            -
              spec.add_dependency 'faraday' 
     | 
| 
       36 
     | 
    
         
            -
              spec.add_dependency 'faraday_middleware'
         
     | 
| 
      
 36 
     | 
    
         
            +
              spec.add_dependency 'faraday', '> 2.0.1', '< 3.0'
         
     | 
| 
      
 37 
     | 
    
         
            +
              spec.add_dependency 'faraday-follow_redirects'
         
     | 
| 
       37 
38 
     | 
    
         
             
              spec.add_dependency 'kramdown'
         
     | 
| 
       38 
39 
     | 
    
         
             
              spec.add_dependency 'mime-types', '> 3.0'
         
     | 
| 
       39 
40 
     | 
    
         
             
              spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
         
     | 
| 
      
 41 
     | 
    
         
            +
              spec.add_dependency 'regexp_parser'
         
     | 
| 
       40 
42 
     | 
    
         
             
              spec.add_dependency 'reverse_markdown', '~> 2.0'
         
     | 
| 
       41 
     | 
    
         
            -
              spec.add_dependency ' 
     | 
| 
       42 
     | 
    
         
            -
              spec.add_dependency ' 
     | 
| 
      
 43 
     | 
    
         
            +
              spec.add_dependency 'rss'
         
     | 
| 
      
 44 
     | 
    
         
            +
              spec.add_dependency 'sanitize', '~> 6.0'
         
     | 
| 
      
 45 
     | 
    
         
            +
              spec.add_dependency 'thor'
         
     | 
| 
      
 46 
     | 
    
         
            +
              spec.add_dependency 'tzinfo'
         
     | 
| 
       43 
47 
     | 
    
         
             
              spec.add_dependency 'zeitwerk'
         
     | 
| 
       44 
     | 
    
         
            -
              spec.add_development_dependency 'bundler'
         
     | 
| 
       45 
     | 
    
         
            -
              spec.add_development_dependency 'byebug'
         
     | 
| 
       46 
     | 
    
         
            -
              spec.add_development_dependency 'rspec', '~> 3.0'
         
     | 
| 
       47 
     | 
    
         
            -
              spec.add_development_dependency 'rubocop'
         
     | 
| 
       48 
     | 
    
         
            -
              spec.add_development_dependency 'rubocop-performance'
         
     | 
| 
       49 
     | 
    
         
            -
              spec.add_development_dependency 'rubocop-rspec'
         
     | 
| 
       50 
     | 
    
         
            -
              spec.add_development_dependency 'simplecov'
         
     | 
| 
       51 
     | 
    
         
            -
              spec.add_development_dependency 'vcr'
         
     | 
| 
       52 
     | 
    
         
            -
              spec.add_development_dependency 'yard'
         
     | 
| 
       53 
48 
     | 
    
         
             
            end
         
     | 
| 
         @@ -1,9 +1,8 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       4 
4 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
       5 
5 
     | 
    
         
             
                ##
         
     | 
| 
       6 
     | 
    
         
            -
                #
         
     | 
| 
       7 
6 
     | 
    
         
             
                # Imagine this HTML:
         
     | 
| 
       8 
7 
     | 
    
         
             
                #    <h1>Foo bar and boo<h1>
         
     | 
| 
       9 
8 
     | 
    
         
             
                #
         
     | 
| 
         @@ -19,23 +18,46 @@ module Html2rss 
     | 
|
| 
       19 
18 
     | 
    
         
             
                # Would return:
         
     | 
| 
       20 
19 
     | 
    
         
             
                #    'Foo bar and baz'
         
     | 
| 
       21 
20 
     | 
    
         
             
                #
         
     | 
| 
       22 
     | 
    
         
            -
                # `pattern` can be a Regexp or a String.
         
     | 
| 
      
 21 
     | 
    
         
            +
                # `pattern` can be a Regexp or a String. If it is a String, it will remove
         
     | 
| 
      
 22 
     | 
    
         
            +
                # one pair of surrounding slashes ('/') to keep backwards compatibility
         
     | 
| 
      
 23 
     | 
    
         
            +
                # and then parse it to build a Regexp.
         
     | 
| 
       23 
24 
     | 
    
         
             
                #
         
     | 
| 
       24 
25 
     | 
    
         
             
                # `replacement` can be a String or a Hash.
         
     | 
| 
       25 
26 
     | 
    
         
             
                #
         
     | 
| 
       26 
27 
     | 
    
         
             
                # See the doc on [String#gsub](https://ruby-doc.org/core/String.html#method-i-gsub) for more info.
         
     | 
| 
       27 
28 
     | 
    
         
             
                class Gsub
         
     | 
| 
       28 
     | 
    
         
            -
                   
     | 
| 
      
 29 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 30 
     | 
    
         
            +
                  # @param value [String]
         
     | 
| 
      
 31 
     | 
    
         
            +
                  # @param context [Item::Context]
         
     | 
| 
      
 32 
     | 
    
         
            +
                  def initialize(value, context)
         
     | 
| 
       29 
33 
     | 
    
         
             
                    @value = value
         
     | 
| 
       30 
     | 
    
         
            -
                    options =  
     | 
| 
       31 
     | 
    
         
            -
                    @pattern = options[:pattern].to_regexp || options[:pattern]
         
     | 
| 
       32 
     | 
    
         
            -
                    @replacement = options[:replacement]
         
     | 
| 
      
 34 
     | 
    
         
            +
                    @options = context[:options]
         
     | 
| 
       33 
35 
     | 
    
         
             
                  end
         
     | 
| 
       34 
36 
     | 
    
         | 
| 
       35 
37 
     | 
    
         
             
                  ##
         
     | 
| 
       36 
38 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       37 
39 
     | 
    
         
             
                  def get
         
     | 
| 
       38 
     | 
    
         
            -
                    @value.to_s.gsub( 
     | 
| 
      
 40 
     | 
    
         
            +
                    @value.to_s.gsub(pattern, replacement)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                  private
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 46 
     | 
    
         
            +
                  # @return [Regexp]
         
     | 
| 
      
 47 
     | 
    
         
            +
                  def pattern
         
     | 
| 
      
 48 
     | 
    
         
            +
                    pattern = @options[:pattern]
         
     | 
| 
      
 49 
     | 
    
         
            +
                    raise ArgumentError, 'The `pattern` option is missing' unless pattern
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                    pattern.is_a?(String) ? Utils.build_regexp_from_string(pattern) : pattern
         
     | 
| 
      
 52 
     | 
    
         
            +
                  end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 55 
     | 
    
         
            +
                  # @return [Hash, String]
         
     | 
| 
      
 56 
     | 
    
         
            +
                  def replacement
         
     | 
| 
      
 57 
     | 
    
         
            +
                    replacement = @options[:replacement]
         
     | 
| 
      
 58 
     | 
    
         
            +
                    return replacement if replacement.is_a?(String) || replacement.is_a?(Hash)
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                    raise ArgumentError, 'The `replacement` option must be a String or Hash'
         
     | 
| 
       39 
61 
     | 
    
         
             
                  end
         
     | 
| 
       40 
62 
     | 
    
         
             
                end
         
     | 
| 
       41 
63 
     | 
    
         
             
              end
         
     | 
| 
         @@ -1,3 +1,5 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'reverse_markdown'
         
     | 
| 
       2 
4 
     | 
    
         | 
| 
       3 
5 
     | 
    
         
             
            module Html2rss
         
     | 
| 
         @@ -25,14 +27,17 @@ module Html2rss 
     | 
|
| 
       25 
27 
     | 
    
         
             
                # Would return:
         
     | 
| 
       26 
28 
     | 
    
         
             
                #    'Lorem **ipsum** dolor'
         
     | 
| 
       27 
29 
     | 
    
         
             
                class HtmlToMarkdown
         
     | 
| 
      
 30 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 31 
     | 
    
         
            +
                  # @param value [String]
         
     | 
| 
      
 32 
     | 
    
         
            +
                  # @param env [Item::Context]
         
     | 
| 
       28 
33 
     | 
    
         
             
                  def initialize(value, env)
         
     | 
| 
       29 
     | 
    
         
            -
                    @ 
     | 
| 
      
 34 
     | 
    
         
            +
                    @sanitized_value = SanitizeHtml.new(value, env).get
         
     | 
| 
       30 
35 
     | 
    
         
             
                  end
         
     | 
| 
       31 
36 
     | 
    
         | 
| 
       32 
37 
     | 
    
         
             
                  ##
         
     | 
| 
       33 
38 
     | 
    
         
             
                  # @return [String] formatted in Markdown
         
     | 
| 
       34 
39 
     | 
    
         
             
                  def get
         
     | 
| 
       35 
     | 
    
         
            -
                    ReverseMarkdown.convert 
     | 
| 
      
 40 
     | 
    
         
            +
                    ReverseMarkdown.convert(@sanitized_value)
         
     | 
| 
       36 
41 
     | 
    
         
             
                  end
         
     | 
| 
       37 
42 
     | 
    
         
             
                end
         
     | 
| 
       38 
43 
     | 
    
         
             
              end
         
     | 
    
        data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
    ADDED
    
    | 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Html2rss
         
     | 
| 
      
 4 
     | 
    
         
            +
              module AttributePostProcessors
         
     | 
| 
      
 5 
     | 
    
         
            +
                module HtmlTransformers
         
     | 
| 
      
 6 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # Transformer that converts relative URLs to absolute URLs within specified HTML elements.
         
     | 
| 
      
 8 
     | 
    
         
            +
                  class TransformUrlsToAbsoluteOnes
         
     | 
| 
      
 9 
     | 
    
         
            +
                    URL_ELEMENTS_WITH_URL_ATTRIBUTE = { 'a' => :href, 'img' => :src }.freeze
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                    def initialize(channel_url)
         
     | 
| 
      
 12 
     | 
    
         
            +
                      @channel_url = channel_url
         
     | 
| 
      
 13 
     | 
    
         
            +
                    end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                    ##
         
     | 
| 
      
 16 
     | 
    
         
            +
                    # Transforms URLs to absolute ones.
         
     | 
| 
      
 17 
     | 
    
         
            +
                    def call(node_name:, node:, **_env)
         
     | 
| 
      
 18 
     | 
    
         
            +
                      return unless URL_ELEMENTS_WITH_URL_ATTRIBUTE.key?(node_name)
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                      url_attribute = URL_ELEMENTS_WITH_URL_ATTRIBUTE[node_name]
         
     | 
| 
      
 21 
     | 
    
         
            +
                      url = node[url_attribute]
         
     | 
| 
      
 22 
     | 
    
         
            +
                      node[url_attribute] = Html2rss::Utils.build_absolute_url_from_relative(url, @channel_url)
         
     | 
| 
      
 23 
     | 
    
         
            +
                    end
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Html2rss
         
     | 
| 
      
 4 
     | 
    
         
            +
              module AttributePostProcessors
         
     | 
| 
      
 5 
     | 
    
         
            +
                module HtmlTransformers
         
     | 
| 
      
 6 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # Transformer that wraps <img> tags into <a> tags linking to `img.src`.
         
     | 
| 
      
 8 
     | 
    
         
            +
                  class WrapImgInA
         
     | 
| 
      
 9 
     | 
    
         
            +
                    ##
         
     | 
| 
      
 10 
     | 
    
         
            +
                    # Wraps <img> tags into <a> tags that link to `img.src`.
         
     | 
| 
      
 11 
     | 
    
         
            +
                    #
         
     | 
| 
      
 12 
     | 
    
         
            +
                    # @param node_name [String]
         
     | 
| 
      
 13 
     | 
    
         
            +
                    # @param node [Nokogiri::XML::Node]
         
     | 
| 
      
 14 
     | 
    
         
            +
                    # @return [nil]
         
     | 
| 
      
 15 
     | 
    
         
            +
                    def call(node_name:, node:, **_env)
         
     | 
| 
      
 16 
     | 
    
         
            +
                      return unless already_wrapped?(node_name, node)
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                      wrap_image_in_anchor(node)
         
     | 
| 
      
 19 
     | 
    
         
            +
                    end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                    def already_wrapped?(node_name, node)
         
     | 
| 
      
 22 
     | 
    
         
            +
                      node_name == 'img' && node.parent.name != 'a'
         
     | 
| 
      
 23 
     | 
    
         
            +
                    end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                    private
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    ##
         
     | 
| 
      
 28 
     | 
    
         
            +
                    # Wraps the <img> node in an <a> tag.
         
     | 
| 
      
 29 
     | 
    
         
            +
                    #
         
     | 
| 
      
 30 
     | 
    
         
            +
                    # @param node [Nokogiri::XML::Node]
         
     | 
| 
      
 31 
     | 
    
         
            +
                    # @return [nil]
         
     | 
| 
      
 32 
     | 
    
         
            +
                    def wrap_image_in_anchor(node)
         
     | 
| 
      
 33 
     | 
    
         
            +
                      anchor = Nokogiri::XML::Node.new('a', node.document)
         
     | 
| 
      
 34 
     | 
    
         
            +
                      anchor['href'] = node['src']
         
     | 
| 
      
 35 
     | 
    
         
            +
                      node.add_next_sibling(anchor)
         
     | 
| 
      
 36 
     | 
    
         
            +
                      anchor.add_child(node.remove)
         
     | 
| 
      
 37 
     | 
    
         
            +
                    end
         
     | 
| 
      
 38 
     | 
    
         
            +
                  end
         
     | 
| 
      
 39 
     | 
    
         
            +
                end
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,4 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'kramdown'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative 'sanitize_html'
         
     | 
| 
       2 
5 
     | 
    
         | 
| 
       3 
6 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       4 
7 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
         @@ -30,15 +33,21 @@ module Html2rss 
     | 
|
| 
       30 
33 
     | 
    
         
             
                #
         
     | 
| 
       31 
34 
     | 
    
         
             
                #    <p>Price: 12.34</p>
         
     | 
| 
       32 
35 
     | 
    
         
             
                class MarkdownToHtml
         
     | 
| 
      
 36 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 37 
     | 
    
         
            +
                  # @param value [String] Markdown content to convert to HTML
         
     | 
| 
      
 38 
     | 
    
         
            +
                  # @param env [Item::Context] Context object providing additional environment details
         
     | 
| 
       33 
39 
     | 
    
         
             
                  def initialize(value, env)
         
     | 
| 
       34 
40 
     | 
    
         
             
                    @value = value
         
     | 
| 
       35 
41 
     | 
    
         
             
                    @env = env
         
     | 
| 
       36 
42 
     | 
    
         
             
                  end
         
     | 
| 
       37 
43 
     | 
    
         | 
| 
       38 
44 
     | 
    
         
             
                  ##
         
     | 
| 
       39 
     | 
    
         
            -
                  #  
     | 
| 
      
 45 
     | 
    
         
            +
                  # Converts Markdown to sanitized HTML.
         
     | 
| 
      
 46 
     | 
    
         
            +
                  #
         
     | 
| 
      
 47 
     | 
    
         
            +
                  # @return [String] Sanitized HTML content
         
     | 
| 
       40 
48 
     | 
    
         
             
                  def get
         
     | 
| 
       41 
     | 
    
         
            -
                     
     | 
| 
      
 49 
     | 
    
         
            +
                    html_content = Kramdown::Document.new(@value).to_html
         
     | 
| 
      
 50 
     | 
    
         
            +
                    SanitizeHtml.new(html_content, @env).get
         
     | 
| 
       42 
51 
     | 
    
         
             
                  end
         
     | 
| 
       43 
52 
     | 
    
         
             
                end
         
     | 
| 
       44 
53 
     | 
    
         
             
              end
         
     | 
| 
         @@ -1,5 +1,7 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'time'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative '../utils'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
       4 
6 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       5 
7 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
         @@ -24,15 +26,20 @@ module Html2rss 
     | 
|
| 
       24 
26 
     | 
    
         
             
                #
         
     | 
| 
       25 
27 
     | 
    
         
             
                # It uses {https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse Time.parse}.
         
     | 
| 
       26 
28 
     | 
    
         
             
                class ParseTime
         
     | 
| 
      
 29 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 30 
     | 
    
         
            +
                  # @param value [String] the time to parse
         
     | 
| 
      
 31 
     | 
    
         
            +
                  # @param env [Item::Context] Context object providing additional environment details
         
     | 
| 
       27 
32 
     | 
    
         
             
                  def initialize(value, env)
         
     | 
| 
       28 
33 
     | 
    
         
             
                    @value = value.to_s
         
     | 
| 
       29 
34 
     | 
    
         
             
                    @time_zone = env[:config].time_zone
         
     | 
| 
       30 
35 
     | 
    
         
             
                  end
         
     | 
| 
       31 
36 
     | 
    
         | 
| 
       32 
37 
     | 
    
         
             
                  ##
         
     | 
| 
       33 
     | 
    
         
            -
                  #  
     | 
| 
      
 38 
     | 
    
         
            +
                  # Converts the provided time string to RFC822 format, taking into account the configured time zone.
         
     | 
| 
      
 39 
     | 
    
         
            +
                  #
         
     | 
| 
      
 40 
     | 
    
         
            +
                  # @return [String] RFC822 formatted time
         
     | 
| 
       34 
41 
     | 
    
         
             
                  def get
         
     | 
| 
       35 
     | 
    
         
            -
                     
     | 
| 
      
 42 
     | 
    
         
            +
                    Utils.use_zone(@time_zone) { Time.parse(@value).rfc822 }
         
     | 
| 
       36 
43 
     | 
    
         
             
                  end
         
     | 
| 
       37 
44 
     | 
    
         
             
                end
         
     | 
| 
       38 
45 
     | 
    
         
             
              end
         
     | 
| 
         @@ -1,7 +1,10 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       2 
4 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
       3 
5 
     | 
    
         
             
                ##
         
     | 
| 
       4 
6 
     | 
    
         
             
                # Returns the URI as String.
         
     | 
| 
      
 7 
     | 
    
         
            +
                # If the URL is relative, it builds an absolute one with the channel's URL as base.
         
     | 
| 
       5 
8 
     | 
    
         
             
                #
         
     | 
| 
       6 
9 
     | 
    
         
             
                # Imagine this HTML structure:
         
     | 
| 
       7 
10 
     | 
    
         
             
                #
         
     | 
| 
         @@ -19,14 +22,21 @@ module Html2rss 
     | 
|
| 
       19 
22 
     | 
    
         
             
                # Would return:
         
     | 
| 
       20 
23 
     | 
    
         
             
                #    'http://why-not-use-a-link.uh'
         
     | 
| 
       21 
24 
     | 
    
         
             
                class ParseUri
         
     | 
| 
       22 
     | 
    
         
            -
                   
     | 
| 
      
 25 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 26 
     | 
    
         
            +
                  # @param value [String]
         
     | 
| 
      
 27 
     | 
    
         
            +
                  # @param context [Item::Context]
         
     | 
| 
      
 28 
     | 
    
         
            +
                  def initialize(value, context)
         
     | 
| 
       23 
29 
     | 
    
         
             
                    @value = value
         
     | 
| 
      
 30 
     | 
    
         
            +
                    @config_url = context.config.url
         
     | 
| 
       24 
31 
     | 
    
         
             
                  end
         
     | 
| 
       25 
32 
     | 
    
         | 
| 
       26 
33 
     | 
    
         
             
                  ##
         
     | 
| 
       27 
34 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       28 
35 
     | 
    
         
             
                  def get
         
     | 
| 
       29 
     | 
    
         
            -
                     
     | 
| 
      
 36 
     | 
    
         
            +
                    Html2rss::Utils.build_absolute_url_from_relative(
         
     | 
| 
      
 37 
     | 
    
         
            +
                      Html2rss::Utils.sanitize_url(@value),
         
     | 
| 
      
 38 
     | 
    
         
            +
                      @config_url
         
     | 
| 
      
 39 
     | 
    
         
            +
                    ).to_s
         
     | 
| 
       30 
40 
     | 
    
         
             
                  end
         
     | 
| 
       31 
41 
     | 
    
         
             
                end
         
     | 
| 
       32 
42 
     | 
    
         
             
              end
         
     | 
| 
         @@ -1,17 +1,21 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'sanitize'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative 'html_transformers/transform_urls_to_absolute_ones'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_relative 'html_transformers/wrap_img_in_a'
         
     | 
| 
       2 
6 
     | 
    
         | 
| 
       3 
7 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       4 
8 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
       5 
9 
     | 
    
         
             
                ##
         
     | 
| 
       6 
10 
     | 
    
         
             
                # Returns sanitized HTML code as String.
         
     | 
| 
       7 
11 
     | 
    
         
             
                #
         
     | 
| 
       8 
     | 
    
         
            -
                # It  
     | 
| 
      
 12 
     | 
    
         
            +
                # It sanitizes by using the [sanitize gem](https://github.com/rgrove/sanitize) with
         
     | 
| 
      
 13 
     | 
    
         
            +
                # [Sanitize::Config::RELAXED](https://github.com/rgrove/sanitize#sanitizeconfigrelaxed).
         
     | 
| 
      
 14 
     | 
    
         
            +
                #
         
     | 
| 
      
 15 
     | 
    
         
            +
                # Furthermore, it adds:
         
     | 
| 
       9 
16 
     | 
    
         
             
                #
         
     | 
| 
       10 
17 
     | 
    
         
             
                # - `rel="nofollow noopener noreferrer"` to <a> tags
         
     | 
| 
       11 
18 
     | 
    
         
             
                # - `referrer-policy='no-referrer'` to <img> tags
         
     | 
| 
       12 
     | 
    
         
            -
                #
         
     | 
| 
       13 
     | 
    
         
            -
                # It also:
         
     | 
| 
       14 
     | 
    
         
            -
                #
         
     | 
| 
       15 
19 
     | 
    
         
             
                # - wraps all <img> tags, whose direct parent is not an <a>, into an <a>
         
     | 
| 
       16 
20 
     | 
    
         
             
                #   linking to the <img>'s `src`.
         
     | 
| 
       17 
21 
     | 
    
         
             
                #
         
     | 
| 
         @@ -35,68 +39,60 @@ module Html2rss 
     | 
|
| 
       35 
39 
     | 
    
         
             
                # Would return:
         
     | 
| 
       36 
40 
     | 
    
         
             
                #    '<p>Lorem <b>ipsum</b> dolor ...</p>'
         
     | 
| 
       37 
41 
     | 
    
         
             
                class SanitizeHtml
         
     | 
| 
       38 
     | 
    
         
            -
                   
     | 
| 
       39 
     | 
    
         
            -
                   
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
      
 42 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 43 
     | 
    
         
            +
                  # @param value [String]
         
     | 
| 
      
 44 
     | 
    
         
            +
                  # @param env [Item::Context]
         
     | 
| 
       41 
45 
     | 
    
         
             
                  def initialize(value, env)
         
     | 
| 
       42 
46 
     | 
    
         
             
                    @value = value
         
     | 
| 
       43 
47 
     | 
    
         
             
                    @channel_url = env[:config].url
         
     | 
| 
       44 
48 
     | 
    
         
             
                  end
         
     | 
| 
       45 
49 
     | 
    
         | 
| 
       46 
50 
     | 
    
         
             
                  ##
         
     | 
| 
       47 
     | 
    
         
            -
                  # - uses the {https://github.com/rgrove/sanitize sanitize gem}
         
     | 
| 
       48 
     | 
    
         
            -
                  # - uses the config {https://github.com/rgrove/sanitize#sanitizeconfigrelaxed Sanitize::Config::RELAXED}
         
     | 
| 
       49 
     | 
    
         
            -
                  # - adds rel="nofollow noopener noreferrer" to a elements
         
     | 
| 
       50 
     | 
    
         
            -
                  # - adds target="_blank" to a elements
         
     | 
| 
       51 
51 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       52 
52 
     | 
    
         
             
                  def get
         
     | 
| 
       53 
     | 
    
         
            -
                    Sanitize.fragment(@value, sanitize_config) 
     | 
| 
      
 53 
     | 
    
         
            +
                    sanitized_html = Sanitize.fragment(@value, sanitize_config)
         
     | 
| 
      
 54 
     | 
    
         
            +
                    sanitized_html.to_s.gsub(/\s+/, ' ').strip
         
     | 
| 
       54 
55 
     | 
    
         
             
                  end
         
     | 
| 
       55 
56 
     | 
    
         | 
| 
       56 
57 
     | 
    
         
             
                  private
         
     | 
| 
       57 
58 
     | 
    
         | 
| 
      
 59 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 60 
     | 
    
         
            +
                  # @return [Sanitize::Config]
         
     | 
| 
       58 
61 
     | 
    
         
             
                  def sanitize_config
         
     | 
| 
       59 
62 
     | 
    
         
             
                    Sanitize::Config.merge(
         
     | 
| 
       60 
63 
     | 
    
         
             
                      Sanitize::Config::RELAXED,
         
     | 
| 
       61 
64 
     | 
    
         
             
                      attributes: { all: %w[dir lang alt title translate] },
         
     | 
| 
       62 
     | 
    
         
            -
                      add_attributes 
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
                         
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
                       
     | 
| 
      
 65 
     | 
    
         
            +
                      add_attributes:,
         
     | 
| 
      
 66 
     | 
    
         
            +
                      transformers: [
         
     | 
| 
      
 67 
     | 
    
         
            +
                        method(:transform_urls_to_absolute_ones),
         
     | 
| 
      
 68 
     | 
    
         
            +
                        method(:wrap_img_in_a)
         
     | 
| 
      
 69 
     | 
    
         
            +
                      ]
         
     | 
| 
       67 
70 
     | 
    
         
             
                    )
         
     | 
| 
       68 
71 
     | 
    
         
             
                  end
         
     | 
| 
       69 
72 
     | 
    
         | 
| 
       70 
     | 
    
         
            -
                  def  
     | 
| 
       71 
     | 
    
         
            -
                     
     | 
| 
       72 
     | 
    
         
            -
                       
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
                      url = env[:node][url_attribute]
         
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
                      return if URI(url).absolute?
         
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
                      absolute_url = Html2rss::Utils.build_absolute_url_from_relative(url, @channel_url)
         
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
                      env[:node][url_attribute] = absolute_url
         
     | 
| 
       82 
     | 
    
         
            -
                    end
         
     | 
| 
      
 73 
     | 
    
         
            +
                  def add_attributes
         
     | 
| 
      
 74 
     | 
    
         
            +
                    {
         
     | 
| 
      
 75 
     | 
    
         
            +
                      'a' => { 'rel' => 'nofollow noopener noreferrer', 'target' => '_blank' },
         
     | 
| 
      
 76 
     | 
    
         
            +
                      'img' => { 'referrer-policy' => 'no-referrer' }
         
     | 
| 
      
 77 
     | 
    
         
            +
                    }
         
     | 
| 
       83 
78 
     | 
    
         
             
                  end
         
     | 
| 
       84 
79 
     | 
    
         | 
| 
       85 
     | 
    
         
            -
                   
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
                      anchor = Nokogiri::XML::Node.new('a', img)
         
     | 
| 
       94 
     | 
    
         
            -
                      anchor[:href] = img[:src]
         
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
                      anchor.add_child img.dup
         
     | 
| 
      
 80 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 81 
     | 
    
         
            +
                  # Wrapper for transform_urls_to_absolute_ones to pass the channel_url.
         
     | 
| 
      
 82 
     | 
    
         
            +
                  #
         
     | 
| 
      
 83 
     | 
    
         
            +
                  # @param env [Hash]
         
     | 
| 
      
 84 
     | 
    
         
            +
                  # @return [nil]
         
     | 
| 
      
 85 
     | 
    
         
            +
                  def transform_urls_to_absolute_ones(env)
         
     | 
| 
      
 86 
     | 
    
         
            +
                    HtmlTransformers::TransformUrlsToAbsoluteOnes.new(@channel_url).call(**env)
         
     | 
| 
      
 87 
     | 
    
         
            +
                  end
         
     | 
| 
       97 
88 
     | 
    
         | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
      
 89 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 90 
     | 
    
         
            +
                  # Wrapper for wrap_img_in_a.
         
     | 
| 
      
 91 
     | 
    
         
            +
                  #
         
     | 
| 
      
 92 
     | 
    
         
            +
                  # @param env [Hash]
         
     | 
| 
      
 93 
     | 
    
         
            +
                  # @return [nil]
         
     | 
| 
      
 94 
     | 
    
         
            +
                  def wrap_img_in_a(env)
         
     | 
| 
      
 95 
     | 
    
         
            +
                    HtmlTransformers::WrapImgInA.new.call(**env)
         
     | 
| 
       100 
96 
     | 
    
         
             
                  end
         
     | 
| 
       101 
97 
     | 
    
         
             
                end
         
     | 
| 
       102 
98 
     | 
    
         
             
              end
         
     | 
| 
         @@ -1,6 +1,9 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       2 
4 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
       3 
     | 
    
         
            -
                ## 
     | 
| 
      
 5 
     | 
    
         
            +
                ##
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Returns a defined part of a String.
         
     | 
| 
       4 
7 
     | 
    
         
             
                #
         
     | 
| 
       5 
8 
     | 
    
         
             
                # Both parameters must be an Integer and they can be negative.
         
     | 
| 
       6 
9 
     | 
    
         
             
                # The +end+ parameter can be omitted, in that case it will not cut the
         
     | 
| 
         @@ -26,16 +29,23 @@ module Html2rss 
     | 
|
| 
       26 
29 
     | 
    
         
             
                # Would return:
         
     | 
| 
       27 
30 
     | 
    
         
             
                #    'bar'
         
     | 
| 
       28 
31 
     | 
    
         
             
                class Substring
         
     | 
| 
      
 32 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 33 
     | 
    
         
            +
                  # @param value [String] The original string to extract a substring from.
         
     | 
| 
      
 34 
     | 
    
         
            +
                  # @param env [Item::Context] Context object providing additional environment details.
         
     | 
| 
       29 
35 
     | 
    
         
             
                  def initialize(value, env)
         
     | 
| 
       30 
36 
     | 
    
         
             
                    @value = value
         
     | 
| 
       31 
37 
     | 
    
         
             
                    @options = env[:options]
         
     | 
| 
       32 
38 
     | 
    
         
             
                  end
         
     | 
| 
       33 
39 
     | 
    
         | 
| 
       34 
40 
     | 
    
         
             
                  ##
         
     | 
| 
       35 
     | 
    
         
            -
                  #  
     | 
| 
      
 41 
     | 
    
         
            +
                  # Extracts the substring from the original string based on the provided start and end indices.
         
     | 
| 
      
 42 
     | 
    
         
            +
                  #
         
     | 
| 
      
 43 
     | 
    
         
            +
                  # @return [String] The extracted substring.
         
     | 
| 
       36 
44 
     | 
    
         
             
                  def get
         
     | 
| 
       37 
     | 
    
         
            -
                     
     | 
| 
       38 
     | 
    
         
            -
                    @ 
     | 
| 
      
 45 
     | 
    
         
            +
                    start_index = @options[:start].to_i
         
     | 
| 
      
 46 
     | 
    
         
            +
                    end_index = @options[:end]&.to_i || @value.length
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                    @value[start_index..end_index]
         
     | 
| 
       39 
49 
     | 
    
         
             
                  end
         
     | 
| 
       40 
50 
     | 
    
         
             
                end
         
     | 
| 
       41 
51 
     | 
    
         
             
              end
         
     | 
| 
         @@ -1,25 +1,28 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       4 
4 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
       5 
     | 
    
         
            -
                ## 
     | 
| 
      
 5 
     | 
    
         
            +
                ##
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Returns a formatted String according to the string pattern.
         
     | 
| 
       6 
7 
     | 
    
         
             
                #
         
     | 
| 
       7 
8 
     | 
    
         
             
                # If +self+ is used, the selectors extracted value will be used.
         
     | 
| 
       8 
9 
     | 
    
         
             
                # It uses [Kernel#format](https://ruby-doc.org/core/Kernel.html#method-i-format)
         
     | 
| 
       9 
10 
     | 
    
         
             
                #
         
     | 
| 
       10 
11 
     | 
    
         
             
                # Imagine this HTML:
         
     | 
| 
      
 12 
     | 
    
         
            +
                #
         
     | 
| 
       11 
13 
     | 
    
         
             
                #    <li>
         
     | 
| 
       12 
14 
     | 
    
         
             
                #      <h1>Product</h1>
         
     | 
| 
       13 
15 
     | 
    
         
             
                #      <span class="price">23,42€</span>
         
     | 
| 
       14 
16 
     | 
    
         
             
                #    </li>
         
     | 
| 
       15 
17 
     | 
    
         
             
                #
         
     | 
| 
      
 18 
     | 
    
         
            +
                #
         
     | 
| 
       16 
19 
     | 
    
         
             
                # YAML usage example:
         
     | 
| 
       17 
20 
     | 
    
         
             
                #
         
     | 
| 
       18 
21 
     | 
    
         
             
                #    selectors:
         
     | 
| 
       19 
22 
     | 
    
         
             
                #      items:
         
     | 
| 
       20 
23 
     | 
    
         
             
                #        selector: 'li'
         
     | 
| 
       21 
24 
     | 
    
         
             
                #      price:
         
     | 
| 
       22 
     | 
    
         
            -
                # 
     | 
| 
      
 25 
     | 
    
         
            +
                #        selector: '.price'
         
     | 
| 
       23 
26 
     | 
    
         
             
                #      title:
         
     | 
| 
       24 
27 
     | 
    
         
             
                #        selector: h1
         
     | 
| 
       25 
28 
     | 
    
         
             
                #        post_process:
         
     | 
| 
         @@ -29,6 +32,9 @@ module Html2rss 
     | 
|
| 
       29 
32 
     | 
    
         
             
                # Would return:
         
     | 
| 
       30 
33 
     | 
    
         
             
                #    'Product (23,42€)'
         
     | 
| 
       31 
34 
     | 
    
         
             
                class Template
         
     | 
| 
      
 35 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 36 
     | 
    
         
            +
                  # @param value [String]
         
     | 
| 
      
 37 
     | 
    
         
            +
                  # @param env [Item::Context]
         
     | 
| 
       32 
38 
     | 
    
         
             
                  def initialize(value, env)
         
     | 
| 
       33 
39 
     | 
    
         
             
                    @value = value
         
     | 
| 
       34 
40 
     | 
    
         
             
                    @options = env[:options]
         
     | 
| 
         @@ -39,28 +45,46 @@ module Html2rss 
     | 
|
| 
       39 
45 
     | 
    
         
             
                  ##
         
     | 
| 
       40 
46 
     | 
    
         
             
                  # @return [String]
         
     | 
| 
       41 
47 
     | 
    
         
             
                  def get
         
     | 
| 
       42 
     | 
    
         
            -
                     
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
                    names = string.scan(/%[<|{](\w*)[>|}]/)
         
     | 
| 
       45 
     | 
    
         
            -
                    names.flatten!
         
     | 
| 
       46 
     | 
    
         
            -
                    names.compact!
         
     | 
| 
       47 
     | 
    
         
            -
                    names.map!(&:to_sym)
         
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                    format(string, names.map { |name| [name, item_value(name)] }.to_h)
         
     | 
| 
      
 48 
     | 
    
         
            +
                    @options[:methods] ? format_string_with_methods : format_string_with_dynamic_params
         
     | 
| 
       50 
49 
     | 
    
         
             
                  end
         
     | 
| 
       51 
50 
     | 
    
         | 
| 
       52 
51 
     | 
    
         
             
                  private
         
     | 
| 
       53 
52 
     | 
    
         | 
| 
      
 53 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 54 
     | 
    
         
            +
                  # @return [String] the string containing the template
         
     | 
| 
       54 
55 
     | 
    
         
             
                  attr_reader :string
         
     | 
| 
       55 
56 
     | 
    
         | 
| 
      
 57 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 58 
     | 
    
         
            +
                  # @return [Array<String>]
         
     | 
| 
       56 
59 
     | 
    
         
             
                  def methods
         
     | 
| 
       57 
     | 
    
         
            -
                    @methods ||= @options[:methods].map( 
     | 
| 
      
 60 
     | 
    
         
            +
                    @methods ||= @options[:methods].map { |method_name| item_value(method_name) }
         
     | 
| 
       58 
61 
     | 
    
         
             
                  end
         
     | 
| 
       59 
62 
     | 
    
         | 
| 
      
 63 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 64 
     | 
    
         
            +
                  # Formats a string using methods.
         
     | 
| 
      
 65 
     | 
    
         
            +
                  #
         
     | 
| 
      
 66 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
      
 67 
     | 
    
         
            +
                  # @deprecated Use %<id>s formatting instead. Will be removed in version 1.0.0. See README / Dynamic parameters.
         
     | 
| 
       60 
68 
     | 
    
         
             
                  def format_string_with_methods
         
     | 
| 
      
 69 
     | 
    
         
            +
                    warn '[DEPRECATION] This method of using params is deprecated and \
         
     | 
| 
      
 70 
     | 
    
         
            +
                          support for it will be removed in version 1.0.0.\
         
     | 
| 
      
 71 
     | 
    
         
            +
                          Please use dynamic parameters (i.e. %<id>s, see README.md) instead.'
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
       61 
73 
     | 
    
         
             
                    string % methods
         
     | 
| 
       62 
74 
     | 
    
         
             
                  end
         
     | 
| 
       63 
75 
     | 
    
         | 
| 
      
 76 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 77 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
      
 78 
     | 
    
         
            +
                  def format_string_with_dynamic_params
         
     | 
| 
      
 79 
     | 
    
         
            +
                    param_names = string.scan(/%[<|{](\w*)[>|}]/)
         
     | 
| 
      
 80 
     | 
    
         
            +
                    param_names.flatten!
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                    format(string, param_names.to_h { |name| [name.to_sym, item_value(name)] })
         
     | 
| 
      
 83 
     | 
    
         
            +
                  end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                  ##
         
     | 
| 
      
 86 
     | 
    
         
            +
                  # @param method_name [String, Symbol]
         
     | 
| 
      
 87 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
       64 
88 
     | 
    
         
             
                  def item_value(method_name)
         
     | 
| 
       65 
89 
     | 
    
         
             
                    method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
         
     | 
| 
       66 
90 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -1,13 +1,36 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            module Html2rss
         
     | 
| 
       2 
4 
     | 
    
         
             
              ##
         
     | 
| 
       3 
5 
     | 
    
         
             
              # Provides a namespace for attribute post processors.
         
     | 
| 
       4 
6 
     | 
    
         
             
              module AttributePostProcessors
         
     | 
| 
       5 
     | 
    
         
            -
                 
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
                  end
         
     | 
| 
      
 7 
     | 
    
         
            +
                ##
         
     | 
| 
      
 8 
     | 
    
         
            +
                # Error raised when an unknown post processor name is requested.
         
     | 
| 
      
 9 
     | 
    
         
            +
                class UnknownPostProcessorName < StandardError; end
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
                ##
         
     | 
| 
      
 12 
     | 
    
         
            +
                # Maps the post processor name to the class implementing the post processor.
         
     | 
| 
      
 13 
     | 
    
         
            +
                #
         
     | 
| 
      
 14 
     | 
    
         
            +
                # The key is the name to use in the feed config.
         
     | 
| 
      
 15 
     | 
    
         
            +
                NAME_TO_CLASS = {
         
     | 
| 
      
 16 
     | 
    
         
            +
                  gsub: Gsub,
         
     | 
| 
      
 17 
     | 
    
         
            +
                  html_to_markdown: HtmlToMarkdown,
         
     | 
| 
      
 18 
     | 
    
         
            +
                  markdown_to_html: MarkdownToHtml,
         
     | 
| 
      
 19 
     | 
    
         
            +
                  parse_time: ParseTime,
         
     | 
| 
      
 20 
     | 
    
         
            +
                  parse_uri: ParseUri,
         
     | 
| 
      
 21 
     | 
    
         
            +
                  sanitize_html: SanitizeHtml,
         
     | 
| 
      
 22 
     | 
    
         
            +
                  substring: Substring,
         
     | 
| 
      
 23 
     | 
    
         
            +
                  template: Template
         
     | 
| 
      
 24 
     | 
    
         
            +
                }.freeze
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                ##
         
     | 
| 
      
 27 
     | 
    
         
            +
                # Retrieves the attribute post processor class based on the given name.
         
     | 
| 
      
 28 
     | 
    
         
            +
                #
         
     | 
| 
      
 29 
     | 
    
         
            +
                # @param name [Symbol] The name of the post processor.
         
     | 
| 
      
 30 
     | 
    
         
            +
                # @return [Class] The attribute post processor class.
         
     | 
| 
      
 31 
     | 
    
         
            +
                # @raise [UnknownPostProcessorName] If the requested name is not found in NAME_TO_CLASS.
         
     | 
| 
      
 32 
     | 
    
         
            +
                def self.get_processor(name)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  NAME_TO_CLASS[name.to_sym] || raise(UnknownPostProcessorName, "Can't find a post processor named '#{name}'")
         
     | 
| 
       11 
34 
     | 
    
         
             
                end
         
     | 
| 
       12 
35 
     | 
    
         
             
              end
         
     | 
| 
       13 
36 
     | 
    
         
             
            end
         
     | 
    
        data/lib/html2rss/cli.rb
    ADDED
    
    | 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative '../html2rss'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'thor'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module Html2rss
         
     | 
| 
      
 7 
     | 
    
         
            +
              ##
         
     | 
| 
      
 8 
     | 
    
         
            +
              # The Html2rss command line interface.
         
     | 
| 
      
 9 
     | 
    
         
            +
              class CLI < Thor
         
     | 
| 
      
 10 
     | 
    
         
            +
                def self.exit_on_failure?
         
     | 
| 
      
 11 
     | 
    
         
            +
                  true
         
     | 
| 
      
 12 
     | 
    
         
            +
                end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                desc 'feed YAML_FILE [FEED_NAME] [param=value ...]', 'Print RSS built from the YAML_FILE file to stdout'
         
     | 
| 
      
 15 
     | 
    
         
            +
                ##
         
     | 
| 
      
 16 
     | 
    
         
            +
                # Prints the feed to STDOUT.
         
     | 
| 
      
 17 
     | 
    
         
            +
                #
         
     | 
| 
      
 18 
     | 
    
         
            +
                # @param yaml_file [String] Path to the YAML configuration file.
         
     | 
| 
      
 19 
     | 
    
         
            +
                # @param options [Array<String>] Additional options including feed name and parameters.
         
     | 
| 
      
 20 
     | 
    
         
            +
                # @return [nil]
         
     | 
| 
      
 21 
     | 
    
         
            +
                def feed(yaml_file, *options)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  raise "File '#{yaml_file}' does not exist" unless File.exist?(yaml_file)
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  feed_name = options.shift
         
     | 
| 
      
 25 
     | 
    
         
            +
                  params = options.to_h { |opt| opt.split('=', 2) }
         
     | 
| 
      
 26 
     | 
    
         
            +
                  puts Html2rss.feed_from_yaml_config(yaml_file, feed_name, params:)
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
            end
         
     |