html2rss 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c8df87caea7cc1dd9855993e9dd4fcb4285b37a5c1f87fb48a3d85e75ce0a51
4
- data.tar.gz: 8ab432f13d76f45e30ea65316669c324bcba46b5234a082a6f0670cc445a6958
3
+ metadata.gz: ebe536d8051a64c6e2adf9fa8e1d9d1f9fa3743541c44ca85022d0603f9032b2
4
+ data.tar.gz: 7b3aaa213aaf6a37fb6e94fa72c9936ffd2391322297553b253b097edea300cc
5
5
  SHA512:
6
- metadata.gz: 7115ee36e47ac65d19b13bb0f2ef0b19ebdb387d2e6fa53128e804b8d22d6a834517b88642f5c3e31e8850e344a2f94b6cb0de6619504603cfeacae52e47f3e2
7
- data.tar.gz: b2c9f342cff237d9cafd4817f2570d18803fa9ba844ab306681ac02d12fb7cf147f3b40117ab21c78937325d80623ad27e25c6df4c8f8150fe81be2a07d12817
6
+ metadata.gz: '03985002d050b996c1dc315cbe8e3fc79b6619447a048ad3d2dca86f792eab5c2356716cf6198a24efc61de7e7ddceba2780da49c3e68a3c9efe895eb7cf0cf1'
7
+ data.tar.gz: 8315473528f46a5ba28297af296b879a66ac00f86ba9eb117b4e6c9ec61c285e4090cfd999ff712368f5b988b1cbda460e268aa3ea8928912bcdb1960ae25a4a
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ ##
5
+ # Provides a namespace for attribute post processors.
6
+ module AttributePostProcessors
7
+ ##
8
+ # All post processors must inherit from this base class and implement `self.validate_args!` and `#get`.
9
+ class Base
10
+ # Validates the presence of required options in the context
11
+ #
12
+ # @param keys [Array<Symbol>] the keys to check for presence
13
+ # @param context [Hash] the context containing options
14
+ # @raise [MissingOption] if any key is missing
15
+ def self.expect_options(keys, context)
16
+ keys.each do |key|
17
+ unless (options = context[:options]).key?(key)
18
+ raise MissingOption, "The `#{key}` option is missing in: #{options.inspect}", [],
19
+ cause: nil
20
+ end
21
+ end
22
+ end
23
+
24
+ # Asserts that the value is of the expected type(s)
25
+ #
26
+ # @param value [Object] the value to check
27
+ # @param types [Array<Class>, Class] the expected type(s)
28
+ # @param name [String] the name of the option being checked
29
+ # @raise [InvalidType] if the value is not of the expected type(s)
30
+ def self.assert_type(value, types = [], name)
31
+ types = [types] unless types.is_a?(Array)
32
+
33
+ return if types.any? { |type| value.is_a?(type) }
34
+
35
+ error_message_template = 'The type of `%s` must be %s, but is: %s'
36
+ raise InvalidType, format(error_message_template, name, types.join(' or '), value.class), [], cause: nil
37
+ end
38
+
39
+ # private_class_method :expect_options, :assert_type
40
+
41
+ ##
42
+ # This method validates the arguments passed to the post processor. Must be implemented by subclasses.
43
+ def self.validate_args!(_value, _context)
44
+ raise NotImplementedError, 'You must implement the `validate_args!` method in the post processor'
45
+ end
46
+
47
+ # Initializes the post processor
48
+ #
49
+ # @param value [Object] the value to be processed
50
+ # @param context [Item::Context] the context
51
+ def initialize(value, context)
52
+ klass = self.class
53
+ # TODO: get rid of Hash
54
+ klass.assert_type(context, [Item::Context, Hash], 'context')
55
+ klass.validate_args!(value, context)
56
+
57
+ @value = value
58
+ @context = context
59
+ end
60
+
61
+ attr_reader :value, :context
62
+
63
+ # Abstract method to be implemented by subclasses
64
+ #
65
+ # @raise [NotImplementedError] if not implemented in subclass
66
+ def get
67
+ raise NotImplementedError, 'You must implement the `get` method in the post processor'
68
+ end
69
+ end
70
+ end
71
+ end
@@ -25,39 +25,39 @@ module Html2rss
25
25
  # `replacement` can be a String or a Hash.
26
26
  #
27
27
  # See the doc on [String#gsub](https://ruby-doc.org/core/String.html#method-i-gsub) for more info.
28
- class Gsub
28
+ class Gsub < Base
29
+ def self.validate_args!(value, context)
30
+ assert_type value, String, :value
31
+ expect_options(%i[replacement pattern], context)
32
+ assert_type context.dig(:options, :replacement), [String, Hash], :replacement
33
+ end
34
+
29
35
  ##
30
36
  # @param value [String]
31
37
  # @param context [Item::Context]
32
38
  def initialize(value, context)
33
- @value = value
34
- @options = context[:options]
39
+ super
40
+
41
+ options = context[:options]
42
+
43
+ @replacement = options[:replacement]
44
+ @pattern = options[:pattern]
35
45
  end
36
46
 
37
47
  ##
38
48
  # @return [String]
39
49
  def get
40
- @value.to_s.gsub(pattern, replacement)
50
+ value.to_s.gsub(pattern, replacement)
41
51
  end
42
52
 
43
53
  private
44
54
 
55
+ attr_accessor :replacement
56
+
45
57
  ##
46
58
  # @return [Regexp]
47
59
  def pattern
48
- pattern = @options[:pattern]
49
- raise ArgumentError, 'The `pattern` option is missing' unless pattern
50
-
51
- pattern.is_a?(String) ? Utils.build_regexp_from_string(pattern) : pattern
52
- end
53
-
54
- ##
55
- # @return [Hash, String]
56
- def replacement
57
- replacement = @options[:replacement]
58
- return replacement if replacement.is_a?(String) || replacement.is_a?(Hash)
59
-
60
- raise ArgumentError, 'The `replacement` option must be a String or Hash'
60
+ @pattern.is_a?(String) ? Utils.build_regexp_from_string(@pattern) : @pattern
61
61
  end
62
62
  end
63
63
  end
@@ -26,18 +26,17 @@ module Html2rss
26
26
  #
27
27
  # Would return:
28
28
  # 'Lorem **ipsum** dolor'
29
- class HtmlToMarkdown
30
- ##
31
- # @param value [String]
32
- # @param env [Item::Context]
33
- def initialize(value, env)
34
- @sanitized_value = SanitizeHtml.new(value, env).get
29
+ class HtmlToMarkdown < Base
30
+ def self.validate_args!(value, _context)
31
+ assert_type value, String, :value
35
32
  end
36
33
 
37
34
  ##
38
35
  # @return [String] formatted in Markdown
39
36
  def get
40
- ReverseMarkdown.convert(@sanitized_value)
37
+ sanitized_value = SanitizeHtml.new(value, context).get
38
+
39
+ ReverseMarkdown.convert(sanitized_value)
41
40
  end
42
41
  end
43
42
  end
@@ -32,13 +32,9 @@ module Html2rss
32
32
  # <h1>Section</h1>
33
33
  #
34
34
  # <p>Price: 12.34</p>
35
- class MarkdownToHtml
36
- ##
37
- # @param value [String] Markdown content to convert to HTML
38
- # @param env [Item::Context] Context object providing additional environment details
39
- def initialize(value, env)
40
- @value = value
41
- @env = env
35
+ class MarkdownToHtml < Base
36
+ def self.validate_args!(value, _context)
37
+ assert_type value, String, :value
42
38
  end
43
39
 
44
40
  ##
@@ -46,8 +42,8 @@ module Html2rss
46
42
  #
47
43
  # @return [String] Sanitized HTML content
48
44
  def get
49
- html_content = Kramdown::Document.new(@value).to_html
50
- SanitizeHtml.new(html_content, @env).get
45
+ html_content = Kramdown::Document.new(value).to_html
46
+ SanitizeHtml.new(html_content, context).get
51
47
  end
52
48
  end
53
49
  end
@@ -24,22 +24,22 @@ module Html2rss
24
24
  # Would return:
25
25
  # "Tue, 02 Jul 2019 00:00:00 +0200"
26
26
  #
27
- # It uses {https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse Time.parse}.
28
- class ParseTime
29
- ##
30
- # @param value [String] the time to parse
31
- # @param env [Item::Context] Context object providing additional environment details
32
- def initialize(value, env)
33
- @value = value.to_s
34
- @time_zone = env[:config].time_zone
27
+ # It uses `Time.parse`.
28
+ class ParseTime < Base
29
+ def self.validate_args!(value, context)
30
+ assert_type value, String, :value
31
+ assert_type context[:config].time_zone, String, :time_zone
35
32
  end
36
33
 
37
34
  ##
38
- # Converts the provided time string to RFC822 format, taking into account the configured time zone.
35
+ # Converts the provided time string to RFC822 format, taking into account the time_zone.
39
36
  #
40
37
  # @return [String] RFC822 formatted time
38
+ # @raise [TZInfo::InvalidTimezoneIdentifier] if the configured time zone is invalid
41
39
  def get
42
- Utils.use_zone(@time_zone) { Time.parse(@value).rfc822 }
40
+ time_zone = context[:config].time_zone
41
+
42
+ Utils.use_zone(time_zone) { Time.parse(value).rfc822 }
43
43
  end
44
44
  end
45
45
  end
@@ -21,21 +21,24 @@ module Html2rss
21
21
  #
22
22
  # Would return:
23
23
  # 'http://why-not-use-a-link.uh'
24
- class ParseUri
25
- ##
26
- # @param value [String]
27
- # @param context [Item::Context]
28
- def initialize(value, context)
29
- @value = value
30
- @config_url = context.config.url
24
+ class ParseUri < Base
25
+ def self.validate_args!(value, context)
26
+ url_types = [String, URI::HTTP, Addressable::URI].freeze
27
+
28
+ assert_type(value, url_types, :value)
29
+ assert_type(context.config.url, url_types, :url)
30
+
31
+ raise ArgumentError, 'The `value` option is missing or empty.' if value.to_s.empty?
31
32
  end
32
33
 
33
34
  ##
34
35
  # @return [String]
35
36
  def get
37
+ config_url = context.config.url
38
+
36
39
  Html2rss::Utils.build_absolute_url_from_relative(
37
- Html2rss::Utils.sanitize_url(@value),
38
- @config_url
40
+ Html2rss::Utils.sanitize_url(value),
41
+ config_url
39
42
  ).to_s
40
43
  end
41
44
  end
@@ -38,19 +38,15 @@ module Html2rss
38
38
  #
39
39
  # Would return:
40
40
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
41
- class SanitizeHtml
42
- ##
43
- # @param value [String]
44
- # @param env [Item::Context]
45
- def initialize(value, env)
46
- @value = value
47
- @channel_url = env[:config].url
41
+ class SanitizeHtml < Base
42
+ def self.validate_args!(value, _context)
43
+ assert_type value, String, :value
48
44
  end
49
45
 
50
46
  ##
51
47
  # @return [String]
52
48
  def get
53
- sanitized_html = Sanitize.fragment(@value, sanitize_config)
49
+ sanitized_html = Sanitize.fragment(value, sanitize_config)
54
50
  sanitized_html.to_s.gsub(/\s+/, ' ').strip
55
51
  end
56
52
 
@@ -77,13 +73,15 @@ module Html2rss
77
73
  }
78
74
  end
79
75
 
76
+ def channel_url = context[:config].url
77
+
80
78
  ##
81
79
  # Wrapper for transform_urls_to_absolute_ones to pass the channel_url.
82
80
  #
83
81
  # @param env [Hash]
84
82
  # @return [nil]
85
83
  def transform_urls_to_absolute_ones(env)
86
- HtmlTransformers::TransformUrlsToAbsoluteOnes.new(@channel_url).call(**env)
84
+ HtmlTransformers::TransformUrlsToAbsoluteOnes.new(channel_url).call(**env)
87
85
  end
88
86
 
89
87
  ##
@@ -28,13 +28,15 @@ module Html2rss
28
28
  #
29
29
  # Would return:
30
30
  # 'bar'
31
- class Substring
32
- ##
33
- # @param value [String] The original string to extract a substring from.
34
- # @param env [Item::Context] Context object providing additional environment details.
35
- def initialize(value, env)
36
- @value = value
37
- @options = env[:options]
31
+ class Substring < Base
32
+ def self.validate_args!(value, context)
33
+ assert_type value, String, :value
34
+
35
+ options = context[:options]
36
+ assert_type options[:start], Integer, :start
37
+
38
+ end_index = options[:end]
39
+ assert_type end_index, Integer, :end if end_index
38
40
  end
39
41
 
40
42
  ##
@@ -42,11 +44,29 @@ module Html2rss
42
44
  #
43
45
  # @return [String] The extracted substring.
44
46
  def get
45
- start_index = @options[:start].to_i
46
- end_index = @options[:end]&.to_i || @value.length
47
+ value[range]
48
+ end
49
+
50
+ ##
51
+ # Determines the range for the substring extraction based on the provided start and end indices.
52
+ #
53
+ # @return [Range] The range object representing the start and end/Infinity (integers).
54
+ def range
55
+ return (start_index..) unless end_index?
56
+
57
+ if start_index == end_index
58
+ raise ArgumentError,
59
+ 'The `start` value must be unequal to the `end` value.'
60
+ end
47
61
 
48
- @value[start_index..end_index]
62
+ (start_index..end_index)
49
63
  end
64
+
65
+ private
66
+
67
+ def end_index? = !context[:options][:end].to_s.empty?
68
+ def end_index = context[:options][:end].to_i
69
+ def start_index = context[:options][:start].to_i
50
70
  end
51
71
  end
52
72
  end
@@ -31,15 +31,23 @@ module Html2rss
31
31
  #
32
32
  # Would return:
33
33
  # 'Product (23,42€)'
34
- class Template
34
+ class Template < Base
35
+ def self.validate_args!(value, context)
36
+ assert_type value, String, :value
37
+
38
+ string = context[:options]&.dig(:string).to_s
39
+ raise InvalidType, 'The `string` template is absent.' if string.empty?
40
+ end
41
+
35
42
  ##
36
43
  # @param value [String]
37
- # @param env [Item::Context]
38
- def initialize(value, env)
39
- @value = value
40
- @options = env[:options]
41
- @item = env[:item]
42
- @string = @options[:string]
44
+ # @param context [Item::Context]
45
+ def initialize(value, context)
46
+ super
47
+
48
+ @options = context[:options] || {}
49
+ @item = context[:item]
50
+ @string = @options[:string].to_s
43
51
  end
44
52
 
45
53
  ##
@@ -86,7 +94,7 @@ module Html2rss
86
94
  # @param method_name [String, Symbol]
87
95
  # @return [String]
88
96
  def item_value(method_name)
89
- method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
97
+ method_name.to_sym == :self ? value : @item.public_send(method_name).to_s
90
98
  end
91
99
  end
92
100
  end
@@ -8,6 +8,14 @@ module Html2rss
8
8
  # Error raised when an unknown post processor name is requested.
9
9
  class UnknownPostProcessorName < Html2rss::Error; end
10
10
 
11
+ ##
12
+ # Error raised when a required option is missing.
13
+ class MissingOption < Html2rss::Error; end
14
+
15
+ ##
16
+ # Error raised when an invalid type is provided.
17
+ class InvalidType < Html2rss::Error; end
18
+
11
19
  ##
12
20
  # Maps the post processor name to the class implementing the post processor.
13
21
  #
@@ -10,6 +10,9 @@ module Html2rss
10
10
  # Struct to represent a selector with associated attributes for extraction and processing.
11
11
  Selector = Struct.new(:selector, :attribute, :extractor, :post_process, :order, :static, keyword_init: true)
12
12
 
13
+ # raised when an invalid selector name is used
14
+ class InvalidSelectorName < Html2rss::Error; end
15
+
13
16
  ##
14
17
  # @param config [Hash<Symbol, Object>]
15
18
  def initialize(config)
@@ -28,9 +31,15 @@ module Html2rss
28
31
  # @param name [Symbol]
29
32
  # @return [Selector]
30
33
  def selector(name)
31
- raise ArgumentError, "invalid item's selector name: #{name}" unless selector?(name)
34
+ raise InvalidSelectorName, "invalid selector name: #{name}" unless selector?(name)
35
+
36
+ keywords = config[name].slice(*available_keys)
32
37
 
33
- Selector.new(config[name])
38
+ if (additional_keys = available_keys - keywords.keys).any?
39
+ warn "additional keys (#{additional_keys.join(', ')}) present in selector #{name}"
40
+ end
41
+
42
+ Selector.new(keywords)
34
43
  end
35
44
 
36
45
  ##
@@ -86,6 +95,8 @@ module Html2rss
86
95
  array.map!(&:to_sym)
87
96
  end.to_set
88
97
  end
98
+
99
+ def available_keys = @available_keys ||= Selector.members
89
100
  end
90
101
  end
91
102
  end
@@ -3,6 +3,6 @@
3
3
  ##
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
- VERSION = '0.11.0'
6
+ VERSION = '0.12.0'
7
7
  public_constant :VERSION
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-09 00:00:00.000000000 Z
11
+ date: 2024-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -219,6 +219,7 @@ files:
219
219
  - html2rss.gemspec
220
220
  - lib/html2rss.rb
221
221
  - lib/html2rss/attribute_post_processors.rb
222
+ - lib/html2rss/attribute_post_processors/base.rb
222
223
  - lib/html2rss/attribute_post_processors/gsub.rb
223
224
  - lib/html2rss/attribute_post_processors/html_to_markdown.rb
224
225
  - lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
@@ -252,7 +253,7 @@ licenses:
252
253
  - MIT
253
254
  metadata:
254
255
  allowed_push_host: https://rubygems.org
255
- changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.11.0
256
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.12.0
256
257
  rubygems_mfa_required: 'true'
257
258
  post_install_message:
258
259
  rdoc_options: []