html2rss 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c8df87caea7cc1dd9855993e9dd4fcb4285b37a5c1f87fb48a3d85e75ce0a51
4
- data.tar.gz: 8ab432f13d76f45e30ea65316669c324bcba46b5234a082a6f0670cc445a6958
3
+ metadata.gz: ebe536d8051a64c6e2adf9fa8e1d9d1f9fa3743541c44ca85022d0603f9032b2
4
+ data.tar.gz: 7b3aaa213aaf6a37fb6e94fa72c9936ffd2391322297553b253b097edea300cc
5
5
  SHA512:
6
- metadata.gz: 7115ee36e47ac65d19b13bb0f2ef0b19ebdb387d2e6fa53128e804b8d22d6a834517b88642f5c3e31e8850e344a2f94b6cb0de6619504603cfeacae52e47f3e2
7
- data.tar.gz: b2c9f342cff237d9cafd4817f2570d18803fa9ba844ab306681ac02d12fb7cf147f3b40117ab21c78937325d80623ad27e25c6df4c8f8150fe81be2a07d12817
6
+ metadata.gz: '03985002d050b996c1dc315cbe8e3fc79b6619447a048ad3d2dca86f792eab5c2356716cf6198a24efc61de7e7ddceba2780da49c3e68a3c9efe895eb7cf0cf1'
7
+ data.tar.gz: 8315473528f46a5ba28297af296b879a66ac00f86ba9eb117b4e6c9ec61c285e4090cfd999ff712368f5b988b1cbda460e268aa3ea8928912bcdb1960ae25a4a
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ ##
5
+ # Provides a namespace for attribute post processors.
6
+ module AttributePostProcessors
7
+ ##
8
+ # All post processors must inherit from this base class and implement `self.validate_args!` and `#get`.
9
+ class Base
10
+ # Validates the presence of required options in the context
11
+ #
12
+ # @param keys [Array<Symbol>] the keys to check for presence
13
+ # @param context [Hash] the context containing options
14
+ # @raise [MissingOption] if any key is missing
15
+ def self.expect_options(keys, context)
16
+ keys.each do |key|
17
+ unless (options = context[:options]).key?(key)
18
+ raise MissingOption, "The `#{key}` option is missing in: #{options.inspect}", [],
19
+ cause: nil
20
+ end
21
+ end
22
+ end
23
+
24
+ # Asserts that the value is of the expected type(s)
25
+ #
26
+ # @param value [Object] the value to check
27
+ # @param types [Array<Class>, Class] the expected type(s)
28
+ # @param name [String] the name of the option being checked
29
+ # @raise [InvalidType] if the value is not of the expected type(s)
30
+ def self.assert_type(value, types = [], name)
31
+ types = [types] unless types.is_a?(Array)
32
+
33
+ return if types.any? { |type| value.is_a?(type) }
34
+
35
+ error_message_template = 'The type of `%s` must be %s, but is: %s'
36
+ raise InvalidType, format(error_message_template, name, types.join(' or '), value.class), [], cause: nil
37
+ end
38
+
39
+ # private_class_method :expect_options, :assert_type
40
+
41
+ ##
42
+ # This method validates the arguments passed to the post processor. Must be implemented by subclasses.
43
+ def self.validate_args!(_value, _context)
44
+ raise NotImplementedError, 'You must implement the `validate_args!` method in the post processor'
45
+ end
46
+
47
+ # Initializes the post processor
48
+ #
49
+ # @param value [Object] the value to be processed
50
+ # @param context [Item::Context] the context
51
+ def initialize(value, context)
52
+ klass = self.class
53
+ # TODO: get rid of Hash
54
+ klass.assert_type(context, [Item::Context, Hash], 'context')
55
+ klass.validate_args!(value, context)
56
+
57
+ @value = value
58
+ @context = context
59
+ end
60
+
61
+ attr_reader :value, :context
62
+
63
+ # Abstract method to be implemented by subclasses
64
+ #
65
+ # @raise [NotImplementedError] if not implemented in subclass
66
+ def get
67
+ raise NotImplementedError, 'You must implement the `get` method in the post processor'
68
+ end
69
+ end
70
+ end
71
+ end
@@ -25,39 +25,39 @@ module Html2rss
25
25
  # `replacement` can be a String or a Hash.
26
26
  #
27
27
  # See the doc on [String#gsub](https://ruby-doc.org/core/String.html#method-i-gsub) for more info.
28
- class Gsub
28
+ class Gsub < Base
29
+ def self.validate_args!(value, context)
30
+ assert_type value, String, :value
31
+ expect_options(%i[replacement pattern], context)
32
+ assert_type context.dig(:options, :replacement), [String, Hash], :replacement
33
+ end
34
+
29
35
  ##
30
36
  # @param value [String]
31
37
  # @param context [Item::Context]
32
38
  def initialize(value, context)
33
- @value = value
34
- @options = context[:options]
39
+ super
40
+
41
+ options = context[:options]
42
+
43
+ @replacement = options[:replacement]
44
+ @pattern = options[:pattern]
35
45
  end
36
46
 
37
47
  ##
38
48
  # @return [String]
39
49
  def get
40
- @value.to_s.gsub(pattern, replacement)
50
+ value.to_s.gsub(pattern, replacement)
41
51
  end
42
52
 
43
53
  private
44
54
 
55
+ attr_accessor :replacement
56
+
45
57
  ##
46
58
  # @return [Regexp]
47
59
  def pattern
48
- pattern = @options[:pattern]
49
- raise ArgumentError, 'The `pattern` option is missing' unless pattern
50
-
51
- pattern.is_a?(String) ? Utils.build_regexp_from_string(pattern) : pattern
52
- end
53
-
54
- ##
55
- # @return [Hash, String]
56
- def replacement
57
- replacement = @options[:replacement]
58
- return replacement if replacement.is_a?(String) || replacement.is_a?(Hash)
59
-
60
- raise ArgumentError, 'The `replacement` option must be a String or Hash'
60
+ @pattern.is_a?(String) ? Utils.build_regexp_from_string(@pattern) : @pattern
61
61
  end
62
62
  end
63
63
  end
@@ -26,18 +26,17 @@ module Html2rss
26
26
  #
27
27
  # Would return:
28
28
  # 'Lorem **ipsum** dolor'
29
- class HtmlToMarkdown
30
- ##
31
- # @param value [String]
32
- # @param env [Item::Context]
33
- def initialize(value, env)
34
- @sanitized_value = SanitizeHtml.new(value, env).get
29
+ class HtmlToMarkdown < Base
30
+ def self.validate_args!(value, _context)
31
+ assert_type value, String, :value
35
32
  end
36
33
 
37
34
  ##
38
35
  # @return [String] formatted in Markdown
39
36
  def get
40
- ReverseMarkdown.convert(@sanitized_value)
37
+ sanitized_value = SanitizeHtml.new(value, context).get
38
+
39
+ ReverseMarkdown.convert(sanitized_value)
41
40
  end
42
41
  end
43
42
  end
@@ -32,13 +32,9 @@ module Html2rss
32
32
  # <h1>Section</h1>
33
33
  #
34
34
  # <p>Price: 12.34</p>
35
- class MarkdownToHtml
36
- ##
37
- # @param value [String] Markdown content to convert to HTML
38
- # @param env [Item::Context] Context object providing additional environment details
39
- def initialize(value, env)
40
- @value = value
41
- @env = env
35
+ class MarkdownToHtml < Base
36
+ def self.validate_args!(value, _context)
37
+ assert_type value, String, :value
42
38
  end
43
39
 
44
40
  ##
@@ -46,8 +42,8 @@ module Html2rss
46
42
  #
47
43
  # @return [String] Sanitized HTML content
48
44
  def get
49
- html_content = Kramdown::Document.new(@value).to_html
50
- SanitizeHtml.new(html_content, @env).get
45
+ html_content = Kramdown::Document.new(value).to_html
46
+ SanitizeHtml.new(html_content, context).get
51
47
  end
52
48
  end
53
49
  end
@@ -24,22 +24,22 @@ module Html2rss
24
24
  # Would return:
25
25
  # "Tue, 02 Jul 2019 00:00:00 +0200"
26
26
  #
27
- # It uses {https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse Time.parse}.
28
- class ParseTime
29
- ##
30
- # @param value [String] the time to parse
31
- # @param env [Item::Context] Context object providing additional environment details
32
- def initialize(value, env)
33
- @value = value.to_s
34
- @time_zone = env[:config].time_zone
27
+ # It uses `Time.parse`.
28
+ class ParseTime < Base
29
+ def self.validate_args!(value, context)
30
+ assert_type value, String, :value
31
+ assert_type context[:config].time_zone, String, :time_zone
35
32
  end
36
33
 
37
34
  ##
38
- # Converts the provided time string to RFC822 format, taking into account the configured time zone.
35
+ # Converts the provided time string to RFC822 format, taking into account the time_zone.
39
36
  #
40
37
  # @return [String] RFC822 formatted time
38
+ # @raise [TZInfo::InvalidTimezoneIdentifier] if the configured time zone is invalid
41
39
  def get
42
- Utils.use_zone(@time_zone) { Time.parse(@value).rfc822 }
40
+ time_zone = context[:config].time_zone
41
+
42
+ Utils.use_zone(time_zone) { Time.parse(value).rfc822 }
43
43
  end
44
44
  end
45
45
  end
@@ -21,21 +21,24 @@ module Html2rss
21
21
  #
22
22
  # Would return:
23
23
  # 'http://why-not-use-a-link.uh'
24
- class ParseUri
25
- ##
26
- # @param value [String]
27
- # @param context [Item::Context]
28
- def initialize(value, context)
29
- @value = value
30
- @config_url = context.config.url
24
+ class ParseUri < Base
25
+ def self.validate_args!(value, context)
26
+ url_types = [String, URI::HTTP, Addressable::URI].freeze
27
+
28
+ assert_type(value, url_types, :value)
29
+ assert_type(context.config.url, url_types, :url)
30
+
31
+ raise ArgumentError, 'The `value` option is missing or empty.' if value.to_s.empty?
31
32
  end
32
33
 
33
34
  ##
34
35
  # @return [String]
35
36
  def get
37
+ config_url = context.config.url
38
+
36
39
  Html2rss::Utils.build_absolute_url_from_relative(
37
- Html2rss::Utils.sanitize_url(@value),
38
- @config_url
40
+ Html2rss::Utils.sanitize_url(value),
41
+ config_url
39
42
  ).to_s
40
43
  end
41
44
  end
@@ -38,19 +38,15 @@ module Html2rss
38
38
  #
39
39
  # Would return:
40
40
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
41
- class SanitizeHtml
42
- ##
43
- # @param value [String]
44
- # @param env [Item::Context]
45
- def initialize(value, env)
46
- @value = value
47
- @channel_url = env[:config].url
41
+ class SanitizeHtml < Base
42
+ def self.validate_args!(value, _context)
43
+ assert_type value, String, :value
48
44
  end
49
45
 
50
46
  ##
51
47
  # @return [String]
52
48
  def get
53
- sanitized_html = Sanitize.fragment(@value, sanitize_config)
49
+ sanitized_html = Sanitize.fragment(value, sanitize_config)
54
50
  sanitized_html.to_s.gsub(/\s+/, ' ').strip
55
51
  end
56
52
 
@@ -77,13 +73,15 @@ module Html2rss
77
73
  }
78
74
  end
79
75
 
76
+ def channel_url = context[:config].url
77
+
80
78
  ##
81
79
  # Wrapper for transform_urls_to_absolute_ones to pass the channel_url.
82
80
  #
83
81
  # @param env [Hash]
84
82
  # @return [nil]
85
83
  def transform_urls_to_absolute_ones(env)
86
- HtmlTransformers::TransformUrlsToAbsoluteOnes.new(@channel_url).call(**env)
84
+ HtmlTransformers::TransformUrlsToAbsoluteOnes.new(channel_url).call(**env)
87
85
  end
88
86
 
89
87
  ##
@@ -28,13 +28,15 @@ module Html2rss
28
28
  #
29
29
  # Would return:
30
30
  # 'bar'
31
- class Substring
32
- ##
33
- # @param value [String] The original string to extract a substring from.
34
- # @param env [Item::Context] Context object providing additional environment details.
35
- def initialize(value, env)
36
- @value = value
37
- @options = env[:options]
31
+ class Substring < Base
32
+ def self.validate_args!(value, context)
33
+ assert_type value, String, :value
34
+
35
+ options = context[:options]
36
+ assert_type options[:start], Integer, :start
37
+
38
+ end_index = options[:end]
39
+ assert_type end_index, Integer, :end if end_index
38
40
  end
39
41
 
40
42
  ##
@@ -42,11 +44,29 @@ module Html2rss
42
44
  #
43
45
  # @return [String] The extracted substring.
44
46
  def get
45
- start_index = @options[:start].to_i
46
- end_index = @options[:end]&.to_i || @value.length
47
+ value[range]
48
+ end
49
+
50
+ ##
51
+ # Determines the range for the substring extraction based on the provided start and end indices.
52
+ #
53
+ # @return [Range] The range object representing the start and end/Infinity (integers).
54
+ def range
55
+ return (start_index..) unless end_index?
56
+
57
+ if start_index == end_index
58
+ raise ArgumentError,
59
+ 'The `start` value must be unequal to the `end` value.'
60
+ end
47
61
 
48
- @value[start_index..end_index]
62
+ (start_index..end_index)
49
63
  end
64
+
65
+ private
66
+
67
+ def end_index? = !context[:options][:end].to_s.empty?
68
+ def end_index = context[:options][:end].to_i
69
+ def start_index = context[:options][:start].to_i
50
70
  end
51
71
  end
52
72
  end
@@ -31,15 +31,23 @@ module Html2rss
31
31
  #
32
32
  # Would return:
33
33
  # 'Product (23,42€)'
34
- class Template
34
+ class Template < Base
35
+ def self.validate_args!(value, context)
36
+ assert_type value, String, :value
37
+
38
+ string = context[:options]&.dig(:string).to_s
39
+ raise InvalidType, 'The `string` template is absent.' if string.empty?
40
+ end
41
+
35
42
  ##
36
43
  # @param value [String]
37
- # @param env [Item::Context]
38
- def initialize(value, env)
39
- @value = value
40
- @options = env[:options]
41
- @item = env[:item]
42
- @string = @options[:string]
44
+ # @param context [Item::Context]
45
+ def initialize(value, context)
46
+ super
47
+
48
+ @options = context[:options] || {}
49
+ @item = context[:item]
50
+ @string = @options[:string].to_s
43
51
  end
44
52
 
45
53
  ##
@@ -86,7 +94,7 @@ module Html2rss
86
94
  # @param method_name [String, Symbol]
87
95
  # @return [String]
88
96
  def item_value(method_name)
89
- method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
97
+ method_name.to_sym == :self ? value : @item.public_send(method_name).to_s
90
98
  end
91
99
  end
92
100
  end
@@ -8,6 +8,14 @@ module Html2rss
8
8
  # Error raised when an unknown post processor name is requested.
9
9
  class UnknownPostProcessorName < Html2rss::Error; end
10
10
 
11
+ ##
12
+ # Error raised when a required option is missing.
13
+ class MissingOption < Html2rss::Error; end
14
+
15
+ ##
16
+ # Error raised when an invalid type is provided.
17
+ class InvalidType < Html2rss::Error; end
18
+
11
19
  ##
12
20
  # Maps the post processor name to the class implementing the post processor.
13
21
  #
@@ -10,6 +10,9 @@ module Html2rss
10
10
  # Struct to represent a selector with associated attributes for extraction and processing.
11
11
  Selector = Struct.new(:selector, :attribute, :extractor, :post_process, :order, :static, keyword_init: true)
12
12
 
13
+ # raised when an invalid selector name is used
14
+ class InvalidSelectorName < Html2rss::Error; end
15
+
13
16
  ##
14
17
  # @param config [Hash<Symbol, Object>]
15
18
  def initialize(config)
@@ -28,9 +31,15 @@ module Html2rss
28
31
  # @param name [Symbol]
29
32
  # @return [Selector]
30
33
  def selector(name)
31
- raise ArgumentError, "invalid item's selector name: #{name}" unless selector?(name)
34
+ raise InvalidSelectorName, "invalid selector name: #{name}" unless selector?(name)
35
+
36
+ keywords = config[name].slice(*available_keys)
32
37
 
33
- Selector.new(config[name])
38
+ if (additional_keys = available_keys - keywords.keys).any?
39
+ warn "additional keys (#{additional_keys.join(', ')}) present in selector #{name}"
40
+ end
41
+
42
+ Selector.new(keywords)
34
43
  end
35
44
 
36
45
  ##
@@ -86,6 +95,8 @@ module Html2rss
86
95
  array.map!(&:to_sym)
87
96
  end.to_set
88
97
  end
98
+
99
+ def available_keys = @available_keys ||= Selector.members
89
100
  end
90
101
  end
91
102
  end
@@ -3,6 +3,6 @@
3
3
  ##
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
- VERSION = '0.11.0'
6
+ VERSION = '0.12.0'
7
7
  public_constant :VERSION
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-09 00:00:00.000000000 Z
11
+ date: 2024-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -219,6 +219,7 @@ files:
219
219
  - html2rss.gemspec
220
220
  - lib/html2rss.rb
221
221
  - lib/html2rss/attribute_post_processors.rb
222
+ - lib/html2rss/attribute_post_processors/base.rb
222
223
  - lib/html2rss/attribute_post_processors/gsub.rb
223
224
  - lib/html2rss/attribute_post_processors/html_to_markdown.rb
224
225
  - lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
@@ -252,7 +253,7 @@ licenses:
252
253
  - MIT
253
254
  metadata:
254
255
  allowed_push_host: https://rubygems.org
255
- changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.11.0
256
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.12.0
256
257
  rubygems_mfa_required: 'true'
257
258
  post_install_message:
258
259
  rdoc_options: []