html2rss 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d51b095228dec5d98a1ff66ee7329171d2297bad67ca27a7930afe296b589e8
4
- data.tar.gz: 32bf56a05ed45a1bb8c3341f228f41d1441e65243c6a5d55908e90d002f8880b
3
+ metadata.gz: ebe536d8051a64c6e2adf9fa8e1d9d1f9fa3743541c44ca85022d0603f9032b2
4
+ data.tar.gz: 7b3aaa213aaf6a37fb6e94fa72c9936ffd2391322297553b253b097edea300cc
5
5
  SHA512:
6
- metadata.gz: 0cbe761521ec3cd43bd25565f6425e6ced445814c30706d50d52c008ce6e3905a6f95b9f6d9dfa2faf90baf4bc2fbeb43741af5bc185e1488d31b55b9ffa3c75
7
- data.tar.gz: 353e1586ddde92858ea95a2eecda6d752975585038acc29e6d09ff97363e5429d15966a005e1f9c4a41e9ad83b972973cfadd9c840160849d530166e7dd84298
6
+ metadata.gz: '03985002d050b996c1dc315cbe8e3fc79b6619447a048ad3d2dca86f792eab5c2356716cf6198a24efc61de7e7ddceba2780da49c3e68a3c9efe895eb7cf0cf1'
7
+ data.tar.gz: 8315473528f46a5ba28297af296b879a66ac00f86ba9eb117b4e6c9ec61c285e4090cfd999ff712368f5b988b1cbda460e268aa3ea8928912bcdb1960ae25a4a
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  ![html2rss logo](https://github.com/html2rss/html2rss/raw/master/support/logo.png)
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png) [![](http://img.shields.io/liberapay/goal/gildesmarais.svg?logo=liberapa)](https://liberapay.com/gildesmarais/donate)
3
+ [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png)
4
4
 
5
5
  `html2rss` is a Ruby gem that generates RSS 2.0 feeds from a _feed config_.
6
6
 
data/html2rss.gemspec CHANGED
@@ -10,23 +10,23 @@ Gem::Specification.new do |spec|
10
10
  spec.authors = ['Gil Desmarais']
11
11
  spec.email = ['html2rss@desmarais.de']
12
12
 
13
- spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
14
- spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
+ spec.summary = 'Generates RSS feeds from websites by scraping a URL and using CSS selectors to extract item.'
14
+ spec.description = 'Supports JSON content, custom HTTP headers, and post-processing of extracted content.'
15
15
  spec.homepage = 'https://github.com/html2rss/html2rss'
16
16
  spec.license = 'MIT'
17
17
  spec.required_ruby_version = '>= 3.1'
18
18
 
19
19
  if spec.respond_to?(:metadata)
20
20
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
21
- spec.metadata['changelog_uri'] = 'https://github.com/html2rss/html2rss/releases'
21
+ spec.metadata['changelog_uri'] = "#{spec.homepage}/releases/tag/v#{spec.version}"
22
22
  spec.metadata['rubygems_mfa_required'] = 'true'
23
23
  else
24
24
  raise 'RubyGems 2.0 or newer is required to protect against ' \
25
25
  'public gem pushes.'
26
26
  end
27
27
 
28
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
29
- f.match(%r{^(test|spec|features|support|docs|.github|.yardoc)/})
28
+ spec.files = `git ls-files -z`.split("\x0").select do |f|
29
+ f.match(%r{^(lib/|exe/|README.md|LICENSE|html2rss.gemspec)})
30
30
  end
31
31
  spec.bindir = 'exe'
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ ##
5
+ # Provides a namespace for attribute post processors.
6
+ module AttributePostProcessors
7
+ ##
8
+ # All post processors must inherit from this base class and implement `self.validate_args!` and `#get`.
9
+ class Base
10
+ # Validates the presence of required options in the context
11
+ #
12
+ # @param keys [Array<Symbol>] the keys to check for presence
13
+ # @param context [Hash] the context containing options
14
+ # @raise [MissingOption] if any key is missing
15
+ def self.expect_options(keys, context)
16
+ keys.each do |key|
17
+ unless (options = context[:options]).key?(key)
18
+ raise MissingOption, "The `#{key}` option is missing in: #{options.inspect}", [],
19
+ cause: nil
20
+ end
21
+ end
22
+ end
23
+
24
+ # Asserts that the value is of the expected type(s)
25
+ #
26
+ # @param value [Object] the value to check
27
+ # @param types [Array<Class>, Class] the expected type(s)
28
+ # @param name [String] the name of the option being checked
29
+ # @raise [InvalidType] if the value is not of the expected type(s)
30
+ def self.assert_type(value, types = [], name)
31
+ types = [types] unless types.is_a?(Array)
32
+
33
+ return if types.any? { |type| value.is_a?(type) }
34
+
35
+ error_message_template = 'The type of `%s` must be %s, but is: %s'
36
+ raise InvalidType, format(error_message_template, name, types.join(' or '), value.class), [], cause: nil
37
+ end
38
+
39
+ # private_class_method :expect_options, :assert_type
40
+
41
+ ##
42
+ # This method validates the arguments passed to the post processor. Must be implemented by subclasses.
43
+ def self.validate_args!(_value, _context)
44
+ raise NotImplementedError, 'You must implement the `validate_args!` method in the post processor'
45
+ end
46
+
47
+ # Initializes the post processor
48
+ #
49
+ # @param value [Object] the value to be processed
50
+ # @param context [Item::Context] the context
51
+ def initialize(value, context)
52
+ klass = self.class
53
+ # TODO: get rid of Hash
54
+ klass.assert_type(context, [Item::Context, Hash], 'context')
55
+ klass.validate_args!(value, context)
56
+
57
+ @value = value
58
+ @context = context
59
+ end
60
+
61
+ attr_reader :value, :context
62
+
63
+ # Abstract method to be implemented by subclasses
64
+ #
65
+ # @raise [NotImplementedError] if not implemented in subclass
66
+ def get
67
+ raise NotImplementedError, 'You must implement the `get` method in the post processor'
68
+ end
69
+ end
70
+ end
71
+ end
@@ -25,39 +25,39 @@ module Html2rss
25
25
  # `replacement` can be a String or a Hash.
26
26
  #
27
27
  # See the doc on [String#gsub](https://ruby-doc.org/core/String.html#method-i-gsub) for more info.
28
- class Gsub
28
+ class Gsub < Base
29
+ def self.validate_args!(value, context)
30
+ assert_type value, String, :value
31
+ expect_options(%i[replacement pattern], context)
32
+ assert_type context.dig(:options, :replacement), [String, Hash], :replacement
33
+ end
34
+
29
35
  ##
30
36
  # @param value [String]
31
37
  # @param context [Item::Context]
32
38
  def initialize(value, context)
33
- @value = value
34
- @options = context[:options]
39
+ super
40
+
41
+ options = context[:options]
42
+
43
+ @replacement = options[:replacement]
44
+ @pattern = options[:pattern]
35
45
  end
36
46
 
37
47
  ##
38
48
  # @return [String]
39
49
  def get
40
- @value.to_s.gsub(pattern, replacement)
50
+ value.to_s.gsub(pattern, replacement)
41
51
  end
42
52
 
43
53
  private
44
54
 
55
+ attr_accessor :replacement
56
+
45
57
  ##
46
58
  # @return [Regexp]
47
59
  def pattern
48
- pattern = @options[:pattern]
49
- raise ArgumentError, 'The `pattern` option is missing' unless pattern
50
-
51
- pattern.is_a?(String) ? Utils.build_regexp_from_string(pattern) : pattern
52
- end
53
-
54
- ##
55
- # @return [Hash, String]
56
- def replacement
57
- replacement = @options[:replacement]
58
- return replacement if replacement.is_a?(String) || replacement.is_a?(Hash)
59
-
60
- raise ArgumentError, 'The `replacement` option must be a String or Hash'
60
+ @pattern.is_a?(String) ? Utils.build_regexp_from_string(@pattern) : @pattern
61
61
  end
62
62
  end
63
63
  end
@@ -26,18 +26,17 @@ module Html2rss
26
26
  #
27
27
  # Would return:
28
28
  # 'Lorem **ipsum** dolor'
29
- class HtmlToMarkdown
30
- ##
31
- # @param value [String]
32
- # @param env [Item::Context]
33
- def initialize(value, env)
34
- @sanitized_value = SanitizeHtml.new(value, env).get
29
+ class HtmlToMarkdown < Base
30
+ def self.validate_args!(value, _context)
31
+ assert_type value, String, :value
35
32
  end
36
33
 
37
34
  ##
38
35
  # @return [String] formatted in Markdown
39
36
  def get
40
- ReverseMarkdown.convert(@sanitized_value)
37
+ sanitized_value = SanitizeHtml.new(value, context).get
38
+
39
+ ReverseMarkdown.convert(sanitized_value)
41
40
  end
42
41
  end
43
42
  end
@@ -32,13 +32,9 @@ module Html2rss
32
32
  # <h1>Section</h1>
33
33
  #
34
34
  # <p>Price: 12.34</p>
35
- class MarkdownToHtml
36
- ##
37
- # @param value [String] Markdown content to convert to HTML
38
- # @param env [Item::Context] Context object providing additional environment details
39
- def initialize(value, env)
40
- @value = value
41
- @env = env
35
+ class MarkdownToHtml < Base
36
+ def self.validate_args!(value, _context)
37
+ assert_type value, String, :value
42
38
  end
43
39
 
44
40
  ##
@@ -46,8 +42,8 @@ module Html2rss
46
42
  #
47
43
  # @return [String] Sanitized HTML content
48
44
  def get
49
- html_content = Kramdown::Document.new(@value).to_html
50
- SanitizeHtml.new(html_content, @env).get
45
+ html_content = Kramdown::Document.new(value).to_html
46
+ SanitizeHtml.new(html_content, context).get
51
47
  end
52
48
  end
53
49
  end
@@ -24,22 +24,22 @@ module Html2rss
24
24
  # Would return:
25
25
  # "Tue, 02 Jul 2019 00:00:00 +0200"
26
26
  #
27
- # It uses {https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse Time.parse}.
28
- class ParseTime
29
- ##
30
- # @param value [String] the time to parse
31
- # @param env [Item::Context] Context object providing additional environment details
32
- def initialize(value, env)
33
- @value = value.to_s
34
- @time_zone = env[:config].time_zone
27
+ # It uses `Time.parse`.
28
+ class ParseTime < Base
29
+ def self.validate_args!(value, context)
30
+ assert_type value, String, :value
31
+ assert_type context[:config].time_zone, String, :time_zone
35
32
  end
36
33
 
37
34
  ##
38
- # Converts the provided time string to RFC822 format, taking into account the configured time zone.
35
+ # Converts the provided time string to RFC822 format, taking into account the time_zone.
39
36
  #
40
37
  # @return [String] RFC822 formatted time
38
+ # @raise [TZInfo::InvalidTimezoneIdentifier] if the configured time zone is invalid
41
39
  def get
42
- Utils.use_zone(@time_zone) { Time.parse(@value).rfc822 }
40
+ time_zone = context[:config].time_zone
41
+
42
+ Utils.use_zone(time_zone) { Time.parse(value).rfc822 }
43
43
  end
44
44
  end
45
45
  end
@@ -21,21 +21,24 @@ module Html2rss
21
21
  #
22
22
  # Would return:
23
23
  # 'http://why-not-use-a-link.uh'
24
- class ParseUri
25
- ##
26
- # @param value [String]
27
- # @param context [Item::Context]
28
- def initialize(value, context)
29
- @value = value
30
- @config_url = context.config.url
24
+ class ParseUri < Base
25
+ def self.validate_args!(value, context)
26
+ url_types = [String, URI::HTTP, Addressable::URI].freeze
27
+
28
+ assert_type(value, url_types, :value)
29
+ assert_type(context.config.url, url_types, :url)
30
+
31
+ raise ArgumentError, 'The `value` option is missing or empty.' if value.to_s.empty?
31
32
  end
32
33
 
33
34
  ##
34
35
  # @return [String]
35
36
  def get
37
+ config_url = context.config.url
38
+
36
39
  Html2rss::Utils.build_absolute_url_from_relative(
37
- Html2rss::Utils.sanitize_url(@value),
38
- @config_url
40
+ Html2rss::Utils.sanitize_url(value),
41
+ config_url
39
42
  ).to_s
40
43
  end
41
44
  end
@@ -38,19 +38,15 @@ module Html2rss
38
38
  #
39
39
  # Would return:
40
40
  # '<p>Lorem <b>ipsum</b> dolor ...</p>'
41
- class SanitizeHtml
42
- ##
43
- # @param value [String]
44
- # @param env [Item::Context]
45
- def initialize(value, env)
46
- @value = value
47
- @channel_url = env[:config].url
41
+ class SanitizeHtml < Base
42
+ def self.validate_args!(value, _context)
43
+ assert_type value, String, :value
48
44
  end
49
45
 
50
46
  ##
51
47
  # @return [String]
52
48
  def get
53
- sanitized_html = Sanitize.fragment(@value, sanitize_config)
49
+ sanitized_html = Sanitize.fragment(value, sanitize_config)
54
50
  sanitized_html.to_s.gsub(/\s+/, ' ').strip
55
51
  end
56
52
 
@@ -77,13 +73,15 @@ module Html2rss
77
73
  }
78
74
  end
79
75
 
76
+ def channel_url = context[:config].url
77
+
80
78
  ##
81
79
  # Wrapper for transform_urls_to_absolute_ones to pass the channel_url.
82
80
  #
83
81
  # @param env [Hash]
84
82
  # @return [nil]
85
83
  def transform_urls_to_absolute_ones(env)
86
- HtmlTransformers::TransformUrlsToAbsoluteOnes.new(@channel_url).call(**env)
84
+ HtmlTransformers::TransformUrlsToAbsoluteOnes.new(channel_url).call(**env)
87
85
  end
88
86
 
89
87
  ##
@@ -28,13 +28,15 @@ module Html2rss
28
28
  #
29
29
  # Would return:
30
30
  # 'bar'
31
- class Substring
32
- ##
33
- # @param value [String] The original string to extract a substring from.
34
- # @param env [Item::Context] Context object providing additional environment details.
35
- def initialize(value, env)
36
- @value = value
37
- @options = env[:options]
31
+ class Substring < Base
32
+ def self.validate_args!(value, context)
33
+ assert_type value, String, :value
34
+
35
+ options = context[:options]
36
+ assert_type options[:start], Integer, :start
37
+
38
+ end_index = options[:end]
39
+ assert_type end_index, Integer, :end if end_index
38
40
  end
39
41
 
40
42
  ##
@@ -42,11 +44,29 @@ module Html2rss
42
44
  #
43
45
  # @return [String] The extracted substring.
44
46
  def get
45
- start_index = @options[:start].to_i
46
- end_index = @options[:end]&.to_i || @value.length
47
+ value[range]
48
+ end
49
+
50
+ ##
51
+ # Determines the range for the substring extraction based on the provided start and end indices.
52
+ #
53
+ # @return [Range] The range object representing the start and end/Infinity (integers).
54
+ def range
55
+ return (start_index..) unless end_index?
56
+
57
+ if start_index == end_index
58
+ raise ArgumentError,
59
+ 'The `start` value must be unequal to the `end` value.'
60
+ end
47
61
 
48
- @value[start_index..end_index]
62
+ (start_index..end_index)
49
63
  end
64
+
65
+ private
66
+
67
+ def end_index? = !context[:options][:end].to_s.empty?
68
+ def end_index = context[:options][:end].to_i
69
+ def start_index = context[:options][:start].to_i
50
70
  end
51
71
  end
52
72
  end
@@ -31,15 +31,23 @@ module Html2rss
31
31
  #
32
32
  # Would return:
33
33
  # 'Product (23,42€)'
34
- class Template
34
+ class Template < Base
35
+ def self.validate_args!(value, context)
36
+ assert_type value, String, :value
37
+
38
+ string = context[:options]&.dig(:string).to_s
39
+ raise InvalidType, 'The `string` template is absent.' if string.empty?
40
+ end
41
+
35
42
  ##
36
43
  # @param value [String]
37
- # @param env [Item::Context]
38
- def initialize(value, env)
39
- @value = value
40
- @options = env[:options]
41
- @item = env[:item]
42
- @string = @options[:string]
44
+ # @param context [Item::Context]
45
+ def initialize(value, context)
46
+ super
47
+
48
+ @options = context[:options] || {}
49
+ @item = context[:item]
50
+ @string = @options[:string].to_s
43
51
  end
44
52
 
45
53
  ##
@@ -86,7 +94,7 @@ module Html2rss
86
94
  # @param method_name [String, Symbol]
87
95
  # @return [String]
88
96
  def item_value(method_name)
89
- method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
97
+ method_name.to_sym == :self ? value : @item.public_send(method_name).to_s
90
98
  end
91
99
  end
92
100
  end
@@ -6,7 +6,15 @@ module Html2rss
6
6
  module AttributePostProcessors
7
7
  ##
8
8
  # Error raised when an unknown post processor name is requested.
9
- class UnknownPostProcessorName < StandardError; end
9
+ class UnknownPostProcessorName < Html2rss::Error; end
10
+
11
+ ##
12
+ # Error raised when a required option is missing.
13
+ class MissingOption < Html2rss::Error; end
14
+
15
+ ##
16
+ # Error raised when an invalid type is provided.
17
+ class InvalidType < Html2rss::Error; end
10
18
 
11
19
  ##
12
20
  # Maps the post processor name to the class implementing the post processor.
@@ -11,6 +11,15 @@ module Html2rss
11
11
  # 1. the RSS channel attributes
12
12
  # 2. html2rss options like json or custom HTTP-headers for the request
13
13
  class Channel
14
+ ##
15
+ # @param config [Hash<Symbol, Object>]
16
+ # @return [Set<String>] the required parameter names
17
+ def self.required_params_for_config(config)
18
+ config.each_with_object(Set.new) do |(_, value), required_params|
19
+ required_params.merge(value.scan(/%<([\w_\d]+)>/).flatten) if value.is_a?(String)
20
+ end
21
+ end
22
+
14
23
  ##
15
24
  # @param channel [Hash<Symbol, Object>]
16
25
  # @param params [Hash]
@@ -77,15 +86,6 @@ module Html2rss
77
86
  config.fetch(:json, false)
78
87
  end
79
88
 
80
- ##
81
- # @param config [Hash<Symbol, Object>]
82
- # @return [Set<String>] the required parameter names
83
- def self.required_params_for_config(config)
84
- config.each_with_object(Set.new) do |(_, value), required_params|
85
- required_params.merge(value.scan(/%<([\w_\d]+)>/).flatten) if value.is_a?(String)
86
- end
87
- end
88
-
89
89
  private
90
90
 
91
91
  # @return [Hash<Symbol, Object>]
@@ -10,6 +10,9 @@ module Html2rss
10
10
  # Struct to represent a selector with associated attributes for extraction and processing.
11
11
  Selector = Struct.new(:selector, :attribute, :extractor, :post_process, :order, :static, keyword_init: true)
12
12
 
13
+ # raised when an invalid selector name is used
14
+ class InvalidSelectorName < Html2rss::Error; end
15
+
13
16
  ##
14
17
  # @param config [Hash<Symbol, Object>]
15
18
  def initialize(config)
@@ -28,9 +31,15 @@ module Html2rss
28
31
  # @param name [Symbol]
29
32
  # @return [Selector]
30
33
  def selector(name)
31
- raise ArgumentError, "invalid item's selector name: #{name}" unless selector?(name)
34
+ raise InvalidSelectorName, "invalid selector name: #{name}" unless selector?(name)
35
+
36
+ keywords = config[name].slice(*available_keys)
32
37
 
33
- Selector.new(config[name])
38
+ if (additional_keys = available_keys - keywords.keys).any?
39
+ warn "additional keys (#{additional_keys.join(', ')}) present in selector #{name}"
40
+ end
41
+
42
+ Selector.new(keywords)
34
43
  end
35
44
 
36
45
  ##
@@ -86,6 +95,8 @@ module Html2rss
86
95
  array.map!(&:to_sym)
87
96
  end.to_set
88
97
  end
98
+
99
+ def available_keys = @available_keys ||= Selector.members
89
100
  end
90
101
  end
91
102
  end
@@ -12,11 +12,11 @@ module Html2rss
12
12
  ##
13
13
  # The Error class to be thrown when a feed config requires params, but none
14
14
  # were passed to Config.
15
- class ParamsMissing < StandardError; end
15
+ class ParamsMissing < Html2rss::Error; end
16
16
 
17
17
  ##
18
18
  # Thrown when the feed config does not contain a value at `:channel`.
19
- class ChannelMissing < StandardError; end
19
+ class ChannelMissing < Html2rss::Error; end
20
20
 
21
21
  # Struct to store XML Stylesheet attributes
22
22
  Stylesheet = Struct.new(:href, :type, :media, keyword_init: true)
data/lib/html2rss/item.rb CHANGED
@@ -16,6 +16,21 @@ module Html2rss
16
16
  # Class to keep an Item's <enclosure>.
17
17
  Enclosure = Struct.new('Enclosure', :type, :bits_length, :url, keyword_init: true)
18
18
 
19
+ ##
20
+ # Fetches items from a given URL using configuration settings.
21
+ #
22
+ # @param url [String] URL to fetch items from.
23
+ # @param config [Html2rss::Config] Configuration object.
24
+ # @return [Array<Html2rss::Item>] list of items fetched.
25
+ def self.from_url(url, config)
26
+ body = Utils.request_body_from_url(url, convert_json_to_xml: config.json?, headers: config.headers)
27
+
28
+ Nokogiri.HTML(body)
29
+ .css(config.selector_string(Config::Selectors::ITEMS_SELECTOR_NAME))
30
+ .map { |xml| new(xml, config) }
31
+ .select(&:valid?)
32
+ end
33
+
19
34
  ##
20
35
  # @param xml [Nokogiri::XML::Element]
21
36
  # @param config [Html2rss::Config]
@@ -122,21 +137,6 @@ module Html2rss
122
137
  )
123
138
  end
124
139
 
125
- ##
126
- # Fetches items from a given URL using configuration settings.
127
- #
128
- # @param url [String] URL to fetch items from.
129
- # @param config [Html2rss::Config] Configuration object.
130
- # @return [Array<Html2rss::Item>] list of items fetched.
131
- def self.from_url(url, config)
132
- body = Utils.request_body_from_url(url, convert_json_to_xml: config.json?, headers: config.headers)
133
-
134
- Nokogiri.HTML(body)
135
- .css(config.selector_string(Config::Selectors::ITEMS_SELECTOR_NAME))
136
- .map { |xml| new(xml, config) }
137
- .select(&:valid?)
138
- end
139
-
140
140
  private
141
141
 
142
142
  # @return [Nokogiri::XML::Element] XML element representing the item.
@@ -6,7 +6,7 @@ module Html2rss
6
6
  module ItemExtractors
7
7
  ##
8
8
  # The Error class to be thrown when an unknown extractor name is requested.
9
- class UnknownExtractorName < StandardError; end
9
+ class UnknownExtractorName < Html2rss::Error; end
10
10
 
11
11
  ##
12
12
  # Maps the extractor name to the class implementing the extractor.
@@ -3,6 +3,6 @@
3
3
  ##
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
- VERSION = '0.10.0'
6
+ VERSION = '0.12.0'
7
7
  public_constant :VERSION
8
8
  end
data/lib/html2rss.rb CHANGED
@@ -10,6 +10,12 @@ require 'yaml'
10
10
  ##
11
11
  # The Html2rss namespace.
12
12
  module Html2rss
13
+ ##
14
+ # The Html2rss::Error base class.
15
+ class Error < StandardError; end
16
+
17
+ ##
18
+ # Key for the feeds configuration in the YAML file.
13
19
  CONFIG_KEY_FEEDS = :feeds
14
20
 
15
21
  ##
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-30 00:00:00.000000000 Z
11
+ date: 2024-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -204,8 +204,8 @@ dependencies:
204
204
  - - ">="
205
205
  - !ruby/object:Gem::Version
206
206
  version: '0'
207
- description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
208
- in return.
207
+ description: Supports JSON content, custom HTTP headers, and post-processing of extracted
208
+ content.
209
209
  email:
210
210
  - html2rss@desmarais.de
211
211
  executables:
@@ -213,21 +213,13 @@ executables:
213
213
  extensions: []
214
214
  extra_rdoc_files: []
215
215
  files:
216
- - ".gitignore"
217
- - ".mergify.yml"
218
- - ".rspec"
219
- - ".rubocop.yml"
220
- - ".yardopts"
221
- - Gemfile
222
- - Gemfile.lock
223
216
  - LICENSE
224
217
  - README.md
225
- - bin/console
226
- - bin/setup
227
218
  - exe/html2rss
228
219
  - html2rss.gemspec
229
220
  - lib/html2rss.rb
230
221
  - lib/html2rss/attribute_post_processors.rb
222
+ - lib/html2rss/attribute_post_processors/base.rb
231
223
  - lib/html2rss/attribute_post_processors/gsub.rb
232
224
  - lib/html2rss/attribute_post_processors/html_to_markdown.rb
233
225
  - lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
@@ -256,13 +248,12 @@ files:
256
248
  - lib/html2rss/rss_builder/stylesheet.rb
257
249
  - lib/html2rss/utils.rb
258
250
  - lib/html2rss/version.rb
259
- - rakefile.rb
260
251
  homepage: https://github.com/html2rss/html2rss
261
252
  licenses:
262
253
  - MIT
263
254
  metadata:
264
255
  allowed_push_host: https://rubygems.org
265
- changelog_uri: https://github.com/html2rss/html2rss/releases
256
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.12.0
266
257
  rubygems_mfa_required: 'true'
267
258
  post_install_message:
268
259
  rdoc_options: []
@@ -282,5 +273,6 @@ requirements: []
282
273
  rubygems_version: 3.5.11
283
274
  signing_key:
284
275
  specification_version: 4
285
- summary: Returns an RSS::Rss object by scraping a URL.
276
+ summary: Generates RSS feeds from websites by scraping a URL and using CSS selectors
277
+ to extract item.
286
278
  test_files: []
data/.gitignore DELETED
@@ -1,12 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /_yardoc/
4
- /coverage/
5
- /doc/
6
- /pkg/
7
- /spec/reports/
8
- /tmp/
9
-
10
- # rspec failure tracking
11
- .rspec_status
12
- docs/
data/.mergify.yml DELETED
@@ -1,15 +0,0 @@
1
- queue_rules:
2
- - name: dependabot
3
- conditions:
4
- - author=dependabot[bot]
5
- - status-success=test
6
- - base=master
7
-
8
- pull_request_rules:
9
- - name: automatic merge for Dependabot pull requests
10
- conditions:
11
- - author=dependabot[bot]
12
- actions:
13
- queue:
14
- method: squash
15
- name: dependabot
data/.rspec DELETED
@@ -1,4 +0,0 @@
1
- --format documentation
2
- --color
3
- --order random
4
- --require spec_helper
data/.rubocop.yml DELETED
@@ -1,30 +0,0 @@
1
- require:
2
- - rubocop-performance
3
- - rubocop-rspec
4
- - rubocop-md
5
- - rubocop-rake
6
-
7
- AllCops:
8
- DisplayCopNames: true
9
- NewCops: enable
10
- Exclude:
11
- - vendor/**/*
12
-
13
- Metrics/BlockLength:
14
- Exclude:
15
- - "spec/**/*_spec.rb"
16
- - html2rss.gemspec
17
-
18
- RSpec/NestedGroups:
19
- Exclude:
20
- - spec/html2rss_spec.rb
21
-
22
- RSpec/DescribeClass:
23
- Exclude:
24
- - spec/exe/**/*_spec.rb
25
-
26
- RSpec/NamedSubject:
27
- Enabled: false
28
-
29
- Naming/RescuedExceptionsVariableName:
30
- PreferredName: error
data/.yardopts DELETED
@@ -1,6 +0,0 @@
1
- --readme README.md
2
- --charset utf-8
3
- --no-private
4
- --exclude /coverage
5
- --exclude /support
6
- --output-dir docs/
data/Gemfile DELETED
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source 'https://rubygems.org'
4
-
5
- git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
-
7
- # Specify your gem's dependencies in html2rss.gemspec
8
- gemspec
9
-
10
- group :development, :test do
11
- gem 'byebug'
12
- gem 'rake'
13
- gem 'rspec', '~> 3.0'
14
- gem 'rubocop'
15
- gem 'rubocop-md'
16
- gem 'rubocop-performance'
17
- gem 'rubocop-rake'
18
- gem 'rubocop-rspec'
19
- gem 'vcr'
20
- gem 'yard'
21
- end
22
-
23
- group :test do
24
- gem 'simplecov', require: false
25
- end
data/Gemfile.lock DELETED
@@ -1,153 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- html2rss (0.10.0)
5
- addressable (~> 2.7)
6
- faraday (> 2.0.1, < 3.0)
7
- faraday-follow_redirects
8
- kramdown
9
- mime-types (> 3.0)
10
- nokogiri (>= 1.10, < 2.0)
11
- regexp_parser
12
- reverse_markdown (~> 2.0)
13
- rss
14
- sanitize (~> 6.0)
15
- thor
16
- tzinfo
17
- zeitwerk
18
-
19
- GEM
20
- remote: https://rubygems.org/
21
- specs:
22
- addressable (2.8.6)
23
- public_suffix (>= 2.0.2, < 6.0)
24
- ast (2.4.2)
25
- byebug (11.1.3)
26
- concurrent-ruby (1.2.3)
27
- crass (1.0.6)
28
- diff-lcs (1.5.1)
29
- docile (1.4.0)
30
- faraday (2.9.0)
31
- faraday-net_http (>= 2.0, < 3.2)
32
- faraday-follow_redirects (0.3.0)
33
- faraday (>= 1, < 3)
34
- faraday-net_http (3.1.0)
35
- net-http
36
- json (2.7.2)
37
- kramdown (2.4.0)
38
- rexml
39
- language_server-protocol (3.17.0.3)
40
- mime-types (3.5.2)
41
- mime-types-data (~> 3.2015)
42
- mime-types-data (3.2024.0305)
43
- mini_portile2 (2.8.6)
44
- net-http (0.4.1)
45
- uri
46
- nokogiri (1.16.5)
47
- mini_portile2 (~> 2.8.2)
48
- racc (~> 1.4)
49
- nokogiri (1.16.5-x86_64-darwin)
50
- racc (~> 1.4)
51
- nokogiri (1.16.5-x86_64-linux)
52
- racc (~> 1.4)
53
- parallel (1.24.0)
54
- parser (3.3.1.0)
55
- ast (~> 2.4.1)
56
- racc
57
- public_suffix (5.0.5)
58
- racc (1.7.3)
59
- rainbow (3.1.1)
60
- rake (13.2.1)
61
- regexp_parser (2.9.0)
62
- reverse_markdown (2.1.1)
63
- nokogiri
64
- rexml (3.3.2)
65
- strscan
66
- rspec (3.13.0)
67
- rspec-core (~> 3.13.0)
68
- rspec-expectations (~> 3.13.0)
69
- rspec-mocks (~> 3.13.0)
70
- rspec-core (3.13.0)
71
- rspec-support (~> 3.13.0)
72
- rspec-expectations (3.13.0)
73
- diff-lcs (>= 1.2.0, < 2.0)
74
- rspec-support (~> 3.13.0)
75
- rspec-mocks (3.13.0)
76
- diff-lcs (>= 1.2.0, < 2.0)
77
- rspec-support (~> 3.13.0)
78
- rspec-support (3.13.1)
79
- rss (0.3.0)
80
- rexml
81
- rubocop (1.63.4)
82
- json (~> 2.3)
83
- language_server-protocol (>= 3.17.0)
84
- parallel (~> 1.10)
85
- parser (>= 3.3.0.2)
86
- rainbow (>= 2.2.2, < 4.0)
87
- regexp_parser (>= 1.8, < 3.0)
88
- rexml (>= 3.2.5, < 4.0)
89
- rubocop-ast (>= 1.31.1, < 2.0)
90
- ruby-progressbar (~> 1.7)
91
- unicode-display_width (>= 2.4.0, < 3.0)
92
- rubocop-ast (1.31.3)
93
- parser (>= 3.3.1.0)
94
- rubocop-capybara (2.20.0)
95
- rubocop (~> 1.41)
96
- rubocop-factory_bot (2.25.1)
97
- rubocop (~> 1.41)
98
- rubocop-md (1.2.2)
99
- rubocop (>= 1.0)
100
- rubocop-performance (1.21.0)
101
- rubocop (>= 1.48.1, < 2.0)
102
- rubocop-ast (>= 1.31.1, < 2.0)
103
- rubocop-rake (0.6.0)
104
- rubocop (~> 1.0)
105
- rubocop-rspec (2.29.1)
106
- rubocop (~> 1.40)
107
- rubocop-capybara (~> 2.17)
108
- rubocop-factory_bot (~> 2.22)
109
- rubocop-rspec_rails (~> 2.28)
110
- rubocop-rspec_rails (2.28.3)
111
- rubocop (~> 1.40)
112
- ruby-progressbar (1.13.0)
113
- sanitize (6.1.0)
114
- crass (~> 1.0.2)
115
- nokogiri (>= 1.12.0)
116
- simplecov (0.22.0)
117
- docile (~> 1.1)
118
- simplecov-html (~> 0.11)
119
- simplecov_json_formatter (~> 0.1)
120
- simplecov-html (0.12.3)
121
- simplecov_json_formatter (0.1.4)
122
- strscan (3.1.0)
123
- thor (1.3.1)
124
- tzinfo (2.0.6)
125
- concurrent-ruby (~> 1.0)
126
- unicode-display_width (2.5.0)
127
- uri (0.13.0)
128
- vcr (6.2.0)
129
- yard (0.9.36)
130
- zeitwerk (2.6.13)
131
-
132
- PLATFORMS
133
- ruby
134
- x86_64-darwin
135
- x86_64-darwin-20
136
- x86_64-linux
137
-
138
- DEPENDENCIES
139
- byebug
140
- html2rss!
141
- rake
142
- rspec (~> 3.0)
143
- rubocop
144
- rubocop-md
145
- rubocop-performance
146
- rubocop-rake
147
- rubocop-rspec
148
- simplecov
149
- vcr
150
- yard
151
-
152
- BUNDLED WITH
153
- 2.4.1
data/bin/console DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'bundler/setup'
5
- require 'html2rss'
6
- require 'byebug'
7
-
8
- # You can add fixtures and/or initialization code here to make experimenting
9
- # with your gem easier. You can also use a different console, if you like.
10
-
11
- # (If you use this, don't forget to add pry to your Gemfile!)
12
- # require "pry"
13
- # Pry.start
14
-
15
- require 'irb'
16
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
data/rakefile.rb DELETED
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'bundler'
4
- require 'rake'
5
- require 'rspec'
6
- require 'rspec/core/rake_task'
7
-
8
- Bundler.setup
9
- Bundler::GemHelper.install_tasks
10
-
11
- task default: [:spec]
12
-
13
- desc 'Run all examples'
14
- RSpec::Core::RakeTask.new(:spec) do |t|
15
- t.ruby_opts = %w[-w]
16
- end