html2rss 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7a2bf557dd65533533e07b4581e195f2d2b32ff906831526a4d7aed27a558d71
4
- data.tar.gz: f42e5f03649a08219d310a2545413c371f851530c4d323fd68ef783b4b3b5e13
3
+ metadata.gz: d89191b35f643372cc18b880dab7535d18a10d9fd123897460ee16c5e990a5d9
4
+ data.tar.gz: 71cb356f5261b2e6a3d2152afcb68f658e78d5fec5ff15bc67ed0d5bd153fc00
5
5
  SHA512:
6
- metadata.gz: 724a1fa8ab15ae140278eb9b055f22e7aad12e94627795f7a2f13c78f5421607e39d6ba040821b4c47b69f963cc0180bf8e964ff0b896403cb6305ed1d67dbb5
7
- data.tar.gz: a06c2e16b0b51c6b6d2184430efc2a4e8b2812fee413163aa2991567e7608141f1c18189fdded58c8c3383940c4790478cd631abc6a1470ad648b2030fdefaab
6
+ metadata.gz: 46f048feae342844df1af51c741d681677192c1dc84452fae1002f5cca5b406c0698a426ec6e532572c4fb4f6fb896a966862d8d2599b8dd742a174707289aed
7
+ data.tar.gz: 98d0316c64bb5a160d26d5efa59b25901b3a64e572795bbd840539fe69d84a4ea3c797bb16721edb73277d1b9bfb9238f9d40ea2b9bb4ebeffc81e8790a02062
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png)
4
4
 
5
- `html2rss` is a Ruby gem that generates RSS 2.0 feeds from a _feed config_.
5
+ `html2rss` is a Ruby gem that generates RSS 2.0 feeds from websites automatically, and as a fallback via _feed config_.
6
6
 
7
7
  With the _feed config_, you provide a URL to scrape and CSS selectors for extracting information (like title, URL, etc.). The gem builds the RSS feed accordingly. [Extractors](#using-extractors) and chainable [post processors](#using-post-processors) make information extraction, processing, and sanitizing a breeze. The gem also supports [scraping JSON](#scraping-and-handling-json-responses) responses and [setting HTTP request headers](#set-any-http-header-in-the-request).
8
8
 
@@ -26,9 +26,9 @@ You can also install it as a dependency in your Ruby project:
26
26
 
27
27
  ## Generating a feed on the CLI
28
28
 
29
- ### using automatic scraping
29
+ ### using automatic generation
30
30
 
31
- html2rss offers an automatic scrapting feature. Try it with:
31
+ html2rss offers an automatic RSS generation feature. Try it with:
32
32
 
33
33
  `html2rss auto https://unmatchedstyle.com/`
34
34
 
@@ -10,21 +10,27 @@ module Html2rss
10
10
  ##
11
11
  #
12
12
  # @param parsed_body [Nokogiri::HTML::Document] The parsed HTML document.
13
- # @param response [Faraday::Response] The URL of the HTML document.
14
- def initialize(parsed_body, url:, response:, articles: [])
13
+ # @param url [Addressable::URI] The URL of the channel.
14
+ # @param headers [Hash<String, String>] the http headers
15
+ # @param articles [Array<Html2rss::AutoSource::Article>] The articles.
16
+ def initialize(parsed_body, url:, headers:, articles: [], stylesheets: [])
15
17
  @parsed_body = parsed_body
16
18
  @url = url
17
- @response = response
19
+ @headers = headers
18
20
  @articles = articles
21
+ @stylesheets = stylesheets
19
22
  end
20
23
 
24
+ attr_writer :articles
25
+ attr_reader :stylesheets
26
+
21
27
  def url = extract_url
22
28
  def title = extract_title
23
29
  def language = extract_language
24
30
  def description = extract_description
25
31
  def image = extract_image
26
32
  def ttl = extract_ttl
27
- def last_build_date = response.headers['last-modified']
33
+ def last_build_date = headers['last-modified']
28
34
 
29
35
  def generator
30
36
  "html2rss V. #{::Html2rss::VERSION} (using auto_source scrapers: #{scraper_counts})"
@@ -32,7 +38,7 @@ module Html2rss
32
38
 
33
39
  private
34
40
 
35
- attr_reader :parsed_body, :response
41
+ attr_reader :parsed_body, :headers
36
42
 
37
43
  def extract_url
38
44
  @url.normalize.to_s
@@ -58,7 +64,7 @@ module Html2rss
58
64
  end
59
65
 
60
66
  def extract_ttl
61
- ttl = response.headers['cache-control']&.match(/max-age=(\d+)/)&.[](1)
67
+ ttl = headers['cache-control']&.match(/max-age=(\d+)/)&.[](1)
62
68
  return unless ttl
63
69
 
64
70
  ttl.to_i.fdiv(60).ceil
@@ -31,6 +31,8 @@ module Html2rss
31
31
 
32
32
  def call
33
33
  RSS::Maker.make('2.0') do |maker|
34
+ Html2rss::RssBuilder::Stylesheet.add(maker, channel.stylesheets)
35
+
34
36
  make_channel(maker.channel)
35
37
  make_items(maker)
36
38
  end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'set'
5
+
6
+ module Html2rss
7
+ class AutoSource
8
+ module Scraper
9
+ ##
10
+ # Scrapes articles from HTML pages by
11
+ # finding similar structures around anchor tags in the parsed_body.
12
+ class Html
13
+ include Enumerable
14
+
15
+ def self.articles?(parsed_body)
16
+ new(parsed_body, url: '').any?
17
+ end
18
+
19
+ def self.parent_until_condition(node, condition)
20
+ return nil if !node || node.parent.name == 'html'
21
+ return node if condition.call(node)
22
+
23
+ parent_until_condition(node.parent, condition)
24
+ end
25
+
26
+ ##
27
+ # Simplify an XPath selector by removing the index notation.
28
+ def self.simplify_xpath(xpath)
29
+ xpath.gsub(/\[\d+\]/, '')
30
+ end
31
+
32
+ def initialize(parsed_body, url:)
33
+ @parsed_body = parsed_body
34
+ @url = url
35
+ @css_selectors = Hash.new(0)
36
+ end
37
+
38
+ attr_reader :parsed_body
39
+
40
+ ##
41
+ # @yieldparam [Hash] The scraped article hash
42
+ # @return [Enumerator] Enumerator for the scraped articles
43
+ def each
44
+ return enum_for(:each) unless block_given?
45
+
46
+ return if frequent_selectors.empty?
47
+
48
+ frequent_selectors.each do |selector|
49
+ parsed_body.xpath(selector).each do |selected_tag|
50
+ article_tag = self.class.parent_until_condition(selected_tag, method(:article_condition))
51
+ article_hash = SemanticHtml::Extractor.new(article_tag, url: @url).call
52
+
53
+ yield article_hash if article_hash
54
+ end
55
+ end
56
+ end
57
+
58
+ ##
59
+ # Find all the anchors in root.
60
+ # @param root [Nokogiri::XML::Node] The root node to search for anchors
61
+ # @return [Set<String>] The set of CSS selectors which exist at least min_frequency times
62
+ def frequent_selectors(root = @parsed_body.at_css('body'), min_frequency: 2)
63
+ @frequent_selectors ||= begin
64
+ root.traverse do |node|
65
+ next if !node.element? || node.name != 'a'
66
+
67
+ @css_selectors[self.class.simplify_xpath(node.path)] += 1
68
+ end
69
+
70
+ @css_selectors.keys
71
+ .select { |selector| (@css_selectors[selector]).to_i >= min_frequency }
72
+ .to_set
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ def article_condition(node)
79
+ return true if %w[body html].include?(node.name)
80
+ return true if node.parent.css('a').size > 1
81
+
82
+ false
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -1,5 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
4
+ require 'nokogiri'
5
+ require 'set'
6
+
3
7
  module Html2rss
4
8
  class AutoSource
5
9
  module Scraper
@@ -99,6 +103,8 @@ module Html2rss
99
103
  # @yield [Hash] Each scraped article_hash
100
104
  # @return [Array<Hash>] the scraped article_hashes
101
105
  def each(&)
106
+ return enum_for(:each) unless block_given?
107
+
102
108
  schema_objects.filter_map do |schema_object|
103
109
  next unless (klass = self.class.scraper_for_schema_object(schema_object))
104
110
  next unless (article_hash = klass.new(schema_object, url:).call)
@@ -35,13 +35,13 @@ module Html2rss
35
35
  def initialize(article_tag, url:)
36
36
  @article_tag = article_tag
37
37
  @url = url
38
- @heading = find_heading
39
- @extract_url = find_url
40
38
  end
41
39
 
42
40
  # @return [Hash, nil] The scraped article or nil.
43
41
  def call
44
- return unless heading
42
+ @heading = find_heading || closest_anchor || return
43
+
44
+ @extract_url = find_url
45
45
 
46
46
  {
47
47
  title: extract_title,
@@ -71,14 +71,20 @@ module Html2rss
71
71
  times.min
72
72
  end
73
73
 
74
+ ##
75
+ # Find the heading of the article.
76
+ # @return [Nokogiri::XML::Node, nil]
74
77
  def find_heading
75
78
  heading_tags = article_tag.css(HEADING_TAGS.join(',')).group_by(&:name)
79
+
80
+ return if heading_tags.empty?
81
+
76
82
  smallest_heading = heading_tags.keys.min
77
- heading_tags[smallest_heading]&.max_by { |tag| tag.text.size }
83
+ heading_tags[smallest_heading]&.max_by { |tag| visible_text_from_tag(tag)&.size }
78
84
  end
79
85
 
80
86
  def extract_title
81
- @extract_title ||= if heading.children.empty? && heading.text
87
+ @extract_title ||= if heading && (heading.children.empty? || heading.text)
82
88
  visible_text_from_tag(heading)
83
89
  else
84
90
  visible_text_from_tag(
@@ -101,9 +107,12 @@ module Html2rss
101
107
  description.empty? ? nil : description
102
108
  end
103
109
 
110
+ def closest_anchor
111
+ SemanticHtml.find_closest_selector(heading || article_tag,
112
+ selector: 'a[href]:not([href=""])')
113
+ end
114
+
104
115
  def find_url
105
- closest_anchor = SemanticHtml.find_closest_selector(heading || article_tag,
106
- selector: 'a[href]:not([href=""])')
107
116
  href = closest_anchor&.[]('href')&.split('#')&.first&.strip
108
117
  Utils.build_absolute_url_from_relative(href, url) unless href.to_s.empty?
109
118
  end
@@ -113,8 +122,12 @@ module Html2rss
113
122
  end
114
123
 
115
124
  def generate_id
116
- [article_tag['id'], article_tag.at_css('[id]')&.attr('id'),
117
- extract_url&.path].compact.reject(&:empty?).first
125
+ [
126
+ article_tag['id'],
127
+ article_tag.at_css('[id]')&.attr('id'),
128
+ extract_url&.path,
129
+ extract_url&.query
130
+ ].compact.reject(&:empty?).first
118
131
  end
119
132
  end
120
133
  end
@@ -10,6 +10,7 @@ module Html2rss
10
10
  #
11
11
  module Scraper
12
12
  SCRAPERS = [
13
+ Html,
13
14
  Schema,
14
15
  SemanticHtml
15
16
  ].freeze
@@ -16,16 +16,18 @@ module Html2rss
16
16
 
17
17
  SUPPORTED_URL_SCHEMES = %w[http https].to_set.freeze
18
18
 
19
- def initialize(url)
20
- unless url.is_a?(String) || url.is_a?(Addressable::URI)
21
- raise ArgumentError,
22
- 'URL must be a String or Addressable::URI'
23
- end
24
-
25
- @url = Addressable::URI.parse(url)
26
-
27
- raise ArgumentError, 'URL must be absolute' unless @url.absolute?
28
- raise UnsupportedUrlScheme, "#{@url.scheme} not supported" unless SUPPORTED_URL_SCHEMES.include?(@url.scheme)
19
+ ##
20
+ # @param url [Addressable::URI] The URL to extract articles from.
21
+ # @param body [String] The body of the response.
22
+ # @param headers [Hash] The headers of the response.
23
+ def initialize(url, body:, headers: {})
24
+ raise ArgumentError, 'URL must be a Addressable::URI' unless url.is_a?(Addressable::URI)
25
+ raise ArgumentError, 'URL must be absolute' unless url.absolute?
26
+ raise UnsupportedUrlScheme, "#{url.scheme} not supported" unless SUPPORTED_URL_SCHEMES.include?(url.scheme)
27
+
28
+ @url = url
29
+ @body = body
30
+ @headers = headers
29
31
  end
30
32
 
31
33
  def build
@@ -34,6 +36,8 @@ module Html2rss
34
36
  Reducer.call(articles, url:)
35
37
  Cleanup.call(articles, url:, keep_different_domain: true)
36
38
 
39
+ channel.articles = articles
40
+
37
41
  Html2rss::AutoSource::RssBuilder.new(
38
42
  channel:,
39
43
  articles:
@@ -57,21 +61,20 @@ module Html2rss
57
61
  end
58
62
 
59
63
  def channel
60
- Channel.new(parsed_body, response:, url:, articles:)
64
+ @channel ||= Channel.new(parsed_body, headers: @headers, url:)
61
65
  end
62
66
 
63
67
  private
64
68
 
65
69
  attr_reader :url
66
70
 
67
- def response
68
- @response ||= Html2rss::Utils.request_url(url)
69
- end
70
-
71
- # Parses the HTML body of the response using Nokogiri.
72
71
  # @return [Nokogiri::HTML::Document]
73
72
  def parsed_body
74
- @parsed_body ||= Nokogiri.HTML(response.body).freeze
73
+ @parsed_body ||= Nokogiri.HTML(@body)
74
+ .tap do |doc|
75
+ # Remove comments from the document
76
+ doc.xpath('//comment()').each(&:remove)
77
+ end.freeze
75
78
  end
76
79
  end
77
80
  end
@@ -18,9 +18,6 @@ module Html2rss
18
18
  # Thrown when the feed config does not contain a value at `:channel`.
19
19
  class ChannelMissing < Html2rss::Error; end
20
20
 
21
- # Struct to store XML Stylesheet attributes
22
- Stylesheet = Struct.new(:href, :type, :media, keyword_init: true)
23
-
24
21
  def_delegator :@channel, :author
25
22
  def_delegator :@channel, :ttl
26
23
  def_delegator :@channel, :title
@@ -75,7 +72,7 @@ module Html2rss
75
72
  #
76
73
  # @return [Array<Stylesheet>] Array of Stylesheet structs.
77
74
  def stylesheets
78
- @global.fetch(:stylesheets, []).map { |attributes| Stylesheet.new(attributes) }
75
+ @global.fetch(:stylesheets, []).map { |attributes| Html2rss::RssBuilder::Stylesheet.new(**attributes) }
79
76
  end
80
77
 
81
78
  # Provides read-only access to the channel object.
data/lib/html2rss/item.rb CHANGED
@@ -19,7 +19,7 @@ module Html2rss
19
19
  ##
20
20
  # Fetches items from a given URL using configuration settings.
21
21
  #
22
- # @param url [String] URL to fetch items from.
22
+ # @param url [Addressable::URI] URL to fetch items from.
23
23
  # @param config [Html2rss::Config] Configuration object.
24
24
  # @return [Array<Html2rss::Item>] list of items fetched.
25
25
  def self.from_url(url, config)
@@ -3,35 +3,50 @@
3
3
  module Html2rss
4
4
  module RssBuilder
5
5
  ##
6
- # Adds XML stylesheet tags (with the provided maker).
6
+ # Represents a stylesheet.
7
7
  class Stylesheet
8
- ##
9
- # Adds the stylesheet XML tags to the RSS.
10
- #
11
- # @param maker [RSS::Maker::RSS20] RSS maker object.
12
- # @param stylesheets [Array<Html2rss::Config::Stylesheet>] Array of stylesheet configurations.
13
- # @return [nil]
14
- def self.add(maker, stylesheets)
15
- stylesheets.each do |stylesheet|
16
- add_stylesheet(maker, stylesheet)
8
+ class << self
9
+ ##
10
+ # Adds the stylesheet XML tags to the RSS.
11
+ #
12
+ # @param maker [RSS::Maker::RSS20] RSS maker object.
13
+ # @param stylesheets [Array<Html2rss::Config::Stylesheet>] Array of stylesheet configurations.
14
+ # @return [nil]
15
+ def add(maker, stylesheets)
16
+ stylesheets.each do |stylesheet|
17
+ add_stylesheet(maker, stylesheet)
18
+ end
17
19
  end
18
- end
19
20
 
20
- ##
21
- # Adds a single Stylesheet to the RSS.
22
- #
23
- # @param maker [RSS::Maker::RSS20] RSS maker object.
24
- # @param stylesheet [Html2rss::Config::Stylesheet] Stylesheet configuration.
25
- # @return [nil]
26
- def self.add_stylesheet(maker, stylesheet)
27
- maker.xml_stylesheets.new_xml_stylesheet do |xss|
28
- xss.href = stylesheet.href
29
- xss.type = stylesheet.type
30
- xss.media = stylesheet.media
21
+ private
22
+
23
+ ##
24
+ # Adds a single Stylesheet to the RSS.
25
+ #
26
+ # @param maker [RSS::Maker::RSS20] RSS maker object.
27
+ # @param stylesheet [Html2rss::Config::Stylesheet] Stylesheet configuration.
28
+ # @return [nil]
29
+ def add_stylesheet(maker, stylesheet)
30
+ maker.xml_stylesheets.new_xml_stylesheet do |xss|
31
+ xss.href = stylesheet.href
32
+ xss.type = stylesheet.type
33
+ xss.media = stylesheet.media
34
+ end
31
35
  end
32
36
  end
33
37
 
34
- private_class_method :add_stylesheet
38
+ TYPES = ['text/css', 'text/xsl'].freeze
39
+
40
+ def initialize(href:, type:, media: 'all')
41
+ raise ArgumentError, 'stylesheet.href must be a String' unless href.is_a?(String)
42
+ raise ArgumentError, 'stylesheet.type invalid' unless TYPES.include?(type)
43
+ raise ArgumentError, 'stylesheet.media must be a String' unless media.is_a?(String)
44
+
45
+ @href = href
46
+ @type = type
47
+ @media = media
48
+ end
49
+ attr_reader :href, :type, :media
35
50
  end
36
51
  end
37
52
  end
@@ -44,6 +44,7 @@ module Html2rss
44
44
  #
45
45
  # @param time_zone [String]
46
46
  # @param default_time_zone [String]
47
+ # @yield block to execute with the given time zone
47
48
  # @return [Object] whatever the given block returns
48
49
  def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone)
49
50
  raise ArgumentError, 'a block is required' unless block_given?
@@ -74,6 +75,11 @@ module Html2rss
74
75
  # @param headers [Hash] additional HTTP request headers to use for the request
75
76
  # @return [Faraday::Response] body of the HTTP response
76
77
  def self.request_url(url, headers: {})
78
+ url = Addressable::URI.parse(url.to_s) unless url.is_a?(Addressable::URI)
79
+
80
+ raise ArgumentError, 'URL must be absolute' unless url.absolute?
81
+ raise ArgumentError, 'URL must not contain an @ characater' if url.to_s.include?('@')
82
+
77
83
  Faraday.new(url:, headers:) do |faraday|
78
84
  faraday.use Faraday::FollowRedirects::Middleware
79
85
  faraday.adapter Faraday.default_adapter
@@ -3,6 +3,6 @@
3
3
  ##
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
- VERSION = '0.13.0'
6
+ VERSION = '0.15.0'
7
7
  public_constant :VERSION
8
8
  end
data/lib/html2rss.rb CHANGED
@@ -5,8 +5,9 @@ require 'zeitwerk'
5
5
  loader = Zeitwerk::Loader.for_gem
6
6
  loader.setup
7
7
 
8
- require 'yaml'
8
+ require 'addressable'
9
9
  require 'logger'
10
+ require 'yaml'
10
11
 
11
12
  ##
12
13
  # The Html2rss namespace.
@@ -43,7 +44,7 @@ module Html2rss
43
44
  # @param params [Hash] Dynamic parameters for the feed configuration.
44
45
  # @return [RSS::Rss] RSS object generated from the configuration.
45
46
  def self.feed_from_yaml_config(file, name = nil, global_config: {}, params: {})
46
- yaml = load_yaml(file)
47
+ yaml = YAML.safe_load_file(file, symbolize_names: true)
47
48
  feeds = yaml[CONFIG_KEY_FEEDS] || {}
48
49
 
49
50
  feed_config = find_feed_config(yaml, feeds, name, global_config)
@@ -73,15 +74,6 @@ module Html2rss
73
74
  RssBuilder.build(config)
74
75
  end
75
76
 
76
- ##
77
- # Loads and parses the YAML file.
78
- #
79
- # @param file [String] Path to the YAML file.
80
- # @return [Hash] Parsed YAML content.
81
- def self.load_yaml(file)
82
- YAML.safe_load_file(file, symbolize_names: true)
83
- end
84
-
85
77
  ##
86
78
  # Builds the feed configuration based on the provided parameters.
87
79
  #
@@ -109,8 +101,12 @@ module Html2rss
109
101
  # @param url [String] the URL to automatically source the feed from
110
102
  # @return [RSS::Rss]
111
103
  def self.auto_source(url)
112
- Html2rss::AutoSource.new(url).build
104
+ url = Addressable::URI.parse(url)
105
+
106
+ response = Html2rss::Utils.request_url(url)
107
+
108
+ Html2rss::AutoSource.new(url, body: response.body, headers: response.headers).build
113
109
  end
114
110
 
115
- private_class_method :load_yaml, :find_feed_config
111
+ private_class_method :find_feed_config
116
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-16 00:00:00.000000000 Z
11
+ date: 2024-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -251,6 +251,7 @@ files:
251
251
  - lib/html2rss/auto_source/reducer.rb
252
252
  - lib/html2rss/auto_source/rss_builder.rb
253
253
  - lib/html2rss/auto_source/scraper.rb
254
+ - lib/html2rss/auto_source/scraper/html.rb
254
255
  - lib/html2rss/auto_source/scraper/schema.rb
255
256
  - lib/html2rss/auto_source/scraper/schema/base.rb
256
257
  - lib/html2rss/auto_source/scraper/semantic_html.rb
@@ -279,7 +280,7 @@ licenses:
279
280
  - MIT
280
281
  metadata:
281
282
  allowed_push_host: https://rubygems.org
282
- changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.13.0
283
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.15.0
283
284
  rubygems_mfa_required: 'true'
284
285
  post_install_message:
285
286
  rdoc_options: []
@@ -296,7 +297,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
296
297
  - !ruby/object:Gem::Version
297
298
  version: '0'
298
299
  requirements: []
299
- rubygems_version: 3.5.11
300
+ rubygems_version: 3.5.16
300
301
  signing_key:
301
302
  specification_version: 4
302
303
  summary: Generates RSS feeds from websites by scraping a URL and using CSS selectors