html2rss 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/html2rss/auto_source/channel.rb +12 -6
- data/lib/html2rss/auto_source/rss_builder.rb +2 -0
- data/lib/html2rss/auto_source/scraper/html.rb +87 -0
- data/lib/html2rss/auto_source/scraper/schema.rb +6 -0
- data/lib/html2rss/auto_source/scraper/semantic_html/extractor.rb +22 -9
- data/lib/html2rss/auto_source/scraper.rb +1 -0
- data/lib/html2rss/auto_source.rb +20 -17
- data/lib/html2rss/config.rb +1 -4
- data/lib/html2rss/item.rb +1 -1
- data/lib/html2rss/rss_builder/stylesheet.rb +38 -23
- data/lib/html2rss/utils.rb +6 -0
- data/lib/html2rss/version.rb +1 -1
- data/lib/html2rss.rb +9 -13
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d89191b35f643372cc18b880dab7535d18a10d9fd123897460ee16c5e990a5d9
|
4
|
+
data.tar.gz: 71cb356f5261b2e6a3d2152afcb68f658e78d5fec5ff15bc67ed0d5bd153fc00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 46f048feae342844df1af51c741d681677192c1dc84452fae1002f5cca5b406c0698a426ec6e532572c4fb4f6fb896a966862d8d2599b8dd742a174707289aed
|
7
|
+
data.tar.gz: 98d0316c64bb5a160d26d5efa59b25901b3a64e572795bbd840539fe69d84a4ea3c797bb16721edb73277d1b9bfb9238f9d40ea2b9bb4ebeffc81e8790a02062
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[](http://rubygems.org/gems/html2rss/) [](https://www.rubydoc.info/gems/html2rss) 
|
4
4
|
|
5
|
-
`html2rss` is a Ruby gem that generates RSS 2.0 feeds from a _feed config_.
|
5
|
+
`html2rss` is a Ruby gem that generates RSS 2.0 feeds from websites automatically, and as a fallback via _feed config_.
|
6
6
|
|
7
7
|
With the _feed config_, you provide a URL to scrape and CSS selectors for extracting information (like title, URL, etc.). The gem builds the RSS feed accordingly. [Extractors](#using-extractors) and chainable [post processors](#using-post-processors) make information extraction, processing, and sanitizing a breeze. The gem also supports [scraping JSON](#scraping-and-handling-json-responses) responses and [setting HTTP request headers](#set-any-http-header-in-the-request).
|
8
8
|
|
@@ -26,9 +26,9 @@ You can also install it as a dependency in your Ruby project:
|
|
26
26
|
|
27
27
|
## Generating a feed on the CLI
|
28
28
|
|
29
|
-
### using automatic
|
29
|
+
### using automatic generation
|
30
30
|
|
31
|
-
html2rss offers an automatic
|
31
|
+
html2rss offers an automatic RSS generation feature. Try it with:
|
32
32
|
|
33
33
|
`html2rss auto https://unmatchedstyle.com/`
|
34
34
|
|
@@ -10,21 +10,27 @@ module Html2rss
|
|
10
10
|
##
|
11
11
|
#
|
12
12
|
# @param parsed_body [Nokogiri::HTML::Document] The parsed HTML document.
|
13
|
-
# @param
|
14
|
-
|
13
|
+
# @param url [Addressable::URI] The URL of the channel.
|
14
|
+
# @param headers [Hash<String, String>] the http headers
|
15
|
+
# @param articles [Array<Html2rss::AutoSource::Article>] The articles.
|
16
|
+
def initialize(parsed_body, url:, headers:, articles: [], stylesheets: [])
|
15
17
|
@parsed_body = parsed_body
|
16
18
|
@url = url
|
17
|
-
@
|
19
|
+
@headers = headers
|
18
20
|
@articles = articles
|
21
|
+
@stylesheets = stylesheets
|
19
22
|
end
|
20
23
|
|
24
|
+
attr_writer :articles
|
25
|
+
attr_reader :stylesheets
|
26
|
+
|
21
27
|
def url = extract_url
|
22
28
|
def title = extract_title
|
23
29
|
def language = extract_language
|
24
30
|
def description = extract_description
|
25
31
|
def image = extract_image
|
26
32
|
def ttl = extract_ttl
|
27
|
-
def last_build_date =
|
33
|
+
def last_build_date = headers['last-modified']
|
28
34
|
|
29
35
|
def generator
|
30
36
|
"html2rss V. #{::Html2rss::VERSION} (using auto_source scrapers: #{scraper_counts})"
|
@@ -32,7 +38,7 @@ module Html2rss
|
|
32
38
|
|
33
39
|
private
|
34
40
|
|
35
|
-
attr_reader :parsed_body, :
|
41
|
+
attr_reader :parsed_body, :headers
|
36
42
|
|
37
43
|
def extract_url
|
38
44
|
@url.normalize.to_s
|
@@ -58,7 +64,7 @@ module Html2rss
|
|
58
64
|
end
|
59
65
|
|
60
66
|
def extract_ttl
|
61
|
-
ttl =
|
67
|
+
ttl = headers['cache-control']&.match(/max-age=(\d+)/)&.[](1)
|
62
68
|
return unless ttl
|
63
69
|
|
64
70
|
ttl.to_i.fdiv(60).ceil
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'set'
|
5
|
+
|
6
|
+
module Html2rss
|
7
|
+
class AutoSource
|
8
|
+
module Scraper
|
9
|
+
##
|
10
|
+
# Scrapes articles from HTML pages by
|
11
|
+
# finding similar structures around anchor tags in the parsed_body.
|
12
|
+
class Html
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
def self.articles?(parsed_body)
|
16
|
+
new(parsed_body, url: '').any?
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parent_until_condition(node, condition)
|
20
|
+
return nil if !node || node.parent.name == 'html'
|
21
|
+
return node if condition.call(node)
|
22
|
+
|
23
|
+
parent_until_condition(node.parent, condition)
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Simplify an XPath selector by removing the index notation.
|
28
|
+
def self.simplify_xpath(xpath)
|
29
|
+
xpath.gsub(/\[\d+\]/, '')
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(parsed_body, url:)
|
33
|
+
@parsed_body = parsed_body
|
34
|
+
@url = url
|
35
|
+
@css_selectors = Hash.new(0)
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :parsed_body
|
39
|
+
|
40
|
+
##
|
41
|
+
# @yieldparam [Hash] The scraped article hash
|
42
|
+
# @return [Enumerator] Enumerator for the scraped articles
|
43
|
+
def each
|
44
|
+
return enum_for(:each) unless block_given?
|
45
|
+
|
46
|
+
return if frequent_selectors.empty?
|
47
|
+
|
48
|
+
frequent_selectors.each do |selector|
|
49
|
+
parsed_body.xpath(selector).each do |selected_tag|
|
50
|
+
article_tag = self.class.parent_until_condition(selected_tag, method(:article_condition))
|
51
|
+
article_hash = SemanticHtml::Extractor.new(article_tag, url: @url).call
|
52
|
+
|
53
|
+
yield article_hash if article_hash
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Find all the anchors in root.
|
60
|
+
# @param root [Nokogiri::XML::Node] The root node to search for anchors
|
61
|
+
# @return [Set<String>] The set of CSS selectors which exist at least min_frequency times
|
62
|
+
def frequent_selectors(root = @parsed_body.at_css('body'), min_frequency: 2)
|
63
|
+
@frequent_selectors ||= begin
|
64
|
+
root.traverse do |node|
|
65
|
+
next if !node.element? || node.name != 'a'
|
66
|
+
|
67
|
+
@css_selectors[self.class.simplify_xpath(node.path)] += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
@css_selectors.keys
|
71
|
+
.select { |selector| (@css_selectors[selector]).to_i >= min_frequency }
|
72
|
+
.to_set
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def article_condition(node)
|
79
|
+
return true if %w[body html].include?(node.name)
|
80
|
+
return true if node.parent.css('a').size > 1
|
81
|
+
|
82
|
+
false
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -1,5 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'json'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'set'
|
6
|
+
|
3
7
|
module Html2rss
|
4
8
|
class AutoSource
|
5
9
|
module Scraper
|
@@ -99,6 +103,8 @@ module Html2rss
|
|
99
103
|
# @yield [Hash] Each scraped article_hash
|
100
104
|
# @return [Array<Hash>] the scraped article_hashes
|
101
105
|
def each(&)
|
106
|
+
return enum_for(:each) unless block_given?
|
107
|
+
|
102
108
|
schema_objects.filter_map do |schema_object|
|
103
109
|
next unless (klass = self.class.scraper_for_schema_object(schema_object))
|
104
110
|
next unless (article_hash = klass.new(schema_object, url:).call)
|
@@ -35,13 +35,13 @@ module Html2rss
|
|
35
35
|
def initialize(article_tag, url:)
|
36
36
|
@article_tag = article_tag
|
37
37
|
@url = url
|
38
|
-
@heading = find_heading
|
39
|
-
@extract_url = find_url
|
40
38
|
end
|
41
39
|
|
42
40
|
# @return [Hash, nil] The scraped article or nil.
|
43
41
|
def call
|
44
|
-
|
42
|
+
@heading = find_heading || closest_anchor || return
|
43
|
+
|
44
|
+
@extract_url = find_url
|
45
45
|
|
46
46
|
{
|
47
47
|
title: extract_title,
|
@@ -71,14 +71,20 @@ module Html2rss
|
|
71
71
|
times.min
|
72
72
|
end
|
73
73
|
|
74
|
+
##
|
75
|
+
# Find the heading of the article.
|
76
|
+
# @return [Nokogiri::XML::Node, nil]
|
74
77
|
def find_heading
|
75
78
|
heading_tags = article_tag.css(HEADING_TAGS.join(',')).group_by(&:name)
|
79
|
+
|
80
|
+
return if heading_tags.empty?
|
81
|
+
|
76
82
|
smallest_heading = heading_tags.keys.min
|
77
|
-
heading_tags[smallest_heading]&.max_by { |tag| tag
|
83
|
+
heading_tags[smallest_heading]&.max_by { |tag| visible_text_from_tag(tag)&.size }
|
78
84
|
end
|
79
85
|
|
80
86
|
def extract_title
|
81
|
-
@extract_title ||= if heading.children.empty?
|
87
|
+
@extract_title ||= if heading && (heading.children.empty? || heading.text)
|
82
88
|
visible_text_from_tag(heading)
|
83
89
|
else
|
84
90
|
visible_text_from_tag(
|
@@ -101,9 +107,12 @@ module Html2rss
|
|
101
107
|
description.empty? ? nil : description
|
102
108
|
end
|
103
109
|
|
110
|
+
def closest_anchor
|
111
|
+
SemanticHtml.find_closest_selector(heading || article_tag,
|
112
|
+
selector: 'a[href]:not([href=""])')
|
113
|
+
end
|
114
|
+
|
104
115
|
def find_url
|
105
|
-
closest_anchor = SemanticHtml.find_closest_selector(heading || article_tag,
|
106
|
-
selector: 'a[href]:not([href=""])')
|
107
116
|
href = closest_anchor&.[]('href')&.split('#')&.first&.strip
|
108
117
|
Utils.build_absolute_url_from_relative(href, url) unless href.to_s.empty?
|
109
118
|
end
|
@@ -113,8 +122,12 @@ module Html2rss
|
|
113
122
|
end
|
114
123
|
|
115
124
|
def generate_id
|
116
|
-
[
|
117
|
-
|
125
|
+
[
|
126
|
+
article_tag['id'],
|
127
|
+
article_tag.at_css('[id]')&.attr('id'),
|
128
|
+
extract_url&.path,
|
129
|
+
extract_url&.query
|
130
|
+
].compact.reject(&:empty?).first
|
118
131
|
end
|
119
132
|
end
|
120
133
|
end
|
data/lib/html2rss/auto_source.rb
CHANGED
@@ -16,16 +16,18 @@ module Html2rss
|
|
16
16
|
|
17
17
|
SUPPORTED_URL_SCHEMES = %w[http https].to_set.freeze
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
19
|
+
##
|
20
|
+
# @param url [Addressable::URI] The URL to extract articles from.
|
21
|
+
# @param body [String] The body of the response.
|
22
|
+
# @param headers [Hash] The headers of the response.
|
23
|
+
def initialize(url, body:, headers: {})
|
24
|
+
raise ArgumentError, 'URL must be a Addressable::URI' unless url.is_a?(Addressable::URI)
|
25
|
+
raise ArgumentError, 'URL must be absolute' unless url.absolute?
|
26
|
+
raise UnsupportedUrlScheme, "#{url.scheme} not supported" unless SUPPORTED_URL_SCHEMES.include?(url.scheme)
|
27
|
+
|
28
|
+
@url = url
|
29
|
+
@body = body
|
30
|
+
@headers = headers
|
29
31
|
end
|
30
32
|
|
31
33
|
def build
|
@@ -34,6 +36,8 @@ module Html2rss
|
|
34
36
|
Reducer.call(articles, url:)
|
35
37
|
Cleanup.call(articles, url:, keep_different_domain: true)
|
36
38
|
|
39
|
+
channel.articles = articles
|
40
|
+
|
37
41
|
Html2rss::AutoSource::RssBuilder.new(
|
38
42
|
channel:,
|
39
43
|
articles:
|
@@ -57,21 +61,20 @@ module Html2rss
|
|
57
61
|
end
|
58
62
|
|
59
63
|
def channel
|
60
|
-
Channel.new(parsed_body,
|
64
|
+
@channel ||= Channel.new(parsed_body, headers: @headers, url:)
|
61
65
|
end
|
62
66
|
|
63
67
|
private
|
64
68
|
|
65
69
|
attr_reader :url
|
66
70
|
|
67
|
-
def response
|
68
|
-
@response ||= Html2rss::Utils.request_url(url)
|
69
|
-
end
|
70
|
-
|
71
|
-
# Parses the HTML body of the response using Nokogiri.
|
72
71
|
# @return [Nokogiri::HTML::Document]
|
73
72
|
def parsed_body
|
74
|
-
@parsed_body ||= Nokogiri.HTML(
|
73
|
+
@parsed_body ||= Nokogiri.HTML(@body)
|
74
|
+
.tap do |doc|
|
75
|
+
# Remove comments from the document
|
76
|
+
doc.xpath('//comment()').each(&:remove)
|
77
|
+
end.freeze
|
75
78
|
end
|
76
79
|
end
|
77
80
|
end
|
data/lib/html2rss/config.rb
CHANGED
@@ -18,9 +18,6 @@ module Html2rss
|
|
18
18
|
# Thrown when the feed config does not contain a value at `:channel`.
|
19
19
|
class ChannelMissing < Html2rss::Error; end
|
20
20
|
|
21
|
-
# Struct to store XML Stylesheet attributes
|
22
|
-
Stylesheet = Struct.new(:href, :type, :media, keyword_init: true)
|
23
|
-
|
24
21
|
def_delegator :@channel, :author
|
25
22
|
def_delegator :@channel, :ttl
|
26
23
|
def_delegator :@channel, :title
|
@@ -75,7 +72,7 @@ module Html2rss
|
|
75
72
|
#
|
76
73
|
# @return [Array<Stylesheet>] Array of Stylesheet structs.
|
77
74
|
def stylesheets
|
78
|
-
@global.fetch(:stylesheets, []).map { |attributes| Stylesheet.new(attributes) }
|
75
|
+
@global.fetch(:stylesheets, []).map { |attributes| Html2rss::RssBuilder::Stylesheet.new(**attributes) }
|
79
76
|
end
|
80
77
|
|
81
78
|
# Provides read-only access to the channel object.
|
data/lib/html2rss/item.rb
CHANGED
@@ -19,7 +19,7 @@ module Html2rss
|
|
19
19
|
##
|
20
20
|
# Fetches items from a given URL using configuration settings.
|
21
21
|
#
|
22
|
-
# @param url [
|
22
|
+
# @param url [Addressable::URI] URL to fetch items from.
|
23
23
|
# @param config [Html2rss::Config] Configuration object.
|
24
24
|
# @return [Array<Html2rss::Item>] list of items fetched.
|
25
25
|
def self.from_url(url, config)
|
@@ -3,35 +3,50 @@
|
|
3
3
|
module Html2rss
|
4
4
|
module RssBuilder
|
5
5
|
##
|
6
|
-
#
|
6
|
+
# Represents a stylesheet.
|
7
7
|
class Stylesheet
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
class << self
|
9
|
+
##
|
10
|
+
# Adds the stylesheet XML tags to the RSS.
|
11
|
+
#
|
12
|
+
# @param maker [RSS::Maker::RSS20] RSS maker object.
|
13
|
+
# @param stylesheets [Array<Html2rss::Config::Stylesheet>] Array of stylesheet configurations.
|
14
|
+
# @return [nil]
|
15
|
+
def add(maker, stylesheets)
|
16
|
+
stylesheets.each do |stylesheet|
|
17
|
+
add_stylesheet(maker, stylesheet)
|
18
|
+
end
|
17
19
|
end
|
18
|
-
end
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
21
|
+
private
|
22
|
+
|
23
|
+
##
|
24
|
+
# Adds a single Stylesheet to the RSS.
|
25
|
+
#
|
26
|
+
# @param maker [RSS::Maker::RSS20] RSS maker object.
|
27
|
+
# @param stylesheet [Html2rss::Config::Stylesheet] Stylesheet configuration.
|
28
|
+
# @return [nil]
|
29
|
+
def add_stylesheet(maker, stylesheet)
|
30
|
+
maker.xml_stylesheets.new_xml_stylesheet do |xss|
|
31
|
+
xss.href = stylesheet.href
|
32
|
+
xss.type = stylesheet.type
|
33
|
+
xss.media = stylesheet.media
|
34
|
+
end
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
34
|
-
|
38
|
+
TYPES = ['text/css', 'text/xsl'].freeze
|
39
|
+
|
40
|
+
def initialize(href:, type:, media: 'all')
|
41
|
+
raise ArgumentError, 'stylesheet.href must be a String' unless href.is_a?(String)
|
42
|
+
raise ArgumentError, 'stylesheet.type invalid' unless TYPES.include?(type)
|
43
|
+
raise ArgumentError, 'stylesheet.media must be a String' unless media.is_a?(String)
|
44
|
+
|
45
|
+
@href = href
|
46
|
+
@type = type
|
47
|
+
@media = media
|
48
|
+
end
|
49
|
+
attr_reader :href, :type, :media
|
35
50
|
end
|
36
51
|
end
|
37
52
|
end
|
data/lib/html2rss/utils.rb
CHANGED
@@ -44,6 +44,7 @@ module Html2rss
|
|
44
44
|
#
|
45
45
|
# @param time_zone [String]
|
46
46
|
# @param default_time_zone [String]
|
47
|
+
# @yield block to execute with the given time zone
|
47
48
|
# @return [Object] whatever the given block returns
|
48
49
|
def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone)
|
49
50
|
raise ArgumentError, 'a block is required' unless block_given?
|
@@ -74,6 +75,11 @@ module Html2rss
|
|
74
75
|
# @param headers [Hash] additional HTTP request headers to use for the request
|
75
76
|
# @return [Faraday::Response] body of the HTTP response
|
76
77
|
def self.request_url(url, headers: {})
|
78
|
+
url = Addressable::URI.parse(url.to_s) unless url.is_a?(Addressable::URI)
|
79
|
+
|
80
|
+
raise ArgumentError, 'URL must be absolute' unless url.absolute?
|
81
|
+
raise ArgumentError, 'URL must not contain an @ characater' if url.to_s.include?('@')
|
82
|
+
|
77
83
|
Faraday.new(url:, headers:) do |faraday|
|
78
84
|
faraday.use Faraday::FollowRedirects::Middleware
|
79
85
|
faraday.adapter Faraday.default_adapter
|
data/lib/html2rss/version.rb
CHANGED
data/lib/html2rss.rb
CHANGED
@@ -5,8 +5,9 @@ require 'zeitwerk'
|
|
5
5
|
loader = Zeitwerk::Loader.for_gem
|
6
6
|
loader.setup
|
7
7
|
|
8
|
-
require '
|
8
|
+
require 'addressable'
|
9
9
|
require 'logger'
|
10
|
+
require 'yaml'
|
10
11
|
|
11
12
|
##
|
12
13
|
# The Html2rss namespace.
|
@@ -43,7 +44,7 @@ module Html2rss
|
|
43
44
|
# @param params [Hash] Dynamic parameters for the feed configuration.
|
44
45
|
# @return [RSS::Rss] RSS object generated from the configuration.
|
45
46
|
def self.feed_from_yaml_config(file, name = nil, global_config: {}, params: {})
|
46
|
-
yaml =
|
47
|
+
yaml = YAML.safe_load_file(file, symbolize_names: true)
|
47
48
|
feeds = yaml[CONFIG_KEY_FEEDS] || {}
|
48
49
|
|
49
50
|
feed_config = find_feed_config(yaml, feeds, name, global_config)
|
@@ -73,15 +74,6 @@ module Html2rss
|
|
73
74
|
RssBuilder.build(config)
|
74
75
|
end
|
75
76
|
|
76
|
-
##
|
77
|
-
# Loads and parses the YAML file.
|
78
|
-
#
|
79
|
-
# @param file [String] Path to the YAML file.
|
80
|
-
# @return [Hash] Parsed YAML content.
|
81
|
-
def self.load_yaml(file)
|
82
|
-
YAML.safe_load_file(file, symbolize_names: true)
|
83
|
-
end
|
84
|
-
|
85
77
|
##
|
86
78
|
# Builds the feed configuration based on the provided parameters.
|
87
79
|
#
|
@@ -109,8 +101,12 @@ module Html2rss
|
|
109
101
|
# @param url [String] the URL to automatically source the feed from
|
110
102
|
# @return [RSS::Rss]
|
111
103
|
def self.auto_source(url)
|
112
|
-
|
104
|
+
url = Addressable::URI.parse(url)
|
105
|
+
|
106
|
+
response = Html2rss::Utils.request_url(url)
|
107
|
+
|
108
|
+
Html2rss::AutoSource.new(url, body: response.body, headers: response.headers).build
|
113
109
|
end
|
114
110
|
|
115
|
-
private_class_method :
|
111
|
+
private_class_method :find_feed_config
|
116
112
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -251,6 +251,7 @@ files:
|
|
251
251
|
- lib/html2rss/auto_source/reducer.rb
|
252
252
|
- lib/html2rss/auto_source/rss_builder.rb
|
253
253
|
- lib/html2rss/auto_source/scraper.rb
|
254
|
+
- lib/html2rss/auto_source/scraper/html.rb
|
254
255
|
- lib/html2rss/auto_source/scraper/schema.rb
|
255
256
|
- lib/html2rss/auto_source/scraper/schema/base.rb
|
256
257
|
- lib/html2rss/auto_source/scraper/semantic_html.rb
|
@@ -279,7 +280,7 @@ licenses:
|
|
279
280
|
- MIT
|
280
281
|
metadata:
|
281
282
|
allowed_push_host: https://rubygems.org
|
282
|
-
changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.
|
283
|
+
changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.15.0
|
283
284
|
rubygems_mfa_required: 'true'
|
284
285
|
post_install_message:
|
285
286
|
rdoc_options: []
|
@@ -296,7 +297,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
296
297
|
- !ruby/object:Gem::Version
|
297
298
|
version: '0'
|
298
299
|
requirements: []
|
299
|
-
rubygems_version: 3.5.
|
300
|
+
rubygems_version: 3.5.16
|
300
301
|
signing_key:
|
301
302
|
specification_version: 4
|
302
303
|
summary: Generates RSS feeds from websites by scraping a URL and using CSS selectors
|