html2rss 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ ##
5
+ # Global configuration defaults for the Html2rss gem.
6
+ class Configuration
7
+ # The valid symbol log levels.
8
+ VALID_LOG_LEVELS = %i[debug info warn error fatal unknown].freeze
9
+
10
+ # @return [Object] the logger
11
+ attr_reader :logger
12
+
13
+ # @return [Proc, nil] the logger formatter
14
+ attr_reader :logger_formatter
15
+
16
+ # @return [Symbol, Integer] the current log level
17
+ attr_reader :log_level
18
+
19
+ # @return [Hash, Proc, nil] the globally configured headers
20
+ attr_reader :headers
21
+
22
+ # @return [Symbol, nil] the default strategy name
23
+ attr_reader :default_strategy
24
+
25
+ # @return [Integer, nil] the minimum TTL in minutes
26
+ attr_reader :min_ttl
27
+
28
+ # @return [Array<Hash>] the globally configured stylesheets
29
+ attr_reader :stylesheets
30
+
31
+ ##
32
+ # Initializes a new Configuration instance with defaults.
33
+ def initialize
34
+ @logger_formatter = proc do |severity, datetime, _progname, msg|
35
+ "#{datetime} [#{severity}] #{msg}\n"
36
+ end
37
+ @logger = Logger.new($stdout)
38
+ @logger.formatter = @logger_formatter
39
+ self.log_level = ENV.fetch('LOG_LEVEL', :warn)
40
+ @headers = nil
41
+ @default_strategy = nil
42
+ @min_ttl = nil
43
+ @stylesheets = [].freeze
44
+ end
45
+
46
+ ##
47
+ # Sets the logger.
48
+ #
49
+ # @param logger [Object]
50
+ # @return [Object] the logger
51
+ def logger=(logger)
52
+ @logger = logger
53
+ @logger.level = @log_level if @logger.respond_to?(:level=)
54
+ @logger.formatter = @logger_formatter if @logger_formatter && @logger.respond_to?(:formatter=)
55
+ end
56
+
57
+ ##
58
+ # Sets the log level.
59
+ #
60
+ # @param level [Symbol, String, Integer] the new log level
61
+ # @return [Integer] the normalized log level
62
+ # @raise [ArgumentError] if the log level is invalid
63
+ def log_level=(level)
64
+ @log_level = normalize_log_level(level)
65
+ @logger.level = @log_level if @logger.respond_to?(:level=)
66
+ end
67
+
68
+ ##
69
+ # Sets the logger formatter.
70
+ #
71
+ # @param formatter [Proc, #call, nil] the new logger formatter
72
+ # @return [Proc, #call, nil] the new logger formatter
73
+ # @raise [ArgumentError] if formatter does not respond to #call
74
+ def logger_formatter=(formatter)
75
+ raise ArgumentError, 'formatter must respond to #call or be nil' if formatter && !formatter.respond_to?(:call)
76
+
77
+ @logger_formatter = formatter
78
+ @logger.formatter = @logger_formatter if @logger.respond_to?(:formatter=)
79
+ end
80
+
81
+ ##
82
+ # Sets the global request headers.
83
+ #
84
+ # @param headers [Hash, Proc, #call, nil] the HTTP request headers to globally apply
85
+ # @return [Hash, Proc, #call, nil] the assigned headers
86
+ # @raise [ArgumentError] if headers is not a Hash or callable
87
+ def headers=(headers)
88
+ if headers && !headers.is_a?(Hash) && !headers.respond_to?(:call)
89
+ raise ArgumentError, 'headers must be a Hash or respond to #call'
90
+ end
91
+
92
+ @headers = headers.is_a?(Hash) ? headers.dup.freeze : headers
93
+ end
94
+
95
+ ##
96
+ # Sets the default strategy.
97
+ #
98
+ # @param strategy [Symbol, String, nil] the strategy name
99
+ # @return [Symbol, nil] the normalized strategy name
100
+ # @raise [ArgumentError] if the strategy is not registered
101
+ def default_strategy=(strategy)
102
+ if strategy.nil?
103
+ @default_strategy = nil
104
+ else
105
+ unless strategy.is_a?(Symbol) || strategy.is_a?(String)
106
+ raise ArgumentError, 'strategy must be a Symbol or String'
107
+ end
108
+
109
+ normalized = strategy.to_sym
110
+ raise ArgumentError, "unknown strategy: #{strategy}" unless RequestService.strategy_registered?(normalized)
111
+
112
+ @default_strategy = normalized
113
+ end
114
+ end
115
+
116
+ ##
117
+ # Sets the minimum TTL in minutes.
118
+ #
119
+ # @param ttl [Integer, String, nil] the minimum TTL
120
+ # @return [Integer, nil] the normalized minimum TTL
121
+ # @raise [ArgumentError] if ttl is not a positive integer
122
+ def min_ttl=(ttl)
123
+ if ttl.nil?
124
+ @min_ttl = nil
125
+ else
126
+ val = Integer(ttl)
127
+ raise ArgumentError unless val.positive?
128
+
129
+ @min_ttl = val
130
+ end
131
+ rescue ArgumentError, TypeError
132
+ raise ArgumentError, "min_ttl must be a positive integer, got #{ttl.inspect}"
133
+ end
134
+
135
+ ##
136
+ # Sets the global stylesheets.
137
+ #
138
+ # @param stylesheets [Array<Hash>] the XML stylesheet processing instructions to include in the generated feed
139
+ # @return [Array<Hash>] the assigned stylesheets
140
+ # @raise [ArgumentError] if stylesheets is not an Array of hashes
141
+ def stylesheets=(stylesheets)
142
+ raise ArgumentError, 'stylesheets must be an Array' unless stylesheets.is_a?(Array)
143
+ raise ArgumentError, 'stylesheets must be an Array of Hashes' unless stylesheets.all?(Hash)
144
+
145
+ @stylesheets = stylesheets.map { |h| h.dup.freeze }.freeze
146
+ end
147
+
148
+ protected
149
+
150
+ ##
151
+ # Copy constructor for duplicating configuration.
152
+ #
153
+ # @param other [Html2rss::Configuration] the original configuration
154
+ # @return [void]
155
+ def initialize_copy(other)
156
+ super
157
+ @headers = @headers.dup if @headers.is_a?(Hash)
158
+ @stylesheets = @stylesheets.map(&:dup) if @stylesheets.is_a?(Array)
159
+ end
160
+
161
+ private
162
+
163
+ def normalize_log_level(level)
164
+ if level.is_a?(Integer)
165
+ raise ArgumentError, "invalid log level: #{level}" unless level.between?(0, 5)
166
+
167
+ level
168
+ else
169
+ sym = level.to_s.downcase.to_sym
170
+ raise ArgumentError, "invalid log level: #{level}" unless VALID_LOG_LEVELS.include?(sym)
171
+
172
+ Logger.const_get(sym.upcase)
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ class HtmlExtractor
5
+ ##
6
+ # Builds repeated-list article container candidates from generic HTML.
7
+ class ListCandidates
8
+ ##
9
+ # Simplify an XPath selector by removing index notation.
10
+ #
11
+ # @param xpath [String] original XPath
12
+ # @return [String] XPath without positional indexes
13
+ def self.simplify_xpath(xpath)
14
+ xpath.gsub(/\[\d+\]/, '')
15
+ end
16
+
17
+ # @param parsed_body [Nokogiri::HTML::Document] parsed document
18
+ # @param minimum_selector_frequency [Integer] minimum repeated anchor path count
19
+ # @param use_top_selectors [Integer] number of frequent anchor paths to inspect
20
+ def initialize(parsed_body, minimum_selector_frequency:, use_top_selectors:)
21
+ @parsed_body = parsed_body
22
+ @minimum_selector_frequency = minimum_selector_frequency
23
+ @use_top_selectors = use_top_selectors
24
+ end
25
+
26
+ ##
27
+ # @param anchor_filter [#call] predicate for scraper-specific anchor eligibility
28
+ # @param boundary_condition [#call] predicate for article container boundary
29
+ # @yieldparam article_tag [Nokogiri::XML::Node] candidate article container
30
+ # @yieldparam selected_anchor [Nokogiri::XML::Node] anchor that made the container eligible
31
+ # @return [Enumerator]
32
+ def each_article_tag(anchor_filter:, boundary_condition:)
33
+ return enum_for(:each_article_tag, anchor_filter:, boundary_condition:) unless block_given?
34
+
35
+ article_tags(anchor_filter:, boundary_condition:).each { yield _1[:article_tag], _1[:selected_anchor] }
36
+ end
37
+
38
+ private
39
+
40
+ attr_reader :parsed_body, :minimum_selector_frequency, :use_top_selectors
41
+
42
+ def article_tags(anchor_filter:, boundary_condition:)
43
+ selectors(anchor_filter:).flat_map do |selector|
44
+ article_tags_for_selector(selector, boundary_condition)
45
+ end
46
+ end
47
+
48
+ def article_tags_for_selector(selector, boundary_condition)
49
+ parsed_body.xpath(selector).filter_map do |selected_tag|
50
+ next if HtmlExtractor.ignored_container_path?(selected_tag)
51
+
52
+ article_tag = HtmlNavigator.parent_until_condition(selected_tag, boundary_condition)
53
+ next unless article_tag
54
+
55
+ { article_tag:, selected_anchor: selected_tag }
56
+ end
57
+ end
58
+
59
+ def selectors(anchor_filter:)
60
+ anchor_counts(anchor_filter:)
61
+ .select { |_selector, count| count >= minimum_selector_frequency }
62
+ .max_by(use_top_selectors, &:last)
63
+ .map(&:first)
64
+ end
65
+
66
+ def anchor_counts(anchor_filter:)
67
+ Hash.new(0).tap do |counts|
68
+ each_anchor(anchor_filter:) do |node|
69
+ path = self.class.simplify_xpath(node.path)
70
+ counts[path] += 1 unless HtmlExtractor.ignored_container_path?(path)
71
+ end
72
+ end
73
+ end
74
+
75
+ def each_anchor(anchor_filter:)
76
+ return enum_for(:each_anchor, anchor_filter:) unless block_given?
77
+
78
+ traversal_root&.traverse do |node|
79
+ yield node if relevant_anchor?(node, anchor_filter:)
80
+ end
81
+ end
82
+
83
+ def relevant_anchor?(node, anchor_filter:)
84
+ node.element? &&
85
+ node.matches?(HtmlExtractor::MAIN_ANCHOR_SELECTOR) &&
86
+ anchor_filter.call(node)
87
+ end
88
+
89
+ def traversal_root
90
+ parsed_body.at_css('body, html') || parsed_body.root
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,257 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ class HtmlExtractor
5
+ ##
6
+ # Builds ranked anchor facts for one semantic content container.
7
+ class SemanticAnchorCandidates
8
+ # Anchor candidate plus scoring signals used by semantic anchor ranking.
9
+ AnchorFacts = Data.define(
10
+ :anchor,
11
+ :text,
12
+ :url,
13
+ :destination,
14
+ :segments,
15
+ :meaningful_text,
16
+ :content_like_destination,
17
+ :heading_anchor,
18
+ :heading_text_match,
19
+ :score
20
+ ) do
21
+ # @param candidate [Candidate] eligible semantic anchor candidate
22
+ # @return [AnchorFacts] serializable facts used for ranking and dedupe
23
+ def self.from_candidate(candidate)
24
+ new(
25
+ **candidate.anchor_identity_attributes,
26
+ **candidate.anchor_signal_attributes,
27
+ score: Score.new(candidate).value
28
+ )
29
+ end
30
+ end
31
+
32
+ # Shared context for all anchors in one semantic container.
33
+ class Context
34
+ # Ancestor tags that usually indicate navigation/utility regions.
35
+ UTILITY_LANDMARK_TAGS = %w[nav aside footer menu].freeze
36
+
37
+ # @param container [Nokogiri::XML::Node] semantic container
38
+ # @param link_heuristics [Html2rss::AutoSource::Scraper::LinkHeuristics] destination/text heuristics
39
+ def initialize(container, link_heuristics:)
40
+ @container = container
41
+ @link_heuristics = link_heuristics
42
+ end
43
+
44
+ # @return [Nokogiri::XML::Node, nil] heading used to identify title anchors
45
+ def heading
46
+ @heading ||= @container.at_css(HtmlExtractor::HEADING_TAGS.join(','))
47
+ end
48
+
49
+ # @return [String] visible heading text
50
+ def heading_text
51
+ @heading_text ||= visible_text(heading)
52
+ end
53
+
54
+ # @param node [Nokogiri::XML::Node, nil] node to extract text from
55
+ # @return [String] visible text for the node
56
+ def visible_text(node)
57
+ return '' unless node
58
+
59
+ HtmlExtractor.extract_visible_text(node).to_s.strip
60
+ end
61
+
62
+ # @param anchor [Nokogiri::XML::Node] anchor candidate
63
+ # @return [Html2rss::AutoSource::Scraper::LinkHeuristics::DestinationFacts, nil] destination facts
64
+ def destination_facts(anchor)
65
+ @link_heuristics.destination_facts(anchor)
66
+ end
67
+
68
+ # @param text [String] visible anchor text
69
+ # @return [Boolean] true when text is utility chrome
70
+ def utility_text?(text)
71
+ @link_heuristics.utility_text?(text)
72
+ end
73
+
74
+ # @param ancestors [Array<Nokogiri::XML::Node>]
75
+ # @return [Boolean] true when the anchor lives inside navigation chrome
76
+ def utility_landmark?(ancestors)
77
+ ancestors.any? { |node| UTILITY_LANDMARK_TAGS.include?(node.name) }
78
+ end
79
+ end
80
+
81
+ # One anchor plus the facts needed to decide whether it represents content.
82
+ class Candidate
83
+ attr_reader :anchor
84
+
85
+ # @param anchor [Nokogiri::XML::Node] anchor candidate
86
+ # @param context [Context] semantic container context
87
+ def initialize(anchor, context)
88
+ @anchor = anchor
89
+ @context = context
90
+ end
91
+
92
+ # @return [AnchorFacts, nil] ranked anchor facts when the anchor is eligible
93
+ def facts
94
+ return unless destination_facts
95
+ return if utility_text_suppressed? || ineligible_anchor?
96
+ return unless representative_content_anchor?
97
+
98
+ AnchorFacts.from_candidate(self)
99
+ end
100
+
101
+ # @return [Html2rss::AutoSource::Scraper::LinkHeuristics::DestinationFacts, nil] destination facts
102
+ def destination_facts
103
+ @destination_facts ||= @context.destination_facts(@anchor)
104
+ end
105
+
106
+ # @return [String] visible anchor text
107
+ def text
108
+ @text ||= @context.visible_text(@anchor)
109
+ end
110
+
111
+ # @return [Hash] anchor identity attributes used to build AnchorFacts
112
+ def anchor_identity_attributes
113
+ {
114
+ anchor:,
115
+ text:,
116
+ url: destination_facts.url,
117
+ destination: destination_facts.destination,
118
+ segments: destination_facts.segments
119
+ }
120
+ end
121
+
122
+ # @return [Hash] anchor signal attributes used to build AnchorFacts
123
+ def anchor_signal_attributes
124
+ {
125
+ meaningful_text: meaningful_text?,
126
+ content_like_destination: content_like_destination?,
127
+ heading_anchor: heading_anchor?,
128
+ heading_text_match: heading_text_match?
129
+ }
130
+ end
131
+
132
+ # @return [Boolean] true when visible anchor text has words
133
+ def meaningful_text?
134
+ text.scan(/\p{Alnum}+/).any?
135
+ end
136
+
137
+ # @return [Boolean] true when the destination route has content signals
138
+ def content_like_destination?
139
+ destination_facts.content_path
140
+ end
141
+
142
+ # @return [Boolean] true when the anchor is inside the selected heading
143
+ def heading_anchor?
144
+ heading = @context.heading
145
+
146
+ heading && @anchor.ancestors.include?(heading)
147
+ end
148
+
149
+ # @return [Boolean] true when anchor text exactly matches heading text
150
+ def heading_text_match?
151
+ heading_text = @context.heading_text
152
+
153
+ meaningful_text? &&
154
+ heading_text.scan(/\p{Alnum}+/).any? &&
155
+ heading_text == text
156
+ end
157
+
158
+ private
159
+
160
+ def representative_content_anchor?
161
+ heading_anchor? || meaningful_text? || content_like_destination?
162
+ end
163
+
164
+ def utility_text_suppressed?
165
+ !content_like_destination? &&
166
+ @context.utility_text?(text) &&
167
+ (destination_facts.high_confidence_utility_destination || non_heading_weak_post?)
168
+ end
169
+
170
+ def non_heading_weak_post?
171
+ !heading_anchor? && !destination_facts.strong_post_suffix
172
+ end
173
+
174
+ def ineligible_anchor?
175
+ destination_facts.high_confidence_utility_destination ||
176
+ icon_only_anchor? ||
177
+ @context.utility_landmark?(@anchor.ancestors.to_a)
178
+ end
179
+
180
+ def icon_only_anchor?
181
+ !meaningful_text? && @anchor.at_css('img, svg')
182
+ end
183
+ end
184
+
185
+ # Scores an eligible semantic anchor candidate.
186
+ class Score
187
+ # Score weights keyed by candidate signal predicate.
188
+ RULES = {
189
+ heading_anchor?: 100,
190
+ heading_text_match?: 20,
191
+ meaningful_text?: 10,
192
+ content_like_destination?: 10
193
+ }.freeze
194
+
195
+ # @param candidate [Candidate] eligible semantic anchor candidate
196
+ def initialize(candidate)
197
+ @candidate = candidate
198
+ end
199
+
200
+ # @return [Integer] ranking score
201
+ def value
202
+ RULES.sum { |predicate, weight| @candidate.public_send(predicate) ? weight : 0 }
203
+ end
204
+ end
205
+
206
+ # Keeps the strongest semantic anchor fact for each destination.
207
+ class DestinationWinners
208
+ def initialize
209
+ @winners = {}
210
+ end
211
+
212
+ # @return [Array<AnchorFacts>] strongest candidate per destination
213
+ def to_a
214
+ @winners.values
215
+ end
216
+
217
+ # @param facts [AnchorFacts] candidate anchor facts
218
+ # @return [void]
219
+ def add(facts)
220
+ destination = facts.destination
221
+ @winners[destination] = stronger_fact(@winners[destination], facts)
222
+ end
223
+
224
+ private
225
+
226
+ def stronger_fact(current, candidate)
227
+ return candidate unless current
228
+
229
+ current.score >= candidate.score ? current : candidate
230
+ end
231
+ end
232
+
233
+ # @param container [Nokogiri::XML::Node] semantic container
234
+ # @param link_heuristics [Html2rss::AutoSource::Scraper::LinkHeuristics] destination/text heuristics
235
+ def initialize(container, link_heuristics:)
236
+ @container = container
237
+ @context = Context.new(container, link_heuristics:)
238
+ end
239
+
240
+ # @return [Array<AnchorFacts>] strongest candidate per destination
241
+ def to_a
242
+ @container.css(HtmlExtractor::MAIN_ANCHOR_SELECTOR)
243
+ .each_with_object(DestinationWinners.new) { |anchor, winners| add_anchor(anchor, winners) }
244
+ .to_a
245
+ end
246
+
247
+ private
248
+
249
+ def add_anchor(anchor, winners)
250
+ return if HtmlExtractor.ignored_container_path?(anchor)
251
+
252
+ facts = Candidate.new(anchor, @context).facts
253
+ winners.add(facts) if facts
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Html2rss
4
+ class HtmlExtractor
5
+ ##
6
+ # Collects semantic content containers from a parsed HTML document.
7
+ class SemanticContainers
8
+ # Candidate selectors used to locate extractable semantic content blocks.
9
+ SELECTORS = [
10
+ 'article:not(:has(article))',
11
+ 'section:not(:has(section))',
12
+ 'li:not(:has(li))',
13
+ 'tr:not(:has(tr))',
14
+ 'div:not(:has(div))'
15
+ ].freeze
16
+
17
+ # @param parsed_body [Nokogiri::HTML::Document] parsed document
18
+ # @return [Array<Nokogiri::XML::Node>] candidate semantic containers
19
+ def self.call(parsed_body)
20
+ new(parsed_body).call
21
+ end
22
+
23
+ # @param parsed_body [Nokogiri::HTML::Document] parsed document
24
+ def initialize(parsed_body)
25
+ @parsed_body = parsed_body
26
+ end
27
+
28
+ # @return [Array<Nokogiri::XML::Node>] candidate semantic containers
29
+ def call
30
+ containers = SELECTORS.each_with_object([]) do |selector, memo|
31
+ collect_selector_containers(selector, memo)
32
+ end
33
+
34
+ containers.sort_by { document_order.fetch(_1) }
35
+ end
36
+
37
+ private
38
+
39
+ def document_order
40
+ @document_order ||= begin
41
+ order = {}
42
+ index = 0
43
+
44
+ @parsed_body.traverse do |node|
45
+ next unless node.element?
46
+
47
+ order[node] = index
48
+ index += 1
49
+ end
50
+
51
+ order.compare_by_identity
52
+ end
53
+ end
54
+
55
+ def collect_selector_containers(selector, containers)
56
+ @parsed_body.css(selector).each do |container|
57
+ next if HtmlExtractor.ignored_container_path?(container)
58
+ next if seen[container]
59
+
60
+ seen[container] = true
61
+ containers << container
62
+ end
63
+ end
64
+
65
+ def seen
66
+ @seen ||= {}.compare_by_identity
67
+ end
68
+ end
69
+ end
70
+ end
@@ -7,6 +7,8 @@ module Html2rss
7
7
  class HtmlExtractor
8
8
  # Tags ignored when extracting visible text content from article containers.
9
9
  INVISIBLE_CONTENT_TAGS = %w[svg script noscript style template].to_set.freeze
10
+ # Element path pattern ignored when traversing candidate article containers.
11
+ IGNORED_CONTAINER_PATH = /(nav|footer|header|svg|script|style)/i
10
12
  # Heading tags used to prioritize title extraction.
11
13
  HEADING_TAGS = %w[h1 h2 h3 h4 h5 h6].freeze
12
14
  # Selector used to derive non-headline description nodes.
@@ -87,6 +89,15 @@ module Html2rss
87
89
 
88
90
  article_tag.at_css(MAIN_ANCHOR_SELECTOR)
89
91
  end
92
+
93
+ ##
94
+ # @param node [Nokogiri::XML::Node, String] node or path to test
95
+ # @return [Boolean] true when the node belongs to ignored DOM chrome
96
+ def ignored_container_path?(node)
97
+ path = node.respond_to?(:path) ? node.path : node.to_s
98
+
99
+ path.match?(IGNORED_CONTAINER_PATH)
100
+ end
90
101
  end
91
102
 
92
103
  def extract_url
@@ -41,13 +41,16 @@ module Html2rss
41
41
 
42
42
  # @return [Integer] cache time-to-live in minutes
43
43
  def ttl
44
- return overrides[:ttl] if overrides[:ttl]
45
-
46
- if (ttl = headers['cache-control']&.match(/max-age=(\d+)/)&.[](1))
47
- return ttl.to_i.fdiv(60).ceil
48
- end
49
-
50
- DEFAULT_TTL_IN_MINUTES
44
+ calculated = if overrides[:ttl]
45
+ overrides[:ttl].to_i
46
+ elsif (max_age = headers['cache-control']&.match(/max-age=(\d+)/)&.[](1))
47
+ max_age.to_i.fdiv(60).ceil
48
+ else
49
+ DEFAULT_TTL_IN_MINUTES
50
+ end
51
+
52
+ min_ttl = Html2rss.configuration.min_ttl
53
+ min_ttl ? [calculated, min_ttl].max : calculated
51
54
  end
52
55
 
53
56
  # @return [String, nil] ISO-like language code when available
data/lib/html2rss/url.rb CHANGED
@@ -43,6 +43,8 @@ module Html2rss
43
43
  base_uri.path = '/' if base_uri.path.empty?
44
44
 
45
45
  new(base_uri.join(url).normalize)
46
+ rescue Addressable::URI::InvalidURIError
47
+ raise ArgumentError, 'URL could not be parsed'
46
48
  end
47
49
 
48
50
  ##
@@ -4,6 +4,6 @@
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
6
  # Current application version.
7
- VERSION = '0.19.1'
7
+ VERSION = '0.20.0'
8
8
  public_constant :VERSION
9
9
  end