html2rss 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/html2rss.rb CHANGED
@@ -7,18 +7,21 @@ loader.inflector.inflect('cli' => 'CLI')
7
7
  loader.setup
8
8
 
9
9
  require 'logger'
10
+ require 'forwardable'
11
+ require 'html2rss/configuration'
10
12
 
11
13
  ##
12
14
  # The Html2rss namespace.
13
15
  module Html2rss
14
16
  ##
15
17
  # The logger instance.
16
- Log = Logger.new($stdout)
18
+ module Log
19
+ class << self
20
+ extend Forwardable
17
21
 
18
- Log.level = ENV.fetch('LOG_LEVEL', :warn).upcase.to_sym
19
-
20
- Log.formatter = proc do |severity, datetime, _progname, msg|
21
- "#{datetime} [#{severity}] #{msg}\n"
22
+ def_delegator 'Html2rss', :logger
23
+ def_delegators :logger, :debug, :info, :warn, :error, :fatal, :unknown, :level, :level=, :formatter, :formatter=
24
+ end
22
25
  end
23
26
 
24
27
  ##
@@ -75,6 +78,50 @@ module Html2rss
75
78
  json_feed(build_auto_source_config(url:, strategy:, items_selector:, max_redirects:, max_requests:))
76
79
  end
77
80
 
81
+ # rubocop:disable ThreadSafety/ClassInstanceVariable
82
+ class << self
83
+ ##
84
+ # @return [Html2rss::Configuration] the global configuration instance
85
+ def configuration
86
+ @configuration ||= Configuration.new.freeze
87
+ end
88
+
89
+ ##
90
+ # Configures global library defaults.
91
+ #
92
+ # @yieldparam config [Html2rss::Configuration]
93
+ # @return [Html2rss::Configuration] the frozen configuration
94
+ def configure
95
+ config = configuration.dup
96
+ yield config
97
+ @configuration = config.freeze
98
+ end
99
+
100
+ ##
101
+ # @return [Object] the logger
102
+ def logger
103
+ configuration.logger
104
+ end
105
+
106
+ ##
107
+ # @param logger [Object] the new logger
108
+ def logger=(logger)
109
+ configure { |config| config.logger = logger }
110
+ end
111
+
112
+ private
113
+
114
+ ##
115
+ # Resets the global configuration to defaults (mainly for testing).
116
+ #
117
+ # @return [void]
118
+ def reset_configuration!
119
+ @configuration = nil
120
+ logger.level = configuration.log_level if logger.respond_to?(:level=)
121
+ end
122
+ end
123
+ # rubocop:enable ThreadSafety/ClassInstanceVariable
124
+
78
125
  class << self
79
126
  private
80
127
 
@@ -103,6 +150,8 @@ module Html2rss
103
150
  keys
104
151
  end
105
152
  end
153
+
154
+ logger.level = configuration.log_level if logger.respond_to?(:level=)
106
155
  end
107
156
 
108
157
  loader.eager_load
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.1
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
@@ -293,6 +293,7 @@ files:
293
293
  - lib/html2rss/auto_source/scraper.rb
294
294
  - lib/html2rss/auto_source/scraper/html.rb
295
295
  - lib/html2rss/auto_source/scraper/json_state.rb
296
+ - lib/html2rss/auto_source/scraper/link_heuristics.rb
296
297
  - lib/html2rss/auto_source/scraper/microdata.rb
297
298
  - lib/html2rss/auto_source/scraper/schema.rb
298
299
  - lib/html2rss/auto_source/scraper/schema/category_extractor.rb
@@ -301,6 +302,7 @@ files:
301
302
  - lib/html2rss/auto_source/scraper/schema/thing.rb
302
303
  - lib/html2rss/auto_source/scraper/semantic_html.rb
303
304
  - lib/html2rss/auto_source/scraper/semantic_html/anchor_selector.rb
305
+ - lib/html2rss/auto_source/scraper/semantic_html/deduplicator.rb
304
306
  - lib/html2rss/auto_source/scraper/wordpress_api.rb
305
307
  - lib/html2rss/auto_source/scraper/wordpress_api/page_scope.rb
306
308
  - lib/html2rss/auto_source/scraper/wordpress_api/posts_endpoint.rb
@@ -314,6 +316,7 @@ files:
314
316
  - lib/html2rss/config/request_headers.rb
315
317
  - lib/html2rss/config/schema.rb
316
318
  - lib/html2rss/config/validator.rb
319
+ - lib/html2rss/configuration.rb
317
320
  - lib/html2rss/error.rb
318
321
  - lib/html2rss/feed_pipeline.rb
319
322
  - lib/html2rss/feed_pipeline/auto_fallback.rb
@@ -322,6 +325,9 @@ files:
322
325
  - lib/html2rss/html_extractor/date_extractor.rb
323
326
  - lib/html2rss/html_extractor/enclosure_extractor.rb
324
327
  - lib/html2rss/html_extractor/image_extractor.rb
328
+ - lib/html2rss/html_extractor/list_candidates.rb
329
+ - lib/html2rss/html_extractor/semantic_anchor_candidates.rb
330
+ - lib/html2rss/html_extractor/semantic_containers.rb
325
331
  - lib/html2rss/html_navigator.rb
326
332
  - lib/html2rss/json_feed_builder.rb
327
333
  - lib/html2rss/json_feed_builder/item.rb
@@ -384,7 +390,7 @@ licenses:
384
390
  - MIT
385
391
  metadata:
386
392
  allowed_push_host: https://rubygems.org
387
- changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.19.1
393
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.20.0
388
394
  rubygems_mfa_required: 'true'
389
395
  rdoc_options: []
390
396
  require_paths:
@@ -400,7 +406,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
400
406
  - !ruby/object:Gem::Version
401
407
  version: '0'
402
408
  requirements: []
403
- rubygems_version: 4.0.6
409
+ rubygems_version: 4.0.10
404
410
  specification_version: 4
405
411
  summary: Generates RSS feeds from websites by scraping a URL and using CSS selectors
406
412
  to extract item.