html2rss 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d51b095228dec5d98a1ff66ee7329171d2297bad67ca27a7930afe296b589e8
4
- data.tar.gz: 32bf56a05ed45a1bb8c3341f228f41d1441e65243c6a5d55908e90d002f8880b
3
+ metadata.gz: 3c8df87caea7cc1dd9855993e9dd4fcb4285b37a5c1f87fb48a3d85e75ce0a51
4
+ data.tar.gz: 8ab432f13d76f45e30ea65316669c324bcba46b5234a082a6f0670cc445a6958
5
5
  SHA512:
6
- metadata.gz: 0cbe761521ec3cd43bd25565f6425e6ced445814c30706d50d52c008ce6e3905a6f95b9f6d9dfa2faf90baf4bc2fbeb43741af5bc185e1488d31b55b9ffa3c75
7
- data.tar.gz: 353e1586ddde92858ea95a2eecda6d752975585038acc29e6d09ff97363e5429d15966a005e1f9c4a41e9ad83b972973cfadd9c840160849d530166e7dd84298
6
+ metadata.gz: 7115ee36e47ac65d19b13bb0f2ef0b19ebdb387d2e6fa53128e804b8d22d6a834517b88642f5c3e31e8850e344a2f94b6cb0de6619504603cfeacae52e47f3e2
7
+ data.tar.gz: b2c9f342cff237d9cafd4817f2570d18803fa9ba844ab306681ac02d12fb7cf147f3b40117ab21c78937325d80623ad27e25c6df4c8f8150fe81be2a07d12817
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  ![html2rss logo](https://github.com/html2rss/html2rss/raw/master/support/logo.png)
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png) [![](http://img.shields.io/liberapay/goal/gildesmarais.svg?logo=liberapa)](https://liberapay.com/gildesmarais/donate)
3
+ [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png)
4
4
 
5
5
  `html2rss` is a Ruby gem that generates RSS 2.0 feeds from a _feed config_.
6
6
 
data/html2rss.gemspec CHANGED
@@ -10,23 +10,23 @@ Gem::Specification.new do |spec|
10
10
  spec.authors = ['Gil Desmarais']
11
11
  spec.email = ['html2rss@desmarais.de']
12
12
 
13
- spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
14
- spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
+ spec.summary = 'Generates RSS feeds from websites by scraping a URL and using CSS selectors to extract item.'
14
+ spec.description = 'Supports JSON content, custom HTTP headers, and post-processing of extracted content.'
15
15
  spec.homepage = 'https://github.com/html2rss/html2rss'
16
16
  spec.license = 'MIT'
17
17
  spec.required_ruby_version = '>= 3.1'
18
18
 
19
19
  if spec.respond_to?(:metadata)
20
20
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
21
- spec.metadata['changelog_uri'] = 'https://github.com/html2rss/html2rss/releases'
21
+ spec.metadata['changelog_uri'] = "#{spec.homepage}/releases/tag/v#{spec.version}"
22
22
  spec.metadata['rubygems_mfa_required'] = 'true'
23
23
  else
24
24
  raise 'RubyGems 2.0 or newer is required to protect against ' \
25
25
  'public gem pushes.'
26
26
  end
27
27
 
28
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
29
- f.match(%r{^(test|spec|features|support|docs|.github|.yardoc)/})
28
+ spec.files = `git ls-files -z`.split("\x0").select do |f|
29
+ f.match(%r{^(lib/|exe/|README.md|LICENSE|html2rss.gemspec)})
30
30
  end
31
31
  spec.bindir = 'exe'
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
@@ -6,7 +6,7 @@ module Html2rss
6
6
  module AttributePostProcessors
7
7
  ##
8
8
  # Error raised when an unknown post processor name is requested.
9
- class UnknownPostProcessorName < StandardError; end
9
+ class UnknownPostProcessorName < Html2rss::Error; end
10
10
 
11
11
  ##
12
12
  # Maps the post processor name to the class implementing the post processor.
@@ -11,6 +11,15 @@ module Html2rss
11
11
  # 1. the RSS channel attributes
12
12
  # 2. html2rss options like json or custom HTTP-headers for the request
13
13
  class Channel
14
+ ##
15
+ # @param config [Hash<Symbol, Object>]
16
+ # @return [Set<String>] the required parameter names
17
+ def self.required_params_for_config(config)
18
+ config.each_with_object(Set.new) do |(_, value), required_params|
19
+ required_params.merge(value.scan(/%<([\w_\d]+)>/).flatten) if value.is_a?(String)
20
+ end
21
+ end
22
+
14
23
  ##
15
24
  # @param channel [Hash<Symbol, Object>]
16
25
  # @param params [Hash]
@@ -77,15 +86,6 @@ module Html2rss
77
86
  config.fetch(:json, false)
78
87
  end
79
88
 
80
- ##
81
- # @param config [Hash<Symbol, Object>]
82
- # @return [Set<String>] the required parameter names
83
- def self.required_params_for_config(config)
84
- config.each_with_object(Set.new) do |(_, value), required_params|
85
- required_params.merge(value.scan(/%<([\w_\d]+)>/).flatten) if value.is_a?(String)
86
- end
87
- end
88
-
89
89
  private
90
90
 
91
91
  # @return [Hash<Symbol, Object>]
@@ -12,11 +12,11 @@ module Html2rss
12
12
  ##
13
13
  # The Error class to be thrown when a feed config requires params, but none
14
14
  # were passed to Config.
15
- class ParamsMissing < StandardError; end
15
+ class ParamsMissing < Html2rss::Error; end
16
16
 
17
17
  ##
18
18
  # Thrown when the feed config does not contain a value at `:channel`.
19
- class ChannelMissing < StandardError; end
19
+ class ChannelMissing < Html2rss::Error; end
20
20
 
21
21
  # Struct to store XML Stylesheet attributes
22
22
  Stylesheet = Struct.new(:href, :type, :media, keyword_init: true)
data/lib/html2rss/item.rb CHANGED
@@ -16,6 +16,21 @@ module Html2rss
16
16
  # Class to keep an Item's <enclosure>.
17
17
  Enclosure = Struct.new('Enclosure', :type, :bits_length, :url, keyword_init: true)
18
18
 
19
+ ##
20
+ # Fetches items from a given URL using configuration settings.
21
+ #
22
+ # @param url [String] URL to fetch items from.
23
+ # @param config [Html2rss::Config] Configuration object.
24
+ # @return [Array<Html2rss::Item>] list of items fetched.
25
+ def self.from_url(url, config)
26
+ body = Utils.request_body_from_url(url, convert_json_to_xml: config.json?, headers: config.headers)
27
+
28
+ Nokogiri.HTML(body)
29
+ .css(config.selector_string(Config::Selectors::ITEMS_SELECTOR_NAME))
30
+ .map { |xml| new(xml, config) }
31
+ .select(&:valid?)
32
+ end
33
+
19
34
  ##
20
35
  # @param xml [Nokogiri::XML::Element]
21
36
  # @param config [Html2rss::Config]
@@ -122,21 +137,6 @@ module Html2rss
122
137
  )
123
138
  end
124
139
 
125
- ##
126
- # Fetches items from a given URL using configuration settings.
127
- #
128
- # @param url [String] URL to fetch items from.
129
- # @param config [Html2rss::Config] Configuration object.
130
- # @return [Array<Html2rss::Item>] list of items fetched.
131
- def self.from_url(url, config)
132
- body = Utils.request_body_from_url(url, convert_json_to_xml: config.json?, headers: config.headers)
133
-
134
- Nokogiri.HTML(body)
135
- .css(config.selector_string(Config::Selectors::ITEMS_SELECTOR_NAME))
136
- .map { |xml| new(xml, config) }
137
- .select(&:valid?)
138
- end
139
-
140
140
  private
141
141
 
142
142
  # @return [Nokogiri::XML::Element] XML element representing the item.
@@ -6,7 +6,7 @@ module Html2rss
6
6
  module ItemExtractors
7
7
  ##
8
8
  # The Error class to be thrown when an unknown extractor name is requested.
9
- class UnknownExtractorName < StandardError; end
9
+ class UnknownExtractorName < Html2rss::Error; end
10
10
 
11
11
  ##
12
12
  # Maps the extractor name to the class implementing the extractor.
@@ -3,6 +3,6 @@
3
3
  ##
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
- VERSION = '0.10.0'
6
+ VERSION = '0.11.0'
7
7
  public_constant :VERSION
8
8
  end
data/lib/html2rss.rb CHANGED
@@ -10,6 +10,12 @@ require 'yaml'
10
10
  ##
11
11
  # The Html2rss namespace.
12
12
  module Html2rss
13
+ ##
14
+ # The Html2rss::Error base class.
15
+ class Error < StandardError; end
16
+
17
+ ##
18
+ # Key for the feeds configuration in the YAML file.
13
19
  CONFIG_KEY_FEEDS = :feeds
14
20
 
15
21
  ##
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-30 00:00:00.000000000 Z
11
+ date: 2024-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -204,8 +204,8 @@ dependencies:
204
204
  - - ">="
205
205
  - !ruby/object:Gem::Version
206
206
  version: '0'
207
- description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
208
- in return.
207
+ description: Supports JSON content, custom HTTP headers, and post-processing of extracted
208
+ content.
209
209
  email:
210
210
  - html2rss@desmarais.de
211
211
  executables:
@@ -213,17 +213,8 @@ executables:
213
213
  extensions: []
214
214
  extra_rdoc_files: []
215
215
  files:
216
- - ".gitignore"
217
- - ".mergify.yml"
218
- - ".rspec"
219
- - ".rubocop.yml"
220
- - ".yardopts"
221
- - Gemfile
222
- - Gemfile.lock
223
216
  - LICENSE
224
217
  - README.md
225
- - bin/console
226
- - bin/setup
227
218
  - exe/html2rss
228
219
  - html2rss.gemspec
229
220
  - lib/html2rss.rb
@@ -256,13 +247,12 @@ files:
256
247
  - lib/html2rss/rss_builder/stylesheet.rb
257
248
  - lib/html2rss/utils.rb
258
249
  - lib/html2rss/version.rb
259
- - rakefile.rb
260
250
  homepage: https://github.com/html2rss/html2rss
261
251
  licenses:
262
252
  - MIT
263
253
  metadata:
264
254
  allowed_push_host: https://rubygems.org
265
- changelog_uri: https://github.com/html2rss/html2rss/releases
255
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.11.0
266
256
  rubygems_mfa_required: 'true'
267
257
  post_install_message:
268
258
  rdoc_options: []
@@ -282,5 +272,6 @@ requirements: []
282
272
  rubygems_version: 3.5.11
283
273
  signing_key:
284
274
  specification_version: 4
285
- summary: Returns an RSS::Rss object by scraping a URL.
275
+ summary: Generates RSS feeds from websites by scraping a URL and using CSS selectors
276
+ to extract item.
286
277
  test_files: []
data/.gitignore DELETED
@@ -1,12 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /_yardoc/
4
- /coverage/
5
- /doc/
6
- /pkg/
7
- /spec/reports/
8
- /tmp/
9
-
10
- # rspec failure tracking
11
- .rspec_status
12
- docs/
data/.mergify.yml DELETED
@@ -1,15 +0,0 @@
1
- queue_rules:
2
- - name: dependabot
3
- conditions:
4
- - author=dependabot[bot]
5
- - status-success=test
6
- - base=master
7
-
8
- pull_request_rules:
9
- - name: automatic merge for Dependabot pull requests
10
- conditions:
11
- - author=dependabot[bot]
12
- actions:
13
- queue:
14
- method: squash
15
- name: dependabot
data/.rspec DELETED
@@ -1,4 +0,0 @@
1
- --format documentation
2
- --color
3
- --order random
4
- --require spec_helper
data/.rubocop.yml DELETED
@@ -1,30 +0,0 @@
1
- require:
2
- - rubocop-performance
3
- - rubocop-rspec
4
- - rubocop-md
5
- - rubocop-rake
6
-
7
- AllCops:
8
- DisplayCopNames: true
9
- NewCops: enable
10
- Exclude:
11
- - vendor/**/*
12
-
13
- Metrics/BlockLength:
14
- Exclude:
15
- - "spec/**/*_spec.rb"
16
- - html2rss.gemspec
17
-
18
- RSpec/NestedGroups:
19
- Exclude:
20
- - spec/html2rss_spec.rb
21
-
22
- RSpec/DescribeClass:
23
- Exclude:
24
- - spec/exe/**/*_spec.rb
25
-
26
- RSpec/NamedSubject:
27
- Enabled: false
28
-
29
- Naming/RescuedExceptionsVariableName:
30
- PreferredName: error
data/.yardopts DELETED
@@ -1,6 +0,0 @@
1
- --readme README.md
2
- --charset utf-8
3
- --no-private
4
- --exclude /coverage
5
- --exclude /support
6
- --output-dir docs/
data/Gemfile DELETED
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source 'https://rubygems.org'
4
-
5
- git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
-
7
- # Specify your gem's dependencies in html2rss.gemspec
8
- gemspec
9
-
10
- group :development, :test do
11
- gem 'byebug'
12
- gem 'rake'
13
- gem 'rspec', '~> 3.0'
14
- gem 'rubocop'
15
- gem 'rubocop-md'
16
- gem 'rubocop-performance'
17
- gem 'rubocop-rake'
18
- gem 'rubocop-rspec'
19
- gem 'vcr'
20
- gem 'yard'
21
- end
22
-
23
- group :test do
24
- gem 'simplecov', require: false
25
- end
data/Gemfile.lock DELETED
@@ -1,153 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- html2rss (0.10.0)
5
- addressable (~> 2.7)
6
- faraday (> 2.0.1, < 3.0)
7
- faraday-follow_redirects
8
- kramdown
9
- mime-types (> 3.0)
10
- nokogiri (>= 1.10, < 2.0)
11
- regexp_parser
12
- reverse_markdown (~> 2.0)
13
- rss
14
- sanitize (~> 6.0)
15
- thor
16
- tzinfo
17
- zeitwerk
18
-
19
- GEM
20
- remote: https://rubygems.org/
21
- specs:
22
- addressable (2.8.6)
23
- public_suffix (>= 2.0.2, < 6.0)
24
- ast (2.4.2)
25
- byebug (11.1.3)
26
- concurrent-ruby (1.2.3)
27
- crass (1.0.6)
28
- diff-lcs (1.5.1)
29
- docile (1.4.0)
30
- faraday (2.9.0)
31
- faraday-net_http (>= 2.0, < 3.2)
32
- faraday-follow_redirects (0.3.0)
33
- faraday (>= 1, < 3)
34
- faraday-net_http (3.1.0)
35
- net-http
36
- json (2.7.2)
37
- kramdown (2.4.0)
38
- rexml
39
- language_server-protocol (3.17.0.3)
40
- mime-types (3.5.2)
41
- mime-types-data (~> 3.2015)
42
- mime-types-data (3.2024.0305)
43
- mini_portile2 (2.8.6)
44
- net-http (0.4.1)
45
- uri
46
- nokogiri (1.16.5)
47
- mini_portile2 (~> 2.8.2)
48
- racc (~> 1.4)
49
- nokogiri (1.16.5-x86_64-darwin)
50
- racc (~> 1.4)
51
- nokogiri (1.16.5-x86_64-linux)
52
- racc (~> 1.4)
53
- parallel (1.24.0)
54
- parser (3.3.1.0)
55
- ast (~> 2.4.1)
56
- racc
57
- public_suffix (5.0.5)
58
- racc (1.7.3)
59
- rainbow (3.1.1)
60
- rake (13.2.1)
61
- regexp_parser (2.9.0)
62
- reverse_markdown (2.1.1)
63
- nokogiri
64
- rexml (3.3.2)
65
- strscan
66
- rspec (3.13.0)
67
- rspec-core (~> 3.13.0)
68
- rspec-expectations (~> 3.13.0)
69
- rspec-mocks (~> 3.13.0)
70
- rspec-core (3.13.0)
71
- rspec-support (~> 3.13.0)
72
- rspec-expectations (3.13.0)
73
- diff-lcs (>= 1.2.0, < 2.0)
74
- rspec-support (~> 3.13.0)
75
- rspec-mocks (3.13.0)
76
- diff-lcs (>= 1.2.0, < 2.0)
77
- rspec-support (~> 3.13.0)
78
- rspec-support (3.13.1)
79
- rss (0.3.0)
80
- rexml
81
- rubocop (1.63.4)
82
- json (~> 2.3)
83
- language_server-protocol (>= 3.17.0)
84
- parallel (~> 1.10)
85
- parser (>= 3.3.0.2)
86
- rainbow (>= 2.2.2, < 4.0)
87
- regexp_parser (>= 1.8, < 3.0)
88
- rexml (>= 3.2.5, < 4.0)
89
- rubocop-ast (>= 1.31.1, < 2.0)
90
- ruby-progressbar (~> 1.7)
91
- unicode-display_width (>= 2.4.0, < 3.0)
92
- rubocop-ast (1.31.3)
93
- parser (>= 3.3.1.0)
94
- rubocop-capybara (2.20.0)
95
- rubocop (~> 1.41)
96
- rubocop-factory_bot (2.25.1)
97
- rubocop (~> 1.41)
98
- rubocop-md (1.2.2)
99
- rubocop (>= 1.0)
100
- rubocop-performance (1.21.0)
101
- rubocop (>= 1.48.1, < 2.0)
102
- rubocop-ast (>= 1.31.1, < 2.0)
103
- rubocop-rake (0.6.0)
104
- rubocop (~> 1.0)
105
- rubocop-rspec (2.29.1)
106
- rubocop (~> 1.40)
107
- rubocop-capybara (~> 2.17)
108
- rubocop-factory_bot (~> 2.22)
109
- rubocop-rspec_rails (~> 2.28)
110
- rubocop-rspec_rails (2.28.3)
111
- rubocop (~> 1.40)
112
- ruby-progressbar (1.13.0)
113
- sanitize (6.1.0)
114
- crass (~> 1.0.2)
115
- nokogiri (>= 1.12.0)
116
- simplecov (0.22.0)
117
- docile (~> 1.1)
118
- simplecov-html (~> 0.11)
119
- simplecov_json_formatter (~> 0.1)
120
- simplecov-html (0.12.3)
121
- simplecov_json_formatter (0.1.4)
122
- strscan (3.1.0)
123
- thor (1.3.1)
124
- tzinfo (2.0.6)
125
- concurrent-ruby (~> 1.0)
126
- unicode-display_width (2.5.0)
127
- uri (0.13.0)
128
- vcr (6.2.0)
129
- yard (0.9.36)
130
- zeitwerk (2.6.13)
131
-
132
- PLATFORMS
133
- ruby
134
- x86_64-darwin
135
- x86_64-darwin-20
136
- x86_64-linux
137
-
138
- DEPENDENCIES
139
- byebug
140
- html2rss!
141
- rake
142
- rspec (~> 3.0)
143
- rubocop
144
- rubocop-md
145
- rubocop-performance
146
- rubocop-rake
147
- rubocop-rspec
148
- simplecov
149
- vcr
150
- yard
151
-
152
- BUNDLED WITH
153
- 2.4.1
data/bin/console DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'bundler/setup'
5
- require 'html2rss'
6
- require 'byebug'
7
-
8
- # You can add fixtures and/or initialization code here to make experimenting
9
- # with your gem easier. You can also use a different console, if you like.
10
-
11
- # (If you use this, don't forget to add pry to your Gemfile!)
12
- # require "pry"
13
- # Pry.start
14
-
15
- require 'irb'
16
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
data/rakefile.rb DELETED
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'bundler'
4
- require 'rake'
5
- require 'rspec'
6
- require 'rspec/core/rake_task'
7
-
8
- Bundler.setup
9
- Bundler::GemHelper.install_tasks
10
-
11
- task default: [:spec]
12
-
13
- desc 'Run all examples'
14
- RSpec::Core::RakeTask.new(:spec) do |t|
15
- t.ruby_opts = %w[-w]
16
- end