html2rss 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d51b095228dec5d98a1ff66ee7329171d2297bad67ca27a7930afe296b589e8
4
- data.tar.gz: 32bf56a05ed45a1bb8c3341f228f41d1441e65243c6a5d55908e90d002f8880b
3
+ metadata.gz: 3c8df87caea7cc1dd9855993e9dd4fcb4285b37a5c1f87fb48a3d85e75ce0a51
4
+ data.tar.gz: 8ab432f13d76f45e30ea65316669c324bcba46b5234a082a6f0670cc445a6958
5
5
  SHA512:
6
- metadata.gz: 0cbe761521ec3cd43bd25565f6425e6ced445814c30706d50d52c008ce6e3905a6f95b9f6d9dfa2faf90baf4bc2fbeb43741af5bc185e1488d31b55b9ffa3c75
7
- data.tar.gz: 353e1586ddde92858ea95a2eecda6d752975585038acc29e6d09ff97363e5429d15966a005e1f9c4a41e9ad83b972973cfadd9c840160849d530166e7dd84298
6
+ metadata.gz: 7115ee36e47ac65d19b13bb0f2ef0b19ebdb387d2e6fa53128e804b8d22d6a834517b88642f5c3e31e8850e344a2f94b6cb0de6619504603cfeacae52e47f3e2
7
+ data.tar.gz: b2c9f342cff237d9cafd4817f2570d18803fa9ba844ab306681ac02d12fb7cf147f3b40117ab21c78937325d80623ad27e25c6df4c8f8150fe81be2a07d12817
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  ![html2rss logo](https://github.com/html2rss/html2rss/raw/master/support/logo.png)
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png) [![](http://img.shields.io/liberapay/goal/gildesmarais.svg?logo=liberapa)](https://liberapay.com/gildesmarais/donate)
3
+ [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/) [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss) ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png)
4
4
 
5
5
  `html2rss` is a Ruby gem that generates RSS 2.0 feeds from a _feed config_.
6
6
 
data/html2rss.gemspec CHANGED
@@ -10,23 +10,23 @@ Gem::Specification.new do |spec|
10
10
  spec.authors = ['Gil Desmarais']
11
11
  spec.email = ['html2rss@desmarais.de']
12
12
 
13
- spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
14
- spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
+ spec.summary = 'Generates RSS feeds from websites by scraping a URL and using CSS selectors to extract item.'
14
+ spec.description = 'Supports JSON content, custom HTTP headers, and post-processing of extracted content.'
15
15
  spec.homepage = 'https://github.com/html2rss/html2rss'
16
16
  spec.license = 'MIT'
17
17
  spec.required_ruby_version = '>= 3.1'
18
18
 
19
19
  if spec.respond_to?(:metadata)
20
20
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
21
- spec.metadata['changelog_uri'] = 'https://github.com/html2rss/html2rss/releases'
21
+ spec.metadata['changelog_uri'] = "#{spec.homepage}/releases/tag/v#{spec.version}"
22
22
  spec.metadata['rubygems_mfa_required'] = 'true'
23
23
  else
24
24
  raise 'RubyGems 2.0 or newer is required to protect against ' \
25
25
  'public gem pushes.'
26
26
  end
27
27
 
28
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
29
- f.match(%r{^(test|spec|features|support|docs|.github|.yardoc)/})
28
+ spec.files = `git ls-files -z`.split("\x0").select do |f|
29
+ f.match(%r{^(lib/|exe/|README.md|LICENSE|html2rss.gemspec)})
30
30
  end
31
31
  spec.bindir = 'exe'
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
@@ -6,7 +6,7 @@ module Html2rss
6
6
  module AttributePostProcessors
7
7
  ##
8
8
  # Error raised when an unknown post processor name is requested.
9
- class UnknownPostProcessorName < StandardError; end
9
+ class UnknownPostProcessorName < Html2rss::Error; end
10
10
 
11
11
  ##
12
12
  # Maps the post processor name to the class implementing the post processor.
@@ -11,6 +11,15 @@ module Html2rss
11
11
  # 1. the RSS channel attributes
12
12
  # 2. html2rss options like json or custom HTTP-headers for the request
13
13
  class Channel
14
+ ##
15
+ # @param config [Hash<Symbol, Object>]
16
+ # @return [Set<String>] the required parameter names
17
+ def self.required_params_for_config(config)
18
+ config.each_with_object(Set.new) do |(_, value), required_params|
19
+ required_params.merge(value.scan(/%<([\w_\d]+)>/).flatten) if value.is_a?(String)
20
+ end
21
+ end
22
+
14
23
  ##
15
24
  # @param channel [Hash<Symbol, Object>]
16
25
  # @param params [Hash]
@@ -77,15 +86,6 @@ module Html2rss
77
86
  config.fetch(:json, false)
78
87
  end
79
88
 
80
- ##
81
- # @param config [Hash<Symbol, Object>]
82
- # @return [Set<String>] the required parameter names
83
- def self.required_params_for_config(config)
84
- config.each_with_object(Set.new) do |(_, value), required_params|
85
- required_params.merge(value.scan(/%<([\w_\d]+)>/).flatten) if value.is_a?(String)
86
- end
87
- end
88
-
89
89
  private
90
90
 
91
91
  # @return [Hash<Symbol, Object>]
@@ -12,11 +12,11 @@ module Html2rss
12
12
  ##
13
13
  # The Error class to be thrown when a feed config requires params, but none
14
14
  # were passed to Config.
15
- class ParamsMissing < StandardError; end
15
+ class ParamsMissing < Html2rss::Error; end
16
16
 
17
17
  ##
18
18
  # Thrown when the feed config does not contain a value at `:channel`.
19
- class ChannelMissing < StandardError; end
19
+ class ChannelMissing < Html2rss::Error; end
20
20
 
21
21
  # Struct to store XML Stylesheet attributes
22
22
  Stylesheet = Struct.new(:href, :type, :media, keyword_init: true)
data/lib/html2rss/item.rb CHANGED
@@ -16,6 +16,21 @@ module Html2rss
16
16
  # Class to keep an Item's <enclosure>.
17
17
  Enclosure = Struct.new('Enclosure', :type, :bits_length, :url, keyword_init: true)
18
18
 
19
+ ##
20
+ # Fetches items from a given URL using configuration settings.
21
+ #
22
+ # @param url [String] URL to fetch items from.
23
+ # @param config [Html2rss::Config] Configuration object.
24
+ # @return [Array<Html2rss::Item>] list of items fetched.
25
+ def self.from_url(url, config)
26
+ body = Utils.request_body_from_url(url, convert_json_to_xml: config.json?, headers: config.headers)
27
+
28
+ Nokogiri.HTML(body)
29
+ .css(config.selector_string(Config::Selectors::ITEMS_SELECTOR_NAME))
30
+ .map { |xml| new(xml, config) }
31
+ .select(&:valid?)
32
+ end
33
+
19
34
  ##
20
35
  # @param xml [Nokogiri::XML::Element]
21
36
  # @param config [Html2rss::Config]
@@ -122,21 +137,6 @@ module Html2rss
122
137
  )
123
138
  end
124
139
 
125
- ##
126
- # Fetches items from a given URL using configuration settings.
127
- #
128
- # @param url [String] URL to fetch items from.
129
- # @param config [Html2rss::Config] Configuration object.
130
- # @return [Array<Html2rss::Item>] list of items fetched.
131
- def self.from_url(url, config)
132
- body = Utils.request_body_from_url(url, convert_json_to_xml: config.json?, headers: config.headers)
133
-
134
- Nokogiri.HTML(body)
135
- .css(config.selector_string(Config::Selectors::ITEMS_SELECTOR_NAME))
136
- .map { |xml| new(xml, config) }
137
- .select(&:valid?)
138
- end
139
-
140
140
  private
141
141
 
142
142
  # @return [Nokogiri::XML::Element] XML element representing the item.
@@ -6,7 +6,7 @@ module Html2rss
6
6
  module ItemExtractors
7
7
  ##
8
8
  # The Error class to be thrown when an unknown extractor name is requested.
9
- class UnknownExtractorName < StandardError; end
9
+ class UnknownExtractorName < Html2rss::Error; end
10
10
 
11
11
  ##
12
12
  # Maps the extractor name to the class implementing the extractor.
@@ -3,6 +3,6 @@
3
3
  ##
4
4
  # The Html2rss namespace.
5
5
  module Html2rss
6
- VERSION = '0.10.0'
6
+ VERSION = '0.11.0'
7
7
  public_constant :VERSION
8
8
  end
data/lib/html2rss.rb CHANGED
@@ -10,6 +10,12 @@ require 'yaml'
10
10
  ##
11
11
  # The Html2rss namespace.
12
12
  module Html2rss
13
+ ##
14
+ # The Html2rss::Error base class.
15
+ class Error < StandardError; end
16
+
17
+ ##
18
+ # Key for the feeds configuration in the YAML file.
13
19
  CONFIG_KEY_FEEDS = :feeds
14
20
 
15
21
  ##
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-30 00:00:00.000000000 Z
11
+ date: 2024-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -204,8 +204,8 @@ dependencies:
204
204
  - - ">="
205
205
  - !ruby/object:Gem::Version
206
206
  version: '0'
207
- description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
208
- in return.
207
+ description: Supports JSON content, custom HTTP headers, and post-processing of extracted
208
+ content.
209
209
  email:
210
210
  - html2rss@desmarais.de
211
211
  executables:
@@ -213,17 +213,8 @@ executables:
213
213
  extensions: []
214
214
  extra_rdoc_files: []
215
215
  files:
216
- - ".gitignore"
217
- - ".mergify.yml"
218
- - ".rspec"
219
- - ".rubocop.yml"
220
- - ".yardopts"
221
- - Gemfile
222
- - Gemfile.lock
223
216
  - LICENSE
224
217
  - README.md
225
- - bin/console
226
- - bin/setup
227
218
  - exe/html2rss
228
219
  - html2rss.gemspec
229
220
  - lib/html2rss.rb
@@ -256,13 +247,12 @@ files:
256
247
  - lib/html2rss/rss_builder/stylesheet.rb
257
248
  - lib/html2rss/utils.rb
258
249
  - lib/html2rss/version.rb
259
- - rakefile.rb
260
250
  homepage: https://github.com/html2rss/html2rss
261
251
  licenses:
262
252
  - MIT
263
253
  metadata:
264
254
  allowed_push_host: https://rubygems.org
265
- changelog_uri: https://github.com/html2rss/html2rss/releases
255
+ changelog_uri: https://github.com/html2rss/html2rss/releases/tag/v0.11.0
266
256
  rubygems_mfa_required: 'true'
267
257
  post_install_message:
268
258
  rdoc_options: []
@@ -282,5 +272,6 @@ requirements: []
282
272
  rubygems_version: 3.5.11
283
273
  signing_key:
284
274
  specification_version: 4
285
- summary: Returns an RSS::Rss object by scraping a URL.
275
+ summary: Generates RSS feeds from websites by scraping a URL and using CSS selectors
276
+ to extract item.
286
277
  test_files: []
data/.gitignore DELETED
@@ -1,12 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /_yardoc/
4
- /coverage/
5
- /doc/
6
- /pkg/
7
- /spec/reports/
8
- /tmp/
9
-
10
- # rspec failure tracking
11
- .rspec_status
12
- docs/
data/.mergify.yml DELETED
@@ -1,15 +0,0 @@
1
- queue_rules:
2
- - name: dependabot
3
- conditions:
4
- - author=dependabot[bot]
5
- - status-success=test
6
- - base=master
7
-
8
- pull_request_rules:
9
- - name: automatic merge for Dependabot pull requests
10
- conditions:
11
- - author=dependabot[bot]
12
- actions:
13
- queue:
14
- method: squash
15
- name: dependabot
data/.rspec DELETED
@@ -1,4 +0,0 @@
1
- --format documentation
2
- --color
3
- --order random
4
- --require spec_helper
data/.rubocop.yml DELETED
@@ -1,30 +0,0 @@
1
- require:
2
- - rubocop-performance
3
- - rubocop-rspec
4
- - rubocop-md
5
- - rubocop-rake
6
-
7
- AllCops:
8
- DisplayCopNames: true
9
- NewCops: enable
10
- Exclude:
11
- - vendor/**/*
12
-
13
- Metrics/BlockLength:
14
- Exclude:
15
- - "spec/**/*_spec.rb"
16
- - html2rss.gemspec
17
-
18
- RSpec/NestedGroups:
19
- Exclude:
20
- - spec/html2rss_spec.rb
21
-
22
- RSpec/DescribeClass:
23
- Exclude:
24
- - spec/exe/**/*_spec.rb
25
-
26
- RSpec/NamedSubject:
27
- Enabled: false
28
-
29
- Naming/RescuedExceptionsVariableName:
30
- PreferredName: error
data/.yardopts DELETED
@@ -1,6 +0,0 @@
1
- --readme README.md
2
- --charset utf-8
3
- --no-private
4
- --exclude /coverage
5
- --exclude /support
6
- --output-dir docs/
data/Gemfile DELETED
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source 'https://rubygems.org'
4
-
5
- git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
-
7
- # Specify your gem's dependencies in html2rss.gemspec
8
- gemspec
9
-
10
- group :development, :test do
11
- gem 'byebug'
12
- gem 'rake'
13
- gem 'rspec', '~> 3.0'
14
- gem 'rubocop'
15
- gem 'rubocop-md'
16
- gem 'rubocop-performance'
17
- gem 'rubocop-rake'
18
- gem 'rubocop-rspec'
19
- gem 'vcr'
20
- gem 'yard'
21
- end
22
-
23
- group :test do
24
- gem 'simplecov', require: false
25
- end
data/Gemfile.lock DELETED
@@ -1,153 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- html2rss (0.10.0)
5
- addressable (~> 2.7)
6
- faraday (> 2.0.1, < 3.0)
7
- faraday-follow_redirects
8
- kramdown
9
- mime-types (> 3.0)
10
- nokogiri (>= 1.10, < 2.0)
11
- regexp_parser
12
- reverse_markdown (~> 2.0)
13
- rss
14
- sanitize (~> 6.0)
15
- thor
16
- tzinfo
17
- zeitwerk
18
-
19
- GEM
20
- remote: https://rubygems.org/
21
- specs:
22
- addressable (2.8.6)
23
- public_suffix (>= 2.0.2, < 6.0)
24
- ast (2.4.2)
25
- byebug (11.1.3)
26
- concurrent-ruby (1.2.3)
27
- crass (1.0.6)
28
- diff-lcs (1.5.1)
29
- docile (1.4.0)
30
- faraday (2.9.0)
31
- faraday-net_http (>= 2.0, < 3.2)
32
- faraday-follow_redirects (0.3.0)
33
- faraday (>= 1, < 3)
34
- faraday-net_http (3.1.0)
35
- net-http
36
- json (2.7.2)
37
- kramdown (2.4.0)
38
- rexml
39
- language_server-protocol (3.17.0.3)
40
- mime-types (3.5.2)
41
- mime-types-data (~> 3.2015)
42
- mime-types-data (3.2024.0305)
43
- mini_portile2 (2.8.6)
44
- net-http (0.4.1)
45
- uri
46
- nokogiri (1.16.5)
47
- mini_portile2 (~> 2.8.2)
48
- racc (~> 1.4)
49
- nokogiri (1.16.5-x86_64-darwin)
50
- racc (~> 1.4)
51
- nokogiri (1.16.5-x86_64-linux)
52
- racc (~> 1.4)
53
- parallel (1.24.0)
54
- parser (3.3.1.0)
55
- ast (~> 2.4.1)
56
- racc
57
- public_suffix (5.0.5)
58
- racc (1.7.3)
59
- rainbow (3.1.1)
60
- rake (13.2.1)
61
- regexp_parser (2.9.0)
62
- reverse_markdown (2.1.1)
63
- nokogiri
64
- rexml (3.3.2)
65
- strscan
66
- rspec (3.13.0)
67
- rspec-core (~> 3.13.0)
68
- rspec-expectations (~> 3.13.0)
69
- rspec-mocks (~> 3.13.0)
70
- rspec-core (3.13.0)
71
- rspec-support (~> 3.13.0)
72
- rspec-expectations (3.13.0)
73
- diff-lcs (>= 1.2.0, < 2.0)
74
- rspec-support (~> 3.13.0)
75
- rspec-mocks (3.13.0)
76
- diff-lcs (>= 1.2.0, < 2.0)
77
- rspec-support (~> 3.13.0)
78
- rspec-support (3.13.1)
79
- rss (0.3.0)
80
- rexml
81
- rubocop (1.63.4)
82
- json (~> 2.3)
83
- language_server-protocol (>= 3.17.0)
84
- parallel (~> 1.10)
85
- parser (>= 3.3.0.2)
86
- rainbow (>= 2.2.2, < 4.0)
87
- regexp_parser (>= 1.8, < 3.0)
88
- rexml (>= 3.2.5, < 4.0)
89
- rubocop-ast (>= 1.31.1, < 2.0)
90
- ruby-progressbar (~> 1.7)
91
- unicode-display_width (>= 2.4.0, < 3.0)
92
- rubocop-ast (1.31.3)
93
- parser (>= 3.3.1.0)
94
- rubocop-capybara (2.20.0)
95
- rubocop (~> 1.41)
96
- rubocop-factory_bot (2.25.1)
97
- rubocop (~> 1.41)
98
- rubocop-md (1.2.2)
99
- rubocop (>= 1.0)
100
- rubocop-performance (1.21.0)
101
- rubocop (>= 1.48.1, < 2.0)
102
- rubocop-ast (>= 1.31.1, < 2.0)
103
- rubocop-rake (0.6.0)
104
- rubocop (~> 1.0)
105
- rubocop-rspec (2.29.1)
106
- rubocop (~> 1.40)
107
- rubocop-capybara (~> 2.17)
108
- rubocop-factory_bot (~> 2.22)
109
- rubocop-rspec_rails (~> 2.28)
110
- rubocop-rspec_rails (2.28.3)
111
- rubocop (~> 1.40)
112
- ruby-progressbar (1.13.0)
113
- sanitize (6.1.0)
114
- crass (~> 1.0.2)
115
- nokogiri (>= 1.12.0)
116
- simplecov (0.22.0)
117
- docile (~> 1.1)
118
- simplecov-html (~> 0.11)
119
- simplecov_json_formatter (~> 0.1)
120
- simplecov-html (0.12.3)
121
- simplecov_json_formatter (0.1.4)
122
- strscan (3.1.0)
123
- thor (1.3.1)
124
- tzinfo (2.0.6)
125
- concurrent-ruby (~> 1.0)
126
- unicode-display_width (2.5.0)
127
- uri (0.13.0)
128
- vcr (6.2.0)
129
- yard (0.9.36)
130
- zeitwerk (2.6.13)
131
-
132
- PLATFORMS
133
- ruby
134
- x86_64-darwin
135
- x86_64-darwin-20
136
- x86_64-linux
137
-
138
- DEPENDENCIES
139
- byebug
140
- html2rss!
141
- rake
142
- rspec (~> 3.0)
143
- rubocop
144
- rubocop-md
145
- rubocop-performance
146
- rubocop-rake
147
- rubocop-rspec
148
- simplecov
149
- vcr
150
- yard
151
-
152
- BUNDLED WITH
153
- 2.4.1
data/bin/console DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'bundler/setup'
5
- require 'html2rss'
6
- require 'byebug'
7
-
8
- # You can add fixtures and/or initialization code here to make experimenting
9
- # with your gem easier. You can also use a different console, if you like.
10
-
11
- # (If you use this, don't forget to add pry to your Gemfile!)
12
- # require "pry"
13
- # Pry.start
14
-
15
- require 'irb'
16
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
data/rakefile.rb DELETED
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'bundler'
4
- require 'rake'
5
- require 'rspec'
6
- require 'rspec/core/rake_task'
7
-
8
- Bundler.setup
9
- Bundler::GemHelper.install_tasks
10
-
11
- task default: [:spec]
12
-
13
- desc 'Run all examples'
14
- RSpec::Core::RakeTask.new(:spec) do |t|
15
- t.ruby_opts = %w[-w]
16
- end