html2rss 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +13 -42
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +116 -94
  7. data/README.md +326 -253
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +16 -21
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +46 -51
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -78
  26. data/lib/html2rss/item.rb +118 -40
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -9
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +6 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +54 -150
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -210
  45. data/lib/html2rss/feed_builder.rb +0 -75
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
data/lib/html2rss.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zeitwerk'
2
4
 
3
5
  loader = Zeitwerk::Loader.for_gem
@@ -7,35 +9,33 @@ require 'yaml'
7
9
 
8
10
  ##
9
11
  # The Html2rss namespace.
10
- # Request HTML from an URL and transform it to a RSS 2.0 object.
11
12
  module Html2rss
13
+ CONFIG_KEY_FEEDS = :feeds
14
+
12
15
  ##
13
- # Returns a RSS object which is generated from the provided file.
14
- #
15
- # `file_path`: a File object of a YAML file
16
- # `name`: the of the feed
16
+ # Returns an RSS object generated from the provided YAML file configuration.
17
17
  #
18
18
  # Example:
19
19
  #
20
20
  # feed = Html2rss.feed_from_yaml_config(File.join(['spec', 'config.test.yml']), 'nuxt-releases')
21
21
  # # => #<RSS::Rss:0x00007fb2f6331228
22
- # @return [RSS:Rss]
23
- def self.feed_from_yaml_config(file, name)
24
- # rubocop:disable Security/YAMLLoad
25
- yaml = YAML.load(File.open(file))
26
- # rubocop:enable Security/YAMLLoad
22
+ #
23
+ # @param file [String] Path to the YAML file.
24
+ # @param name [String, Symbol, nil] Name of the feed in the YAML file.
25
+ # @param global_config [Hash] Global options (e.g., HTTP headers).
26
+ # @param params [Hash] Dynamic parameters for the feed configuration.
27
+ # @return [RSS::Rss] RSS object generated from the configuration.
28
+ def self.feed_from_yaml_config(file, name = nil, global_config: {}, params: {})
29
+ yaml = load_yaml(file)
30
+ feeds = yaml[CONFIG_KEY_FEEDS] || {}
27
31
 
28
- feed_config = yaml['feeds'][name]
29
- global_config = yaml.reject { |key| key == 'feeds' }
32
+ feed_config = find_feed_config(yaml, feeds, name, global_config)
30
33
 
31
- config = Config.new(feed_config, global_config)
32
- feed(config)
34
+ feed(Config.new(feed_config, global_config, params))
33
35
  end
34
36
 
35
37
  ##
36
- # Returns a RSS object which is generated from the provided config.
37
- #
38
- # `config`: can be a Hash or an instance of Html2rss::Config.
38
+ # Returns an RSS object generated from the provided configuration.
39
39
  #
40
40
  # Example:
41
41
  #
@@ -48,11 +48,42 @@ module Html2rss
48
48
  # }
49
49
  # )
50
50
  # # => #<RSS::Rss:0x00007fb2f48d14a0 ...>
51
- # @return [RSS:Rss]
51
+ #
52
+ # @param config [Hash<Symbol, Object>, Html2rss::Config] Feed configuration.
53
+ # @return [RSS::Rss] RSS object generated from the configuration.
52
54
  def self.feed(config)
53
55
  config = Config.new(config) unless config.is_a?(Config)
56
+ RssBuilder.build(config)
57
+ end
54
58
 
55
- feed = FeedBuilder.new config
56
- feed.rss
59
+ ##
60
+ # Loads and parses the YAML file.
61
+ #
62
+ # @param file [String] Path to the YAML file.
63
+ # @return [Hash] Parsed YAML content.
64
+ def self.load_yaml(file)
65
+ YAML.safe_load_file(file, symbolize_names: true)
57
66
  end
67
+
68
+ ##
69
+ # Builds the feed configuration based on the provided parameters.
70
+ #
71
+ # @param yaml [Hash] Parsed YAML content.
72
+ # @param feeds [Hash] Feeds from the YAML content.
73
+ # @param feed_name [String, Symbol, nil] Name of the feed in the YAML file.
74
+ # @param global_config [Hash] Global options (e.g., HTTP headers).
75
+ # @return [Hash] Feed configuration.
76
+ def self.find_feed_config(yaml, feeds, feed_name, global_config)
77
+ return yaml unless feed_name
78
+
79
+ feed_name = feed_name.to_sym
80
+ if feeds.key?(feed_name)
81
+ global_config.merge!(yaml.reject { |key| key == CONFIG_KEY_FEEDS })
82
+ feeds[feed_name]
83
+ else
84
+ yaml
85
+ end
86
+ end
87
+
88
+ private_class_method :load_yaml, :find_feed_config
58
89
  end
data/rakefile.rb ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler'
4
+ require 'rake'
5
+ require 'rspec'
6
+ require 'rspec/core/rake_task'
7
+
8
+ Bundler.setup
9
+ Bundler::GemHelper.install_tasks
10
+
11
+ task default: [:spec]
12
+
13
+ desc 'Run all examples'
14
+ RSpec::Core::RakeTask.new(:spec) do |t|
15
+ t.ruby_opts = %w[-w]
16
+ end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-09 00:00:00.000000000 Z
11
+ date: 2024-07-30 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: activesupport
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '5.0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '5.0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: addressable
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -38,48 +24,40 @@ dependencies:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
26
  version: '2.7'
41
- - !ruby/object:Gem::Dependency
42
- name: builder
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
27
  - !ruby/object:Gem::Dependency
56
28
  name: faraday
57
29
  requirement: !ruby/object:Gem::Requirement
58
30
  requirements:
59
- - - "~>"
31
+ - - ">"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.1
34
+ - - "<"
60
35
  - !ruby/object:Gem::Version
61
- version: '0.15'
36
+ version: '3.0'
62
37
  type: :runtime
63
38
  prerelease: false
64
39
  version_requirements: !ruby/object:Gem::Requirement
65
40
  requirements:
66
- - - "~>"
41
+ - - ">"
42
+ - !ruby/object:Gem::Version
43
+ version: 2.0.1
44
+ - - "<"
67
45
  - !ruby/object:Gem::Version
68
- version: '0.15'
46
+ version: '3.0'
69
47
  - !ruby/object:Gem::Dependency
70
- name: faraday_middleware
48
+ name: faraday-follow_redirects
71
49
  requirement: !ruby/object:Gem::Requirement
72
50
  requirements:
73
- - - "~>"
51
+ - - ">="
74
52
  - !ruby/object:Gem::Version
75
- version: '0.13'
53
+ version: '0'
76
54
  type: :runtime
77
55
  prerelease: false
78
56
  version_requirements: !ruby/object:Gem::Requirement
79
57
  requirements:
80
- - - "~>"
58
+ - - ">="
81
59
  - !ruby/object:Gem::Version
82
- version: '0.13'
60
+ version: '0'
83
61
  - !ruby/object:Gem::Dependency
84
62
  name: kramdown
85
63
  requirement: !ruby/object:Gem::Requirement
@@ -129,35 +107,7 @@ dependencies:
129
107
  - !ruby/object:Gem::Version
130
108
  version: '2.0'
131
109
  - !ruby/object:Gem::Dependency
132
- name: reverse_markdown
133
- requirement: !ruby/object:Gem::Requirement
134
- requirements:
135
- - - "~>"
136
- - !ruby/object:Gem::Version
137
- version: '1.3'
138
- type: :runtime
139
- prerelease: false
140
- version_requirements: !ruby/object:Gem::Requirement
141
- requirements:
142
- - - "~>"
143
- - !ruby/object:Gem::Version
144
- version: '1.3'
145
- - !ruby/object:Gem::Dependency
146
- name: sanitize
147
- requirement: !ruby/object:Gem::Requirement
148
- requirements:
149
- - - "~>"
150
- - !ruby/object:Gem::Version
151
- version: '5.0'
152
- type: :runtime
153
- prerelease: false
154
- version_requirements: !ruby/object:Gem::Requirement
155
- requirements:
156
- - - "~>"
157
- - !ruby/object:Gem::Version
158
- version: '5.0'
159
- - !ruby/object:Gem::Dependency
160
- name: to_regexp
110
+ name: regexp_parser
161
111
  requirement: !ruby/object:Gem::Requirement
162
112
  requirements:
163
113
  - - ">="
@@ -171,41 +121,27 @@ dependencies:
171
121
  - !ruby/object:Gem::Version
172
122
  version: '0'
173
123
  - !ruby/object:Gem::Dependency
174
- name: zeitwerk
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - ">="
178
- - !ruby/object:Gem::Version
179
- version: '0'
180
- type: :runtime
181
- prerelease: false
182
- version_requirements: !ruby/object:Gem::Requirement
183
- requirements:
184
- - - ">="
185
- - !ruby/object:Gem::Version
186
- version: '0'
187
- - !ruby/object:Gem::Dependency
188
- name: bundler
124
+ name: reverse_markdown
189
125
  requirement: !ruby/object:Gem::Requirement
190
126
  requirements:
191
127
  - - "~>"
192
128
  - !ruby/object:Gem::Version
193
- version: '1.16'
194
- type: :development
129
+ version: '2.0'
130
+ type: :runtime
195
131
  prerelease: false
196
132
  version_requirements: !ruby/object:Gem::Requirement
197
133
  requirements:
198
134
  - - "~>"
199
135
  - !ruby/object:Gem::Version
200
- version: '1.16'
136
+ version: '2.0'
201
137
  - !ruby/object:Gem::Dependency
202
- name: byebug
138
+ name: rss
203
139
  requirement: !ruby/object:Gem::Requirement
204
140
  requirements:
205
141
  - - ">="
206
142
  - !ruby/object:Gem::Version
207
143
  version: '0'
208
- type: :development
144
+ type: :runtime
209
145
  prerelease: false
210
146
  version_requirements: !ruby/object:Gem::Requirement
211
147
  requirements:
@@ -213,69 +149,27 @@ dependencies:
213
149
  - !ruby/object:Gem::Version
214
150
  version: '0'
215
151
  - !ruby/object:Gem::Dependency
216
- name: rspec
152
+ name: sanitize
217
153
  requirement: !ruby/object:Gem::Requirement
218
154
  requirements:
219
155
  - - "~>"
220
156
  - !ruby/object:Gem::Version
221
- version: '3.0'
222
- type: :development
157
+ version: '6.0'
158
+ type: :runtime
223
159
  prerelease: false
224
160
  version_requirements: !ruby/object:Gem::Requirement
225
161
  requirements:
226
162
  - - "~>"
227
163
  - !ruby/object:Gem::Version
228
- version: '3.0'
164
+ version: '6.0'
229
165
  - !ruby/object:Gem::Dependency
230
- name: rubocop
166
+ name: thor
231
167
  requirement: !ruby/object:Gem::Requirement
232
168
  requirements:
233
169
  - - ">="
234
170
  - !ruby/object:Gem::Version
235
171
  version: '0'
236
- type: :development
237
- prerelease: false
238
- version_requirements: !ruby/object:Gem::Requirement
239
- requirements:
240
- - - ">="
241
- - !ruby/object:Gem::Version
242
- version: '0'
243
- - !ruby/object:Gem::Dependency
244
- name: rubocop-performance
245
- requirement: !ruby/object:Gem::Requirement
246
- requirements:
247
- - - ">="
248
- - !ruby/object:Gem::Version
249
- version: '0'
250
- type: :development
251
- prerelease: false
252
- version_requirements: !ruby/object:Gem::Requirement
253
- requirements:
254
- - - ">="
255
- - !ruby/object:Gem::Version
256
- version: '0'
257
- - !ruby/object:Gem::Dependency
258
- name: rubocop-rspec
259
- requirement: !ruby/object:Gem::Requirement
260
- requirements:
261
- - - ">="
262
- - !ruby/object:Gem::Version
263
- version: '0'
264
- type: :development
265
- prerelease: false
266
- version_requirements: !ruby/object:Gem::Requirement
267
- requirements:
268
- - - ">="
269
- - !ruby/object:Gem::Version
270
- version: '0'
271
- - !ruby/object:Gem::Dependency
272
- name: simplecov
273
- requirement: !ruby/object:Gem::Requirement
274
- requirements:
275
- - - ">="
276
- - !ruby/object:Gem::Version
277
- version: '0'
278
- type: :development
172
+ type: :runtime
279
173
  prerelease: false
280
174
  version_requirements: !ruby/object:Gem::Requirement
281
175
  requirements:
@@ -283,13 +177,13 @@ dependencies:
283
177
  - !ruby/object:Gem::Version
284
178
  version: '0'
285
179
  - !ruby/object:Gem::Dependency
286
- name: vcr
180
+ name: tzinfo
287
181
  requirement: !ruby/object:Gem::Requirement
288
182
  requirements:
289
183
  - - ">="
290
184
  - !ruby/object:Gem::Version
291
185
  version: '0'
292
- type: :development
186
+ type: :runtime
293
187
  prerelease: false
294
188
  version_requirements: !ruby/object:Gem::Requirement
295
189
  requirements:
@@ -297,13 +191,13 @@ dependencies:
297
191
  - !ruby/object:Gem::Version
298
192
  version: '0'
299
193
  - !ruby/object:Gem::Dependency
300
- name: yard
194
+ name: zeitwerk
301
195
  requirement: !ruby/object:Gem::Requirement
302
196
  requirements:
303
197
  - - ">="
304
198
  - !ruby/object:Gem::Version
305
199
  version: '0'
306
- type: :development
200
+ type: :runtime
307
201
  prerelease: false
308
202
  version_requirements: !ruby/object:Gem::Requirement
309
203
  requirements:
@@ -314,52 +208,62 @@ description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss insta
314
208
  in return.
315
209
  email:
316
210
  - html2rss@desmarais.de
317
- executables: []
211
+ executables:
212
+ - html2rss
318
213
  extensions: []
319
214
  extra_rdoc_files: []
320
215
  files:
321
216
  - ".gitignore"
217
+ - ".mergify.yml"
322
218
  - ".rspec"
323
219
  - ".rubocop.yml"
324
- - ".travis.yml"
325
220
  - ".yardopts"
326
- - CHANGELOG.md
327
221
  - Gemfile
328
222
  - Gemfile.lock
329
223
  - LICENSE
330
224
  - README.md
331
225
  - bin/console
332
226
  - bin/setup
227
+ - exe/html2rss
333
228
  - html2rss.gemspec
334
229
  - lib/html2rss.rb
335
230
  - lib/html2rss/attribute_post_processors.rb
336
231
  - lib/html2rss/attribute_post_processors/gsub.rb
337
232
  - lib/html2rss/attribute_post_processors/html_to_markdown.rb
233
+ - lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
234
+ - lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb
338
235
  - lib/html2rss/attribute_post_processors/markdown_to_html.rb
339
236
  - lib/html2rss/attribute_post_processors/parse_time.rb
340
237
  - lib/html2rss/attribute_post_processors/parse_uri.rb
341
238
  - lib/html2rss/attribute_post_processors/sanitize_html.rb
342
239
  - lib/html2rss/attribute_post_processors/substring.rb
343
240
  - lib/html2rss/attribute_post_processors/template.rb
241
+ - lib/html2rss/cli.rb
344
242
  - lib/html2rss/config.rb
345
- - lib/html2rss/feed_builder.rb
243
+ - lib/html2rss/config/channel.rb
244
+ - lib/html2rss/config/selectors.rb
346
245
  - lib/html2rss/item.rb
347
246
  - lib/html2rss/item_extractors.rb
348
247
  - lib/html2rss/item_extractors/attribute.rb
349
- - lib/html2rss/item_extractors/current_time.rb
350
248
  - lib/html2rss/item_extractors/href.rb
351
249
  - lib/html2rss/item_extractors/html.rb
352
250
  - lib/html2rss/item_extractors/static.rb
353
251
  - lib/html2rss/item_extractors/text.rb
252
+ - lib/html2rss/object_to_xml_converter.rb
253
+ - lib/html2rss/rss_builder.rb
254
+ - lib/html2rss/rss_builder/channel.rb
255
+ - lib/html2rss/rss_builder/item.rb
256
+ - lib/html2rss/rss_builder/stylesheet.rb
354
257
  - lib/html2rss/utils.rb
355
258
  - lib/html2rss/version.rb
356
- - support/logo.png
357
- homepage: https://github.com/gildesmarais/html2rss
259
+ - rakefile.rb
260
+ homepage: https://github.com/html2rss/html2rss
358
261
  licenses:
359
262
  - MIT
360
263
  metadata:
361
264
  allowed_push_host: https://rubygems.org
362
- changelog_uri: https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md
265
+ changelog_uri: https://github.com/html2rss/html2rss/releases
266
+ rubygems_mfa_required: 'true'
363
267
  post_install_message:
364
268
  rdoc_options: []
365
269
  require_paths:
@@ -368,14 +272,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
368
272
  requirements:
369
273
  - - ">="
370
274
  - !ruby/object:Gem::Version
371
- version: 2.4.4
275
+ version: '3.1'
372
276
  required_rubygems_version: !ruby/object:Gem::Requirement
373
277
  requirements:
374
278
  - - ">="
375
279
  - !ruby/object:Gem::Version
376
280
  version: '0'
377
281
  requirements: []
378
- rubygems_version: 3.0.6
282
+ rubygems_version: 3.5.11
379
283
  signing_key:
380
284
  specification_version: 4
381
285
  summary: Returns an RSS::Rss object by scraping a URL.
data/.travis.yml DELETED
@@ -1,25 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- cache: bundler
4
-
5
- before_install:
6
- - gem update --system
7
- - gem install bundler
8
-
9
- bundler_args: "--jobs=3 --retry=3"
10
-
11
- rvm:
12
- - 2.4.9
13
- - 2.5.7
14
- - 2.6.5
15
-
16
- script:
17
- - bundle exec rubocop -F
18
- - bundle exec rspec
19
-
20
- deploy:
21
- provider: rubygems
22
- api_key:
23
- secure: bM3Yl8iWdB1Amra3Bm6bIH/mTwHcRhZrX8etFFbJANxIbkhzUOyTKcDMYiWUVM/mBzzv0NOuRejrDR6R0v7E2udrKcLQFCBtv7HqPAXIlkEEyxZy+M1kTqcPzP872E+ZKTn93vCzbiXBLYoMmqgCzqvcO87IBYNzTURHkfFjaYJJdVyZ5EVtbpXf4FhBvuQf9LTk/ocClgwYeuqd+45lO7qHoPatsvbY0vCOfKaiwkdOkBt+hjc56awcYSc9CXn0DCatebPQmQmdrqFd8fKgyCatWS3n+8TPmvzVfNJe44wg3oNfHbWruP85I2LE9ei1iG+iGQIF60fMhGgMJ4EM3REXDE5Mg+GA5uJcgH9Poirut3Ih65jtAzYNGohlmEmc7ysKc0dmG1O3ndwrHjh5KePrOAGDaW6QKG+m5ebIZ+mgrEA+ZVU1mjDM8FlbSKAayoPloslZdllSv7miwGzh6xrHWGQSCURZAkygFh+Kd+Kg1eVlEs+n6aObod82mEOfBPvWPacOrE2fY4B0ocFOKotZBCZSD0ZIixlyslRTnmcJfpRNlYLsQ56oy5uPNUccPQ86NSmmE+qbRdPCLQCKLPm2iYBgOa5iQrfHR/fUgcO0skAZiW4o9QflDgIFS/G+BE6FMHIvjkKA6Ae4KbqGzlF5pGFdo6p4MhlvubwjsVI=
24
- on:
25
- tags: true