html2rss 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +11 -145
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +111 -97
  7. data/README.md +323 -270
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +15 -20
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -82
  26. data/lib/html2rss/item.rb +118 -42
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -10
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +5 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +51 -154
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -221
  45. data/lib/html2rss/feed_builder.rb +0 -81
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
data/lib/html2rss.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zeitwerk'
2
4
 
3
5
  loader = Zeitwerk::Loader.for_gem
@@ -7,35 +9,33 @@ require 'yaml'
7
9
 
8
10
  ##
9
11
  # The Html2rss namespace.
10
- # Request HTML from an URL and transform it to a RSS 2.0 object.
11
12
  module Html2rss
13
+ CONFIG_KEY_FEEDS = :feeds
14
+
12
15
  ##
13
- # Returns a RSS object which is generated from the provided file.
14
- #
15
- # `file_path`: a File object of a YAML file
16
- # `name`: the of the feed
16
+ # Returns an RSS object generated from the provided YAML file configuration.
17
17
  #
18
18
  # Example:
19
19
  #
20
20
  # feed = Html2rss.feed_from_yaml_config(File.join(['spec', 'config.test.yml']), 'nuxt-releases')
21
21
  # # => #<RSS::Rss:0x00007fb2f6331228
22
- # @return [RSS:Rss]
23
- def self.feed_from_yaml_config(file, name)
24
- # rubocop:disable Security/YAMLLoad
25
- yaml = YAML.load(File.open(file))
26
- # rubocop:enable Security/YAMLLoad
22
+ #
23
+ # @param file [String] Path to the YAML file.
24
+ # @param name [String, Symbol, nil] Name of the feed in the YAML file.
25
+ # @param global_config [Hash] Global options (e.g., HTTP headers).
26
+ # @param params [Hash] Dynamic parameters for the feed configuration.
27
+ # @return [RSS::Rss] RSS object generated from the configuration.
28
+ def self.feed_from_yaml_config(file, name = nil, global_config: {}, params: {})
29
+ yaml = load_yaml(file)
30
+ feeds = yaml[CONFIG_KEY_FEEDS] || {}
27
31
 
28
- feed_config = yaml['feeds'][name]
29
- global_config = yaml.reject { |key| key == 'feeds' }
32
+ feed_config = find_feed_config(yaml, feeds, name, global_config)
30
33
 
31
- config = Config.new(feed_config, global_config)
32
- feed(config)
34
+ feed(Config.new(feed_config, global_config, params))
33
35
  end
34
36
 
35
37
  ##
36
- # Returns a RSS object which is generated from the provided config.
37
- #
38
- # `config`: can be a Hash or an instance of Html2rss::Config.
38
+ # Returns an RSS object generated from the provided configuration.
39
39
  #
40
40
  # Example:
41
41
  #
@@ -48,11 +48,42 @@ module Html2rss
48
48
  # }
49
49
  # )
50
50
  # # => #<RSS::Rss:0x00007fb2f48d14a0 ...>
51
- # @return [RSS:Rss]
51
+ #
52
+ # @param config [Hash<Symbol, Object>, Html2rss::Config] Feed configuration.
53
+ # @return [RSS::Rss] RSS object generated from the configuration.
52
54
  def self.feed(config)
53
55
  config = Config.new(config) unless config.is_a?(Config)
56
+ RssBuilder.build(config)
57
+ end
54
58
 
55
- feed = FeedBuilder.new config
56
- feed.rss
59
+ ##
60
+ # Loads and parses the YAML file.
61
+ #
62
+ # @param file [String] Path to the YAML file.
63
+ # @return [Hash] Parsed YAML content.
64
+ def self.load_yaml(file)
65
+ YAML.safe_load_file(file, symbolize_names: true)
57
66
  end
67
+
68
+ ##
69
+ # Builds the feed configuration based on the provided parameters.
70
+ #
71
+ # @param yaml [Hash] Parsed YAML content.
72
+ # @param feeds [Hash] Feeds from the YAML content.
73
+ # @param feed_name [String, Symbol, nil] Name of the feed in the YAML file.
74
+ # @param global_config [Hash] Global options (e.g., HTTP headers).
75
+ # @return [Hash] Feed configuration.
76
+ def self.find_feed_config(yaml, feeds, feed_name, global_config)
77
+ return yaml unless feed_name
78
+
79
+ feed_name = feed_name.to_sym
80
+ if feeds.key?(feed_name)
81
+ global_config.merge!(yaml.reject { |key| key == CONFIG_KEY_FEEDS })
82
+ feeds[feed_name]
83
+ else
84
+ yaml
85
+ end
86
+ end
87
+
88
+ private_class_method :load_yaml, :find_feed_config
58
89
  end
data/rakefile.rb ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler'
4
+ require 'rake'
5
+ require 'rspec'
6
+ require 'rspec/core/rake_task'
7
+
8
+ Bundler.setup
9
+ Bundler::GemHelper.install_tasks
10
+
11
+ task default: [:spec]
12
+
13
+ desc 'Run all examples'
14
+ RSpec::Core::RakeTask.new(:spec) do |t|
15
+ t.ruby_opts = %w[-w]
16
+ end
metadata CHANGED
@@ -1,35 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-19 00:00:00.000000000 Z
11
+ date: 2024-07-30 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: activesupport
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '5'
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '7'
23
- type: :runtime
24
- prerelease: false
25
- version_requirements: !ruby/object:Gem::Requirement
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- version: '5'
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '7'
33
13
  - !ruby/object:Gem::Dependency
34
14
  name: addressable
35
15
  requirement: !ruby/object:Gem::Requirement
@@ -44,36 +24,28 @@ dependencies:
44
24
  - - "~>"
45
25
  - !ruby/object:Gem::Version
46
26
  version: '2.7'
47
- - !ruby/object:Gem::Dependency
48
- name: builder
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: '0'
61
27
  - !ruby/object:Gem::Dependency
62
28
  name: faraday
63
29
  requirement: !ruby/object:Gem::Requirement
64
30
  requirements:
65
- - - "~>"
31
+ - - ">"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.1
34
+ - - "<"
66
35
  - !ruby/object:Gem::Version
67
- version: '1.0'
36
+ version: '3.0'
68
37
  type: :runtime
69
38
  prerelease: false
70
39
  version_requirements: !ruby/object:Gem::Requirement
71
40
  requirements:
72
- - - "~>"
41
+ - - ">"
73
42
  - !ruby/object:Gem::Version
74
- version: '1.0'
43
+ version: 2.0.1
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.0'
75
47
  - !ruby/object:Gem::Dependency
76
- name: faraday_middleware
48
+ name: faraday-follow_redirects
77
49
  requirement: !ruby/object:Gem::Requirement
78
50
  requirements:
79
51
  - - ">="
@@ -135,35 +107,7 @@ dependencies:
135
107
  - !ruby/object:Gem::Version
136
108
  version: '2.0'
137
109
  - !ruby/object:Gem::Dependency
138
- name: reverse_markdown
139
- requirement: !ruby/object:Gem::Requirement
140
- requirements:
141
- - - "~>"
142
- - !ruby/object:Gem::Version
143
- version: '2.0'
144
- type: :runtime
145
- prerelease: false
146
- version_requirements: !ruby/object:Gem::Requirement
147
- requirements:
148
- - - "~>"
149
- - !ruby/object:Gem::Version
150
- version: '2.0'
151
- - !ruby/object:Gem::Dependency
152
- name: sanitize
153
- requirement: !ruby/object:Gem::Requirement
154
- requirements:
155
- - - "~>"
156
- - !ruby/object:Gem::Version
157
- version: '5.0'
158
- type: :runtime
159
- prerelease: false
160
- version_requirements: !ruby/object:Gem::Requirement
161
- requirements:
162
- - - "~>"
163
- - !ruby/object:Gem::Version
164
- version: '5.0'
165
- - !ruby/object:Gem::Dependency
166
- name: to_regexp
110
+ name: regexp_parser
167
111
  requirement: !ruby/object:Gem::Requirement
168
112
  requirements:
169
113
  - - ">="
@@ -177,41 +121,27 @@ dependencies:
177
121
  - !ruby/object:Gem::Version
178
122
  version: '0'
179
123
  - !ruby/object:Gem::Dependency
180
- name: zeitwerk
124
+ name: reverse_markdown
181
125
  requirement: !ruby/object:Gem::Requirement
182
126
  requirements:
183
- - - ">="
127
+ - - "~>"
184
128
  - !ruby/object:Gem::Version
185
- version: '0'
129
+ version: '2.0'
186
130
  type: :runtime
187
131
  prerelease: false
188
132
  version_requirements: !ruby/object:Gem::Requirement
189
133
  requirements:
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- version: '0'
193
- - !ruby/object:Gem::Dependency
194
- name: bundler
195
- requirement: !ruby/object:Gem::Requirement
196
- requirements:
197
- - - ">="
198
- - !ruby/object:Gem::Version
199
- version: '0'
200
- type: :development
201
- prerelease: false
202
- version_requirements: !ruby/object:Gem::Requirement
203
- requirements:
204
- - - ">="
134
+ - - "~>"
205
135
  - !ruby/object:Gem::Version
206
- version: '0'
136
+ version: '2.0'
207
137
  - !ruby/object:Gem::Dependency
208
- name: byebug
138
+ name: rss
209
139
  requirement: !ruby/object:Gem::Requirement
210
140
  requirements:
211
141
  - - ">="
212
142
  - !ruby/object:Gem::Version
213
143
  version: '0'
214
- type: :development
144
+ type: :runtime
215
145
  prerelease: false
216
146
  version_requirements: !ruby/object:Gem::Requirement
217
147
  requirements:
@@ -219,69 +149,27 @@ dependencies:
219
149
  - !ruby/object:Gem::Version
220
150
  version: '0'
221
151
  - !ruby/object:Gem::Dependency
222
- name: rspec
152
+ name: sanitize
223
153
  requirement: !ruby/object:Gem::Requirement
224
154
  requirements:
225
155
  - - "~>"
226
156
  - !ruby/object:Gem::Version
227
- version: '3.0'
228
- type: :development
157
+ version: '6.0'
158
+ type: :runtime
229
159
  prerelease: false
230
160
  version_requirements: !ruby/object:Gem::Requirement
231
161
  requirements:
232
162
  - - "~>"
233
163
  - !ruby/object:Gem::Version
234
- version: '3.0'
164
+ version: '6.0'
235
165
  - !ruby/object:Gem::Dependency
236
- name: rubocop
166
+ name: thor
237
167
  requirement: !ruby/object:Gem::Requirement
238
168
  requirements:
239
169
  - - ">="
240
170
  - !ruby/object:Gem::Version
241
171
  version: '0'
242
- type: :development
243
- prerelease: false
244
- version_requirements: !ruby/object:Gem::Requirement
245
- requirements:
246
- - - ">="
247
- - !ruby/object:Gem::Version
248
- version: '0'
249
- - !ruby/object:Gem::Dependency
250
- name: rubocop-performance
251
- requirement: !ruby/object:Gem::Requirement
252
- requirements:
253
- - - ">="
254
- - !ruby/object:Gem::Version
255
- version: '0'
256
- type: :development
257
- prerelease: false
258
- version_requirements: !ruby/object:Gem::Requirement
259
- requirements:
260
- - - ">="
261
- - !ruby/object:Gem::Version
262
- version: '0'
263
- - !ruby/object:Gem::Dependency
264
- name: rubocop-rspec
265
- requirement: !ruby/object:Gem::Requirement
266
- requirements:
267
- - - ">="
268
- - !ruby/object:Gem::Version
269
- version: '0'
270
- type: :development
271
- prerelease: false
272
- version_requirements: !ruby/object:Gem::Requirement
273
- requirements:
274
- - - ">="
275
- - !ruby/object:Gem::Version
276
- version: '0'
277
- - !ruby/object:Gem::Dependency
278
- name: simplecov
279
- requirement: !ruby/object:Gem::Requirement
280
- requirements:
281
- - - ">="
282
- - !ruby/object:Gem::Version
283
- version: '0'
284
- type: :development
172
+ type: :runtime
285
173
  prerelease: false
286
174
  version_requirements: !ruby/object:Gem::Requirement
287
175
  requirements:
@@ -289,13 +177,13 @@ dependencies:
289
177
  - !ruby/object:Gem::Version
290
178
  version: '0'
291
179
  - !ruby/object:Gem::Dependency
292
- name: vcr
180
+ name: tzinfo
293
181
  requirement: !ruby/object:Gem::Requirement
294
182
  requirements:
295
183
  - - ">="
296
184
  - !ruby/object:Gem::Version
297
185
  version: '0'
298
- type: :development
186
+ type: :runtime
299
187
  prerelease: false
300
188
  version_requirements: !ruby/object:Gem::Requirement
301
189
  requirements:
@@ -303,13 +191,13 @@ dependencies:
303
191
  - !ruby/object:Gem::Version
304
192
  version: '0'
305
193
  - !ruby/object:Gem::Dependency
306
- name: yard
194
+ name: zeitwerk
307
195
  requirement: !ruby/object:Gem::Requirement
308
196
  requirements:
309
197
  - - ">="
310
198
  - !ruby/object:Gem::Version
311
199
  version: '0'
312
- type: :development
200
+ type: :runtime
313
201
  prerelease: false
314
202
  version_requirements: !ruby/object:Gem::Requirement
315
203
  requirements:
@@ -320,52 +208,62 @@ description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss insta
320
208
  in return.
321
209
  email:
322
210
  - html2rss@desmarais.de
323
- executables: []
211
+ executables:
212
+ - html2rss
324
213
  extensions: []
325
214
  extra_rdoc_files: []
326
215
  files:
327
216
  - ".gitignore"
217
+ - ".mergify.yml"
328
218
  - ".rspec"
329
219
  - ".rubocop.yml"
330
- - ".travis.yml"
331
220
  - ".yardopts"
332
- - CHANGELOG.md
333
221
  - Gemfile
334
222
  - Gemfile.lock
335
223
  - LICENSE
336
224
  - README.md
337
225
  - bin/console
338
226
  - bin/setup
227
+ - exe/html2rss
339
228
  - html2rss.gemspec
340
229
  - lib/html2rss.rb
341
230
  - lib/html2rss/attribute_post_processors.rb
342
231
  - lib/html2rss/attribute_post_processors/gsub.rb
343
232
  - lib/html2rss/attribute_post_processors/html_to_markdown.rb
233
+ - lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
234
+ - lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb
344
235
  - lib/html2rss/attribute_post_processors/markdown_to_html.rb
345
236
  - lib/html2rss/attribute_post_processors/parse_time.rb
346
237
  - lib/html2rss/attribute_post_processors/parse_uri.rb
347
238
  - lib/html2rss/attribute_post_processors/sanitize_html.rb
348
239
  - lib/html2rss/attribute_post_processors/substring.rb
349
240
  - lib/html2rss/attribute_post_processors/template.rb
241
+ - lib/html2rss/cli.rb
350
242
  - lib/html2rss/config.rb
351
- - lib/html2rss/feed_builder.rb
243
+ - lib/html2rss/config/channel.rb
244
+ - lib/html2rss/config/selectors.rb
352
245
  - lib/html2rss/item.rb
353
246
  - lib/html2rss/item_extractors.rb
354
247
  - lib/html2rss/item_extractors/attribute.rb
355
- - lib/html2rss/item_extractors/current_time.rb
356
248
  - lib/html2rss/item_extractors/href.rb
357
249
  - lib/html2rss/item_extractors/html.rb
358
250
  - lib/html2rss/item_extractors/static.rb
359
251
  - lib/html2rss/item_extractors/text.rb
252
+ - lib/html2rss/object_to_xml_converter.rb
253
+ - lib/html2rss/rss_builder.rb
254
+ - lib/html2rss/rss_builder/channel.rb
255
+ - lib/html2rss/rss_builder/item.rb
256
+ - lib/html2rss/rss_builder/stylesheet.rb
360
257
  - lib/html2rss/utils.rb
361
258
  - lib/html2rss/version.rb
362
- - support/logo.png
363
- homepage: https://github.com/gildesmarais/html2rss
259
+ - rakefile.rb
260
+ homepage: https://github.com/html2rss/html2rss
364
261
  licenses:
365
262
  - MIT
366
263
  metadata:
367
264
  allowed_push_host: https://rubygems.org
368
- changelog_uri: https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md
265
+ changelog_uri: https://github.com/html2rss/html2rss/releases
266
+ rubygems_mfa_required: 'true'
369
267
  post_install_message:
370
268
  rdoc_options: []
371
269
  require_paths:
@@ -374,15 +272,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
374
272
  requirements:
375
273
  - - ">="
376
274
  - !ruby/object:Gem::Version
377
- version: 2.5.0
275
+ version: '3.1'
378
276
  required_rubygems_version: !ruby/object:Gem::Requirement
379
277
  requirements:
380
278
  - - ">="
381
279
  - !ruby/object:Gem::Version
382
280
  version: '0'
383
281
  requirements: []
384
- rubyforge_project:
385
- rubygems_version: 2.7.7
282
+ rubygems_version: 3.5.11
386
283
  signing_key:
387
284
  specification_version: 4
388
285
  summary: Returns an RSS::Rss object by scraping a URL.
data/.travis.yml DELETED
@@ -1,25 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- cache: bundler
4
-
5
- before_install:
6
- - gem update --system
7
- - gem install bundler
8
-
9
- bundler_args: "--jobs=3 --retry=3"
10
-
11
- rvm:
12
- - 2.5.8
13
- - 2.6.6
14
- - 2.7.1
15
-
16
- script:
17
- - bundle exec rubocop -F
18
- - bundle exec rspec
19
-
20
- deploy:
21
- provider: rubygems
22
- api_key:
23
- secure: bM3Yl8iWdB1Amra3Bm6bIH/mTwHcRhZrX8etFFbJANxIbkhzUOyTKcDMYiWUVM/mBzzv0NOuRejrDR6R0v7E2udrKcLQFCBtv7HqPAXIlkEEyxZy+M1kTqcPzP872E+ZKTn93vCzbiXBLYoMmqgCzqvcO87IBYNzTURHkfFjaYJJdVyZ5EVtbpXf4FhBvuQf9LTk/ocClgwYeuqd+45lO7qHoPatsvbY0vCOfKaiwkdOkBt+hjc56awcYSc9CXn0DCatebPQmQmdrqFd8fKgyCatWS3n+8TPmvzVfNJe44wg3oNfHbWruP85I2LE9ei1iG+iGQIF60fMhGgMJ4EM3REXDE5Mg+GA5uJcgH9Poirut3Ih65jtAzYNGohlmEmc7ysKc0dmG1O3ndwrHjh5KePrOAGDaW6QKG+m5ebIZ+mgrEA+ZVU1mjDM8FlbSKAayoPloslZdllSv7miwGzh6xrHWGQSCURZAkygFh+Kd+Kg1eVlEs+n6aObod82mEOfBPvWPacOrE2fY4B0ocFOKotZBCZSD0ZIixlyslRTnmcJfpRNlYLsQ56oy5uPNUccPQ86NSmmE+qbRdPCLQCKLPm2iYBgOa5iQrfHR/fUgcO0skAZiW4o9QflDgIFS/G+BE6FMHIvjkKA6Ae4KbqGzlF5pGFdo6p4MhlvubwjsVI=
24
- on:
25
- tags: true