html2rss 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.mergify.yml +15 -0
  4. data/.rubocop.yml +11 -145
  5. data/Gemfile +19 -2
  6. data/Gemfile.lock +111 -97
  7. data/README.md +323 -270
  8. data/bin/console +1 -0
  9. data/exe/html2rss +6 -0
  10. data/html2rss.gemspec +15 -20
  11. data/lib/html2rss/attribute_post_processors/gsub.rb +30 -8
  12. data/lib/html2rss/attribute_post_processors/html_to_markdown.rb +7 -2
  13. data/lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb +27 -0
  14. data/lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb +41 -0
  15. data/lib/html2rss/attribute_post_processors/markdown_to_html.rb +11 -2
  16. data/lib/html2rss/attribute_post_processors/parse_time.rb +11 -4
  17. data/lib/html2rss/attribute_post_processors/parse_uri.rb +12 -2
  18. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +40 -44
  19. data/lib/html2rss/attribute_post_processors/substring.rb +14 -4
  20. data/lib/html2rss/attribute_post_processors/template.rb +36 -12
  21. data/lib/html2rss/attribute_post_processors.rb +28 -5
  22. data/lib/html2rss/cli.rb +29 -0
  23. data/lib/html2rss/config/channel.rb +117 -0
  24. data/lib/html2rss/config/selectors.rb +91 -0
  25. data/lib/html2rss/config.rb +71 -82
  26. data/lib/html2rss/item.rb +118 -42
  27. data/lib/html2rss/item_extractors/attribute.rb +20 -7
  28. data/lib/html2rss/item_extractors/href.rb +20 -4
  29. data/lib/html2rss/item_extractors/html.rb +18 -6
  30. data/lib/html2rss/item_extractors/static.rb +18 -7
  31. data/lib/html2rss/item_extractors/text.rb +17 -5
  32. data/lib/html2rss/item_extractors.rb +75 -10
  33. data/lib/html2rss/object_to_xml_converter.rb +56 -0
  34. data/lib/html2rss/rss_builder/channel.rb +21 -0
  35. data/lib/html2rss/rss_builder/item.rb +83 -0
  36. data/lib/html2rss/rss_builder/stylesheet.rb +37 -0
  37. data/lib/html2rss/rss_builder.rb +96 -0
  38. data/lib/html2rss/utils.rb +94 -19
  39. data/lib/html2rss/version.rb +5 -1
  40. data/lib/html2rss.rb +51 -20
  41. data/rakefile.rb +16 -0
  42. metadata +51 -154
  43. data/.travis.yml +0 -25
  44. data/CHANGELOG.md +0 -221
  45. data/lib/html2rss/feed_builder.rb +0 -81
  46. data/lib/html2rss/item_extractors/current_time.rb +0 -21
  47. data/support/logo.png +0 -0
data/lib/html2rss.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'zeitwerk'
2
4
 
3
5
  loader = Zeitwerk::Loader.for_gem
@@ -7,35 +9,33 @@ require 'yaml'
7
9
 
8
10
  ##
9
11
  # The Html2rss namespace.
10
- # Request HTML from an URL and transform it to a RSS 2.0 object.
11
12
  module Html2rss
13
+ CONFIG_KEY_FEEDS = :feeds
14
+
12
15
  ##
13
- # Returns a RSS object which is generated from the provided file.
14
- #
15
- # `file_path`: a File object of a YAML file
16
- # `name`: the of the feed
16
+ # Returns an RSS object generated from the provided YAML file configuration.
17
17
  #
18
18
  # Example:
19
19
  #
20
20
  # feed = Html2rss.feed_from_yaml_config(File.join(['spec', 'config.test.yml']), 'nuxt-releases')
21
21
  # # => #<RSS::Rss:0x00007fb2f6331228
22
- # @return [RSS:Rss]
23
- def self.feed_from_yaml_config(file, name)
24
- # rubocop:disable Security/YAMLLoad
25
- yaml = YAML.load(File.open(file))
26
- # rubocop:enable Security/YAMLLoad
22
+ #
23
+ # @param file [String] Path to the YAML file.
24
+ # @param name [String, Symbol, nil] Name of the feed in the YAML file.
25
+ # @param global_config [Hash] Global options (e.g., HTTP headers).
26
+ # @param params [Hash] Dynamic parameters for the feed configuration.
27
+ # @return [RSS::Rss] RSS object generated from the configuration.
28
+ def self.feed_from_yaml_config(file, name = nil, global_config: {}, params: {})
29
+ yaml = load_yaml(file)
30
+ feeds = yaml[CONFIG_KEY_FEEDS] || {}
27
31
 
28
- feed_config = yaml['feeds'][name]
29
- global_config = yaml.reject { |key| key == 'feeds' }
32
+ feed_config = find_feed_config(yaml, feeds, name, global_config)
30
33
 
31
- config = Config.new(feed_config, global_config)
32
- feed(config)
34
+ feed(Config.new(feed_config, global_config, params))
33
35
  end
34
36
 
35
37
  ##
36
- # Returns a RSS object which is generated from the provided config.
37
- #
38
- # `config`: can be a Hash or an instance of Html2rss::Config.
38
+ # Returns an RSS object generated from the provided configuration.
39
39
  #
40
40
  # Example:
41
41
  #
@@ -48,11 +48,42 @@ module Html2rss
48
48
  # }
49
49
  # )
50
50
  # # => #<RSS::Rss:0x00007fb2f48d14a0 ...>
51
- # @return [RSS:Rss]
51
+ #
52
+ # @param config [Hash<Symbol, Object>, Html2rss::Config] Feed configuration.
53
+ # @return [RSS::Rss] RSS object generated from the configuration.
52
54
  def self.feed(config)
53
55
  config = Config.new(config) unless config.is_a?(Config)
56
+ RssBuilder.build(config)
57
+ end
54
58
 
55
- feed = FeedBuilder.new config
56
- feed.rss
59
+ ##
60
+ # Loads and parses the YAML file.
61
+ #
62
+ # @param file [String] Path to the YAML file.
63
+ # @return [Hash] Parsed YAML content.
64
+ def self.load_yaml(file)
65
+ YAML.safe_load_file(file, symbolize_names: true)
57
66
  end
67
+
68
+ ##
69
+ # Builds the feed configuration based on the provided parameters.
70
+ #
71
+ # @param yaml [Hash] Parsed YAML content.
72
+ # @param feeds [Hash] Feeds from the YAML content.
73
+ # @param feed_name [String, Symbol, nil] Name of the feed in the YAML file.
74
+ # @param global_config [Hash] Global options (e.g., HTTP headers).
75
+ # @return [Hash] Feed configuration.
76
+ def self.find_feed_config(yaml, feeds, feed_name, global_config)
77
+ return yaml unless feed_name
78
+
79
+ feed_name = feed_name.to_sym
80
+ if feeds.key?(feed_name)
81
+ global_config.merge!(yaml.reject { |key| key == CONFIG_KEY_FEEDS })
82
+ feeds[feed_name]
83
+ else
84
+ yaml
85
+ end
86
+ end
87
+
88
+ private_class_method :load_yaml, :find_feed_config
58
89
  end
data/rakefile.rb ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler'
4
+ require 'rake'
5
+ require 'rspec'
6
+ require 'rspec/core/rake_task'
7
+
8
+ Bundler.setup
9
+ Bundler::GemHelper.install_tasks
10
+
11
+ task default: [:spec]
12
+
13
+ desc 'Run all examples'
14
+ RSpec::Core::RakeTask.new(:spec) do |t|
15
+ t.ruby_opts = %w[-w]
16
+ end
metadata CHANGED
@@ -1,35 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-19 00:00:00.000000000 Z
11
+ date: 2024-07-30 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: activesupport
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '5'
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '7'
23
- type: :runtime
24
- prerelease: false
25
- version_requirements: !ruby/object:Gem::Requirement
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- version: '5'
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '7'
33
13
  - !ruby/object:Gem::Dependency
34
14
  name: addressable
35
15
  requirement: !ruby/object:Gem::Requirement
@@ -44,36 +24,28 @@ dependencies:
44
24
  - - "~>"
45
25
  - !ruby/object:Gem::Version
46
26
  version: '2.7'
47
- - !ruby/object:Gem::Dependency
48
- name: builder
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - ">="
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- version: '0'
61
27
  - !ruby/object:Gem::Dependency
62
28
  name: faraday
63
29
  requirement: !ruby/object:Gem::Requirement
64
30
  requirements:
65
- - - "~>"
31
+ - - ">"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.1
34
+ - - "<"
66
35
  - !ruby/object:Gem::Version
67
- version: '1.0'
36
+ version: '3.0'
68
37
  type: :runtime
69
38
  prerelease: false
70
39
  version_requirements: !ruby/object:Gem::Requirement
71
40
  requirements:
72
- - - "~>"
41
+ - - ">"
73
42
  - !ruby/object:Gem::Version
74
- version: '1.0'
43
+ version: 2.0.1
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.0'
75
47
  - !ruby/object:Gem::Dependency
76
- name: faraday_middleware
48
+ name: faraday-follow_redirects
77
49
  requirement: !ruby/object:Gem::Requirement
78
50
  requirements:
79
51
  - - ">="
@@ -135,35 +107,7 @@ dependencies:
135
107
  - !ruby/object:Gem::Version
136
108
  version: '2.0'
137
109
  - !ruby/object:Gem::Dependency
138
- name: reverse_markdown
139
- requirement: !ruby/object:Gem::Requirement
140
- requirements:
141
- - - "~>"
142
- - !ruby/object:Gem::Version
143
- version: '2.0'
144
- type: :runtime
145
- prerelease: false
146
- version_requirements: !ruby/object:Gem::Requirement
147
- requirements:
148
- - - "~>"
149
- - !ruby/object:Gem::Version
150
- version: '2.0'
151
- - !ruby/object:Gem::Dependency
152
- name: sanitize
153
- requirement: !ruby/object:Gem::Requirement
154
- requirements:
155
- - - "~>"
156
- - !ruby/object:Gem::Version
157
- version: '5.0'
158
- type: :runtime
159
- prerelease: false
160
- version_requirements: !ruby/object:Gem::Requirement
161
- requirements:
162
- - - "~>"
163
- - !ruby/object:Gem::Version
164
- version: '5.0'
165
- - !ruby/object:Gem::Dependency
166
- name: to_regexp
110
+ name: regexp_parser
167
111
  requirement: !ruby/object:Gem::Requirement
168
112
  requirements:
169
113
  - - ">="
@@ -177,41 +121,27 @@ dependencies:
177
121
  - !ruby/object:Gem::Version
178
122
  version: '0'
179
123
  - !ruby/object:Gem::Dependency
180
- name: zeitwerk
124
+ name: reverse_markdown
181
125
  requirement: !ruby/object:Gem::Requirement
182
126
  requirements:
183
- - - ">="
127
+ - - "~>"
184
128
  - !ruby/object:Gem::Version
185
- version: '0'
129
+ version: '2.0'
186
130
  type: :runtime
187
131
  prerelease: false
188
132
  version_requirements: !ruby/object:Gem::Requirement
189
133
  requirements:
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- version: '0'
193
- - !ruby/object:Gem::Dependency
194
- name: bundler
195
- requirement: !ruby/object:Gem::Requirement
196
- requirements:
197
- - - ">="
198
- - !ruby/object:Gem::Version
199
- version: '0'
200
- type: :development
201
- prerelease: false
202
- version_requirements: !ruby/object:Gem::Requirement
203
- requirements:
204
- - - ">="
134
+ - - "~>"
205
135
  - !ruby/object:Gem::Version
206
- version: '0'
136
+ version: '2.0'
207
137
  - !ruby/object:Gem::Dependency
208
- name: byebug
138
+ name: rss
209
139
  requirement: !ruby/object:Gem::Requirement
210
140
  requirements:
211
141
  - - ">="
212
142
  - !ruby/object:Gem::Version
213
143
  version: '0'
214
- type: :development
144
+ type: :runtime
215
145
  prerelease: false
216
146
  version_requirements: !ruby/object:Gem::Requirement
217
147
  requirements:
@@ -219,69 +149,27 @@ dependencies:
219
149
  - !ruby/object:Gem::Version
220
150
  version: '0'
221
151
  - !ruby/object:Gem::Dependency
222
- name: rspec
152
+ name: sanitize
223
153
  requirement: !ruby/object:Gem::Requirement
224
154
  requirements:
225
155
  - - "~>"
226
156
  - !ruby/object:Gem::Version
227
- version: '3.0'
228
- type: :development
157
+ version: '6.0'
158
+ type: :runtime
229
159
  prerelease: false
230
160
  version_requirements: !ruby/object:Gem::Requirement
231
161
  requirements:
232
162
  - - "~>"
233
163
  - !ruby/object:Gem::Version
234
- version: '3.0'
164
+ version: '6.0'
235
165
  - !ruby/object:Gem::Dependency
236
- name: rubocop
166
+ name: thor
237
167
  requirement: !ruby/object:Gem::Requirement
238
168
  requirements:
239
169
  - - ">="
240
170
  - !ruby/object:Gem::Version
241
171
  version: '0'
242
- type: :development
243
- prerelease: false
244
- version_requirements: !ruby/object:Gem::Requirement
245
- requirements:
246
- - - ">="
247
- - !ruby/object:Gem::Version
248
- version: '0'
249
- - !ruby/object:Gem::Dependency
250
- name: rubocop-performance
251
- requirement: !ruby/object:Gem::Requirement
252
- requirements:
253
- - - ">="
254
- - !ruby/object:Gem::Version
255
- version: '0'
256
- type: :development
257
- prerelease: false
258
- version_requirements: !ruby/object:Gem::Requirement
259
- requirements:
260
- - - ">="
261
- - !ruby/object:Gem::Version
262
- version: '0'
263
- - !ruby/object:Gem::Dependency
264
- name: rubocop-rspec
265
- requirement: !ruby/object:Gem::Requirement
266
- requirements:
267
- - - ">="
268
- - !ruby/object:Gem::Version
269
- version: '0'
270
- type: :development
271
- prerelease: false
272
- version_requirements: !ruby/object:Gem::Requirement
273
- requirements:
274
- - - ">="
275
- - !ruby/object:Gem::Version
276
- version: '0'
277
- - !ruby/object:Gem::Dependency
278
- name: simplecov
279
- requirement: !ruby/object:Gem::Requirement
280
- requirements:
281
- - - ">="
282
- - !ruby/object:Gem::Version
283
- version: '0'
284
- type: :development
172
+ type: :runtime
285
173
  prerelease: false
286
174
  version_requirements: !ruby/object:Gem::Requirement
287
175
  requirements:
@@ -289,13 +177,13 @@ dependencies:
289
177
  - !ruby/object:Gem::Version
290
178
  version: '0'
291
179
  - !ruby/object:Gem::Dependency
292
- name: vcr
180
+ name: tzinfo
293
181
  requirement: !ruby/object:Gem::Requirement
294
182
  requirements:
295
183
  - - ">="
296
184
  - !ruby/object:Gem::Version
297
185
  version: '0'
298
- type: :development
186
+ type: :runtime
299
187
  prerelease: false
300
188
  version_requirements: !ruby/object:Gem::Requirement
301
189
  requirements:
@@ -303,13 +191,13 @@ dependencies:
303
191
  - !ruby/object:Gem::Version
304
192
  version: '0'
305
193
  - !ruby/object:Gem::Dependency
306
- name: yard
194
+ name: zeitwerk
307
195
  requirement: !ruby/object:Gem::Requirement
308
196
  requirements:
309
197
  - - ">="
310
198
  - !ruby/object:Gem::Version
311
199
  version: '0'
312
- type: :development
200
+ type: :runtime
313
201
  prerelease: false
314
202
  version_requirements: !ruby/object:Gem::Requirement
315
203
  requirements:
@@ -320,52 +208,62 @@ description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss insta
320
208
  in return.
321
209
  email:
322
210
  - html2rss@desmarais.de
323
- executables: []
211
+ executables:
212
+ - html2rss
324
213
  extensions: []
325
214
  extra_rdoc_files: []
326
215
  files:
327
216
  - ".gitignore"
217
+ - ".mergify.yml"
328
218
  - ".rspec"
329
219
  - ".rubocop.yml"
330
- - ".travis.yml"
331
220
  - ".yardopts"
332
- - CHANGELOG.md
333
221
  - Gemfile
334
222
  - Gemfile.lock
335
223
  - LICENSE
336
224
  - README.md
337
225
  - bin/console
338
226
  - bin/setup
227
+ - exe/html2rss
339
228
  - html2rss.gemspec
340
229
  - lib/html2rss.rb
341
230
  - lib/html2rss/attribute_post_processors.rb
342
231
  - lib/html2rss/attribute_post_processors/gsub.rb
343
232
  - lib/html2rss/attribute_post_processors/html_to_markdown.rb
233
+ - lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
234
+ - lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb
344
235
  - lib/html2rss/attribute_post_processors/markdown_to_html.rb
345
236
  - lib/html2rss/attribute_post_processors/parse_time.rb
346
237
  - lib/html2rss/attribute_post_processors/parse_uri.rb
347
238
  - lib/html2rss/attribute_post_processors/sanitize_html.rb
348
239
  - lib/html2rss/attribute_post_processors/substring.rb
349
240
  - lib/html2rss/attribute_post_processors/template.rb
241
+ - lib/html2rss/cli.rb
350
242
  - lib/html2rss/config.rb
351
- - lib/html2rss/feed_builder.rb
243
+ - lib/html2rss/config/channel.rb
244
+ - lib/html2rss/config/selectors.rb
352
245
  - lib/html2rss/item.rb
353
246
  - lib/html2rss/item_extractors.rb
354
247
  - lib/html2rss/item_extractors/attribute.rb
355
- - lib/html2rss/item_extractors/current_time.rb
356
248
  - lib/html2rss/item_extractors/href.rb
357
249
  - lib/html2rss/item_extractors/html.rb
358
250
  - lib/html2rss/item_extractors/static.rb
359
251
  - lib/html2rss/item_extractors/text.rb
252
+ - lib/html2rss/object_to_xml_converter.rb
253
+ - lib/html2rss/rss_builder.rb
254
+ - lib/html2rss/rss_builder/channel.rb
255
+ - lib/html2rss/rss_builder/item.rb
256
+ - lib/html2rss/rss_builder/stylesheet.rb
360
257
  - lib/html2rss/utils.rb
361
258
  - lib/html2rss/version.rb
362
- - support/logo.png
363
- homepage: https://github.com/gildesmarais/html2rss
259
+ - rakefile.rb
260
+ homepage: https://github.com/html2rss/html2rss
364
261
  licenses:
365
262
  - MIT
366
263
  metadata:
367
264
  allowed_push_host: https://rubygems.org
368
- changelog_uri: https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md
265
+ changelog_uri: https://github.com/html2rss/html2rss/releases
266
+ rubygems_mfa_required: 'true'
369
267
  post_install_message:
370
268
  rdoc_options: []
371
269
  require_paths:
@@ -374,15 +272,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
374
272
  requirements:
375
273
  - - ">="
376
274
  - !ruby/object:Gem::Version
377
- version: 2.5.0
275
+ version: '3.1'
378
276
  required_rubygems_version: !ruby/object:Gem::Requirement
379
277
  requirements:
380
278
  - - ">="
381
279
  - !ruby/object:Gem::Version
382
280
  version: '0'
383
281
  requirements: []
384
- rubyforge_project:
385
- rubygems_version: 2.7.7
282
+ rubygems_version: 3.5.11
386
283
  signing_key:
387
284
  specification_version: 4
388
285
  summary: Returns an RSS::Rss object by scraping a URL.
data/.travis.yml DELETED
@@ -1,25 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- cache: bundler
4
-
5
- before_install:
6
- - gem update --system
7
- - gem install bundler
8
-
9
- bundler_args: "--jobs=3 --retry=3"
10
-
11
- rvm:
12
- - 2.5.8
13
- - 2.6.6
14
- - 2.7.1
15
-
16
- script:
17
- - bundle exec rubocop -F
18
- - bundle exec rspec
19
-
20
- deploy:
21
- provider: rubygems
22
- api_key:
23
- secure: bM3Yl8iWdB1Amra3Bm6bIH/mTwHcRhZrX8etFFbJANxIbkhzUOyTKcDMYiWUVM/mBzzv0NOuRejrDR6R0v7E2udrKcLQFCBtv7HqPAXIlkEEyxZy+M1kTqcPzP872E+ZKTn93vCzbiXBLYoMmqgCzqvcO87IBYNzTURHkfFjaYJJdVyZ5EVtbpXf4FhBvuQf9LTk/ocClgwYeuqd+45lO7qHoPatsvbY0vCOfKaiwkdOkBt+hjc56awcYSc9CXn0DCatebPQmQmdrqFd8fKgyCatWS3n+8TPmvzVfNJe44wg3oNfHbWruP85I2LE9ei1iG+iGQIF60fMhGgMJ4EM3REXDE5Mg+GA5uJcgH9Poirut3Ih65jtAzYNGohlmEmc7ysKc0dmG1O3ndwrHjh5KePrOAGDaW6QKG+m5ebIZ+mgrEA+ZVU1mjDM8FlbSKAayoPloslZdllSv7miwGzh6xrHWGQSCURZAkygFh+Kd+Kg1eVlEs+n6aObod82mEOfBPvWPacOrE2fY4B0ocFOKotZBCZSD0ZIixlyslRTnmcJfpRNlYLsQ56oy5uPNUccPQ86NSmmE+qbRdPCLQCKLPm2iYBgOa5iQrfHR/fUgcO0skAZiW4o9QflDgIFS/G+BE6FMHIvjkKA6Ae4KbqGzlF5pGFdo6p4MhlvubwjsVI=
24
- on:
25
- tags: true