html2rss 0.6.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,100 +1,131 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.6.0)
5
- activesupport (~> 5.0)
4
+ html2rss (0.9.0)
5
+ activesupport (>= 5, < 7)
6
+ addressable (~> 2.7)
6
7
  builder
7
- faraday (~> 0.15)
8
- faraday_middleware (~> 0.13)
9
- hashie (~> 3.6)
8
+ faraday (~> 1.0)
9
+ faraday_middleware
10
+ kramdown
11
+ mime-types (> 3.0)
10
12
  nokogiri (>= 1.10, < 2.0)
11
- reverse_markdown (~> 1.3)
13
+ reverse_markdown (~> 2.0)
12
14
  sanitize (~> 5.0)
15
+ to_regexp
16
+ zeitwerk
13
17
 
14
18
  GEM
15
19
  remote: https://rubygems.org/
16
20
  specs:
17
- activesupport (5.2.3)
21
+ activesupport (6.0.3.2)
18
22
  concurrent-ruby (~> 1.0, >= 1.0.2)
19
23
  i18n (>= 0.7, < 2)
20
24
  minitest (~> 5.1)
21
25
  tzinfo (~> 1.1)
22
- ast (2.4.0)
23
- builder (3.2.3)
24
- byebug (11.0.1)
25
- concurrent-ruby (1.1.5)
26
- crass (1.0.4)
26
+ zeitwerk (~> 2.2, >= 2.2.2)
27
+ addressable (2.7.0)
28
+ public_suffix (>= 2.0.2, < 5.0)
29
+ ast (2.4.1)
30
+ builder (3.2.4)
31
+ byebug (11.1.3)
32
+ concurrent-ruby (1.1.6)
33
+ coveralls (0.7.2)
34
+ multi_json (~> 1.3)
35
+ rest-client (= 1.6.7)
36
+ simplecov (>= 0.7)
37
+ term-ansicolor (= 1.2.2)
38
+ thor (= 0.18.1)
39
+ crass (1.0.6)
27
40
  diff-lcs (1.3)
28
41
  docile (1.3.2)
29
- faraday (0.16.2)
42
+ faraday (1.0.1)
30
43
  multipart-post (>= 1.2, < 3)
31
- faraday_middleware (0.13.1)
32
- faraday (>= 0.7.4, < 1.0)
33
- hashie (3.6.0)
34
- i18n (1.7.0)
44
+ faraday_middleware (1.0.0)
45
+ faraday (~> 1.0)
46
+ i18n (1.8.3)
35
47
  concurrent-ruby (~> 1.0)
36
- jaro_winkler (1.5.3)
37
- json (2.2.0)
48
+ kramdown (2.2.1)
49
+ rexml
50
+ mime-types (3.3.1)
51
+ mime-types-data (~> 3.2015)
52
+ mime-types-data (3.2020.0512)
38
53
  mini_portile2 (2.4.0)
39
- minitest (5.12.2)
54
+ minitest (5.14.1)
55
+ multi_json (1.14.1)
40
56
  multipart-post (2.1.1)
41
- nokogiri (1.10.4)
57
+ nokogiri (1.10.9)
42
58
  mini_portile2 (~> 2.4.0)
43
- nokogumbo (2.0.1)
59
+ nokogumbo (2.0.2)
44
60
  nokogiri (~> 1.8, >= 1.8.4)
45
- parallel (1.17.0)
46
- parser (2.6.5.0)
61
+ parallel (1.19.2)
62
+ parser (2.7.1.3)
47
63
  ast (~> 2.4.0)
64
+ public_suffix (4.0.5)
48
65
  rainbow (3.0.0)
49
- reverse_markdown (1.3.0)
66
+ regexp_parser (1.7.1)
67
+ rest-client (1.6.7)
68
+ mime-types (>= 1.16)
69
+ reverse_markdown (2.0.0)
50
70
  nokogiri
51
- rspec (3.8.0)
52
- rspec-core (~> 3.8.0)
53
- rspec-expectations (~> 3.8.0)
54
- rspec-mocks (~> 3.8.0)
55
- rspec-core (3.8.2)
56
- rspec-support (~> 3.8.0)
57
- rspec-expectations (3.8.5)
71
+ rexml (3.2.4)
72
+ rspec (3.9.0)
73
+ rspec-core (~> 3.9.0)
74
+ rspec-expectations (~> 3.9.0)
75
+ rspec-mocks (~> 3.9.0)
76
+ rspec-core (3.9.2)
77
+ rspec-support (~> 3.9.3)
78
+ rspec-expectations (3.9.2)
58
79
  diff-lcs (>= 1.2.0, < 2.0)
59
- rspec-support (~> 3.8.0)
60
- rspec-mocks (3.8.2)
80
+ rspec-support (~> 3.9.0)
81
+ rspec-mocks (3.9.1)
61
82
  diff-lcs (>= 1.2.0, < 2.0)
62
- rspec-support (~> 3.8.0)
63
- rspec-support (3.8.3)
64
- rubocop (0.75.0)
65
- jaro_winkler (~> 1.5.1)
83
+ rspec-support (~> 3.9.0)
84
+ rspec-support (3.9.3)
85
+ rubocop (0.85.1)
66
86
  parallel (~> 1.10)
67
- parser (>= 2.6)
87
+ parser (>= 2.7.0.1)
68
88
  rainbow (>= 2.2.2, < 4.0)
89
+ regexp_parser (>= 1.7)
90
+ rexml
91
+ rubocop-ast (>= 0.0.3)
69
92
  ruby-progressbar (~> 1.7)
70
- unicode-display_width (>= 1.4.0, < 1.7)
71
- rubocop-performance (1.5.0)
93
+ unicode-display_width (>= 1.4.0, < 2.0)
94
+ rubocop-ast (0.0.3)
95
+ parser (>= 2.7.0.1)
96
+ rubocop-performance (1.6.1)
72
97
  rubocop (>= 0.71.0)
73
- rubocop-rspec (1.36.0)
98
+ rubocop-rspec (1.40.0)
74
99
  rubocop (>= 0.68.1)
75
100
  ruby-progressbar (1.10.1)
76
- sanitize (5.1.0)
101
+ sanitize (5.2.1)
77
102
  crass (~> 1.0.2)
78
103
  nokogiri (>= 1.8.0)
79
104
  nokogumbo (~> 2.0)
80
- simplecov (0.17.1)
105
+ simplecov (0.18.5)
81
106
  docile (~> 1.1)
82
- json (>= 1.8, < 3)
83
- simplecov-html (~> 0.10.0)
84
- simplecov-html (0.10.2)
107
+ simplecov-html (~> 0.11)
108
+ simplecov-html (0.12.2)
109
+ term-ansicolor (1.2.2)
110
+ tins (~> 0.8)
111
+ thor (0.18.1)
85
112
  thread_safe (0.3.6)
86
- tzinfo (1.2.5)
113
+ tins (0.13.2)
114
+ to_regexp (0.2.1)
115
+ tzinfo (1.2.7)
87
116
  thread_safe (~> 0.1)
88
- unicode-display_width (1.6.0)
89
- vcr (5.0.0)
90
- yard (0.9.20)
117
+ unicode-display_width (1.7.0)
118
+ vcr (6.0.0)
119
+ yard (0.9.25)
120
+ zeitwerk (2.3.0)
91
121
 
92
122
  PLATFORMS
93
123
  ruby
94
124
 
95
125
  DEPENDENCIES
96
- bundler (~> 1.16)
126
+ bundler
97
127
  byebug
128
+ coveralls
98
129
  html2rss!
99
130
  rspec (~> 3.0)
100
131
  rubocop
@@ -105,4 +136,4 @@ DEPENDENCIES
105
136
  yard
106
137
 
107
138
  BUNDLED WITH
108
- 1.17.2
139
+ 2.1.4
data/README.md CHANGED
@@ -2,63 +2,375 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss)
4
4
  [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](http://rubygems.org/gems/html2rss/)
5
- [API docs on RubyDoc.info](https://www.rubydoc.info/gems/html2rss)
5
+ [![Coverage Status](https://coveralls.io/repos/github/gildesmarais/html2rss/badge.svg?branch=master)](https://coveralls.io/github/gildesmarais/html2rss?branch=master)
6
+ [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://www.rubydoc.info/gems/html2rss)
7
+ ![Retro Badge: valid RSS](https://validator.w3.org/feed/images/valid-rss-rogers.png)
8
+ [![](http://img.shields.io/liberapay/goal/gildesmarais.svg?logo=liberapa)](https://liberapay.com/gildesmarais/donate)
6
9
 
7
- Request HTML from an URL and transform it to a Ruby RSS 2.0 object.
10
+ **Searching for a ready to use app which serves generated feeds via HTTP?**
11
+ [Head over to `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
8
12
 
9
- **Are you searching for a ready to use "website to RSS" solution?**
10
- [Check out `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
13
+ This Ruby gem builds RSS 2.0 feeds from a _feed config_.
11
14
 
12
- Each website needs a _feed config_ which contains the URL to scrape and
13
- CSS selectors to extract the required information (like title, URL, ...).
14
- This gem provides [extractors](https://github.com/gildesmarais/html2rss/blob/master/lib/html2rss/item_extractors) (e.g. extract the information from an HTML attribute)
15
- and chainable [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/attribute_post_processors) to make information retrieval even easier.
15
+ With the _feed config_ containing the URL to scrape and
16
+ CSS selectors for information extraction (like title, URL, ...) your RSS builds.
17
+ [Extractors](#using-extractors) and chain-able [post processors](#using-post-processors)
18
+ make information extraction, processing and sanitizing a breeze.
19
+ [Scraping JSON](#scraping-and-handling-json-responses) responses and
20
+ [setting HTTP request headers](#set-any-http-header-in-the-request) is
21
+ supported, too.
16
22
 
17
23
  ## Installation
18
24
 
19
- Add this line to your application's Gemfile: `gem 'html2rss'`
20
- Then execute: `bundle`
25
+ | 🤩 Like it? | Star it! ⭐️ |
26
+ | ---------------------------------------------: | -------------------- |
27
+ | Add this line to your application's `Gemfile`: | `gem 'html2rss'` |
28
+ | Then execute: | `bundle` |
29
+ | In your code: | `require 'html2rss'` |
30
+
31
+ 😍 Love it? Feel free [to donate](https://liberapay.com/gildesmarais/donate). Thank you! 💓
32
+
33
+ ## Building a feed config
34
+
35
+ Here's a minimal working example:
36
+
37
+ ```ruby
38
+ require 'html2rss'
39
+
40
+ rss =
41
+ Html2rss.feed(
42
+ channel: { url: 'https://stackoverflow.com/questions' },
43
+ selectors: {
44
+ items: { selector: '#hot-network-questions > ul > li' },
45
+ title: { selector: 'a' },
46
+ link: { selector: 'a', extractor: 'href' }
47
+ }
48
+ )
49
+
50
+ puts rss
51
+ ```
52
+
53
+ A _feed config_ consists of a `channel` and a `selectors` Hash.
54
+ The contents of both hashes are explained below.
55
+
56
+ **Looks too complicated?** See [`html2rss-configs`](https://github.com/gildesmarais/html2rss-configs) for ready-made feed configs!
57
+
58
+ ### The `channel`
59
+
60
+ | attribute | | type | default | remark |
61
+ | ------------- | -------- | ------- | -------------: | ------------------------------------------ |
62
+ | `url` | required | String | | |
63
+ | `title` | optional | String | auto-generated | |
64
+ | `description` | optional | String | auto-generated | |
65
+ | `ttl` | optional | Integer | `360` | TTL in _minutes_ |
66
+ | `time_zone` | optional | String | `'UTC'` | TimeZone name |
67
+ | `language` | optional | String | `'en'` | Language code |
68
+ | `author` | optional | String | | Format: `email (Name)'` |
69
+ | `headers` | optional | Hash | `{}` | Set HTTP request headers. See notes below. |
70
+ | `json` | optional | Boolean | `false` | Handle JSON response. See notes below. |
71
+
72
+ ### The `selectors`
73
+
74
+ You must provide an `items` selector hash which contains the CSS selector.
75
+ `items` needs to return a collection of HTML tags.
76
+ The other selectors are scoped to the tags of the items' collection.
77
+
78
+ To build a
79
+ [valid RSS 2.0 item](http://www.rssboard.org/rss-profile#element-channel-item)
80
+ each item has to have at least a `title` or a `description`.
81
+
82
+ Your `selectors` can contain arbitrary selector names, but only these
83
+ will make it into the RSS feed:
84
+
85
+ | RSS 2.0 tag | name in `html2rss` | remark |
86
+ | ------------- | ------------------ | --------------------------- |
87
+ | `title` | `title` | |
88
+ | `description` | `description` | Supports HTML. |
89
+ | `link` | `link` | A URL. |
90
+ | `author` | `author` | |
91
+ | `category` | `categories` | See notes below. |
92
+ | `enclosure` | `enclosure` | See notes below. |
93
+ | `pubDate` | `update` | An instance of `Time`. |
94
+ | `guid` | `guid` | Generated from the `title`. |
95
+ | `comments` | `comments` | A URL. |
96
+ | `source` | ~~source~~ | Not yet supported. |
97
+
98
+ ### The `selector` hash
99
+
100
+ Your selector hash can have these attributes:
101
+
102
+ | name | value |
103
+ | -------------- | -------------------------------------------------------- |
104
+ | `selector` | The CSS selector to select the tag with the information. |
105
+ | `extractor` | Name of the extractor. See notes below. |
106
+ | `post_process` | A hash or array of hashes. See notes below. |
107
+
108
+ #### Reverse ordering of items
109
+
110
+ The `items` selector hash can have an `order` attribute.
111
+ If the value is `reverse` the order of items in the RSS will be reversed.
112
+
113
+ <details>
114
+ <summary>See a YAML feed config example</summary>
115
+
116
+ ```yml
117
+ channel:
118
+   # ... omitted
119
+ selectors:
120
+ items:
121
+ selector: 'ul > li'
122
+ order: 'reverse'
123
+   # ... omitted
124
+ ```
125
+
126
+ </details>
127
+
128
+ ## Using extractors
129
+
130
+ Extractors help with extracting the information from the selected HTML tag.
131
+
132
+ - The default extractor is `text`, which returns the tag's inner text.
133
+ - The `html` extractor returns the tag's outer HTML.
134
+ - The `href` extractor returns a URL from the tag's `href` attribute and corrects relative ones to absolute ones.
135
+ - The `attribute` extractor returns the value of that tag's attribute.
136
+ - The `static` extractor returns the configured static value (it doesn't extract anything).
137
+ - [See file list of extractors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/item_extractors).
138
+
139
+ Extractors can require additional attributes on the selector hash.
140
+ 👉 [Read their docs for usage examples](https://www.rubydoc.info/gems/html2rss/Html2rss/ItemExtractors).
141
+
142
+ <details>
143
+ <summary>See a Ruby example</summary>
144
+
145
+ ```ruby
146
+ Html2rss.feed(
147
+ channel: {}, selectors: { link: { selector: 'a', extractor: 'href' } }
148
+ )
149
+ ```
150
+
151
+ </details>
152
+
153
+ <details>
154
+ <summary>See a YAML feed config example</summary>
155
+
156
+ ```yml
157
+ channel:
158
+   # ... omitted
159
+ selectors:
160
+   # ... omitted
161
+ link:
162
+ selector: 'a'
163
+ extractor: 'href'
164
+ ```
165
+
166
+ </details>
167
+
168
+ ## Using post processors
169
+
170
+ Extracted information can be further manipulated with post processors.
171
+
172
+ | name | |
173
+ | ------------------ | ------------------------------------------------------------------------------------- |
174
+ | `gsub` | Allows global substitution operations on Strings (Regexp or simple pattern). |
175
+ | `html_to_markdown` | HTML to Markdown, using [reverse_markdown](https://github.com/xijo/reverse_markdown). |
176
+ | `markdown_to_html` | converts Markdown to HTML, using [kramdown](https://github.com/gettalong/kramdown). |
177
+ | `parse_time` | Parses a String containing a time in a time zone. |
178
+ | `parse_uri` | Parses a String as URL. |
179
+ | `sanitize_html` | Strips unsafe and uneeded HTML and adds security related attributes. |
180
+ | `substring` | Cuts a part off of a String, starting at a position. |
181
+ | `template` | Based on a template, it creates a new String filled with other selectors values. |
182
+
183
+ ⚠️ Always make use of the `sanitize_html` post processor for HTML content. _Never trust the internet!_ ⚠️
184
+
185
+ - [See file list of post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/attribute_post_processors).
186
+
187
+ 👉 [Read their docs for usage examples.](https://www.rubydoc.info/gems/html2rss/Html2rss/AttributePostProcessors)
188
+
189
+ <details>
190
+ <summary>See a Ruby example</summary>
21
191
 
22
192
  ```ruby
23
- rss = Html2rss.feed(
24
- channel: { title: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com/questions' },
193
+ Html2rss.feed(
194
+ channel: {},
25
195
  selectors: {
26
- items: { selector: '#hot-network-questions > ul > li' },
27
- title: { selector: 'a' },
28
- link: { selector: 'a', extractor: 'href' }
196
+ description: {
197
+ selector: '.content', post_process: { name: 'sanitize_html' }
198
+ }
29
199
  }
30
200
  )
201
+ ```
202
+
203
+ </details>
204
+
205
+ <details>
206
+ <summary>See a YAML feed config example</summary>
31
207
 
32
- puts rss.to_s
208
+ ```yml
209
+ channel:
210
+   # ... omitted
211
+ selectors:
212
+   # ... omitted
213
+ description:
214
+ selector: '.content'
215
+ post_process:
216
+ - name: sanitize_html
33
217
  ```
34
218
 
35
- ## Usage with a YAML config file
219
+ </details>
220
+
221
+ ### Chaining post processors
222
+
223
+ Pass an array to `post_process` to chain the post processors.
224
+
225
+ <details>
226
+ <summary>YAML example: build the description from a template String (in Markdown) and convert that Markdown to HTML</summary>
227
+
228
+ ```yml
229
+ channel:
230
+   # ... omitted
231
+ selectors:
232
+   # ... omitted
233
+ price:
234
+ selector: '.price'
235
+ description:
236
+ selector: '.section'
237
+ post_process:
238
+ - name: template
239
+ string: |
240
+ # %{self}
241
+
242
+ Price: %{price}
243
+ - name: markdown_to_html
244
+ ```
245
+
246
+ Note the use of `|` for a multi-line String in YAML.
247
+
248
+ </details>
249
+
250
+ ## Adding `<category>` tags to an item
251
+
252
+ The `categories` selector takes an array of selector names. Each value of those
253
+ selectors will become a `<category>` on the RSS item.
254
+
255
+ <details>
256
+ <summary>See a Ruby example</summary>
257
+
258
+ ```ruby
259
+ Html2rss.feed(
260
+ channel: {},
261
+ selectors: {
262
+ genre: {
263
+ # ... omitted
264
+ selector: '.genre'
265
+ },
266
+ branch: { selector: '.branch' },
267
+ categories: %i[genre branch]
268
+ }
269
+ )
270
+ ```
271
+
272
+ </details>
273
+
274
+ <details>
275
+ <summary>See a YAML feed config example</summary>
276
+
277
+ ```yml
278
+ channel:
279
+   # ... omitted
280
+ selectors:
281
+ # ... omitted
282
+ genre:
283
+ selector: ".genre"
284
+ branch:
285
+ selector: ".branch"
286
+ categories:
287
+ - genre
288
+ - branch
289
+ ```
290
+
291
+ </details>
36
292
 
37
- Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
293
+ ## Adding an `<enclosure>` tag to an item
38
294
 
39
- `Html2rss.feed_from_yaml_config(File.join(['spec', 'config.test.yml']), 'nuxt-releases')` returns
295
+ An enclosure can be any file, e.g. a image, audio or video.
40
296
 
41
- an `RSS:Rss` object.
297
+ The `enclosure` selector needs to return a URL of the content to enclose. If the extracted URL is relative, it will be converted to an absolute one using the channel's URL as base.
42
298
 
43
- **Too complicated?** See [`html2rss-configs`](https://github.com/gildesmarais/html2rss-configs) for ready-made feed configs!
299
+ Since `html2rss` does no further inspection of the enclosure, its support comes with trade-offs:
44
300
 
45
- ## Scraping JSON
301
+ 1. The content-type is guessed from the file extension of the URL.
302
+ 2. If the content-type guessing fails, it will default to `application/octet-stream`.
303
+ 3. The content-length will always be undetermined and thus stated as `0` bytes.
304
+
305
+ Read the [RSS 2.0 spec](http://www.rssboard.org/rss-profile#element-channel-item-enclosure) for further information on enclosing content.
306
+
307
+ <details>
308
+ <summary>See a Ruby example</summary>
309
+
310
+ ```ruby
311
+ Html2rss.feed(
312
+ channel: {},
313
+ selectors: {
314
+ enclosure: { selector: 'img', extractor: 'attribute', attribute: 'src' }
315
+ }
316
+ )
317
+ ```
46
318
 
47
- Since 0.5.0 it is possible to scrape and process JSON.
319
+ </details>
320
+
321
+ <details>
322
+ <summary>See a YAML feed config example</summary>
323
+
324
+ ```yml
325
+ channel:
326
+   # ... omitted
327
+ selectors:
328
+   # ... omitted
329
+ enclosure:
330
+ selector: "img"
331
+ extractor: "attribute"
332
+ attribute: "src"
333
+ ```
334
+
335
+ </details>
336
+
337
+ ## Scraping and handling JSON responses
338
+
339
+ Although this gem is called **html**​*2rss*, it's possible to scrape and process JSON.
48
340
 
49
341
  Adding `json: true` to the channel config will convert the JSON response to XML.
50
342
 
51
- Feed config:
343
+ <details>
344
+ <summary>See a Ruby example</summary>
345
+
346
+ ```ruby
347
+ Html2rss.feed(
348
+ channel: {
349
+ url: 'https://example.com', json: true
350
+ },
351
+ selectors: {} # ... omitted
352
+ )
353
+ ```
354
+
355
+ </details>
356
+
357
+ <details>
358
+ <summary>See a YAML feed config example</summary>
52
359
 
53
360
  ```yaml
54
361
  channel:
55
362
  url: https://example.com
56
- title: "Example with JSON"
57
363
  json: true
58
- # ...
364
+ selectors:
365
+   # ... omitted
59
366
  ```
60
367
 
61
- Imagine this HTTP response:
368
+ </details>
369
+
370
+ <details>
371
+ <summary>See example of a converted JSON object</summary>
372
+
373
+ This JSON object:
62
374
 
63
375
  ```json
64
376
  {
@@ -66,58 +378,160 @@ Imagine this HTTP response:
66
378
  }
67
379
  ```
68
380
 
69
- will be converted to:
381
+ converts to:
70
382
 
71
383
  ```xml
72
- <html>
384
+ <hash>
73
385
  <data>
74
386
  <datum>
75
387
  <title>Headline</title>
76
388
  <url>https://example.com</url>
77
389
  </datum>
78
390
  </data>
79
- </html>
391
+ </hash>
392
+ ```
393
+
394
+ Your items selector would be `data > datum`, the item's `link` selector would be `url`.
395
+
396
+ Find further information in [ActiveSupport's `Hash.to_xml` documentation](https://apidock.com/rails/Hash/to_xml).
397
+
398
+ </details>
399
+
400
+ <details>
401
+ <summary>See example of a converted JSON array</summary>
402
+
403
+ This JSON array:
404
+
405
+ ```json
406
+ [{ "title": "Headline", "url": "https://example.com" }]
80
407
  ```
81
408
 
82
- Your items selector would be `data > datum`, the item's link selector would be `url`.
409
+ converts to:
83
410
 
84
- Under the hood it uses ActiveSupport's [`Hash.to_xml`](https://apidock.com/rails/Hash/to_xml) core extension for the JSON to XML conversion.
411
+ ```xml
412
+ <objects>
413
+ <object>
414
+ <title>Headline</title>
415
+ <url>https://example.com</url>
416
+ </object>
417
+ </objects>
418
+ ```
419
+
420
+ Your items selector would be `objects > object`, the item's `link` selector would be `url`.
421
+
422
+ Find further information in [ActiveSupport's `Array.to_xml` documentation](https://apidock.com/rails/Array/to_xml).
423
+
424
+ </details>
85
425
 
86
426
  ## Set any HTTP header in the request
87
427
 
88
428
  You can add any HTTP headers to the request to the channel URL.
89
- You can use this to e.g. have Cookie or Authorization information being sent or to overwrite the User-Agent.
429
+ Use this to e.g. have Cookie or Authorization information sent or to spoof the User-Agent.
430
+
431
+ <details>
432
+ <summary>See a Ruby example</summary>
433
+
434
+ ```ruby
435
+ Html2rss.feed(
436
+ channel: {
437
+ url: 'https://example.com',
438
+ headers: {
439
+ "User-Agent": "html2rss-request",
440
+ "X-Something": "Foobar",
441
+ "Authorization": "Token deadbea7",
442
+ "Cookie": "monster=MeWantCookie"
443
+ }
444
+ },
445
+ selectors: {}
446
+ )
447
+ ```
448
+
449
+ </details>
450
+
451
+ <details>
452
+ <summary>See a YAML feed config example</summary>
90
453
 
91
454
  ```yaml
92
455
  channel:
93
456
  url: https://example.com
94
- title: "Example with http headers"
95
457
  headers:
96
458
  "User-Agent": "html2rss-request"
97
459
  "X-Something": "Foobar"
98
460
  "Authorization": "Token deadbea7"
99
461
  "Cookie": "monster=MeWantCookie"
100
- # ...
462
+ selectors:
463
+   # ...
101
464
  ```
102
465
 
103
- The headers provided by the channel will be merged into the global headers.
466
+ </details>
104
467
 
105
- ## Development
468
+ The headers provided by the channel are merged into the global headers.
106
469
 
107
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
470
+ ## Usage with a YAML config file
108
471
 
109
- ## Contributing
472
+ This step is not required to work with this gem. If you're using
473
+ [`html2rss-web`](https://github.com/gildesmarais/html2rss-web)
474
+ and want to create your private feed configs, keep on reading!
475
+
476
+ First, create your YAML file, e.g. called `feeds.yml`.
477
+ This file will contain your global config and feed configs.
478
+
479
+ Example:
480
+
481
+ ```yml
482
+ headers:
483
+ 'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1"
484
+ feeds:
485
+ myfeed:
486
+ channel:
487
+ selectors:
488
+ myotherfeed:
489
+ channel:
490
+ selectors:
491
+ ```
110
492
 
111
- Bug reports and pull requests are welcome on GitHub at https://github.com/gildesmarais/html2rss.
493
+ Your feed configs go below `feeds`. Everything else is part of the global config.
112
494
 
113
- ## Releasing a new version
495
+ Build your feeds like this:
496
+
497
+ ```ruby
498
+ require 'html2rss'
499
+
500
+ myfeed = Html2rss.feed_from_yaml_config('feeds.yml', 'myfeed')
501
+ myotherfeed = Html2rss.feed_from_yaml_config('feeds.yml', 'myotherfeed')
502
+ ```
503
+
504
+ Find a full example of a `feeds.yml` at [`spec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
505
+
506
+ ## Gotchas and tips & tricks
507
+
508
+ - Check that the channel URL does not redirect to a mobile page with a different markup structure.
509
+ - Do not rely on your web browser's developer console. `html2rss` does not execute JavaScript.
510
+ - Fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems efficient (`curl URL | pup`).
511
+ - [CSS selectors are quite versatile, here's an overview.](https://www.w3.org/TR/selectors-4/#overview)
512
+
513
+ ## Development
514
+
515
+ After checking out the repository, run `bin/setup` to install dependencies. Then, run `bundle exec rspec` to run the tests.
516
+ You can also run `bin/console` for an interactive prompt that will allow you to experiment.
517
+
518
+ <details>
519
+ <summary>Releasing a new version</summary>
114
520
 
115
521
  1. `git pull`
116
522
  2. increase version in `lib/html2rss/version.rb`
117
523
  3. `bundle`
118
- 4. commit the changes
119
- 5. `git tag v....`
120
- 6. [`standard-changelog -f`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog)
121
- 7. `git add CHANGELOG.md && git commit --amend`
122
- 8. `git tag v.... -f`
123
- 9. `git push && git push --tags`
524
+ 4. `git add Gemfile.lock lib/html2rss/version.rb`
525
+ 5. `VERSION=$(ruby -e 'require "./lib/html2rss/version.rb"; puts Html2rss::VERSION')`
526
+ 6. `git commit -m "chore: release $VERSION"`
527
+ 7. `git tag v$VERSION`
528
+ 8. [`standard-changelog -f`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog)
529
+ 9. `git add CHANGELOG.md && git commit --amend`
530
+ 10. `git tag v$VERSION -f`
531
+ 11. `git push && git push --tags`
532
+
533
+ </details>
534
+
535
+ ## Contributing
536
+
537
+ Bug reports and pull requests are welcome on GitHub at https://github.com/gildesmarais/html2rss.