html2rss 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38b547bdfe0799d71348690ae20d29d74d88c3cbb0828cdd807a382dec14f895
4
- data.tar.gz: 8e9ae611d6bbf0174bd9038c3aa7a7b214ec8ffa152e990b2224e98d5022bec0
3
+ metadata.gz: 134aacbabf1b61aadcdb6d96757d329bde20429027f97c2c3bbd0328bb02cd7b
4
+ data.tar.gz: 2e6ef3c8a38df3e54983670b17d574bfe771069f662414e58a5c0572ab922b1f
5
5
  SHA512:
6
- metadata.gz: c0c4dc94d9e339d054ec4ee1c586ac21355cdfa1c9664dd05ef01ca474668f7e483baec35bb8abfd1971eff296b7b5c270816c992ec9fc6abd51a89a7ce28000
7
- data.tar.gz: 3f45fb28e10360055ead9b757b4972bd2c52ad3285b94b506c3a6ce66b7bd5ec6364da63cc83b066fab6bc69396ed7408108536b5b9178d4a4a34c28436b4d2d
6
+ metadata.gz: 4e3b80225d14b44820f742f484a418e2d0b90060f7e329f2c89a2786795a1d87bf372f194e873d4a109d8c3649399e0716a41f65479c1b60ef9b8202154a8e48
7
+ data.tar.gz: ed5c095e4457d11208fa67634e526f1345c64f9d48d48df3eb11b8a5ebeb9cb203a62ae329f5388491d57435b1326bf25771349e3a374dc424094d3da60822e0
@@ -39,6 +39,9 @@ Style/BracesAroundHashParameters:
39
39
  Style/HashSyntax:
40
40
  Enabled: true
41
41
 
42
+ Style/FormatStringToken:
43
+ Enabled: false
44
+
42
45
  Layout/SpaceInsideParens:
43
46
  Enabled: true
44
47
 
@@ -1,4 +1,16 @@
1
- # [](https://github.com/gildesmarais/html2rss/compare/v0.8.0...v) (2019-11-02)
1
+ # [](https://github.com/gildesmarais/html2rss/compare/v0.8.1...v) (2019-11-08)
2
+
3
+
4
+
5
+ ## [0.8.1](https://github.com/gildesmarais/html2rss/compare/v0.8.0...v0.8.1) (2019-11-08)
6
+
7
+
8
+ ### Features
9
+
10
+ * auto generate nicer channel's title and description ([#63](https://github.com/gildesmarais/html2rss/issues/63)) ([6db28f6](https://github.com/gildesmarais/html2rss/commit/6db28f6))
11
+ * change default ttl to 360 ([#65](https://github.com/gildesmarais/html2rss/issues/65)) ([605c8db](https://github.com/gildesmarais/html2rss/commit/605c8db))
12
+ * **config:** improve generation of channel.title from channel.url ([#68](https://github.com/gildesmarais/html2rss/issues/68)) ([bc8ecbb](https://github.com/gildesmarais/html2rss/commit/bc8ecbb))
13
+ * **parse_uri:** squish url to not fail on url with padding spaces ([#67](https://github.com/gildesmarais/html2rss/issues/67)) ([e349449](https://github.com/gildesmarais/html2rss/commit/e349449))
2
14
 
3
15
 
4
16
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.8.0)
4
+ html2rss (0.8.1)
5
5
  activesupport (~> 5.0)
6
6
  builder
7
7
  faraday (~> 0.15)
@@ -27,12 +27,12 @@ GEM
27
27
  builder (3.2.3)
28
28
  byebug (11.0.1)
29
29
  concurrent-ruby (1.1.5)
30
- coveralls (0.7.2)
31
- multi_json (~> 1.3)
32
- rest-client (= 1.6.7)
33
- simplecov (>= 0.7)
34
- term-ansicolor (= 1.2.2)
35
- thor (= 0.18.1)
30
+ coveralls (0.8.23)
31
+ json (>= 1.8, < 3)
32
+ simplecov (~> 0.16.1)
33
+ term-ansicolor (~> 1.3)
34
+ thor (>= 0.19.4, < 2.0)
35
+ tins (~> 1.6)
36
36
  crass (1.0.5)
37
37
  diff-lcs (1.3)
38
38
  docile (1.3.2)
@@ -43,7 +43,7 @@ GEM
43
43
  hashie (3.6.0)
44
44
  i18n (1.7.0)
45
45
  concurrent-ruby (~> 1.0)
46
- jaro_winkler (1.5.3)
46
+ jaro_winkler (1.5.4)
47
47
  json (2.2.0)
48
48
  kramdown (2.1.0)
49
49
  mime-types (3.3)
@@ -51,7 +51,6 @@ GEM
51
51
  mime-types-data (3.2019.1009)
52
52
  mini_portile2 (2.4.0)
53
53
  minitest (5.13.0)
54
- multi_json (1.14.1)
55
54
  multipart-post (2.1.1)
56
55
  nokogiri (1.10.5)
57
56
  mini_portile2 (~> 2.4.0)
@@ -61,8 +60,6 @@ GEM
61
60
  parser (2.6.5.0)
62
61
  ast (~> 2.4.0)
63
62
  rainbow (3.0.0)
64
- rest-client (1.6.7)
65
- mime-types (>= 1.16)
66
63
  reverse_markdown (1.3.0)
67
64
  nokogiri
68
65
  rspec (3.9.0)
@@ -78,7 +75,7 @@ GEM
78
75
  diff-lcs (>= 1.2.0, < 2.0)
79
76
  rspec-support (~> 3.9.0)
80
77
  rspec-support (3.9.0)
81
- rubocop (0.75.0)
78
+ rubocop (0.76.0)
82
79
  jaro_winkler (~> 1.5.1)
83
80
  parallel (~> 1.10)
84
81
  parser (>= 2.6)
@@ -94,16 +91,16 @@ GEM
94
91
  crass (~> 1.0.2)
95
92
  nokogiri (>= 1.8.0)
96
93
  nokogumbo (~> 2.0)
97
- simplecov (0.17.1)
94
+ simplecov (0.16.1)
98
95
  docile (~> 1.1)
99
96
  json (>= 1.8, < 3)
100
97
  simplecov-html (~> 0.10.0)
101
98
  simplecov-html (0.10.2)
102
- term-ansicolor (1.2.2)
103
- tins (~> 0.8)
104
- thor (0.18.1)
99
+ term-ansicolor (1.7.1)
100
+ tins (~> 1.0)
101
+ thor (0.20.3)
105
102
  thread_safe (0.3.6)
106
- tins (0.13.2)
103
+ tins (1.22.0)
107
104
  to_regexp (0.2.1)
108
105
  tzinfo (1.2.5)
109
106
  thread_safe (~> 0.1)
data/README.md CHANGED
@@ -15,7 +15,7 @@ With the _feed config_ containing the URL to scrape and
15
15
  CSS selectors for information extraction (like title, URL, ...) your RSS builds.
16
16
  [Extractors](#using-extractors) and chain-able [post processors](#using-post-processors)
17
17
  make information extraction, processing and sanitizing a breeze.
18
- [Scraping JSON](#scraping-json) responses and
18
+ [Scraping JSON](#scraping-and-handling-json-responses) responses and
19
19
  [setting HTTP request headers](#set-any-http-header-in-the-request) is
20
20
  supported, too.
21
21
 
@@ -36,10 +36,7 @@ require 'html2rss'
36
36
 
37
37
  rss =
38
38
  Html2rss.feed(
39
- channel: {
40
- title: 'StackOverflow: Hot Network Questions',
41
- url: 'https://stackoverflow.com/questions'
42
- },
39
+ channel: { url: 'https://stackoverflow.com/questions' },
43
40
  selectors: {
44
41
  items: { selector: '#hot-network-questions > ul > li' },
45
42
  title: { selector: 'a' },
@@ -57,13 +54,15 @@ The contents of both hashes are explained below.
57
54
 
58
55
  ### The `channel`
59
56
 
60
- | attribute | | type | remark |
61
- | ------------- | -------- | ------- | ----------------------- |
62
- | `title` | required | String | |
63
- | `url` | required | String | |
64
- | `ttl` | optional | Integer | time to live in minutes |
65
- | `description` | optional | String | |
66
- | `headers` | optional | Hash | See notes below. |
57
+ | attribute | | type | default | remark |
58
+ | ------------- | -------- | ------- | -------------: | ------------------------------------------ |
59
+ | `url` | required | String | | |
60
+ | `title` | optional | String | auto-generated | |
61
+ | `description` | optional | String | auto-generated | |
62
+ | `ttl` | optional | Integer | `360` | TTL in _minutes_ |
63
+ | `time_zone` | optional | String | `'UTC'` | TimeZone name |
64
+ | `headers` | optional | Hash | `{}` | Set HTTP request headers. See notes below. |
65
+ | `json` | optional | Boolean | `false` | Handle JSON response. See notes below. |
67
66
 
68
67
  ### The `selectors`
69
68
 
@@ -78,18 +77,18 @@ each item has to have at least a `title` or a `description`.
78
77
  Your `selectors` can contain arbitrary selector names, but only these
79
78
  will make it into the RSS feed:
80
79
 
81
- | RSS 2.0 tag | name in html2rss | remark |
82
- | ------------- | ---------------- | --------------------------- |
83
- | `title` | `title` | |
84
- | `description` | `description` | Supports HTML. |
85
- | `link` | `link` | A URL. |
86
- | `author` | `author` | |
87
- | `category` | `categories` | See notes below. |
88
- | `enclosure` | `enclosure` | See notes below. |
89
- | `pubDate` | `update` | An instance of `Time`. |
90
- | `guid` | `guid` | Generated from the `title`. |
91
- | `comments` | `comments` | A URL. |
92
- | `source` | ~~source~~ | Not yet supported. |
80
+ | RSS 2.0 tag | name in `html2rss` | remark |
81
+ | ------------- | ------------------ | --------------------------- |
82
+ | `title` | `title` | |
83
+ | `description` | `description` | Supports HTML. |
84
+ | `link` | `link` | A URL. |
85
+ | `author` | `author` | |
86
+ | `category` | `categories` | See notes below. |
87
+ | `enclosure` | `enclosure` | See notes below. |
88
+ | `pubDate` | `update` | An instance of `Time`. |
89
+ | `guid` | `guid` | Generated from the `title`. |
90
+ | `comments` | `comments` | A URL. |
91
+ | `source` | ~~source~~ | Not yet supported. |
93
92
 
94
93
  ### The `selector` hash
95
94
 
@@ -225,7 +224,7 @@ Note the use of `|` for a multi-line String in YAML.
225
224
 
226
225
  ## Adding `<category>` tags to an item
227
226
 
228
- The `categories` selector takes an array of selector names. The value of those
227
+ The `categories` selector takes an array of selector names. Each value of those
229
228
  selectors will become a `<category>` on the RSS item.
230
229
 
231
230
  <details>
@@ -268,11 +267,11 @@ selectors:
268
267
 
269
268
  ## Adding an `<enclosure>` tag to an item
270
269
 
271
- An enclosure can be 'anything', e.g. a image, audio or video file.
270
+ An enclosure can be any file, e.g. a image, audio or video.
272
271
 
273
272
  The `enclosure` selector needs to return a URL of the content to enclose. If the extracted URL is relative, it will be converted to an absolute one using the channel's URL as base.
274
273
 
275
- Since html2rss does no further inspection of the enclosure, its support comes with trade-offs:
274
+ Since `html2rss` does no further inspection of the enclosure, its support comes with trade-offs:
276
275
 
277
276
  1. The content-type is guessed from the file extension of the URL.
278
277
  2. If the content-type guessing fails, it will default to `application/octet-stream`.
@@ -310,7 +309,7 @@ selectors:
310
309
 
311
310
  </details>
312
311
 
313
- ## Scraping JSON
312
+ ## Scraping and handling JSON responses
314
313
 
315
314
  Although this gem is called **html**​*2rss*, it's possible to scrape and process JSON.
316
315
 
@@ -485,7 +484,7 @@ Find a full example of a `config.yml` at [`spec/config.test.yml`](https://github
485
484
  ## Gotchas and tips & tricks
486
485
 
487
486
  - Check that the channel URL does not redirect to a mobile page with a different markup structure.
488
- - Do not rely on your web browser's developer console. html2rss does not execute JavaScript.
487
+ - Do not rely on your web browser's developer console. `html2rss` does not execute JavaScript.
489
488
  - Fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems efficient (`curl URL | pup`).
490
489
  - [CSS selectors are quite versatile, here's an overview.](https://www.w3.org/TR/selectors-4/#overview)
491
490
 
@@ -38,10 +38,7 @@ module Html2rss
38
38
  ##
39
39
  # @return [String] formatted in Markdown
40
40
  def get
41
- SanitizeHtml.new(
42
- Kramdown::Document.new(@value).to_html,
43
- @env
44
- ).get
41
+ SanitizeHtml.new(Kramdown::Document.new(@value).to_html, @env).get
45
42
  end
46
43
  end
47
44
  end
@@ -5,7 +5,7 @@ module Html2rss
5
5
  #
6
6
  # Imagine this HTML structure:
7
7
  #
8
- # <span>http://why-not-use-a-link.uh</span>
8
+ # <span>http://why-not-use-a-link.uh </span>
9
9
  #
10
10
  # YAML usage example:
11
11
  #
@@ -26,7 +26,7 @@ module Html2rss
26
26
  ##
27
27
  # @return [String]
28
28
  def get
29
- URI(@value).to_s
29
+ URI(@value.to_s.split(' ').join).to_s
30
30
  end
31
31
  end
32
32
  end
@@ -14,11 +14,19 @@ module Html2rss
14
14
  end
15
15
 
16
16
  def ttl
17
- channel_config.fetch 'ttl', 3600
17
+ channel_config.fetch 'ttl', 360
18
18
  end
19
19
 
20
20
  def title
21
- channel_config.fetch 'title', 'html2rss generated title'
21
+ channel_config.fetch 'title' do
22
+ uri = URI(url)
23
+
24
+ nicer_path = uri.path.split('/')
25
+ nicer_path.reject! { |p| p == '' }
26
+ nicer_path.map!(&:titleize)
27
+
28
+ nicer_path.any? ? "#{uri.host}: #{nicer_path.join(' ')}" : uri.host
29
+ end
22
30
  end
23
31
 
24
32
  def language
@@ -26,7 +34,7 @@ module Html2rss
26
34
  end
27
35
 
28
36
  def description
29
- channel_config.fetch 'description', 'A description of my html2rss feed.'
37
+ channel_config.fetch 'description', "Latest items from #{url}."
30
38
  end
31
39
 
32
40
  def url
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.8.0'.freeze
2
+ VERSION = '0.8.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-02 00:00:00.000000000 Z
11
+ date: 2019-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport