html2rss 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38b547bdfe0799d71348690ae20d29d74d88c3cbb0828cdd807a382dec14f895
4
- data.tar.gz: 8e9ae611d6bbf0174bd9038c3aa7a7b214ec8ffa152e990b2224e98d5022bec0
3
+ metadata.gz: 134aacbabf1b61aadcdb6d96757d329bde20429027f97c2c3bbd0328bb02cd7b
4
+ data.tar.gz: 2e6ef3c8a38df3e54983670b17d574bfe771069f662414e58a5c0572ab922b1f
5
5
  SHA512:
6
- metadata.gz: c0c4dc94d9e339d054ec4ee1c586ac21355cdfa1c9664dd05ef01ca474668f7e483baec35bb8abfd1971eff296b7b5c270816c992ec9fc6abd51a89a7ce28000
7
- data.tar.gz: 3f45fb28e10360055ead9b757b4972bd2c52ad3285b94b506c3a6ce66b7bd5ec6364da63cc83b066fab6bc69396ed7408108536b5b9178d4a4a34c28436b4d2d
6
+ metadata.gz: 4e3b80225d14b44820f742f484a418e2d0b90060f7e329f2c89a2786795a1d87bf372f194e873d4a109d8c3649399e0716a41f65479c1b60ef9b8202154a8e48
7
+ data.tar.gz: ed5c095e4457d11208fa67634e526f1345c64f9d48d48df3eb11b8a5ebeb9cb203a62ae329f5388491d57435b1326bf25771349e3a374dc424094d3da60822e0
@@ -39,6 +39,9 @@ Style/BracesAroundHashParameters:
39
39
  Style/HashSyntax:
40
40
  Enabled: true
41
41
 
42
+ Style/FormatStringToken:
43
+ Enabled: false
44
+
42
45
  Layout/SpaceInsideParens:
43
46
  Enabled: true
44
47
 
@@ -1,4 +1,16 @@
1
- # [](https://github.com/gildesmarais/html2rss/compare/v0.8.0...v) (2019-11-02)
1
+ # [](https://github.com/gildesmarais/html2rss/compare/v0.8.1...v) (2019-11-08)
2
+
3
+
4
+
5
+ ## [0.8.1](https://github.com/gildesmarais/html2rss/compare/v0.8.0...v0.8.1) (2019-11-08)
6
+
7
+
8
+ ### Features
9
+
10
+ * auto generate nicer channel's title and description ([#63](https://github.com/gildesmarais/html2rss/issues/63)) ([6db28f6](https://github.com/gildesmarais/html2rss/commit/6db28f6))
11
+ * change default ttl to 360 ([#65](https://github.com/gildesmarais/html2rss/issues/65)) ([605c8db](https://github.com/gildesmarais/html2rss/commit/605c8db))
12
+ * **config:** improve generation of channel.title from channel.url ([#68](https://github.com/gildesmarais/html2rss/issues/68)) ([bc8ecbb](https://github.com/gildesmarais/html2rss/commit/bc8ecbb))
13
+ * **parse_uri:** squish url to not fail on url with padding spaces ([#67](https://github.com/gildesmarais/html2rss/issues/67)) ([e349449](https://github.com/gildesmarais/html2rss/commit/e349449))
2
14
 
3
15
 
4
16
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.8.0)
4
+ html2rss (0.8.1)
5
5
  activesupport (~> 5.0)
6
6
  builder
7
7
  faraday (~> 0.15)
@@ -27,12 +27,12 @@ GEM
27
27
  builder (3.2.3)
28
28
  byebug (11.0.1)
29
29
  concurrent-ruby (1.1.5)
30
- coveralls (0.7.2)
31
- multi_json (~> 1.3)
32
- rest-client (= 1.6.7)
33
- simplecov (>= 0.7)
34
- term-ansicolor (= 1.2.2)
35
- thor (= 0.18.1)
30
+ coveralls (0.8.23)
31
+ json (>= 1.8, < 3)
32
+ simplecov (~> 0.16.1)
33
+ term-ansicolor (~> 1.3)
34
+ thor (>= 0.19.4, < 2.0)
35
+ tins (~> 1.6)
36
36
  crass (1.0.5)
37
37
  diff-lcs (1.3)
38
38
  docile (1.3.2)
@@ -43,7 +43,7 @@ GEM
43
43
  hashie (3.6.0)
44
44
  i18n (1.7.0)
45
45
  concurrent-ruby (~> 1.0)
46
- jaro_winkler (1.5.3)
46
+ jaro_winkler (1.5.4)
47
47
  json (2.2.0)
48
48
  kramdown (2.1.0)
49
49
  mime-types (3.3)
@@ -51,7 +51,6 @@ GEM
51
51
  mime-types-data (3.2019.1009)
52
52
  mini_portile2 (2.4.0)
53
53
  minitest (5.13.0)
54
- multi_json (1.14.1)
55
54
  multipart-post (2.1.1)
56
55
  nokogiri (1.10.5)
57
56
  mini_portile2 (~> 2.4.0)
@@ -61,8 +60,6 @@ GEM
61
60
  parser (2.6.5.0)
62
61
  ast (~> 2.4.0)
63
62
  rainbow (3.0.0)
64
- rest-client (1.6.7)
65
- mime-types (>= 1.16)
66
63
  reverse_markdown (1.3.0)
67
64
  nokogiri
68
65
  rspec (3.9.0)
@@ -78,7 +75,7 @@ GEM
78
75
  diff-lcs (>= 1.2.0, < 2.0)
79
76
  rspec-support (~> 3.9.0)
80
77
  rspec-support (3.9.0)
81
- rubocop (0.75.0)
78
+ rubocop (0.76.0)
82
79
  jaro_winkler (~> 1.5.1)
83
80
  parallel (~> 1.10)
84
81
  parser (>= 2.6)
@@ -94,16 +91,16 @@ GEM
94
91
  crass (~> 1.0.2)
95
92
  nokogiri (>= 1.8.0)
96
93
  nokogumbo (~> 2.0)
97
- simplecov (0.17.1)
94
+ simplecov (0.16.1)
98
95
  docile (~> 1.1)
99
96
  json (>= 1.8, < 3)
100
97
  simplecov-html (~> 0.10.0)
101
98
  simplecov-html (0.10.2)
102
- term-ansicolor (1.2.2)
103
- tins (~> 0.8)
104
- thor (0.18.1)
99
+ term-ansicolor (1.7.1)
100
+ tins (~> 1.0)
101
+ thor (0.20.3)
105
102
  thread_safe (0.3.6)
106
- tins (0.13.2)
103
+ tins (1.22.0)
107
104
  to_regexp (0.2.1)
108
105
  tzinfo (1.2.5)
109
106
  thread_safe (~> 0.1)
data/README.md CHANGED
@@ -15,7 +15,7 @@ With the _feed config_ containing the URL to scrape and
15
15
  CSS selectors for information extraction (like title, URL, ...) your RSS builds.
16
16
  [Extractors](#using-extractors) and chain-able [post processors](#using-post-processors)
17
17
  make information extraction, processing and sanitizing a breeze.
18
- [Scraping JSON](#scraping-json) responses and
18
+ [Scraping JSON](#scraping-and-handling-json-responses) responses and
19
19
  [setting HTTP request headers](#set-any-http-header-in-the-request) is
20
20
  supported, too.
21
21
 
@@ -36,10 +36,7 @@ require 'html2rss'
36
36
 
37
37
  rss =
38
38
  Html2rss.feed(
39
- channel: {
40
- title: 'StackOverflow: Hot Network Questions',
41
- url: 'https://stackoverflow.com/questions'
42
- },
39
+ channel: { url: 'https://stackoverflow.com/questions' },
43
40
  selectors: {
44
41
  items: { selector: '#hot-network-questions > ul > li' },
45
42
  title: { selector: 'a' },
@@ -57,13 +54,15 @@ The contents of both hashes are explained below.
57
54
 
58
55
  ### The `channel`
59
56
 
60
- | attribute | | type | remark |
61
- | ------------- | -------- | ------- | ----------------------- |
62
- | `title` | required | String | |
63
- | `url` | required | String | |
64
- | `ttl` | optional | Integer | time to live in minutes |
65
- | `description` | optional | String | |
66
- | `headers` | optional | Hash | See notes below. |
57
+ | attribute | | type | default | remark |
58
+ | ------------- | -------- | ------- | -------------: | ------------------------------------------ |
59
+ | `url` | required | String | | |
60
+ | `title` | optional | String | auto-generated | |
61
+ | `description` | optional | String | auto-generated | |
62
+ | `ttl` | optional | Integer | `360` | TTL in _minutes_ |
63
+ | `time_zone` | optional | String | `'UTC'` | TimeZone name |
64
+ | `headers` | optional | Hash | `{}` | Set HTTP request headers. See notes below. |
65
+ | `json` | optional | Boolean | `false` | Handle JSON response. See notes below. |
67
66
 
68
67
  ### The `selectors`
69
68
 
@@ -78,18 +77,18 @@ each item has to have at least a `title` or a `description`.
78
77
  Your `selectors` can contain arbitrary selector names, but only these
79
78
  will make it into the RSS feed:
80
79
 
81
- | RSS 2.0 tag | name in html2rss | remark |
82
- | ------------- | ---------------- | --------------------------- |
83
- | `title` | `title` | |
84
- | `description` | `description` | Supports HTML. |
85
- | `link` | `link` | A URL. |
86
- | `author` | `author` | |
87
- | `category` | `categories` | See notes below. |
88
- | `enclosure` | `enclosure` | See notes below. |
89
- | `pubDate` | `update` | An instance of `Time`. |
90
- | `guid` | `guid` | Generated from the `title`. |
91
- | `comments` | `comments` | A URL. |
92
- | `source` | ~~source~~ | Not yet supported. |
80
+ | RSS 2.0 tag | name in `html2rss` | remark |
81
+ | ------------- | ------------------ | --------------------------- |
82
+ | `title` | `title` | |
83
+ | `description` | `description` | Supports HTML. |
84
+ | `link` | `link` | A URL. |
85
+ | `author` | `author` | |
86
+ | `category` | `categories` | See notes below. |
87
+ | `enclosure` | `enclosure` | See notes below. |
88
+ | `pubDate` | `update` | An instance of `Time`. |
89
+ | `guid` | `guid` | Generated from the `title`. |
90
+ | `comments` | `comments` | A URL. |
91
+ | `source` | ~~source~~ | Not yet supported. |
93
92
 
94
93
  ### The `selector` hash
95
94
 
@@ -225,7 +224,7 @@ Note the use of `|` for a multi-line String in YAML.
225
224
 
226
225
  ## Adding `<category>` tags to an item
227
226
 
228
- The `categories` selector takes an array of selector names. The value of those
227
+ The `categories` selector takes an array of selector names. Each value of those
229
228
  selectors will become a `<category>` on the RSS item.
230
229
 
231
230
  <details>
@@ -268,11 +267,11 @@ selectors:
268
267
 
269
268
  ## Adding an `<enclosure>` tag to an item
270
269
 
271
- An enclosure can be 'anything', e.g. a image, audio or video file.
270
+ An enclosure can be any file, e.g. a image, audio or video.
272
271
 
273
272
  The `enclosure` selector needs to return a URL of the content to enclose. If the extracted URL is relative, it will be converted to an absolute one using the channel's URL as base.
274
273
 
275
- Since html2rss does no further inspection of the enclosure, its support comes with trade-offs:
274
+ Since `html2rss` does no further inspection of the enclosure, its support comes with trade-offs:
276
275
 
277
276
  1. The content-type is guessed from the file extension of the URL.
278
277
  2. If the content-type guessing fails, it will default to `application/octet-stream`.
@@ -310,7 +309,7 @@ selectors:
310
309
 
311
310
  </details>
312
311
 
313
- ## Scraping JSON
312
+ ## Scraping and handling JSON responses
314
313
 
315
314
  Although this gem is called **html**​*2rss*, it's possible to scrape and process JSON.
316
315
 
@@ -485,7 +484,7 @@ Find a full example of a `config.yml` at [`spec/config.test.yml`](https://github
485
484
  ## Gotchas and tips & tricks
486
485
 
487
486
  - Check that the channel URL does not redirect to a mobile page with a different markup structure.
488
- - Do not rely on your web browser's developer console. html2rss does not execute JavaScript.
487
+ - Do not rely on your web browser's developer console. `html2rss` does not execute JavaScript.
489
488
  - Fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems efficient (`curl URL | pup`).
490
489
  - [CSS selectors are quite versatile, here's an overview.](https://www.w3.org/TR/selectors-4/#overview)
491
490
 
@@ -38,10 +38,7 @@ module Html2rss
38
38
  ##
39
39
  # @return [String] formatted in Markdown
40
40
  def get
41
- SanitizeHtml.new(
42
- Kramdown::Document.new(@value).to_html,
43
- @env
44
- ).get
41
+ SanitizeHtml.new(Kramdown::Document.new(@value).to_html, @env).get
45
42
  end
46
43
  end
47
44
  end
@@ -5,7 +5,7 @@ module Html2rss
5
5
  #
6
6
  # Imagine this HTML structure:
7
7
  #
8
- # <span>http://why-not-use-a-link.uh</span>
8
+ # <span>http://why-not-use-a-link.uh </span>
9
9
  #
10
10
  # YAML usage example:
11
11
  #
@@ -26,7 +26,7 @@ module Html2rss
26
26
  ##
27
27
  # @return [String]
28
28
  def get
29
- URI(@value).to_s
29
+ URI(@value.to_s.split(' ').join).to_s
30
30
  end
31
31
  end
32
32
  end
@@ -14,11 +14,19 @@ module Html2rss
14
14
  end
15
15
 
16
16
  def ttl
17
- channel_config.fetch 'ttl', 3600
17
+ channel_config.fetch 'ttl', 360
18
18
  end
19
19
 
20
20
  def title
21
- channel_config.fetch 'title', 'html2rss generated title'
21
+ channel_config.fetch 'title' do
22
+ uri = URI(url)
23
+
24
+ nicer_path = uri.path.split('/')
25
+ nicer_path.reject! { |p| p == '' }
26
+ nicer_path.map!(&:titleize)
27
+
28
+ nicer_path.any? ? "#{uri.host}: #{nicer_path.join(' ')}" : uri.host
29
+ end
22
30
  end
23
31
 
24
32
  def language
@@ -26,7 +34,7 @@ module Html2rss
26
34
  end
27
35
 
28
36
  def description
29
- channel_config.fetch 'description', 'A description of my html2rss feed.'
37
+ channel_config.fetch 'description', "Latest items from #{url}."
30
38
  end
31
39
 
32
40
  def url
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.8.0'.freeze
2
+ VERSION = '0.8.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-02 00:00:00.000000000 Z
11
+ date: 2019-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport