html2rss 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2d23c5f38ec394aa7a095ed72fe438bbb7480e56e3f1466afccf8b15640ad3ea
4
- data.tar.gz: 9cf409fb2b00a21964e91ee81882dc626b47a9a37a5dd7817b59048197f44e45
3
+ metadata.gz: 3d7642c21f46ea6086a3ead39b2923fa4c09ddc821719e2c40936846a83e5438
4
+ data.tar.gz: 8d6efa4ed03261f11e9ec302ea0cda860e83b74d78203c48b6650f1a7d9c106c
5
5
  SHA512:
6
- metadata.gz: 4650b3337acea8b93a0d73279e401c3790b67ff9e311e21782e1a892e58d5dd156ce7c649e961b36031d046d4c272042c5ecba6e185fc91a42718ef345aea40c
7
- data.tar.gz: 76a3d9517ea0f5e6f6f12b1de803cbe1bc1cc5b7f5401a8bf5c3cf3de544b3580ab23bfe9be8964e5fb5b1da8b35184ce1bfea418808c447812b8d99def8897c
6
+ metadata.gz: f6f1a3c52d9689b18d04a7b0c772ab0b9742202ca2e292b8265f7d26d85989f7201da9ab3de2db81258b82b8271c5789176976ea6c67c42edcb4739a556386f3
7
+ data.tar.gz: 8d892ec1d5761d6c5338e77f36a86debb2ccc1f1f7bac98ba83489c2aa8a7fe0638a45d355ff6fbd295a417d4df25f9ffc932fd15dd70f5465b0b61ee566cec0
@@ -1,3 +1,6 @@
1
+ require:
2
+ - 'rubocop-performance'
3
+
1
4
  AllCops:
2
5
  TargetRubyVersion: 2.4
3
6
  DisplayCopNames: true
@@ -9,10 +9,9 @@ before_install:
9
9
  bundler_args: "--jobs=3 --retry=3"
10
10
 
11
11
  rvm:
12
- - 2.3.8
13
- - 2.4.5
14
- - 2.5.3
15
- - 2.6.1
12
+ - 2.4.6
13
+ - 2.5.4
14
+ - 2.6.3
16
15
 
17
16
  script:
18
17
  - bundle exec rspec
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.2.1)
4
+ html2rss (0.3.0)
5
5
  faraday (~> 0.15)
6
6
  faraday_middleware (~> 0.13)
7
7
  nokogiri (>= 1.10)
@@ -10,34 +10,50 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- byebug (10.0.2)
13
+ ast (2.4.0)
14
+ byebug (11.0.1)
14
15
  crass (1.0.4)
15
16
  diff-lcs (1.3)
16
- docile (1.3.1)
17
+ docile (1.3.2)
17
18
  faraday (0.15.4)
18
19
  multipart-post (>= 1.2, < 3)
19
- faraday_middleware (0.13.0)
20
+ faraday_middleware (0.13.1)
20
21
  faraday (>= 0.7.4, < 1.0)
21
- json (2.1.0)
22
+ jaro_winkler (1.5.3)
23
+ json (2.2.0)
22
24
  mini_portile2 (2.4.0)
23
- multipart-post (2.0.0)
24
- nokogiri (1.10.1)
25
+ multipart-post (2.1.1)
26
+ nokogiri (1.10.3)
25
27
  mini_portile2 (~> 2.4.0)
26
28
  nokogumbo (2.0.1)
27
29
  nokogiri (~> 1.8, >= 1.8.4)
30
+ parallel (1.17.0)
31
+ parser (2.6.3.0)
32
+ ast (~> 2.4.0)
33
+ rainbow (3.0.0)
28
34
  rspec (3.8.0)
29
35
  rspec-core (~> 3.8.0)
30
36
  rspec-expectations (~> 3.8.0)
31
37
  rspec-mocks (~> 3.8.0)
32
- rspec-core (3.8.0)
38
+ rspec-core (3.8.1)
33
39
  rspec-support (~> 3.8.0)
34
- rspec-expectations (3.8.2)
40
+ rspec-expectations (3.8.4)
35
41
  diff-lcs (>= 1.2.0, < 2.0)
36
42
  rspec-support (~> 3.8.0)
37
- rspec-mocks (3.8.0)
43
+ rspec-mocks (3.8.1)
38
44
  diff-lcs (>= 1.2.0, < 2.0)
39
45
  rspec-support (~> 3.8.0)
40
- rspec-support (3.8.0)
46
+ rspec-support (3.8.2)
47
+ rubocop (0.71.0)
48
+ jaro_winkler (~> 1.5.1)
49
+ parallel (~> 1.10)
50
+ parser (>= 2.6)
51
+ rainbow (>= 2.2.2, < 4.0)
52
+ ruby-progressbar (~> 1.7)
53
+ unicode-display_width (>= 1.4.0, < 1.7)
54
+ rubocop-performance (1.4.0)
55
+ rubocop (>= 0.71.0)
56
+ ruby-progressbar (1.10.1)
41
57
  sanitize (5.0.0)
42
58
  crass (~> 1.0.2)
43
59
  nokogiri (>= 1.8.0)
@@ -47,18 +63,21 @@ GEM
47
63
  json (>= 1.8, < 3)
48
64
  simplecov-html (~> 0.10.0)
49
65
  simplecov-html (0.10.2)
50
- vcr (4.0.0)
66
+ unicode-display_width (1.6.0)
67
+ vcr (5.0.0)
51
68
 
52
69
  PLATFORMS
53
70
  ruby
54
71
 
55
72
  DEPENDENCIES
56
73
  bundler (~> 1.16)
57
- byebug (~> 10.0)
74
+ byebug
58
75
  html2rss!
59
76
  rspec (~> 3.0)
77
+ rubocop
78
+ rubocop-performance
60
79
  simplecov
61
- vcr (~> 4.0)
80
+ vcr
62
81
 
63
82
  BUNDLED WITH
64
- 1.16.6
83
+ 1.17.2
data/README.md CHANGED
@@ -1,34 +1,23 @@
1
1
  ![html2rss logo](https://github.com/gildesmarais/html2rss/raw/master/support/logo.png)
2
2
 
3
- # html2rss [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss)
3
+ # html2rss [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss) [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](https://badge.fury.io/rb/html2rss)
4
4
 
5
- Request and convert an HTML document to an RSS feed via a config object.
6
- The config contains the URL to scrape and the selectors needed to extract
7
- the required information. This gem provides some extractors (e.g. extract
8
- the information from an HTML attribute).
5
+ Request HTML from an URL and transform it to a Ruby RSS 2.0 object.
9
6
 
10
- Please always check the website's Terms of Service before if its allowed to
11
- scrape their content!
7
+ **Are you searching for a ready to use "website to RSS" solution?**
8
+ [Check out `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
12
9
 
13
- ## Installation
14
-
15
- Add this line to your application's Gemfile:
16
-
17
- ```ruby
18
- gem 'html2rss'
19
- ```
20
-
21
- And then execute:
22
-
23
- $ bundle
24
-
25
- Or install it yourself as:
10
+ Each website needs a feed config which contains the URL to scrape and
11
+ CSS selectors to extract the required information (like title, URL, ...).
12
+ This gem provides [extractors](https://github.com/gildesmarais/html2rss/blob/master/lib/html2rss/item_extractor.rb) (e.g. extract the information from an HTML attribute)
13
+ and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/attribute_post_processors) to make information retrieval even easier.
26
14
 
27
- $ gem install html2rss
15
+ ## Installation
28
16
 
29
- ## Usage
17
+ Add this line to your application's Gemfile: `gem 'html2rss'`
18
+ And then execute: `bundle`
30
19
 
31
- ## Usage with a YAML file
20
+ ## Usage with a YAML config file
32
21
 
33
22
  Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
34
23
 
@@ -36,15 +25,7 @@ Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://
36
25
 
37
26
  an `RSS:Rss` object.
38
27
 
39
- ## Usage in a web application
40
-
41
- Find a minimal Sintra app which exposes your feeds to HTTP endpoints here:
42
- [gildesmarais/html2rss-web](https://github.com/gildesmarais/html2rss-web)
43
-
44
- ### Tips and tricks
45
-
46
- - Check that the channel url does not redirect to a mobile page
47
- - fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems quite efficient
28
+ **Too complicated?** See [`html2rss-configs`](https://github.com/gildesmarais/html2rss-configs) for ready-made feed configs!
48
29
 
49
30
  ## Development
50
31
 
@@ -54,10 +35,15 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
54
35
 
55
36
  Bug reports and pull requests are welcome on GitHub at https://github.com/gildesmarais/html2rss.
56
37
 
57
- ## Changelog generation
38
+ ## Releasing a new version
58
39
 
59
- The `CHANGELOG.md` can be generated automatically with [`standard-changelog`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog).
40
+ 1. increase version in `lib/version.rb`
41
+ 2. `bundle`
42
+ 3. commit the changes
43
+ 4. `git tag v....`
44
+ 5. `git push; git push --tags`
45
+ 6. update the changelog, commit and push
60
46
 
61
- ## License
47
+ ### Changelog generation
62
48
 
63
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
49
+ The `CHANGELOG.md` can be generated automatically with [`standard-changelog`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog).
@@ -33,8 +33,10 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'nokogiri', '>= 1.10'
34
34
  spec.add_dependency 'sanitize', '~> 5.0'
35
35
  spec.add_development_dependency 'bundler', '~> 1.16'
36
- spec.add_development_dependency 'byebug', '~> 10.0'
36
+ spec.add_development_dependency 'byebug'
37
37
  spec.add_development_dependency 'rspec', '~> 3.0'
38
+ spec.add_development_dependency 'rubocop'
39
+ spec.add_development_dependency 'rubocop-performance'
38
40
  spec.add_development_dependency 'simplecov'
39
- spec.add_development_dependency 'vcr', '~> 4.0'
41
+ spec.add_development_dependency 'vcr'
40
42
  end
@@ -5,7 +5,11 @@ require 'yaml'
5
5
 
6
6
  module Html2rss
7
7
  def self.feed_from_yaml_config(file, name)
8
- config = Config.new(YAML.load(File.open(file)).freeze, name)
8
+ yaml = YAML.load(File.open(file))
9
+ feed_config = yaml['feeds'][name]
10
+ global_config = yaml.reject { |k| k == 'feeds' }
11
+
12
+ config = Config.new(feed_config, global_config)
9
13
  feed(config)
10
14
  end
11
15
 
@@ -1,11 +1,11 @@
1
1
  module Html2rss
2
2
  class Config
3
- attr_reader :feed_config, :channel_config
3
+ attr_reader :feed_config, :channel_config, :global_config
4
4
 
5
- def initialize(config, name)
6
- @config = config
7
- @feed_config = @config['feeds'][name.to_s]
8
- @channel_config = @feed_config['channel']
5
+ def initialize(feed_config, global_config = {})
6
+ @global_config = global_config
7
+ @feed_config = feed_config
8
+ @channel_config = feed_config.fetch('channel', {})
9
9
  end
10
10
 
11
11
  def author
@@ -34,7 +34,7 @@ module Html2rss
34
34
  alias link url
35
35
 
36
36
  def headers
37
- @config.fetch('headers', {})
37
+ global_config.fetch('headers', {})
38
38
  end
39
39
 
40
40
  def options(name)
@@ -11,7 +11,7 @@ module Html2rss
11
11
  uri = URI(href)
12
12
  else
13
13
  uri = URI(options['channel']['url'])
14
- uri.path = path.start_with?('/') ? path : "/#{path}"
14
+ uri.path = path.to_s.start_with?('/') ? path : "/#{path}"
15
15
  uri.query = query
16
16
  end
17
17
 
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.2.2'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-31 00:00:00.000000000 Z
11
+ date: 2019-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -84,16 +84,16 @@ dependencies:
84
84
  name: byebug
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - "~>"
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - "~>"
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
96
+ version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rspec
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +108,34 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '3.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rubocop
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop-performance
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: simplecov
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -126,16 +154,16 @@ dependencies:
126
154
  name: vcr
127
155
  requirement: !ruby/object:Gem::Requirement
128
156
  requirements:
129
- - - "~>"
157
+ - - ">="
130
158
  - !ruby/object:Gem::Version
131
- version: '4.0'
159
+ version: '0'
132
160
  type: :development
133
161
  prerelease: false
134
162
  version_requirements: !ruby/object:Gem::Requirement
135
163
  requirements:
136
- - - "~>"
164
+ - - ">="
137
165
  - !ruby/object:Gem::Version
138
- version: '4.0'
166
+ version: '0'
139
167
  description: |-
140
168
  Create your config object, include the url to scrape,
141
169
  some selectors and get a RSS2 feed in return.
@@ -191,7 +219,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
219
  - !ruby/object:Gem::Version
192
220
  version: '0'
193
221
  requirements: []
194
- rubygems_version: 3.0.2
222
+ rubygems_version: 3.0.4
195
223
  signing_key:
196
224
  specification_version: 4
197
225
  summary: Generate RSS feeds by scraping websites by providing a config.