html2rss 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2d23c5f38ec394aa7a095ed72fe438bbb7480e56e3f1466afccf8b15640ad3ea
4
- data.tar.gz: 9cf409fb2b00a21964e91ee81882dc626b47a9a37a5dd7817b59048197f44e45
3
+ metadata.gz: 3d7642c21f46ea6086a3ead39b2923fa4c09ddc821719e2c40936846a83e5438
4
+ data.tar.gz: 8d6efa4ed03261f11e9ec302ea0cda860e83b74d78203c48b6650f1a7d9c106c
5
5
  SHA512:
6
- metadata.gz: 4650b3337acea8b93a0d73279e401c3790b67ff9e311e21782e1a892e58d5dd156ce7c649e961b36031d046d4c272042c5ecba6e185fc91a42718ef345aea40c
7
- data.tar.gz: 76a3d9517ea0f5e6f6f12b1de803cbe1bc1cc5b7f5401a8bf5c3cf3de544b3580ab23bfe9be8964e5fb5b1da8b35184ce1bfea418808c447812b8d99def8897c
6
+ metadata.gz: f6f1a3c52d9689b18d04a7b0c772ab0b9742202ca2e292b8265f7d26d85989f7201da9ab3de2db81258b82b8271c5789176976ea6c67c42edcb4739a556386f3
7
+ data.tar.gz: 8d892ec1d5761d6c5338e77f36a86debb2ccc1f1f7bac98ba83489c2aa8a7fe0638a45d355ff6fbd295a417d4df25f9ffc932fd15dd70f5465b0b61ee566cec0
@@ -1,3 +1,6 @@
1
+ require:
2
+ - 'rubocop-performance'
3
+
1
4
  AllCops:
2
5
  TargetRubyVersion: 2.4
3
6
  DisplayCopNames: true
@@ -9,10 +9,9 @@ before_install:
9
9
  bundler_args: "--jobs=3 --retry=3"
10
10
 
11
11
  rvm:
12
- - 2.3.8
13
- - 2.4.5
14
- - 2.5.3
15
- - 2.6.1
12
+ - 2.4.6
13
+ - 2.5.4
14
+ - 2.6.3
16
15
 
17
16
  script:
18
17
  - bundle exec rspec
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.2.1)
4
+ html2rss (0.3.0)
5
5
  faraday (~> 0.15)
6
6
  faraday_middleware (~> 0.13)
7
7
  nokogiri (>= 1.10)
@@ -10,34 +10,50 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- byebug (10.0.2)
13
+ ast (2.4.0)
14
+ byebug (11.0.1)
14
15
  crass (1.0.4)
15
16
  diff-lcs (1.3)
16
- docile (1.3.1)
17
+ docile (1.3.2)
17
18
  faraday (0.15.4)
18
19
  multipart-post (>= 1.2, < 3)
19
- faraday_middleware (0.13.0)
20
+ faraday_middleware (0.13.1)
20
21
  faraday (>= 0.7.4, < 1.0)
21
- json (2.1.0)
22
+ jaro_winkler (1.5.3)
23
+ json (2.2.0)
22
24
  mini_portile2 (2.4.0)
23
- multipart-post (2.0.0)
24
- nokogiri (1.10.1)
25
+ multipart-post (2.1.1)
26
+ nokogiri (1.10.3)
25
27
  mini_portile2 (~> 2.4.0)
26
28
  nokogumbo (2.0.1)
27
29
  nokogiri (~> 1.8, >= 1.8.4)
30
+ parallel (1.17.0)
31
+ parser (2.6.3.0)
32
+ ast (~> 2.4.0)
33
+ rainbow (3.0.0)
28
34
  rspec (3.8.0)
29
35
  rspec-core (~> 3.8.0)
30
36
  rspec-expectations (~> 3.8.0)
31
37
  rspec-mocks (~> 3.8.0)
32
- rspec-core (3.8.0)
38
+ rspec-core (3.8.1)
33
39
  rspec-support (~> 3.8.0)
34
- rspec-expectations (3.8.2)
40
+ rspec-expectations (3.8.4)
35
41
  diff-lcs (>= 1.2.0, < 2.0)
36
42
  rspec-support (~> 3.8.0)
37
- rspec-mocks (3.8.0)
43
+ rspec-mocks (3.8.1)
38
44
  diff-lcs (>= 1.2.0, < 2.0)
39
45
  rspec-support (~> 3.8.0)
40
- rspec-support (3.8.0)
46
+ rspec-support (3.8.2)
47
+ rubocop (0.71.0)
48
+ jaro_winkler (~> 1.5.1)
49
+ parallel (~> 1.10)
50
+ parser (>= 2.6)
51
+ rainbow (>= 2.2.2, < 4.0)
52
+ ruby-progressbar (~> 1.7)
53
+ unicode-display_width (>= 1.4.0, < 1.7)
54
+ rubocop-performance (1.4.0)
55
+ rubocop (>= 0.71.0)
56
+ ruby-progressbar (1.10.1)
41
57
  sanitize (5.0.0)
42
58
  crass (~> 1.0.2)
43
59
  nokogiri (>= 1.8.0)
@@ -47,18 +63,21 @@ GEM
47
63
  json (>= 1.8, < 3)
48
64
  simplecov-html (~> 0.10.0)
49
65
  simplecov-html (0.10.2)
50
- vcr (4.0.0)
66
+ unicode-display_width (1.6.0)
67
+ vcr (5.0.0)
51
68
 
52
69
  PLATFORMS
53
70
  ruby
54
71
 
55
72
  DEPENDENCIES
56
73
  bundler (~> 1.16)
57
- byebug (~> 10.0)
74
+ byebug
58
75
  html2rss!
59
76
  rspec (~> 3.0)
77
+ rubocop
78
+ rubocop-performance
60
79
  simplecov
61
- vcr (~> 4.0)
80
+ vcr
62
81
 
63
82
  BUNDLED WITH
64
- 1.16.6
83
+ 1.17.2
data/README.md CHANGED
@@ -1,34 +1,23 @@
1
1
  ![html2rss logo](https://github.com/gildesmarais/html2rss/raw/master/support/logo.png)
2
2
 
3
- # html2rss [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss)
3
+ # html2rss [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss) [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](https://badge.fury.io/rb/html2rss)
4
4
 
5
- Request and convert an HTML document to an RSS feed via a config object.
6
- The config contains the URL to scrape and the selectors needed to extract
7
- the required information. This gem provides some extractors (e.g. extract
8
- the information from an HTML attribute).
5
+ Request HTML from an URL and transform it to a Ruby RSS 2.0 object.
9
6
 
10
- Please always check the website's Terms of Service before if its allowed to
11
- scrape their content!
7
+ **Are you searching for a ready to use "website to RSS" solution?**
8
+ [Check out `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
12
9
 
13
- ## Installation
14
-
15
- Add this line to your application's Gemfile:
16
-
17
- ```ruby
18
- gem 'html2rss'
19
- ```
20
-
21
- And then execute:
22
-
23
- $ bundle
24
-
25
- Or install it yourself as:
10
+ Each website needs a feed config which contains the URL to scrape and
11
+ CSS selectors to extract the required information (like title, URL, ...).
12
+ This gem provides [extractors](https://github.com/gildesmarais/html2rss/blob/master/lib/html2rss/item_extractor.rb) (e.g. extract the information from an HTML attribute)
13
+ and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/attribute_post_processors) to make information retrieval even easier.
26
14
 
27
- $ gem install html2rss
15
+ ## Installation
28
16
 
29
- ## Usage
17
+ Add this line to your application's Gemfile: `gem 'html2rss'`
18
+ And then execute: `bundle`
30
19
 
31
- ## Usage with a YAML file
20
+ ## Usage with a YAML config file
32
21
 
33
22
  Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
34
23
 
@@ -36,15 +25,7 @@ Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://
36
25
 
37
26
  an `RSS:Rss` object.
38
27
 
39
- ## Usage in a web application
40
-
41
- Find a minimal Sintra app which exposes your feeds to HTTP endpoints here:
42
- [gildesmarais/html2rss-web](https://github.com/gildesmarais/html2rss-web)
43
-
44
- ### Tips and tricks
45
-
46
- - Check that the channel url does not redirect to a mobile page
47
- - fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems quite efficient
28
+ **Too complicated?** See [`html2rss-configs`](https://github.com/gildesmarais/html2rss-configs) for ready-made feed configs!
48
29
 
49
30
  ## Development
50
31
 
@@ -54,10 +35,15 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
54
35
 
55
36
  Bug reports and pull requests are welcome on GitHub at https://github.com/gildesmarais/html2rss.
56
37
 
57
- ## Changelog generation
38
+ ## Releasing a new version
58
39
 
59
- The `CHANGELOG.md` can be generated automatically with [`standard-changelog`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog).
40
+ 1. increase version in `lib/version.rb`
41
+ 2. `bundle`
42
+ 3. commit the changes
43
+ 4. `git tag v....`
44
+ 5. `git push; git push --tags`
45
+ 6. update the changelog, commit and push
60
46
 
61
- ## License
47
+ ### Changelog generation
62
48
 
63
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
49
+ The `CHANGELOG.md` can be generated automatically with [`standard-changelog`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog).
@@ -33,8 +33,10 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'nokogiri', '>= 1.10'
34
34
  spec.add_dependency 'sanitize', '~> 5.0'
35
35
  spec.add_development_dependency 'bundler', '~> 1.16'
36
- spec.add_development_dependency 'byebug', '~> 10.0'
36
+ spec.add_development_dependency 'byebug'
37
37
  spec.add_development_dependency 'rspec', '~> 3.0'
38
+ spec.add_development_dependency 'rubocop'
39
+ spec.add_development_dependency 'rubocop-performance'
38
40
  spec.add_development_dependency 'simplecov'
39
- spec.add_development_dependency 'vcr', '~> 4.0'
41
+ spec.add_development_dependency 'vcr'
40
42
  end
@@ -5,7 +5,11 @@ require 'yaml'
5
5
 
6
6
  module Html2rss
7
7
  def self.feed_from_yaml_config(file, name)
8
- config = Config.new(YAML.load(File.open(file)).freeze, name)
8
+ yaml = YAML.load(File.open(file))
9
+ feed_config = yaml['feeds'][name]
10
+ global_config = yaml.reject { |k| k == 'feeds' }
11
+
12
+ config = Config.new(feed_config, global_config)
9
13
  feed(config)
10
14
  end
11
15
 
@@ -1,11 +1,11 @@
1
1
  module Html2rss
2
2
  class Config
3
- attr_reader :feed_config, :channel_config
3
+ attr_reader :feed_config, :channel_config, :global_config
4
4
 
5
- def initialize(config, name)
6
- @config = config
7
- @feed_config = @config['feeds'][name.to_s]
8
- @channel_config = @feed_config['channel']
5
+ def initialize(feed_config, global_config = {})
6
+ @global_config = global_config
7
+ @feed_config = feed_config
8
+ @channel_config = feed_config.fetch('channel', {})
9
9
  end
10
10
 
11
11
  def author
@@ -34,7 +34,7 @@ module Html2rss
34
34
  alias link url
35
35
 
36
36
  def headers
37
- @config.fetch('headers', {})
37
+ global_config.fetch('headers', {})
38
38
  end
39
39
 
40
40
  def options(name)
@@ -11,7 +11,7 @@ module Html2rss
11
11
  uri = URI(href)
12
12
  else
13
13
  uri = URI(options['channel']['url'])
14
- uri.path = path.start_with?('/') ? path : "/#{path}"
14
+ uri.path = path.to_s.start_with?('/') ? path : "/#{path}"
15
15
  uri.query = query
16
16
  end
17
17
 
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.2.2'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-31 00:00:00.000000000 Z
11
+ date: 2019-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -84,16 +84,16 @@ dependencies:
84
84
  name: byebug
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - "~>"
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - "~>"
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
96
+ version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rspec
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +108,34 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '3.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rubocop
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop-performance
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: simplecov
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -126,16 +154,16 @@ dependencies:
126
154
  name: vcr
127
155
  requirement: !ruby/object:Gem::Requirement
128
156
  requirements:
129
- - - "~>"
157
+ - - ">="
130
158
  - !ruby/object:Gem::Version
131
- version: '4.0'
159
+ version: '0'
132
160
  type: :development
133
161
  prerelease: false
134
162
  version_requirements: !ruby/object:Gem::Requirement
135
163
  requirements:
136
- - - "~>"
164
+ - - ">="
137
165
  - !ruby/object:Gem::Version
138
- version: '4.0'
166
+ version: '0'
139
167
  description: |-
140
168
  Create your config object, include the url to scrape,
141
169
  some selectors and get a RSS2 feed in return.
@@ -191,7 +219,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
219
  - !ruby/object:Gem::Version
192
220
  version: '0'
193
221
  requirements: []
194
- rubygems_version: 3.0.2
222
+ rubygems_version: 3.0.4
195
223
  signing_key:
196
224
  specification_version: 4
197
225
  summary: Generate RSS feeds by scraping websites by providing a config.