html2rss 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -4
- data/Gemfile.lock +34 -15
- data/README.md +22 -36
- data/html2rss.gemspec +4 -2
- data/lib/html2rss.rb +5 -1
- data/lib/html2rss/config.rb +6 -6
- data/lib/html2rss/item_extractor.rb +1 -1
- data/lib/html2rss/version.rb +1 -1
- metadata +39 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d7642c21f46ea6086a3ead39b2923fa4c09ddc821719e2c40936846a83e5438
|
4
|
+
data.tar.gz: 8d6efa4ed03261f11e9ec302ea0cda860e83b74d78203c48b6650f1a7d9c106c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6f1a3c52d9689b18d04a7b0c772ab0b9742202ca2e292b8265f7d26d85989f7201da9ab3de2db81258b82b8271c5789176976ea6c67c42edcb4739a556386f3
|
7
|
+
data.tar.gz: 8d892ec1d5761d6c5338e77f36a86debb2ccc1f1f7bac98ba83489c2aa8a7fe0638a45d355ff6fbd295a417d4df25f9ffc932fd15dd70f5465b0b61ee566cec0
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2rss (0.
|
4
|
+
html2rss (0.3.0)
|
5
5
|
faraday (~> 0.15)
|
6
6
|
faraday_middleware (~> 0.13)
|
7
7
|
nokogiri (>= 1.10)
|
@@ -10,34 +10,50 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
|
13
|
+
ast (2.4.0)
|
14
|
+
byebug (11.0.1)
|
14
15
|
crass (1.0.4)
|
15
16
|
diff-lcs (1.3)
|
16
|
-
docile (1.3.
|
17
|
+
docile (1.3.2)
|
17
18
|
faraday (0.15.4)
|
18
19
|
multipart-post (>= 1.2, < 3)
|
19
|
-
faraday_middleware (0.13.
|
20
|
+
faraday_middleware (0.13.1)
|
20
21
|
faraday (>= 0.7.4, < 1.0)
|
21
|
-
|
22
|
+
jaro_winkler (1.5.3)
|
23
|
+
json (2.2.0)
|
22
24
|
mini_portile2 (2.4.0)
|
23
|
-
multipart-post (2.
|
24
|
-
nokogiri (1.10.
|
25
|
+
multipart-post (2.1.1)
|
26
|
+
nokogiri (1.10.3)
|
25
27
|
mini_portile2 (~> 2.4.0)
|
26
28
|
nokogumbo (2.0.1)
|
27
29
|
nokogiri (~> 1.8, >= 1.8.4)
|
30
|
+
parallel (1.17.0)
|
31
|
+
parser (2.6.3.0)
|
32
|
+
ast (~> 2.4.0)
|
33
|
+
rainbow (3.0.0)
|
28
34
|
rspec (3.8.0)
|
29
35
|
rspec-core (~> 3.8.0)
|
30
36
|
rspec-expectations (~> 3.8.0)
|
31
37
|
rspec-mocks (~> 3.8.0)
|
32
|
-
rspec-core (3.8.
|
38
|
+
rspec-core (3.8.1)
|
33
39
|
rspec-support (~> 3.8.0)
|
34
|
-
rspec-expectations (3.8.
|
40
|
+
rspec-expectations (3.8.4)
|
35
41
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
42
|
rspec-support (~> 3.8.0)
|
37
|
-
rspec-mocks (3.8.
|
43
|
+
rspec-mocks (3.8.1)
|
38
44
|
diff-lcs (>= 1.2.0, < 2.0)
|
39
45
|
rspec-support (~> 3.8.0)
|
40
|
-
rspec-support (3.8.
|
46
|
+
rspec-support (3.8.2)
|
47
|
+
rubocop (0.71.0)
|
48
|
+
jaro_winkler (~> 1.5.1)
|
49
|
+
parallel (~> 1.10)
|
50
|
+
parser (>= 2.6)
|
51
|
+
rainbow (>= 2.2.2, < 4.0)
|
52
|
+
ruby-progressbar (~> 1.7)
|
53
|
+
unicode-display_width (>= 1.4.0, < 1.7)
|
54
|
+
rubocop-performance (1.4.0)
|
55
|
+
rubocop (>= 0.71.0)
|
56
|
+
ruby-progressbar (1.10.1)
|
41
57
|
sanitize (5.0.0)
|
42
58
|
crass (~> 1.0.2)
|
43
59
|
nokogiri (>= 1.8.0)
|
@@ -47,18 +63,21 @@ GEM
|
|
47
63
|
json (>= 1.8, < 3)
|
48
64
|
simplecov-html (~> 0.10.0)
|
49
65
|
simplecov-html (0.10.2)
|
50
|
-
|
66
|
+
unicode-display_width (1.6.0)
|
67
|
+
vcr (5.0.0)
|
51
68
|
|
52
69
|
PLATFORMS
|
53
70
|
ruby
|
54
71
|
|
55
72
|
DEPENDENCIES
|
56
73
|
bundler (~> 1.16)
|
57
|
-
byebug
|
74
|
+
byebug
|
58
75
|
html2rss!
|
59
76
|
rspec (~> 3.0)
|
77
|
+
rubocop
|
78
|
+
rubocop-performance
|
60
79
|
simplecov
|
61
|
-
vcr
|
80
|
+
vcr
|
62
81
|
|
63
82
|
BUNDLED WITH
|
64
|
-
1.
|
83
|
+
1.17.2
|
data/README.md
CHANGED
@@ -1,34 +1,23 @@
|
|
1
1
|
![html2rss logo](https://github.com/gildesmarais/html2rss/raw/master/support/logo.png)
|
2
2
|
|
3
|
-
# html2rss [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss)
|
3
|
+
# html2rss [![Build Status](https://travis-ci.org/gildesmarais/html2rss.svg?branch=master)](https://travis-ci.org/gildesmarais/html2rss) [![Gem Version](https://badge.fury.io/rb/html2rss.svg)](https://badge.fury.io/rb/html2rss)
|
4
4
|
|
5
|
-
Request
|
6
|
-
The config contains the URL to scrape and the selectors needed to extract
|
7
|
-
the required information. This gem provides some extractors (e.g. extract
|
8
|
-
the information from an HTML attribute).
|
5
|
+
Request HTML from an URL and transform it to a Ruby RSS 2.0 object.
|
9
6
|
|
10
|
-
|
11
|
-
|
7
|
+
**Are you searching for a ready to use "website to RSS" solution?**
|
8
|
+
[Check out `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
|
12
9
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
```ruby
|
18
|
-
gem 'html2rss'
|
19
|
-
```
|
20
|
-
|
21
|
-
And then execute:
|
22
|
-
|
23
|
-
$ bundle
|
24
|
-
|
25
|
-
Or install it yourself as:
|
10
|
+
Each website needs a feed config which contains the URL to scrape and
|
11
|
+
CSS selectors to extract the required information (like title, URL, ...).
|
12
|
+
This gem provides [extractors](https://github.com/gildesmarais/html2rss/blob/master/lib/html2rss/item_extractor.rb) (e.g. extract the information from an HTML attribute)
|
13
|
+
and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/attribute_post_processors) to make information retrieval even easier.
|
26
14
|
|
27
|
-
|
15
|
+
## Installation
|
28
16
|
|
29
|
-
|
17
|
+
Add this line to your application's Gemfile: `gem 'html2rss'`
|
18
|
+
And then execute: `bundle`
|
30
19
|
|
31
|
-
## Usage with a YAML file
|
20
|
+
## Usage with a YAML config file
|
32
21
|
|
33
22
|
Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
|
34
23
|
|
@@ -36,15 +25,7 @@ Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://
|
|
36
25
|
|
37
26
|
an `RSS:Rss` object.
|
38
27
|
|
39
|
-
|
40
|
-
|
41
|
-
Find a minimal Sintra app which exposes your feeds to HTTP endpoints here:
|
42
|
-
[gildesmarais/html2rss-web](https://github.com/gildesmarais/html2rss-web)
|
43
|
-
|
44
|
-
### Tips and tricks
|
45
|
-
|
46
|
-
- Check that the channel url does not redirect to a mobile page
|
47
|
-
- fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems quite efficient
|
28
|
+
**Too complicated?** See [`html2rss-configs`](https://github.com/gildesmarais/html2rss-configs) for ready-made feed configs!
|
48
29
|
|
49
30
|
## Development
|
50
31
|
|
@@ -54,10 +35,15 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
54
35
|
|
55
36
|
Bug reports and pull requests are welcome on GitHub at https://github.com/gildesmarais/html2rss.
|
56
37
|
|
57
|
-
##
|
38
|
+
## Releasing a new version
|
58
39
|
|
59
|
-
|
40
|
+
1. increase version in `lib/version.rb`
|
41
|
+
2. `bundle`
|
42
|
+
3. commit the changes
|
43
|
+
4. `git tag v....`
|
44
|
+
5. `git push; git push --tags`
|
45
|
+
6. update the changelog, commit and push
|
60
46
|
|
61
|
-
|
47
|
+
### Changelog generation
|
62
48
|
|
63
|
-
The
|
49
|
+
The `CHANGELOG.md` can be generated automatically with [`standard-changelog`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog).
|
data/html2rss.gemspec
CHANGED
@@ -33,8 +33,10 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.add_dependency 'nokogiri', '>= 1.10'
|
34
34
|
spec.add_dependency 'sanitize', '~> 5.0'
|
35
35
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
36
|
-
spec.add_development_dependency 'byebug'
|
36
|
+
spec.add_development_dependency 'byebug'
|
37
37
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
38
|
+
spec.add_development_dependency 'rubocop'
|
39
|
+
spec.add_development_dependency 'rubocop-performance'
|
38
40
|
spec.add_development_dependency 'simplecov'
|
39
|
-
spec.add_development_dependency 'vcr'
|
41
|
+
spec.add_development_dependency 'vcr'
|
40
42
|
end
|
data/lib/html2rss.rb
CHANGED
@@ -5,7 +5,11 @@ require 'yaml'
|
|
5
5
|
|
6
6
|
module Html2rss
|
7
7
|
def self.feed_from_yaml_config(file, name)
|
8
|
-
|
8
|
+
yaml = YAML.load(File.open(file))
|
9
|
+
feed_config = yaml['feeds'][name]
|
10
|
+
global_config = yaml.reject { |k| k == 'feeds' }
|
11
|
+
|
12
|
+
config = Config.new(feed_config, global_config)
|
9
13
|
feed(config)
|
10
14
|
end
|
11
15
|
|
data/lib/html2rss/config.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
module Html2rss
|
2
2
|
class Config
|
3
|
-
attr_reader :feed_config, :channel_config
|
3
|
+
attr_reader :feed_config, :channel_config, :global_config
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@feed_config =
|
8
|
-
@channel_config =
|
5
|
+
def initialize(feed_config, global_config = {})
|
6
|
+
@global_config = global_config
|
7
|
+
@feed_config = feed_config
|
8
|
+
@channel_config = feed_config.fetch('channel', {})
|
9
9
|
end
|
10
10
|
|
11
11
|
def author
|
@@ -34,7 +34,7 @@ module Html2rss
|
|
34
34
|
alias link url
|
35
35
|
|
36
36
|
def headers
|
37
|
-
|
37
|
+
global_config.fetch('headers', {})
|
38
38
|
end
|
39
39
|
|
40
40
|
def options(name)
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -84,16 +84,16 @@ dependencies:
|
|
84
84
|
name: byebug
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
96
|
+
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: rspec
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +108,34 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '3.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rubocop
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop-performance
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: simplecov
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,16 +154,16 @@ dependencies:
|
|
126
154
|
name: vcr
|
127
155
|
requirement: !ruby/object:Gem::Requirement
|
128
156
|
requirements:
|
129
|
-
- - "
|
157
|
+
- - ">="
|
130
158
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
159
|
+
version: '0'
|
132
160
|
type: :development
|
133
161
|
prerelease: false
|
134
162
|
version_requirements: !ruby/object:Gem::Requirement
|
135
163
|
requirements:
|
136
|
-
- - "
|
164
|
+
- - ">="
|
137
165
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
166
|
+
version: '0'
|
139
167
|
description: |-
|
140
168
|
Create your config object, include the url to scrape,
|
141
169
|
some selectors and get a RSS2 feed in return.
|
@@ -191,7 +219,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
191
219
|
- !ruby/object:Gem::Version
|
192
220
|
version: '0'
|
193
221
|
requirements: []
|
194
|
-
rubygems_version: 3.0.
|
222
|
+
rubygems_version: 3.0.4
|
195
223
|
signing_key:
|
196
224
|
specification_version: 4
|
197
225
|
summary: Generate RSS feeds by scraping websites by providing a config.
|