html2rss 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -4
- data/Gemfile.lock +34 -15
- data/README.md +22 -36
- data/html2rss.gemspec +4 -2
- data/lib/html2rss.rb +5 -1
- data/lib/html2rss/config.rb +6 -6
- data/lib/html2rss/item_extractor.rb +1 -1
- data/lib/html2rss/version.rb +1 -1
- metadata +39 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3d7642c21f46ea6086a3ead39b2923fa4c09ddc821719e2c40936846a83e5438
|
|
4
|
+
data.tar.gz: 8d6efa4ed03261f11e9ec302ea0cda860e83b74d78203c48b6650f1a7d9c106c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f6f1a3c52d9689b18d04a7b0c772ab0b9742202ca2e292b8265f7d26d85989f7201da9ab3de2db81258b82b8271c5789176976ea6c67c42edcb4739a556386f3
|
|
7
|
+
data.tar.gz: 8d892ec1d5761d6c5338e77f36a86debb2ccc1f1f7bac98ba83489c2aa8a7fe0638a45d355ff6fbd295a417d4df25f9ffc932fd15dd70f5465b0b61ee566cec0
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
html2rss (0.
|
|
4
|
+
html2rss (0.3.0)
|
|
5
5
|
faraday (~> 0.15)
|
|
6
6
|
faraday_middleware (~> 0.13)
|
|
7
7
|
nokogiri (>= 1.10)
|
|
@@ -10,34 +10,50 @@ PATH
|
|
|
10
10
|
GEM
|
|
11
11
|
remote: https://rubygems.org/
|
|
12
12
|
specs:
|
|
13
|
-
|
|
13
|
+
ast (2.4.0)
|
|
14
|
+
byebug (11.0.1)
|
|
14
15
|
crass (1.0.4)
|
|
15
16
|
diff-lcs (1.3)
|
|
16
|
-
docile (1.3.
|
|
17
|
+
docile (1.3.2)
|
|
17
18
|
faraday (0.15.4)
|
|
18
19
|
multipart-post (>= 1.2, < 3)
|
|
19
|
-
faraday_middleware (0.13.
|
|
20
|
+
faraday_middleware (0.13.1)
|
|
20
21
|
faraday (>= 0.7.4, < 1.0)
|
|
21
|
-
|
|
22
|
+
jaro_winkler (1.5.3)
|
|
23
|
+
json (2.2.0)
|
|
22
24
|
mini_portile2 (2.4.0)
|
|
23
|
-
multipart-post (2.
|
|
24
|
-
nokogiri (1.10.
|
|
25
|
+
multipart-post (2.1.1)
|
|
26
|
+
nokogiri (1.10.3)
|
|
25
27
|
mini_portile2 (~> 2.4.0)
|
|
26
28
|
nokogumbo (2.0.1)
|
|
27
29
|
nokogiri (~> 1.8, >= 1.8.4)
|
|
30
|
+
parallel (1.17.0)
|
|
31
|
+
parser (2.6.3.0)
|
|
32
|
+
ast (~> 2.4.0)
|
|
33
|
+
rainbow (3.0.0)
|
|
28
34
|
rspec (3.8.0)
|
|
29
35
|
rspec-core (~> 3.8.0)
|
|
30
36
|
rspec-expectations (~> 3.8.0)
|
|
31
37
|
rspec-mocks (~> 3.8.0)
|
|
32
|
-
rspec-core (3.8.
|
|
38
|
+
rspec-core (3.8.1)
|
|
33
39
|
rspec-support (~> 3.8.0)
|
|
34
|
-
rspec-expectations (3.8.
|
|
40
|
+
rspec-expectations (3.8.4)
|
|
35
41
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
36
42
|
rspec-support (~> 3.8.0)
|
|
37
|
-
rspec-mocks (3.8.
|
|
43
|
+
rspec-mocks (3.8.1)
|
|
38
44
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
39
45
|
rspec-support (~> 3.8.0)
|
|
40
|
-
rspec-support (3.8.
|
|
46
|
+
rspec-support (3.8.2)
|
|
47
|
+
rubocop (0.71.0)
|
|
48
|
+
jaro_winkler (~> 1.5.1)
|
|
49
|
+
parallel (~> 1.10)
|
|
50
|
+
parser (>= 2.6)
|
|
51
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
52
|
+
ruby-progressbar (~> 1.7)
|
|
53
|
+
unicode-display_width (>= 1.4.0, < 1.7)
|
|
54
|
+
rubocop-performance (1.4.0)
|
|
55
|
+
rubocop (>= 0.71.0)
|
|
56
|
+
ruby-progressbar (1.10.1)
|
|
41
57
|
sanitize (5.0.0)
|
|
42
58
|
crass (~> 1.0.2)
|
|
43
59
|
nokogiri (>= 1.8.0)
|
|
@@ -47,18 +63,21 @@ GEM
|
|
|
47
63
|
json (>= 1.8, < 3)
|
|
48
64
|
simplecov-html (~> 0.10.0)
|
|
49
65
|
simplecov-html (0.10.2)
|
|
50
|
-
|
|
66
|
+
unicode-display_width (1.6.0)
|
|
67
|
+
vcr (5.0.0)
|
|
51
68
|
|
|
52
69
|
PLATFORMS
|
|
53
70
|
ruby
|
|
54
71
|
|
|
55
72
|
DEPENDENCIES
|
|
56
73
|
bundler (~> 1.16)
|
|
57
|
-
byebug
|
|
74
|
+
byebug
|
|
58
75
|
html2rss!
|
|
59
76
|
rspec (~> 3.0)
|
|
77
|
+
rubocop
|
|
78
|
+
rubocop-performance
|
|
60
79
|
simplecov
|
|
61
|
-
vcr
|
|
80
|
+
vcr
|
|
62
81
|
|
|
63
82
|
BUNDLED WITH
|
|
64
|
-
1.
|
|
83
|
+
1.17.2
|
data/README.md
CHANGED
|
@@ -1,34 +1,23 @@
|
|
|
1
1
|

|
|
2
2
|
|
|
3
|
-
# html2rss [](https://travis-ci.org/gildesmarais/html2rss)
|
|
3
|
+
# html2rss [](https://travis-ci.org/gildesmarais/html2rss) [](https://badge.fury.io/rb/html2rss)
|
|
4
4
|
|
|
5
|
-
Request
|
|
6
|
-
The config contains the URL to scrape and the selectors needed to extract
|
|
7
|
-
the required information. This gem provides some extractors (e.g. extract
|
|
8
|
-
the information from an HTML attribute).
|
|
5
|
+
Request HTML from an URL and transform it to a Ruby RSS 2.0 object.
|
|
9
6
|
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
**Are you searching for a ready to use "website to RSS" solution?**
|
|
8
|
+
[Check out `html2rss-web`!](https://github.com/gildesmarais/html2rss-web)
|
|
12
9
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
```ruby
|
|
18
|
-
gem 'html2rss'
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
And then execute:
|
|
22
|
-
|
|
23
|
-
$ bundle
|
|
24
|
-
|
|
25
|
-
Or install it yourself as:
|
|
10
|
+
Each website needs a feed config which contains the URL to scrape and
|
|
11
|
+
CSS selectors to extract the required information (like title, URL, ...).
|
|
12
|
+
This gem provides [extractors](https://github.com/gildesmarais/html2rss/blob/master/lib/html2rss/item_extractor.rb) (e.g. extract the information from an HTML attribute)
|
|
13
|
+
and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/html2rss/attribute_post_processors) to make information retrieval even easier.
|
|
26
14
|
|
|
27
|
-
|
|
15
|
+
## Installation
|
|
28
16
|
|
|
29
|
-
|
|
17
|
+
Add this line to your application's Gemfile: `gem 'html2rss'`
|
|
18
|
+
And then execute: `bundle`
|
|
30
19
|
|
|
31
|
-
## Usage with a YAML file
|
|
20
|
+
## Usage with a YAML config file
|
|
32
21
|
|
|
33
22
|
Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
|
|
34
23
|
|
|
@@ -36,15 +25,7 @@ Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://
|
|
|
36
25
|
|
|
37
26
|
an `RSS:Rss` object.
|
|
38
27
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
Find a minimal Sintra app which exposes your feeds to HTTP endpoints here:
|
|
42
|
-
[gildesmarais/html2rss-web](https://github.com/gildesmarais/html2rss-web)
|
|
43
|
-
|
|
44
|
-
### Tips and tricks
|
|
45
|
-
|
|
46
|
-
- Check that the channel url does not redirect to a mobile page
|
|
47
|
-
- fiddling with [`curl`](https://github.com/curl/curl) and [`pup`](https://github.com/ericchiang/pup) to find the selectors seems quite efficient
|
|
28
|
+
**Too complicated?** See [`html2rss-configs`](https://github.com/gildesmarais/html2rss-configs) for ready-made feed configs!
|
|
48
29
|
|
|
49
30
|
## Development
|
|
50
31
|
|
|
@@ -54,10 +35,15 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
|
54
35
|
|
|
55
36
|
Bug reports and pull requests are welcome on GitHub at https://github.com/gildesmarais/html2rss.
|
|
56
37
|
|
|
57
|
-
##
|
|
38
|
+
## Releasing a new version
|
|
58
39
|
|
|
59
|
-
|
|
40
|
+
1. increase version in `lib/version.rb`
|
|
41
|
+
2. `bundle`
|
|
42
|
+
3. commit the changes
|
|
43
|
+
4. `git tag v....`
|
|
44
|
+
5. `git push; git push --tags`
|
|
45
|
+
6. update the changelog, commit and push
|
|
60
46
|
|
|
61
|
-
|
|
47
|
+
### Changelog generation
|
|
62
48
|
|
|
63
|
-
The
|
|
49
|
+
The `CHANGELOG.md` can be generated automatically with [`standard-changelog`](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/standard-changelog).
|
data/html2rss.gemspec
CHANGED
|
@@ -33,8 +33,10 @@ Gem::Specification.new do |spec|
|
|
|
33
33
|
spec.add_dependency 'nokogiri', '>= 1.10'
|
|
34
34
|
spec.add_dependency 'sanitize', '~> 5.0'
|
|
35
35
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
|
36
|
-
spec.add_development_dependency 'byebug'
|
|
36
|
+
spec.add_development_dependency 'byebug'
|
|
37
37
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
38
|
+
spec.add_development_dependency 'rubocop'
|
|
39
|
+
spec.add_development_dependency 'rubocop-performance'
|
|
38
40
|
spec.add_development_dependency 'simplecov'
|
|
39
|
-
spec.add_development_dependency 'vcr'
|
|
41
|
+
spec.add_development_dependency 'vcr'
|
|
40
42
|
end
|
data/lib/html2rss.rb
CHANGED
|
@@ -5,7 +5,11 @@ require 'yaml'
|
|
|
5
5
|
|
|
6
6
|
module Html2rss
|
|
7
7
|
def self.feed_from_yaml_config(file, name)
|
|
8
|
-
|
|
8
|
+
yaml = YAML.load(File.open(file))
|
|
9
|
+
feed_config = yaml['feeds'][name]
|
|
10
|
+
global_config = yaml.reject { |k| k == 'feeds' }
|
|
11
|
+
|
|
12
|
+
config = Config.new(feed_config, global_config)
|
|
9
13
|
feed(config)
|
|
10
14
|
end
|
|
11
15
|
|
data/lib/html2rss/config.rb
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
module Html2rss
|
|
2
2
|
class Config
|
|
3
|
-
attr_reader :feed_config, :channel_config
|
|
3
|
+
attr_reader :feed_config, :channel_config, :global_config
|
|
4
4
|
|
|
5
|
-
def initialize(
|
|
6
|
-
@
|
|
7
|
-
@feed_config =
|
|
8
|
-
@channel_config =
|
|
5
|
+
def initialize(feed_config, global_config = {})
|
|
6
|
+
@global_config = global_config
|
|
7
|
+
@feed_config = feed_config
|
|
8
|
+
@channel_config = feed_config.fetch('channel', {})
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def author
|
|
@@ -34,7 +34,7 @@ module Html2rss
|
|
|
34
34
|
alias link url
|
|
35
35
|
|
|
36
36
|
def headers
|
|
37
|
-
|
|
37
|
+
global_config.fetch('headers', {})
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
def options(name)
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html2rss
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Gil Desmarais
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-
|
|
11
|
+
date: 2019-06-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|
|
@@ -84,16 +84,16 @@ dependencies:
|
|
|
84
84
|
name: byebug
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
86
86
|
requirements:
|
|
87
|
-
- - "
|
|
87
|
+
- - ">="
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: '
|
|
89
|
+
version: '0'
|
|
90
90
|
type: :development
|
|
91
91
|
prerelease: false
|
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
|
94
|
-
- - "
|
|
94
|
+
- - ">="
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
|
-
version: '
|
|
96
|
+
version: '0'
|
|
97
97
|
- !ruby/object:Gem::Dependency
|
|
98
98
|
name: rspec
|
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -108,6 +108,34 @@ dependencies:
|
|
|
108
108
|
- - "~>"
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: '3.0'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: rubocop
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0'
|
|
118
|
+
type: :development
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '0'
|
|
125
|
+
- !ruby/object:Gem::Dependency
|
|
126
|
+
name: rubocop-performance
|
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - ">="
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: '0'
|
|
132
|
+
type: :development
|
|
133
|
+
prerelease: false
|
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
+
requirements:
|
|
136
|
+
- - ">="
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
version: '0'
|
|
111
139
|
- !ruby/object:Gem::Dependency
|
|
112
140
|
name: simplecov
|
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -126,16 +154,16 @@ dependencies:
|
|
|
126
154
|
name: vcr
|
|
127
155
|
requirement: !ruby/object:Gem::Requirement
|
|
128
156
|
requirements:
|
|
129
|
-
- - "
|
|
157
|
+
- - ">="
|
|
130
158
|
- !ruby/object:Gem::Version
|
|
131
|
-
version: '
|
|
159
|
+
version: '0'
|
|
132
160
|
type: :development
|
|
133
161
|
prerelease: false
|
|
134
162
|
version_requirements: !ruby/object:Gem::Requirement
|
|
135
163
|
requirements:
|
|
136
|
-
- - "
|
|
164
|
+
- - ">="
|
|
137
165
|
- !ruby/object:Gem::Version
|
|
138
|
-
version: '
|
|
166
|
+
version: '0'
|
|
139
167
|
description: |-
|
|
140
168
|
Create your config object, include the url to scrape,
|
|
141
169
|
some selectors and get a RSS2 feed in return.
|
|
@@ -191,7 +219,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
191
219
|
- !ruby/object:Gem::Version
|
|
192
220
|
version: '0'
|
|
193
221
|
requirements: []
|
|
194
|
-
rubygems_version: 3.0.
|
|
222
|
+
rubygems_version: 3.0.4
|
|
195
223
|
signing_key:
|
|
196
224
|
specification_version: 4
|
|
197
225
|
summary: Generate RSS feeds by scraping websites by providing a config.
|