html2rss 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/Gemfile.lock +4 -2
- data/README.md +13 -0
- data/html2rss.gemspec +4 -4
- data/lib/html2rss.rb +19 -0
- data/lib/html2rss/config.rb +11 -4
- data/lib/html2rss/item.rb +1 -3
- data/lib/html2rss/item_extractor.rb +16 -4
- data/lib/html2rss/version.rb +1 -1
- metadata +25 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a7c25abdeeccbeb69e1b7942c2f8dd9c7da543a090cc1108debcf3370b88989
|
4
|
+
data.tar.gz: 34dc564b089dfec7e2a220288484dd24fe407743301fcb4c2d74f37c35a2325b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2168f96899c9a53b9b55010c1746ce0dca9e78ec5004903d837fb333b1a158b7c124eaf08ef34f2cb6515164d9dc20a3c16c47fe672c4ba80b1241cf3decdd3
|
7
|
+
data.tar.gz: 0fb315100e1cc6b55d81b363186b3e994e54c0c58e3381ca5bc3c5884fd5b993e0364db311eea64688cdaa72e5c865055b45e8f0268db48773355c035083e9b6
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
# [0.3.1](https://github.com/gildesmarais/html2rss/compare/v0.3.0...v0.3.1) (2019-06-23)
|
2
|
+
|
3
|
+
|
4
|
+
### Features
|
5
|
+
|
6
|
+
* handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad824))
|
7
|
+
* support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d))
|
8
|
+
|
9
|
+
|
10
|
+
# [0.3.0](https://github.com/gildesmarais/html2rss/compare/v0.2.2...v0.3.0) (2019-06-20)
|
11
|
+
|
12
|
+
|
13
|
+
### Features
|
14
|
+
|
15
|
+
* add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e06329))
|
16
|
+
* change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746))
|
17
|
+
|
18
|
+
|
19
|
+
|
1
20
|
# [0.2.1](https://github.com/gildesmarais/html2rss/compare/v0.2.0...v0.2.1) (2018-11-18)
|
2
21
|
|
3
22
|
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2rss (0.3.
|
4
|
+
html2rss (0.3.1)
|
5
5
|
faraday (~> 0.15)
|
6
6
|
faraday_middleware (~> 0.13)
|
7
|
-
|
7
|
+
hashie (~> 3.6)
|
8
|
+
nokogiri (>= 1.10, < 2.0)
|
8
9
|
sanitize (~> 5.0)
|
9
10
|
|
10
11
|
GEM
|
@@ -19,6 +20,7 @@ GEM
|
|
19
20
|
multipart-post (>= 1.2, < 3)
|
20
21
|
faraday_middleware (0.13.1)
|
21
22
|
faraday (>= 0.7.4, < 1.0)
|
23
|
+
hashie (3.6.0)
|
22
24
|
jaro_winkler (1.5.3)
|
23
25
|
json (2.2.0)
|
24
26
|
mini_portile2 (2.4.0)
|
data/README.md
CHANGED
@@ -17,6 +17,19 @@ and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/h
|
|
17
17
|
Add this line to your application's Gemfile: `gem 'html2rss'`
|
18
18
|
And then execute: `bundle`
|
19
19
|
|
20
|
+
```ruby
|
21
|
+
rss = Html2rss.feed(
|
22
|
+
channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
|
23
|
+
selectors: {
|
24
|
+
items: { selector: '#hot-network-questions > ul > li' },
|
25
|
+
title: { selector: 'a' },
|
26
|
+
link: { selector: 'a', extractor: 'href' }
|
27
|
+
}
|
28
|
+
)
|
29
|
+
|
30
|
+
puts rss.to_s
|
31
|
+
```
|
32
|
+
|
20
33
|
## Usage with a YAML config file
|
21
34
|
|
22
35
|
Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
|
data/html2rss.gemspec
CHANGED
@@ -8,9 +8,8 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ['Gil Desmarais']
|
9
9
|
spec.email = ['html2rss@desmarais.de']
|
10
10
|
|
11
|
-
spec.summary = '
|
12
|
-
spec.description = '
|
13
|
-
some selectors and get a RSS2 feed in return.'
|
11
|
+
spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
|
12
|
+
spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
|
14
13
|
spec.homepage = 'https://github.com/gildesmarais/html2rss'
|
15
14
|
spec.license = 'MIT'
|
16
15
|
|
@@ -30,7 +29,8 @@ Gem::Specification.new do |spec|
|
|
30
29
|
|
31
30
|
spec.add_dependency 'faraday', '~> 0.15'
|
32
31
|
spec.add_dependency 'faraday_middleware', '~> 0.13'
|
33
|
-
spec.add_dependency '
|
32
|
+
spec.add_dependency 'hashie', '~> 3.6'
|
33
|
+
spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
|
34
34
|
spec.add_dependency 'sanitize', '~> 5.0'
|
35
35
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
36
36
|
spec.add_development_dependency 'byebug'
|
data/lib/html2rss.rb
CHANGED
@@ -5,7 +5,10 @@ require 'yaml'
|
|
5
5
|
|
6
6
|
module Html2rss
|
7
7
|
def self.feed_from_yaml_config(file, name)
|
8
|
+
# rubocop:disable Security/YAMLLoad
|
8
9
|
yaml = YAML.load(File.open(file))
|
10
|
+
# rubocop:enable Security/YAMLLoad
|
11
|
+
|
9
12
|
feed_config = yaml['feeds'][name]
|
10
13
|
global_config = yaml.reject { |k| k == 'feeds' }
|
11
14
|
|
@@ -13,7 +16,23 @@ module Html2rss
|
|
13
16
|
feed(config)
|
14
17
|
end
|
15
18
|
|
19
|
+
##
|
20
|
+
# Returns the RSS object, which is generated from the provided config.
|
21
|
+
#
|
22
|
+
# `config`: can be a Hash or an instance of Html2rss::Config.
|
23
|
+
#
|
24
|
+
# = Example with a Ruby Hash
|
25
|
+
# Html2rss.feed(
|
26
|
+
# channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
|
27
|
+
# selectors: {
|
28
|
+
# items: { selector: '#hot-network-questions > ul > li' },
|
29
|
+
# title: { selector: 'a' },
|
30
|
+
# link: { selector: 'a', extractor: 'href' }
|
31
|
+
# }
|
32
|
+
# )
|
16
33
|
def self.feed(config)
|
34
|
+
config = Config.new(config) unless config.is_a?(Config)
|
35
|
+
|
17
36
|
feed = FeedBuilder.new config
|
18
37
|
feed.rss
|
19
38
|
end
|
data/lib/html2rss/config.rb
CHANGED
@@ -1,11 +1,18 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
|
1
3
|
module Html2rss
|
2
4
|
class Config
|
3
5
|
attr_reader :feed_config, :channel_config, :global_config
|
4
6
|
|
7
|
+
class IndifferentAccessHash < Hash
|
8
|
+
include Hashie::Extensions::MergeInitializer
|
9
|
+
include Hashie::Extensions::IndifferentAccess
|
10
|
+
end
|
11
|
+
|
5
12
|
def initialize(feed_config, global_config = {})
|
6
|
-
@global_config = global_config
|
7
|
-
@feed_config = feed_config
|
8
|
-
@channel_config = feed_config.fetch('channel', {})
|
13
|
+
@global_config = IndifferentAccessHash.new global_config
|
14
|
+
@feed_config = IndifferentAccessHash.new feed_config
|
15
|
+
@channel_config = IndifferentAccessHash.new @feed_config.fetch('channel', {})
|
9
16
|
end
|
10
17
|
|
11
18
|
def author
|
@@ -13,7 +20,7 @@ module Html2rss
|
|
13
20
|
end
|
14
21
|
|
15
22
|
def ttl
|
16
|
-
|
23
|
+
channel_config.fetch 'ttl', 3600
|
17
24
|
end
|
18
25
|
|
19
26
|
def title
|
data/lib/html2rss/item.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
module Html2rss
|
2
2
|
module ItemExtractor
|
3
|
-
TEXT = proc { |xml, options|
|
4
|
-
|
3
|
+
TEXT = proc { |xml, options|
|
4
|
+
element(xml, options)&.text&.strip&.split&.join(' ')
|
5
|
+
}
|
6
|
+
|
7
|
+
ATTRIBUTE = proc { |xml, options|
|
8
|
+
element(xml, options).attr(options['attribute']).to_s
|
9
|
+
}
|
5
10
|
|
6
11
|
HREF = proc { |xml, options|
|
7
|
-
href = xml
|
12
|
+
href = element(xml, options).attr('href').to_s
|
8
13
|
path, query = href.split('?')
|
9
14
|
|
10
15
|
if href.start_with?('http')
|
@@ -18,8 +23,15 @@ module Html2rss
|
|
18
23
|
uri
|
19
24
|
}
|
20
25
|
|
21
|
-
HTML = proc { |xml, options|
|
26
|
+
HTML = proc { |xml, options|
|
27
|
+
element(xml, options).to_s
|
28
|
+
}
|
29
|
+
|
22
30
|
STATIC = proc { |_xml, options| options['static'] }
|
23
31
|
CURRENT_TIME = proc { |_xml, _options| Time.new }
|
32
|
+
|
33
|
+
def self.element(xml, options)
|
34
|
+
options['selector'] ? xml.css(options['selector']) : xml
|
35
|
+
end
|
24
36
|
end
|
25
37
|
end
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0.13'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hashie
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.6'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: nokogiri
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +59,9 @@ dependencies:
|
|
45
59
|
- - ">="
|
46
60
|
- !ruby/object:Gem::Version
|
47
61
|
version: '1.10'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '2.0'
|
48
65
|
type: :runtime
|
49
66
|
prerelease: false
|
50
67
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -52,6 +69,9 @@ dependencies:
|
|
52
69
|
- - ">="
|
53
70
|
- !ruby/object:Gem::Version
|
54
71
|
version: '1.10'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.0'
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: sanitize
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,9 +184,8 @@ dependencies:
|
|
164
184
|
- - ">="
|
165
185
|
- !ruby/object:Gem::Version
|
166
186
|
version: '0'
|
167
|
-
description:
|
168
|
-
|
169
|
-
some selectors and get a RSS2 feed in return.
|
187
|
+
description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
|
188
|
+
in return.
|
170
189
|
email:
|
171
190
|
- html2rss@desmarais.de
|
172
191
|
executables: []
|
@@ -222,5 +241,5 @@ requirements: []
|
|
222
241
|
rubygems_version: 3.0.4
|
223
242
|
signing_key:
|
224
243
|
specification_version: 4
|
225
|
-
summary:
|
244
|
+
summary: Returns an RSS::Rss object by scraping a URL.
|
226
245
|
test_files: []
|