html2rss 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/Gemfile.lock +4 -2
- data/README.md +13 -0
- data/html2rss.gemspec +4 -4
- data/lib/html2rss.rb +19 -0
- data/lib/html2rss/config.rb +11 -4
- data/lib/html2rss/item.rb +1 -3
- data/lib/html2rss/item_extractor.rb +16 -4
- data/lib/html2rss/version.rb +1 -1
- metadata +25 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a7c25abdeeccbeb69e1b7942c2f8dd9c7da543a090cc1108debcf3370b88989
|
4
|
+
data.tar.gz: 34dc564b089dfec7e2a220288484dd24fe407743301fcb4c2d74f37c35a2325b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2168f96899c9a53b9b55010c1746ce0dca9e78ec5004903d837fb333b1a158b7c124eaf08ef34f2cb6515164d9dc20a3c16c47fe672c4ba80b1241cf3decdd3
|
7
|
+
data.tar.gz: 0fb315100e1cc6b55d81b363186b3e994e54c0c58e3381ca5bc3c5884fd5b993e0364db311eea64688cdaa72e5c865055b45e8f0268db48773355c035083e9b6
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
# [0.3.1](https://github.com/gildesmarais/html2rss/compare/v0.3.0...v0.3.1) (2019-06-23)
|
2
|
+
|
3
|
+
|
4
|
+
### Features
|
5
|
+
|
6
|
+
* handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad824))
|
7
|
+
* support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d))
|
8
|
+
|
9
|
+
|
10
|
+
# [0.3.0](https://github.com/gildesmarais/html2rss/compare/v0.2.2...v0.3.0) (2019-06-20)
|
11
|
+
|
12
|
+
|
13
|
+
### Features
|
14
|
+
|
15
|
+
* add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e06329))
|
16
|
+
* change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746))
|
17
|
+
|
18
|
+
|
19
|
+
|
1
20
|
# [0.2.1](https://github.com/gildesmarais/html2rss/compare/v0.2.0...v0.2.1) (2018-11-18)
|
2
21
|
|
3
22
|
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2rss (0.3.
|
4
|
+
html2rss (0.3.1)
|
5
5
|
faraday (~> 0.15)
|
6
6
|
faraday_middleware (~> 0.13)
|
7
|
-
|
7
|
+
hashie (~> 3.6)
|
8
|
+
nokogiri (>= 1.10, < 2.0)
|
8
9
|
sanitize (~> 5.0)
|
9
10
|
|
10
11
|
GEM
|
@@ -19,6 +20,7 @@ GEM
|
|
19
20
|
multipart-post (>= 1.2, < 3)
|
20
21
|
faraday_middleware (0.13.1)
|
21
22
|
faraday (>= 0.7.4, < 1.0)
|
23
|
+
hashie (3.6.0)
|
22
24
|
jaro_winkler (1.5.3)
|
23
25
|
json (2.2.0)
|
24
26
|
mini_portile2 (2.4.0)
|
data/README.md
CHANGED
@@ -17,6 +17,19 @@ and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/h
|
|
17
17
|
Add this line to your application's Gemfile: `gem 'html2rss'`
|
18
18
|
And then execute: `bundle`
|
19
19
|
|
20
|
+
```ruby
|
21
|
+
rss = Html2rss.feed(
|
22
|
+
channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
|
23
|
+
selectors: {
|
24
|
+
items: { selector: '#hot-network-questions > ul > li' },
|
25
|
+
title: { selector: 'a' },
|
26
|
+
link: { selector: 'a', extractor: 'href' }
|
27
|
+
}
|
28
|
+
)
|
29
|
+
|
30
|
+
puts rss.to_s
|
31
|
+
```
|
32
|
+
|
20
33
|
## Usage with a YAML config file
|
21
34
|
|
22
35
|
Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
|
data/html2rss.gemspec
CHANGED
@@ -8,9 +8,8 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ['Gil Desmarais']
|
9
9
|
spec.email = ['html2rss@desmarais.de']
|
10
10
|
|
11
|
-
spec.summary = '
|
12
|
-
spec.description = '
|
13
|
-
some selectors and get a RSS2 feed in return.'
|
11
|
+
spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
|
12
|
+
spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
|
14
13
|
spec.homepage = 'https://github.com/gildesmarais/html2rss'
|
15
14
|
spec.license = 'MIT'
|
16
15
|
|
@@ -30,7 +29,8 @@ Gem::Specification.new do |spec|
|
|
30
29
|
|
31
30
|
spec.add_dependency 'faraday', '~> 0.15'
|
32
31
|
spec.add_dependency 'faraday_middleware', '~> 0.13'
|
33
|
-
spec.add_dependency '
|
32
|
+
spec.add_dependency 'hashie', '~> 3.6'
|
33
|
+
spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
|
34
34
|
spec.add_dependency 'sanitize', '~> 5.0'
|
35
35
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
36
36
|
spec.add_development_dependency 'byebug'
|
data/lib/html2rss.rb
CHANGED
@@ -5,7 +5,10 @@ require 'yaml'
|
|
5
5
|
|
6
6
|
module Html2rss
|
7
7
|
def self.feed_from_yaml_config(file, name)
|
8
|
+
# rubocop:disable Security/YAMLLoad
|
8
9
|
yaml = YAML.load(File.open(file))
|
10
|
+
# rubocop:enable Security/YAMLLoad
|
11
|
+
|
9
12
|
feed_config = yaml['feeds'][name]
|
10
13
|
global_config = yaml.reject { |k| k == 'feeds' }
|
11
14
|
|
@@ -13,7 +16,23 @@ module Html2rss
|
|
13
16
|
feed(config)
|
14
17
|
end
|
15
18
|
|
19
|
+
##
|
20
|
+
# Returns the RSS object, which is generated from the provided config.
|
21
|
+
#
|
22
|
+
# `config`: can be a Hash or an instance of Html2rss::Config.
|
23
|
+
#
|
24
|
+
# = Example with a Ruby Hash
|
25
|
+
# Html2rss.feed(
|
26
|
+
# channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
|
27
|
+
# selectors: {
|
28
|
+
# items: { selector: '#hot-network-questions > ul > li' },
|
29
|
+
# title: { selector: 'a' },
|
30
|
+
# link: { selector: 'a', extractor: 'href' }
|
31
|
+
# }
|
32
|
+
# )
|
16
33
|
def self.feed(config)
|
34
|
+
config = Config.new(config) unless config.is_a?(Config)
|
35
|
+
|
17
36
|
feed = FeedBuilder.new config
|
18
37
|
feed.rss
|
19
38
|
end
|
data/lib/html2rss/config.rb
CHANGED
@@ -1,11 +1,18 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
|
1
3
|
module Html2rss
|
2
4
|
class Config
|
3
5
|
attr_reader :feed_config, :channel_config, :global_config
|
4
6
|
|
7
|
+
class IndifferentAccessHash < Hash
|
8
|
+
include Hashie::Extensions::MergeInitializer
|
9
|
+
include Hashie::Extensions::IndifferentAccess
|
10
|
+
end
|
11
|
+
|
5
12
|
def initialize(feed_config, global_config = {})
|
6
|
-
@global_config = global_config
|
7
|
-
@feed_config = feed_config
|
8
|
-
@channel_config = feed_config.fetch('channel', {})
|
13
|
+
@global_config = IndifferentAccessHash.new global_config
|
14
|
+
@feed_config = IndifferentAccessHash.new feed_config
|
15
|
+
@channel_config = IndifferentAccessHash.new @feed_config.fetch('channel', {})
|
9
16
|
end
|
10
17
|
|
11
18
|
def author
|
@@ -13,7 +20,7 @@ module Html2rss
|
|
13
20
|
end
|
14
21
|
|
15
22
|
def ttl
|
16
|
-
|
23
|
+
channel_config.fetch 'ttl', 3600
|
17
24
|
end
|
18
25
|
|
19
26
|
def title
|
data/lib/html2rss/item.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
module Html2rss
|
2
2
|
module ItemExtractor
|
3
|
-
TEXT = proc { |xml, options|
|
4
|
-
|
3
|
+
TEXT = proc { |xml, options|
|
4
|
+
element(xml, options)&.text&.strip&.split&.join(' ')
|
5
|
+
}
|
6
|
+
|
7
|
+
ATTRIBUTE = proc { |xml, options|
|
8
|
+
element(xml, options).attr(options['attribute']).to_s
|
9
|
+
}
|
5
10
|
|
6
11
|
HREF = proc { |xml, options|
|
7
|
-
href = xml
|
12
|
+
href = element(xml, options).attr('href').to_s
|
8
13
|
path, query = href.split('?')
|
9
14
|
|
10
15
|
if href.start_with?('http')
|
@@ -18,8 +23,15 @@ module Html2rss
|
|
18
23
|
uri
|
19
24
|
}
|
20
25
|
|
21
|
-
HTML = proc { |xml, options|
|
26
|
+
HTML = proc { |xml, options|
|
27
|
+
element(xml, options).to_s
|
28
|
+
}
|
29
|
+
|
22
30
|
STATIC = proc { |_xml, options| options['static'] }
|
23
31
|
CURRENT_TIME = proc { |_xml, _options| Time.new }
|
32
|
+
|
33
|
+
def self.element(xml, options)
|
34
|
+
options['selector'] ? xml.css(options['selector']) : xml
|
35
|
+
end
|
24
36
|
end
|
25
37
|
end
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0.13'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hashie
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.6'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: nokogiri
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +59,9 @@ dependencies:
|
|
45
59
|
- - ">="
|
46
60
|
- !ruby/object:Gem::Version
|
47
61
|
version: '1.10'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '2.0'
|
48
65
|
type: :runtime
|
49
66
|
prerelease: false
|
50
67
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -52,6 +69,9 @@ dependencies:
|
|
52
69
|
- - ">="
|
53
70
|
- !ruby/object:Gem::Version
|
54
71
|
version: '1.10'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.0'
|
55
75
|
- !ruby/object:Gem::Dependency
|
56
76
|
name: sanitize
|
57
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,9 +184,8 @@ dependencies:
|
|
164
184
|
- - ">="
|
165
185
|
- !ruby/object:Gem::Version
|
166
186
|
version: '0'
|
167
|
-
description:
|
168
|
-
|
169
|
-
some selectors and get a RSS2 feed in return.
|
187
|
+
description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
|
188
|
+
in return.
|
170
189
|
email:
|
171
190
|
- html2rss@desmarais.de
|
172
191
|
executables: []
|
@@ -222,5 +241,5 @@ requirements: []
|
|
222
241
|
rubygems_version: 3.0.4
|
223
242
|
signing_key:
|
224
243
|
specification_version: 4
|
225
|
-
summary:
|
244
|
+
summary: Returns an RSS::Rss object by scraping a URL.
|
226
245
|
test_files: []
|