html2rss 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d7642c21f46ea6086a3ead39b2923fa4c09ddc821719e2c40936846a83e5438
4
- data.tar.gz: 8d6efa4ed03261f11e9ec302ea0cda860e83b74d78203c48b6650f1a7d9c106c
3
+ metadata.gz: 6a7c25abdeeccbeb69e1b7942c2f8dd9c7da543a090cc1108debcf3370b88989
4
+ data.tar.gz: 34dc564b089dfec7e2a220288484dd24fe407743301fcb4c2d74f37c35a2325b
5
5
  SHA512:
6
- metadata.gz: f6f1a3c52d9689b18d04a7b0c772ab0b9742202ca2e292b8265f7d26d85989f7201da9ab3de2db81258b82b8271c5789176976ea6c67c42edcb4739a556386f3
7
- data.tar.gz: 8d892ec1d5761d6c5338e77f36a86debb2ccc1f1f7bac98ba83489c2aa8a7fe0638a45d355ff6fbd295a417d4df25f9ffc932fd15dd70f5465b0b61ee566cec0
6
+ metadata.gz: a2168f96899c9a53b9b55010c1746ce0dca9e78ec5004903d837fb333b1a158b7c124eaf08ef34f2cb6515164d9dc20a3c16c47fe672c4ba80b1241cf3decdd3
7
+ data.tar.gz: 0fb315100e1cc6b55d81b363186b3e994e54c0c58e3381ca5bc3c5884fd5b993e0364db311eea64688cdaa72e5c865055b45e8f0268db48773355c035083e9b6
@@ -1,3 +1,22 @@
1
+ # [0.3.1](https://github.com/gildesmarais/html2rss/compare/v0.3.0...v0.3.1) (2019-06-23)
2
+
3
+
4
+ ### Features
5
+
6
+ * handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad824))
7
+ * support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d))
8
+
9
+
10
+ # [0.3.0](https://github.com/gildesmarais/html2rss/compare/v0.2.2...v0.3.0) (2019-06-20)
11
+
12
+
13
+ ### Features
14
+
15
+ * add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e06329))
16
+ * change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746))
17
+
18
+
19
+
1
20
  # [0.2.1](https://github.com/gildesmarais/html2rss/compare/v0.2.0...v0.2.1) (2018-11-18)
2
21
 
3
22
 
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.3.0)
4
+ html2rss (0.3.1)
5
5
  faraday (~> 0.15)
6
6
  faraday_middleware (~> 0.13)
7
- nokogiri (>= 1.10)
7
+ hashie (~> 3.6)
8
+ nokogiri (>= 1.10, < 2.0)
8
9
  sanitize (~> 5.0)
9
10
 
10
11
  GEM
@@ -19,6 +20,7 @@ GEM
19
20
  multipart-post (>= 1.2, < 3)
20
21
  faraday_middleware (0.13.1)
21
22
  faraday (>= 0.7.4, < 1.0)
23
+ hashie (3.6.0)
22
24
  jaro_winkler (1.5.3)
23
25
  json (2.2.0)
24
26
  mini_portile2 (2.4.0)
data/README.md CHANGED
@@ -17,6 +17,19 @@ and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/h
17
17
  Add this line to your application's Gemfile: `gem 'html2rss'`
18
18
  And then execute: `bundle`
19
19
 
20
+ ```ruby
21
+ rss = Html2rss.feed(
22
+ channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
23
+ selectors: {
24
+ items: { selector: '#hot-network-questions > ul > li' },
25
+ title: { selector: 'a' },
26
+ link: { selector: 'a', extractor: 'href' }
27
+ }
28
+ )
29
+
30
+ puts rss.to_s
31
+ ```
32
+
20
33
  ## Usage with a YAML config file
21
34
 
22
35
  Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
@@ -8,9 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ['Gil Desmarais']
9
9
  spec.email = ['html2rss@desmarais.de']
10
10
 
11
- spec.summary = 'Generate RSS feeds by scraping websites by providing a config.'
12
- spec.description = 'Create your config object, include the url to scrape,
13
- some selectors and get a RSS2 feed in return.'
11
+ spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
12
+ spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
14
13
  spec.homepage = 'https://github.com/gildesmarais/html2rss'
15
14
  spec.license = 'MIT'
16
15
 
@@ -30,7 +29,8 @@ Gem::Specification.new do |spec|
30
29
 
31
30
  spec.add_dependency 'faraday', '~> 0.15'
32
31
  spec.add_dependency 'faraday_middleware', '~> 0.13'
33
- spec.add_dependency 'nokogiri', '>= 1.10'
32
+ spec.add_dependency 'hashie', '~> 3.6'
33
+ spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
34
34
  spec.add_dependency 'sanitize', '~> 5.0'
35
35
  spec.add_development_dependency 'bundler', '~> 1.16'
36
36
  spec.add_development_dependency 'byebug'
@@ -5,7 +5,10 @@ require 'yaml'
5
5
 
6
6
  module Html2rss
7
7
  def self.feed_from_yaml_config(file, name)
8
+ # rubocop:disable Security/YAMLLoad
8
9
  yaml = YAML.load(File.open(file))
10
+ # rubocop:enable Security/YAMLLoad
11
+
9
12
  feed_config = yaml['feeds'][name]
10
13
  global_config = yaml.reject { |k| k == 'feeds' }
11
14
 
@@ -13,7 +16,23 @@ module Html2rss
13
16
  feed(config)
14
17
  end
15
18
 
19
+ ##
20
+ # Returns the RSS object, which is generated from the provided config.
21
+ #
22
+ # `config`: can be a Hash or an instance of Html2rss::Config.
23
+ #
24
+ # = Example with a Ruby Hash
25
+ # Html2rss.feed(
26
+ # channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
27
+ # selectors: {
28
+ # items: { selector: '#hot-network-questions > ul > li' },
29
+ # title: { selector: 'a' },
30
+ # link: { selector: 'a', extractor: 'href' }
31
+ # }
32
+ # )
16
33
  def self.feed(config)
34
+ config = Config.new(config) unless config.is_a?(Config)
35
+
17
36
  feed = FeedBuilder.new config
18
37
  feed.rss
19
38
  end
@@ -1,11 +1,18 @@
1
+ require 'hashie'
2
+
1
3
  module Html2rss
2
4
  class Config
3
5
  attr_reader :feed_config, :channel_config, :global_config
4
6
 
7
+ class IndifferentAccessHash < Hash
8
+ include Hashie::Extensions::MergeInitializer
9
+ include Hashie::Extensions::IndifferentAccess
10
+ end
11
+
5
12
  def initialize(feed_config, global_config = {})
6
- @global_config = global_config
7
- @feed_config = feed_config
8
- @channel_config = feed_config.fetch('channel', {})
13
+ @global_config = IndifferentAccessHash.new global_config
14
+ @feed_config = IndifferentAccessHash.new feed_config
15
+ @channel_config = IndifferentAccessHash.new @feed_config.fetch('channel', {})
9
16
  end
10
17
 
11
18
  def author
@@ -13,7 +20,7 @@ module Html2rss
13
20
  end
14
21
 
15
22
  def ttl
16
- (channel_config.fetch 'ttl').to_i || nil
23
+ channel_config.fetch 'ttl', 3600
17
24
  end
18
25
 
19
26
  def title
@@ -38,9 +38,7 @@ module Html2rss
38
38
  end
39
39
 
40
40
  def valid?
41
- return false if [title.to_s, description.to_s].join('') == ''
42
-
43
- true
41
+ [title.to_s, description.to_s].join('') != ''
44
42
  end
45
43
 
46
44
  def categories
@@ -1,10 +1,15 @@
1
1
  module Html2rss
2
2
  module ItemExtractor
3
- TEXT = proc { |xml, options| xml.css(options['selector'])&.text&.strip }
4
- ATTRIBUTE = proc { |xml, options| xml.css(options['selector']).attr(options['attribute']).to_s }
3
+ TEXT = proc { |xml, options|
4
+ element(xml, options)&.text&.strip&.split&.join(' ')
5
+ }
6
+
7
+ ATTRIBUTE = proc { |xml, options|
8
+ element(xml, options).attr(options['attribute']).to_s
9
+ }
5
10
 
6
11
  HREF = proc { |xml, options|
7
- href = xml.css(options['selector']).attr('href').to_s
12
+ href = element(xml, options).attr('href').to_s
8
13
  path, query = href.split('?')
9
14
 
10
15
  if href.start_with?('http')
@@ -18,8 +23,15 @@ module Html2rss
18
23
  uri
19
24
  }
20
25
 
21
- HTML = proc { |xml, options| xml.css(options['selector']).to_s }
26
+ HTML = proc { |xml, options|
27
+ element(xml, options).to_s
28
+ }
29
+
22
30
  STATIC = proc { |_xml, options| options['static'] }
23
31
  CURRENT_TIME = proc { |_xml, _options| Time.new }
32
+
33
+ def self.element(xml, options)
34
+ options['selector'] ? xml.css(options['selector']) : xml
35
+ end
24
36
  end
25
37
  end
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.3.0'.freeze
2
+ VERSION = '0.3.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-20 00:00:00.000000000 Z
11
+ date: 2019-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.13'
41
+ - !ruby/object:Gem::Dependency
42
+ name: hashie
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: nokogiri
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +59,9 @@ dependencies:
45
59
  - - ">="
46
60
  - !ruby/object:Gem::Version
47
61
  version: '1.10'
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '2.0'
48
65
  type: :runtime
49
66
  prerelease: false
50
67
  version_requirements: !ruby/object:Gem::Requirement
@@ -52,6 +69,9 @@ dependencies:
52
69
  - - ">="
53
70
  - !ruby/object:Gem::Version
54
71
  version: '1.10'
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.0'
55
75
  - !ruby/object:Gem::Dependency
56
76
  name: sanitize
57
77
  requirement: !ruby/object:Gem::Requirement
@@ -164,9 +184,8 @@ dependencies:
164
184
  - - ">="
165
185
  - !ruby/object:Gem::Version
166
186
  version: '0'
167
- description: |-
168
- Create your config object, include the url to scrape,
169
- some selectors and get a RSS2 feed in return.
187
+ description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
188
+ in return.
170
189
  email:
171
190
  - html2rss@desmarais.de
172
191
  executables: []
@@ -222,5 +241,5 @@ requirements: []
222
241
  rubygems_version: 3.0.4
223
242
  signing_key:
224
243
  specification_version: 4
225
- summary: Generate RSS feeds by scraping websites by providing a config.
244
+ summary: Returns an RSS::Rss object by scraping a URL.
226
245
  test_files: []