html2rss 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d7642c21f46ea6086a3ead39b2923fa4c09ddc821719e2c40936846a83e5438
4
- data.tar.gz: 8d6efa4ed03261f11e9ec302ea0cda860e83b74d78203c48b6650f1a7d9c106c
3
+ metadata.gz: 6a7c25abdeeccbeb69e1b7942c2f8dd9c7da543a090cc1108debcf3370b88989
4
+ data.tar.gz: 34dc564b089dfec7e2a220288484dd24fe407743301fcb4c2d74f37c35a2325b
5
5
  SHA512:
6
- metadata.gz: f6f1a3c52d9689b18d04a7b0c772ab0b9742202ca2e292b8265f7d26d85989f7201da9ab3de2db81258b82b8271c5789176976ea6c67c42edcb4739a556386f3
7
- data.tar.gz: 8d892ec1d5761d6c5338e77f36a86debb2ccc1f1f7bac98ba83489c2aa8a7fe0638a45d355ff6fbd295a417d4df25f9ffc932fd15dd70f5465b0b61ee566cec0
6
+ metadata.gz: a2168f96899c9a53b9b55010c1746ce0dca9e78ec5004903d837fb333b1a158b7c124eaf08ef34f2cb6515164d9dc20a3c16c47fe672c4ba80b1241cf3decdd3
7
+ data.tar.gz: 0fb315100e1cc6b55d81b363186b3e994e54c0c58e3381ca5bc3c5884fd5b993e0364db311eea64688cdaa72e5c865055b45e8f0268db48773355c035083e9b6
@@ -1,3 +1,22 @@
1
+ # [0.3.1](https://github.com/gildesmarais/html2rss/compare/v0.3.0...v0.3.1) (2019-06-23)
2
+
3
+
4
+ ### Features
5
+
6
+ * handle string and symbol keys in config hashes ([#15](https://github.com/gildesmarais/html2rss/issues/15)) ([93ad824](https://github.com/gildesmarais/html2rss/commit/93ad824))
7
+ * support attributes without selector, fallback to root element then ([#16](https://github.com/gildesmarais/html2rss/issues/16)) ([d99ae3d](https://github.com/gildesmarais/html2rss/commit/d99ae3d))
8
+
9
+
10
+ # [0.3.0](https://github.com/gildesmarais/html2rss/compare/v0.2.2...v0.3.0) (2019-06-20)
11
+
12
+
13
+ ### Features
14
+
15
+ * add rubocop and update development deps ([#13](https://github.com/gildesmarais/html2rss/issues/13)) ([6e06329](https://github.com/gildesmarais/html2rss/commit/6e06329))
16
+ * change Config constructor arguments ([#14](https://github.com/gildesmarais/html2rss/issues/14)) ([21f8746](https://github.com/gildesmarais/html2rss/commit/21f8746))
17
+
18
+
19
+
1
20
  # [0.2.1](https://github.com/gildesmarais/html2rss/compare/v0.2.0...v0.2.1) (2018-11-18)
2
21
 
3
22
 
@@ -1,10 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.3.0)
4
+ html2rss (0.3.1)
5
5
  faraday (~> 0.15)
6
6
  faraday_middleware (~> 0.13)
7
- nokogiri (>= 1.10)
7
+ hashie (~> 3.6)
8
+ nokogiri (>= 1.10, < 2.0)
8
9
  sanitize (~> 5.0)
9
10
 
10
11
  GEM
@@ -19,6 +20,7 @@ GEM
19
20
  multipart-post (>= 1.2, < 3)
20
21
  faraday_middleware (0.13.1)
21
22
  faraday (>= 0.7.4, < 1.0)
23
+ hashie (3.6.0)
22
24
  jaro_winkler (1.5.3)
23
25
  json (2.2.0)
24
26
  mini_portile2 (2.4.0)
data/README.md CHANGED
@@ -17,6 +17,19 @@ and [post processors](https://github.com/gildesmarais/html2rss/tree/master/lib/h
17
17
  Add this line to your application's Gemfile: `gem 'html2rss'`
18
18
  And then execute: `bundle`
19
19
 
20
+ ```ruby
21
+ rss = Html2rss.feed(
22
+ channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
23
+ selectors: {
24
+ items: { selector: '#hot-network-questions > ul > li' },
25
+ title: { selector: 'a' },
26
+ link: { selector: 'a', extractor: 'href' }
27
+ }
28
+ )
29
+
30
+ puts rss.to_s
31
+ ```
32
+
20
33
  ## Usage with a YAML config file
21
34
 
22
35
  Create a YAML config file. Find an example at [`rspec/config.test.yml`](https://github.com/gildesmarais/html2rss/blob/master/spec/config.test.yml).
@@ -8,9 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ['Gil Desmarais']
9
9
  spec.email = ['html2rss@desmarais.de']
10
10
 
11
- spec.summary = 'Generate RSS feeds by scraping websites by providing a config.'
12
- spec.description = 'Create your config object, include the url to scrape,
13
- some selectors and get a RSS2 feed in return.'
11
+ spec.summary = 'Returns an RSS::Rss object by scraping a URL.'
12
+ spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
14
13
  spec.homepage = 'https://github.com/gildesmarais/html2rss'
15
14
  spec.license = 'MIT'
16
15
 
@@ -30,7 +29,8 @@ Gem::Specification.new do |spec|
30
29
 
31
30
  spec.add_dependency 'faraday', '~> 0.15'
32
31
  spec.add_dependency 'faraday_middleware', '~> 0.13'
33
- spec.add_dependency 'nokogiri', '>= 1.10'
32
+ spec.add_dependency 'hashie', '~> 3.6'
33
+ spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
34
34
  spec.add_dependency 'sanitize', '~> 5.0'
35
35
  spec.add_development_dependency 'bundler', '~> 1.16'
36
36
  spec.add_development_dependency 'byebug'
@@ -5,7 +5,10 @@ require 'yaml'
5
5
 
6
6
  module Html2rss
7
7
  def self.feed_from_yaml_config(file, name)
8
+ # rubocop:disable Security/YAMLLoad
8
9
  yaml = YAML.load(File.open(file))
10
+ # rubocop:enable Security/YAMLLoad
11
+
9
12
  feed_config = yaml['feeds'][name]
10
13
  global_config = yaml.reject { |k| k == 'feeds' }
11
14
 
@@ -13,7 +16,23 @@ module Html2rss
13
16
  feed(config)
14
17
  end
15
18
 
19
+ ##
20
+ # Returns the RSS object, which is generated from the provided config.
21
+ #
22
+ # `config`: can be a Hash or an instance of Html2rss::Config.
23
+ #
24
+ # = Example with a Ruby Hash
25
+ # Html2rss.feed(
26
+ # channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
27
+ # selectors: {
28
+ # items: { selector: '#hot-network-questions > ul > li' },
29
+ # title: { selector: 'a' },
30
+ # link: { selector: 'a', extractor: 'href' }
31
+ # }
32
+ # )
16
33
  def self.feed(config)
34
+ config = Config.new(config) unless config.is_a?(Config)
35
+
17
36
  feed = FeedBuilder.new config
18
37
  feed.rss
19
38
  end
@@ -1,11 +1,18 @@
1
+ require 'hashie'
2
+
1
3
  module Html2rss
2
4
  class Config
3
5
  attr_reader :feed_config, :channel_config, :global_config
4
6
 
7
+ class IndifferentAccessHash < Hash
8
+ include Hashie::Extensions::MergeInitializer
9
+ include Hashie::Extensions::IndifferentAccess
10
+ end
11
+
5
12
  def initialize(feed_config, global_config = {})
6
- @global_config = global_config
7
- @feed_config = feed_config
8
- @channel_config = feed_config.fetch('channel', {})
13
+ @global_config = IndifferentAccessHash.new global_config
14
+ @feed_config = IndifferentAccessHash.new feed_config
15
+ @channel_config = IndifferentAccessHash.new @feed_config.fetch('channel', {})
9
16
  end
10
17
 
11
18
  def author
@@ -13,7 +20,7 @@ module Html2rss
13
20
  end
14
21
 
15
22
  def ttl
16
- (channel_config.fetch 'ttl').to_i || nil
23
+ channel_config.fetch 'ttl', 3600
17
24
  end
18
25
 
19
26
  def title
@@ -38,9 +38,7 @@ module Html2rss
38
38
  end
39
39
 
40
40
  def valid?
41
- return false if [title.to_s, description.to_s].join('') == ''
42
-
43
- true
41
+ [title.to_s, description.to_s].join('') != ''
44
42
  end
45
43
 
46
44
  def categories
@@ -1,10 +1,15 @@
1
1
  module Html2rss
2
2
  module ItemExtractor
3
- TEXT = proc { |xml, options| xml.css(options['selector'])&.text&.strip }
4
- ATTRIBUTE = proc { |xml, options| xml.css(options['selector']).attr(options['attribute']).to_s }
3
+ TEXT = proc { |xml, options|
4
+ element(xml, options)&.text&.strip&.split&.join(' ')
5
+ }
6
+
7
+ ATTRIBUTE = proc { |xml, options|
8
+ element(xml, options).attr(options['attribute']).to_s
9
+ }
5
10
 
6
11
  HREF = proc { |xml, options|
7
- href = xml.css(options['selector']).attr('href').to_s
12
+ href = element(xml, options).attr('href').to_s
8
13
  path, query = href.split('?')
9
14
 
10
15
  if href.start_with?('http')
@@ -18,8 +23,15 @@ module Html2rss
18
23
  uri
19
24
  }
20
25
 
21
- HTML = proc { |xml, options| xml.css(options['selector']).to_s }
26
+ HTML = proc { |xml, options|
27
+ element(xml, options).to_s
28
+ }
29
+
22
30
  STATIC = proc { |_xml, options| options['static'] }
23
31
  CURRENT_TIME = proc { |_xml, _options| Time.new }
32
+
33
+ def self.element(xml, options)
34
+ options['selector'] ? xml.css(options['selector']) : xml
35
+ end
24
36
  end
25
37
  end
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.3.0'.freeze
2
+ VERSION = '0.3.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-20 00:00:00.000000000 Z
11
+ date: 2019-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.13'
41
+ - !ruby/object:Gem::Dependency
42
+ name: hashie
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: nokogiri
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +59,9 @@ dependencies:
45
59
  - - ">="
46
60
  - !ruby/object:Gem::Version
47
61
  version: '1.10'
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '2.0'
48
65
  type: :runtime
49
66
  prerelease: false
50
67
  version_requirements: !ruby/object:Gem::Requirement
@@ -52,6 +69,9 @@ dependencies:
52
69
  - - ">="
53
70
  - !ruby/object:Gem::Version
54
71
  version: '1.10'
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.0'
55
75
  - !ruby/object:Gem::Dependency
56
76
  name: sanitize
57
77
  requirement: !ruby/object:Gem::Requirement
@@ -164,9 +184,8 @@ dependencies:
164
184
  - - ">="
165
185
  - !ruby/object:Gem::Version
166
186
  version: '0'
167
- description: |-
168
- Create your config object, include the url to scrape,
169
- some selectors and get a RSS2 feed in return.
187
+ description: Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance
188
+ in return.
170
189
  email:
171
190
  - html2rss@desmarais.de
172
191
  executables: []
@@ -222,5 +241,5 @@ requirements: []
222
241
  rubygems_version: 3.0.4
223
242
  signing_key:
224
243
  specification_version: 4
225
- summary: Generate RSS feeds by scraping websites by providing a config.
244
+ summary: Returns an RSS::Rss object by scraping a URL.
226
245
  test_files: []