html2rss 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1590d66348bc64be15db1b614963cf7d9032a438fca94590cb6e1f70bf6149ed
4
- data.tar.gz: 371f1459e0a623b9b62c7cd73f94536762cb7e0abbde2e63a836f82dd06742a2
3
+ metadata.gz: fdb35d5375dda349c92fb7ccaed71747de5e49b2adfb3a1cfca9a9c251aa86a9
4
+ data.tar.gz: 6532e1df3c67108c6ae8bf7eaef7a6d5eb228c4d3020f99aa02f8a9d507a625e
5
5
  SHA512:
6
- metadata.gz: 660f15157d054016e1dc91269d8686d85359e1b9b83cad69c47accb7d3be0e2b8a168802f9d29c8b213c0a4443822c862a9c461fc6e40284371c405ec6425f92
7
- data.tar.gz: 36943075ad772b28f7e4c3b8d4a9d1faef6fb0c61ceea7be7dc177d90fc43839d81e44d2cbc215acdaad476d250d584d612a0c10d201fded859e30488f353eb6
6
+ metadata.gz: 86fbfe46d9ec7b0f21bc0e756da94ed96ecfa635f858336424034875a646f81c08e64610bf8b478d9e0212ea272ea06ee13d6bf33c32cf01dd25363e4ba44868
7
+ data.tar.gz: dfad9fc0ba59cc8ecd323299b330c5cc6831336da7fd913f4f72a52ee807861d4b604927e73477f33e353314f2107477457cfa4f320024cd63c19e4b0a577cd5
@@ -22,7 +22,7 @@ Metrics/MethodLength:
22
22
  Max: 15
23
23
 
24
24
  Documentation:
25
- Enabled: false
25
+ Enabled: true
26
26
 
27
27
  Style/BlockDelimiters:
28
28
  Enabled: true
@@ -9,11 +9,12 @@ before_install:
9
9
  bundler_args: "--jobs=3 --retry=3"
10
10
 
11
11
  rvm:
12
- - 2.4.7
13
- - 2.5.6
14
- - 2.6.4
12
+ - 2.4.9
13
+ - 2.5.7
14
+ - 2.6.5
15
15
 
16
16
  script:
17
+ - bundle exec rubocop -F
17
18
  - bundle exec rspec
18
19
 
19
20
  deploy:
@@ -1,4 +1,20 @@
1
- # [](https://github.com/gildesmarais/html2rss/compare/v0.5.2...v) (2019-09-19)
1
+ # [](https://github.com/gildesmarais/html2rss/compare/v0.6.0...v) (2019-10-05)
2
+
3
+
4
+
5
+ # [0.6.0](https://github.com/gildesmarais/html2rss/compare/v0.5.2...v0.6.0) (2019-10-05)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/b0ca780))
11
+
12
+
13
+ ### Features
14
+
15
+ * **ci:** run rubocop on ci ([#40](https://github.com/gildesmarais/html2rss/issues/40)) ([f4ec8d1](https://github.com/gildesmarais/html2rss/commit/f4ec8d1))
16
+ * memoize ItemExtractor lookups ([#45](https://github.com/gildesmarais/html2rss/issues/45)) ([e88321c](https://github.com/gildesmarais/html2rss/commit/e88321c))
17
+ * support setting of request headers in feed config ([#41](https://github.com/gildesmarais/html2rss/issues/41)) ([a7aca11](https://github.com/gildesmarais/html2rss/commit/a7aca11)), closes [#38](https://github.com/gildesmarais/html2rss/issues/38)
2
18
 
3
19
 
4
20
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.5.2)
4
+ html2rss (0.6.0)
5
5
  activesupport (~> 5.0)
6
6
  builder
7
7
  faraday (~> 0.15)
@@ -26,24 +26,24 @@ GEM
26
26
  crass (1.0.4)
27
27
  diff-lcs (1.3)
28
28
  docile (1.3.2)
29
- faraday (0.15.4)
29
+ faraday (0.16.2)
30
30
  multipart-post (>= 1.2, < 3)
31
31
  faraday_middleware (0.13.1)
32
32
  faraday (>= 0.7.4, < 1.0)
33
33
  hashie (3.6.0)
34
- i18n (1.6.0)
34
+ i18n (1.7.0)
35
35
  concurrent-ruby (~> 1.0)
36
36
  jaro_winkler (1.5.3)
37
37
  json (2.2.0)
38
38
  mini_portile2 (2.4.0)
39
- minitest (5.11.3)
39
+ minitest (5.12.2)
40
40
  multipart-post (2.1.1)
41
41
  nokogiri (1.10.4)
42
42
  mini_portile2 (~> 2.4.0)
43
43
  nokogumbo (2.0.1)
44
44
  nokogiri (~> 1.8, >= 1.8.4)
45
45
  parallel (1.17.0)
46
- parser (2.6.4.1)
46
+ parser (2.6.5.0)
47
47
  ast (~> 2.4.0)
48
48
  rainbow (3.0.0)
49
49
  reverse_markdown (1.3.0)
@@ -54,24 +54,24 @@ GEM
54
54
  rspec-mocks (~> 3.8.0)
55
55
  rspec-core (3.8.2)
56
56
  rspec-support (~> 3.8.0)
57
- rspec-expectations (3.8.4)
57
+ rspec-expectations (3.8.5)
58
58
  diff-lcs (>= 1.2.0, < 2.0)
59
59
  rspec-support (~> 3.8.0)
60
- rspec-mocks (3.8.1)
60
+ rspec-mocks (3.8.2)
61
61
  diff-lcs (>= 1.2.0, < 2.0)
62
62
  rspec-support (~> 3.8.0)
63
- rspec-support (3.8.2)
64
- rubocop (0.74.0)
63
+ rspec-support (3.8.3)
64
+ rubocop (0.75.0)
65
65
  jaro_winkler (~> 1.5.1)
66
66
  parallel (~> 1.10)
67
67
  parser (>= 2.6)
68
68
  rainbow (>= 2.2.2, < 4.0)
69
69
  ruby-progressbar (~> 1.7)
70
70
  unicode-display_width (>= 1.4.0, < 1.7)
71
- rubocop-performance (1.4.1)
71
+ rubocop-performance (1.5.0)
72
72
  rubocop (>= 0.71.0)
73
- rubocop-rspec (1.35.0)
74
- rubocop (>= 0.60.0)
73
+ rubocop-rspec (1.36.0)
74
+ rubocop (>= 0.68.1)
75
75
  ruby-progressbar (1.10.1)
76
76
  sanitize (5.1.0)
77
77
  crass (~> 1.0.2)
data/README.md CHANGED
@@ -48,7 +48,17 @@ Since 0.5.0 it is possible to scrape and process JSON.
48
48
 
49
49
  Adding `json: true` to the channel config will convert the JSON response to XML.
50
50
 
51
- Example:
51
+ Feed config:
52
+
53
+ ```yaml
54
+ channel:
55
+ url: https://example.com
56
+ title: "Example with JSON"
57
+ json: true
58
+ # ...
59
+ ```
60
+
61
+ Imagine this HTTP response:
52
62
 
53
63
  ```json
54
64
  {
@@ -73,6 +83,25 @@ Your items selector would be `data > datum`, the item's link selector would be `
73
83
 
74
84
  Under the hood it uses ActiveSupport's [`Hash.to_xml`](https://apidock.com/rails/Hash/to_xml) core extension for the JSON to XML conversion.
75
85
 
86
+ ## Set any HTTP header in the request
87
+
88
+ You can add any HTTP headers to the request to the channel URL.
89
+ You can use this to e.g. have Cookie or Authorization information being sent or to overwrite the User-Agent.
90
+
91
+ ```yaml
92
+ channel:
93
+ url: https://example.com
94
+ title: "Example with http headers"
95
+ headers:
96
+ "User-Agent": "html2rss-request"
97
+ "X-Something": "Foobar"
98
+ "Authorization": "Token deadbea7"
99
+ "Cookie": "monster=MeWantCookie"
100
+ # ...
101
+ ```
102
+
103
+ The headers provided by the channel will be merged into the global headers.
104
+
76
105
  ## Development
77
106
 
78
107
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -12,9 +12,11 @@ Gem::Specification.new do |spec|
12
12
  spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
13
  spec.homepage = 'https://github.com/gildesmarais/html2rss'
14
14
  spec.license = 'MIT'
15
+ spec.required_ruby_version = '>= 2.4.0'
15
16
 
16
17
  if spec.respond_to?(:metadata)
17
18
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
19
+ spec.metadata['changelog_uri'] = 'https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md'
18
20
  else
19
21
  raise 'RubyGems 2.0 or newer is required to protect against ' \
20
22
  'public gem pushes.'
@@ -4,6 +4,9 @@ require 'html2rss/version'
4
4
  require 'html2rss/utils'
5
5
  require 'yaml'
6
6
 
7
+ ##
8
+ # The Html2rss namespace.
9
+ # Request HTML from an URL and transform it to a RSS 2.0 object.
7
10
  module Html2rss
8
11
  ##
9
12
  # Returns a RSS object which is generated from the provided file.
@@ -1,4 +1,7 @@
1
1
  module Html2rss
2
+ ##
3
+ # The Config class abstracts from the config data structure and
4
+ # provides default values.
2
5
  class Config
3
6
  def initialize(feed_config, global_config = {})
4
7
  @global_config = Utils::IndifferentAccessHash.new global_config
@@ -40,7 +43,7 @@ module Html2rss
40
43
  end
41
44
 
42
45
  def headers
43
- global_config.fetch('headers', {})
46
+ global_config.fetch('headers', {}).merge(channel_config.fetch('headers', {}))
44
47
  end
45
48
 
46
49
  def attribute_options(name)
@@ -2,6 +2,13 @@ require 'rss'
2
2
  require_relative 'item'
3
3
 
4
4
  module Html2rss
5
+ ##
6
+ # The purpose is to build the feed, consisting of
7
+ #
8
+ # - the 'channel' and
9
+ # - the 'item'
10
+ #
11
+ # parts.
5
12
  class FeedBuilder
6
13
  def initialize(config)
7
14
  @config = config
@@ -11,10 +18,10 @@ module Html2rss
11
18
  # @return [RSS:Rss]
12
19
  def rss
13
20
  RSS::Maker.make('2.0') do |maker|
14
- add_channel_to_maker(maker)
21
+ add_channel(maker)
15
22
 
16
23
  feed_items.map do |feed_item|
17
- add_item_to_items(feed_item, maker.items)
24
+ add_item(feed_item, maker.items.new_item)
18
25
  end
19
26
  end
20
27
  end
@@ -23,7 +30,7 @@ module Html2rss
23
30
 
24
31
  attr_reader :config
25
32
 
26
- def add_channel_to_maker(maker)
33
+ def add_channel(maker)
27
34
  %i[language author title description link ttl].each do |attribute_name|
28
35
  maker.channel.public_send("#{attribute_name}=".to_sym, config.public_send(attribute_name))
29
36
  end
@@ -33,24 +40,20 @@ module Html2rss
33
40
  end
34
41
 
35
42
  def feed_items
36
- @feed_items ||= Item.from_url config.url, config
43
+ @feed_items ||= Item.from_url(config.url, config).keep_if(&:valid?)
37
44
  end
38
45
 
39
- def add_item_to_items(feed_item, items)
40
- return unless feed_item.valid?
41
-
42
- items.new_item do |rss_item|
43
- feed_item.available_attributes.each do |attribute_name|
44
- rss_item.public_send("#{attribute_name}=".to_sym, feed_item.public_send(attribute_name))
45
- end
46
-
47
- feed_item.categories.each do |category|
48
- rss_item.categories.new_category.content = category
49
- end
46
+ def add_item(feed_item, rss_item)
47
+ feed_item.available_attributes.each do |attribute_name|
48
+ rss_item.public_send("#{attribute_name}=".to_sym, feed_item.public_send(attribute_name))
49
+ end
50
50
 
51
- rss_item.guid.content = Digest::SHA1.hexdigest(feed_item.title)
52
- rss_item.guid.isPermaLink = false
51
+ feed_item.categories.each do |category|
52
+ rss_item.categories.new_category.content = category
53
53
  end
54
+
55
+ rss_item.guid.content = Digest::SHA1.hexdigest(feed_item.title)
56
+ rss_item.guid.isPermaLink = false
54
57
  end
55
58
  end
56
59
  end
@@ -6,6 +6,9 @@ require_relative 'item_extractors'
6
6
  require_relative 'attribute_post_processors'
7
7
 
8
8
  module Html2rss
9
+ ##
10
+ # Takes the selected Nokogiri::HTML and responds to accessors names
11
+ # defined in the feed config.
9
12
  class Item
10
13
  def initialize(xml, config)
11
14
  @xml = xml
@@ -12,11 +12,12 @@ module Html2rss
12
12
  DEFAULT = 'text'.freeze
13
13
 
14
14
  def self.get_extractor(name)
15
- name ||= DEFAULT
16
- camel_cased_name = name.split('_').map(&:capitalize).join
17
- class_name = ['Html2rss', 'ItemExtractors', camel_cased_name].join('::')
15
+ @extractors = Hash.new do |hash, key|
16
+ camel_cased_name = key.split('_').map(&:capitalize).join
17
+ class_name = ['Html2rss', 'ItemExtractors', camel_cased_name].join('::')
18
18
 
19
- Object.const_get(class_name)
19
+ hash[key] = Object.const_get(class_name)
20
+ end[name || DEFAULT]
20
21
  end
21
22
 
22
23
  ##
@@ -5,6 +5,8 @@ require 'json'
5
5
  require 'nokogiri'
6
6
 
7
7
  module Html2rss
8
+ ##
9
+ # The collecting tank for utility methods.
8
10
  module Utils
9
11
  ##
10
12
  # A Hash with indifferent access, build with {https://github.com/intridea/hashie Hashie}.
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.5.2'.freeze
2
+ VERSION = '0.6.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-19 00:00:00.000000000 Z
11
+ date: 2019-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -301,6 +301,7 @@ licenses:
301
301
  - MIT
302
302
  metadata:
303
303
  allowed_push_host: https://rubygems.org
304
+ changelog_uri: https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md
304
305
  post_install_message:
305
306
  rdoc_options: []
306
307
  require_paths:
@@ -309,15 +310,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
309
310
  requirements:
310
311
  - - ">="
311
312
  - !ruby/object:Gem::Version
312
- version: '0'
313
+ version: 2.4.0
313
314
  required_rubygems_version: !ruby/object:Gem::Requirement
314
315
  requirements:
315
316
  - - ">="
316
317
  - !ruby/object:Gem::Version
317
318
  version: '0'
318
319
  requirements: []
319
- rubyforge_project:
320
- rubygems_version: 2.7.7
320
+ rubygems_version: 3.0.6
321
321
  signing_key:
322
322
  specification_version: 4
323
323
  summary: Returns an RSS::Rss object by scraping a URL.