html2rss 0.5.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1590d66348bc64be15db1b614963cf7d9032a438fca94590cb6e1f70bf6149ed
4
- data.tar.gz: 371f1459e0a623b9b62c7cd73f94536762cb7e0abbde2e63a836f82dd06742a2
3
+ metadata.gz: fdb35d5375dda349c92fb7ccaed71747de5e49b2adfb3a1cfca9a9c251aa86a9
4
+ data.tar.gz: 6532e1df3c67108c6ae8bf7eaef7a6d5eb228c4d3020f99aa02f8a9d507a625e
5
5
  SHA512:
6
- metadata.gz: 660f15157d054016e1dc91269d8686d85359e1b9b83cad69c47accb7d3be0e2b8a168802f9d29c8b213c0a4443822c862a9c461fc6e40284371c405ec6425f92
7
- data.tar.gz: 36943075ad772b28f7e4c3b8d4a9d1faef6fb0c61ceea7be7dc177d90fc43839d81e44d2cbc215acdaad476d250d584d612a0c10d201fded859e30488f353eb6
6
+ metadata.gz: 86fbfe46d9ec7b0f21bc0e756da94ed96ecfa635f858336424034875a646f81c08e64610bf8b478d9e0212ea272ea06ee13d6bf33c32cf01dd25363e4ba44868
7
+ data.tar.gz: dfad9fc0ba59cc8ecd323299b330c5cc6831336da7fd913f4f72a52ee807861d4b604927e73477f33e353314f2107477457cfa4f320024cd63c19e4b0a577cd5
@@ -22,7 +22,7 @@ Metrics/MethodLength:
22
22
  Max: 15
23
23
 
24
24
  Documentation:
25
- Enabled: false
25
+ Enabled: true
26
26
 
27
27
  Style/BlockDelimiters:
28
28
  Enabled: true
@@ -9,11 +9,12 @@ before_install:
9
9
  bundler_args: "--jobs=3 --retry=3"
10
10
 
11
11
  rvm:
12
- - 2.4.7
13
- - 2.5.6
14
- - 2.6.4
12
+ - 2.4.9
13
+ - 2.5.7
14
+ - 2.6.5
15
15
 
16
16
  script:
17
+ - bundle exec rubocop -F
17
18
  - bundle exec rspec
18
19
 
19
20
  deploy:
@@ -1,4 +1,20 @@
1
- # [](https://github.com/gildesmarais/html2rss/compare/v0.5.2...v) (2019-09-19)
1
+ # [](https://github.com/gildesmarais/html2rss/compare/v0.6.0...v) (2019-10-05)
2
+
3
+
4
+
5
+ # [0.6.0](https://github.com/gildesmarais/html2rss/compare/v0.5.2...v0.6.0) (2019-10-05)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * **specs:** simplecov does not exclude files from spec/ ([#44](https://github.com/gildesmarais/html2rss/issues/44)) ([b0ca780](https://github.com/gildesmarais/html2rss/commit/b0ca780))
11
+
12
+
13
+ ### Features
14
+
15
+ * **ci:** run rubocop on ci ([#40](https://github.com/gildesmarais/html2rss/issues/40)) ([f4ec8d1](https://github.com/gildesmarais/html2rss/commit/f4ec8d1))
16
+ * memoize ItemExtractor lookups ([#45](https://github.com/gildesmarais/html2rss/issues/45)) ([e88321c](https://github.com/gildesmarais/html2rss/commit/e88321c))
17
+ * support setting of request headers in feed config ([#41](https://github.com/gildesmarais/html2rss/issues/41)) ([a7aca11](https://github.com/gildesmarais/html2rss/commit/a7aca11)), closes [#38](https://github.com/gildesmarais/html2rss/issues/38)
2
18
 
3
19
 
4
20
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2rss (0.5.2)
4
+ html2rss (0.6.0)
5
5
  activesupport (~> 5.0)
6
6
  builder
7
7
  faraday (~> 0.15)
@@ -26,24 +26,24 @@ GEM
26
26
  crass (1.0.4)
27
27
  diff-lcs (1.3)
28
28
  docile (1.3.2)
29
- faraday (0.15.4)
29
+ faraday (0.16.2)
30
30
  multipart-post (>= 1.2, < 3)
31
31
  faraday_middleware (0.13.1)
32
32
  faraday (>= 0.7.4, < 1.0)
33
33
  hashie (3.6.0)
34
- i18n (1.6.0)
34
+ i18n (1.7.0)
35
35
  concurrent-ruby (~> 1.0)
36
36
  jaro_winkler (1.5.3)
37
37
  json (2.2.0)
38
38
  mini_portile2 (2.4.0)
39
- minitest (5.11.3)
39
+ minitest (5.12.2)
40
40
  multipart-post (2.1.1)
41
41
  nokogiri (1.10.4)
42
42
  mini_portile2 (~> 2.4.0)
43
43
  nokogumbo (2.0.1)
44
44
  nokogiri (~> 1.8, >= 1.8.4)
45
45
  parallel (1.17.0)
46
- parser (2.6.4.1)
46
+ parser (2.6.5.0)
47
47
  ast (~> 2.4.0)
48
48
  rainbow (3.0.0)
49
49
  reverse_markdown (1.3.0)
@@ -54,24 +54,24 @@ GEM
54
54
  rspec-mocks (~> 3.8.0)
55
55
  rspec-core (3.8.2)
56
56
  rspec-support (~> 3.8.0)
57
- rspec-expectations (3.8.4)
57
+ rspec-expectations (3.8.5)
58
58
  diff-lcs (>= 1.2.0, < 2.0)
59
59
  rspec-support (~> 3.8.0)
60
- rspec-mocks (3.8.1)
60
+ rspec-mocks (3.8.2)
61
61
  diff-lcs (>= 1.2.0, < 2.0)
62
62
  rspec-support (~> 3.8.0)
63
- rspec-support (3.8.2)
64
- rubocop (0.74.0)
63
+ rspec-support (3.8.3)
64
+ rubocop (0.75.0)
65
65
  jaro_winkler (~> 1.5.1)
66
66
  parallel (~> 1.10)
67
67
  parser (>= 2.6)
68
68
  rainbow (>= 2.2.2, < 4.0)
69
69
  ruby-progressbar (~> 1.7)
70
70
  unicode-display_width (>= 1.4.0, < 1.7)
71
- rubocop-performance (1.4.1)
71
+ rubocop-performance (1.5.0)
72
72
  rubocop (>= 0.71.0)
73
- rubocop-rspec (1.35.0)
74
- rubocop (>= 0.60.0)
73
+ rubocop-rspec (1.36.0)
74
+ rubocop (>= 0.68.1)
75
75
  ruby-progressbar (1.10.1)
76
76
  sanitize (5.1.0)
77
77
  crass (~> 1.0.2)
data/README.md CHANGED
@@ -48,7 +48,17 @@ Since 0.5.0 it is possible to scrape and process JSON.
48
48
 
49
49
  Adding `json: true` to the channel config will convert the JSON response to XML.
50
50
 
51
- Example:
51
+ Feed config:
52
+
53
+ ```yaml
54
+ channel:
55
+ url: https://example.com
56
+ title: "Example with JSON"
57
+ json: true
58
+ # ...
59
+ ```
60
+
61
+ Imagine this HTTP response:
52
62
 
53
63
  ```json
54
64
  {
@@ -73,6 +83,25 @@ Your items selector would be `data > datum`, the item's link selector would be `
73
83
 
74
84
  Under the hood it uses ActiveSupport's [`Hash.to_xml`](https://apidock.com/rails/Hash/to_xml) core extension for the JSON to XML conversion.
75
85
 
86
+ ## Set any HTTP header in the request
87
+
88
+ You can add any HTTP headers to the request to the channel URL.
89
+ You can use this to e.g. have Cookie or Authorization information being sent or to overwrite the User-Agent.
90
+
91
+ ```yaml
92
+ channel:
93
+ url: https://example.com
94
+ title: "Example with http headers"
95
+ headers:
96
+ "User-Agent": "html2rss-request"
97
+ "X-Something": "Foobar"
98
+ "Authorization": "Token deadbea7"
99
+ "Cookie": "monster=MeWantCookie"
100
+ # ...
101
+ ```
102
+
103
+ The headers provided by the channel will be merged into the global headers.
104
+
76
105
  ## Development
77
106
 
78
107
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -12,9 +12,11 @@ Gem::Specification.new do |spec|
12
12
  spec.description = 'Give the URL to scrape and some CSS selectors. Get a RSS::Rss instance in return.'
13
13
  spec.homepage = 'https://github.com/gildesmarais/html2rss'
14
14
  spec.license = 'MIT'
15
+ spec.required_ruby_version = '>= 2.4.0'
15
16
 
16
17
  if spec.respond_to?(:metadata)
17
18
  spec.metadata['allowed_push_host'] = 'https://rubygems.org'
19
+ spec.metadata['changelog_uri'] = 'https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md'
18
20
  else
19
21
  raise 'RubyGems 2.0 or newer is required to protect against ' \
20
22
  'public gem pushes.'
@@ -4,6 +4,9 @@ require 'html2rss/version'
4
4
  require 'html2rss/utils'
5
5
  require 'yaml'
6
6
 
7
+ ##
8
+ # The Html2rss namespace.
9
+ # Request HTML from an URL and transform it to a RSS 2.0 object.
7
10
  module Html2rss
8
11
  ##
9
12
  # Returns a RSS object which is generated from the provided file.
@@ -1,4 +1,7 @@
1
1
  module Html2rss
2
+ ##
3
+ # The Config class abstracts from the config data structure and
4
+ # provides default values.
2
5
  class Config
3
6
  def initialize(feed_config, global_config = {})
4
7
  @global_config = Utils::IndifferentAccessHash.new global_config
@@ -40,7 +43,7 @@ module Html2rss
40
43
  end
41
44
 
42
45
  def headers
43
- global_config.fetch('headers', {})
46
+ global_config.fetch('headers', {}).merge(channel_config.fetch('headers', {}))
44
47
  end
45
48
 
46
49
  def attribute_options(name)
@@ -2,6 +2,13 @@ require 'rss'
2
2
  require_relative 'item'
3
3
 
4
4
  module Html2rss
5
+ ##
6
+ # The purpose is to build the feed, consisting of
7
+ #
8
+ # - the 'channel' and
9
+ # - the 'item'
10
+ #
11
+ # parts.
5
12
  class FeedBuilder
6
13
  def initialize(config)
7
14
  @config = config
@@ -11,10 +18,10 @@ module Html2rss
11
18
  # @return [RSS:Rss]
12
19
  def rss
13
20
  RSS::Maker.make('2.0') do |maker|
14
- add_channel_to_maker(maker)
21
+ add_channel(maker)
15
22
 
16
23
  feed_items.map do |feed_item|
17
- add_item_to_items(feed_item, maker.items)
24
+ add_item(feed_item, maker.items.new_item)
18
25
  end
19
26
  end
20
27
  end
@@ -23,7 +30,7 @@ module Html2rss
23
30
 
24
31
  attr_reader :config
25
32
 
26
- def add_channel_to_maker(maker)
33
+ def add_channel(maker)
27
34
  %i[language author title description link ttl].each do |attribute_name|
28
35
  maker.channel.public_send("#{attribute_name}=".to_sym, config.public_send(attribute_name))
29
36
  end
@@ -33,24 +40,20 @@ module Html2rss
33
40
  end
34
41
 
35
42
  def feed_items
36
- @feed_items ||= Item.from_url config.url, config
43
+ @feed_items ||= Item.from_url(config.url, config).keep_if(&:valid?)
37
44
  end
38
45
 
39
- def add_item_to_items(feed_item, items)
40
- return unless feed_item.valid?
41
-
42
- items.new_item do |rss_item|
43
- feed_item.available_attributes.each do |attribute_name|
44
- rss_item.public_send("#{attribute_name}=".to_sym, feed_item.public_send(attribute_name))
45
- end
46
-
47
- feed_item.categories.each do |category|
48
- rss_item.categories.new_category.content = category
49
- end
46
+ def add_item(feed_item, rss_item)
47
+ feed_item.available_attributes.each do |attribute_name|
48
+ rss_item.public_send("#{attribute_name}=".to_sym, feed_item.public_send(attribute_name))
49
+ end
50
50
 
51
- rss_item.guid.content = Digest::SHA1.hexdigest(feed_item.title)
52
- rss_item.guid.isPermaLink = false
51
+ feed_item.categories.each do |category|
52
+ rss_item.categories.new_category.content = category
53
53
  end
54
+
55
+ rss_item.guid.content = Digest::SHA1.hexdigest(feed_item.title)
56
+ rss_item.guid.isPermaLink = false
54
57
  end
55
58
  end
56
59
  end
@@ -6,6 +6,9 @@ require_relative 'item_extractors'
6
6
  require_relative 'attribute_post_processors'
7
7
 
8
8
  module Html2rss
9
+ ##
10
+ # Takes the selected Nokogiri::HTML and responds to accessors names
11
+ # defined in the feed config.
9
12
  class Item
10
13
  def initialize(xml, config)
11
14
  @xml = xml
@@ -12,11 +12,12 @@ module Html2rss
12
12
  DEFAULT = 'text'.freeze
13
13
 
14
14
  def self.get_extractor(name)
15
- name ||= DEFAULT
16
- camel_cased_name = name.split('_').map(&:capitalize).join
17
- class_name = ['Html2rss', 'ItemExtractors', camel_cased_name].join('::')
15
+ @extractors = Hash.new do |hash, key|
16
+ camel_cased_name = key.split('_').map(&:capitalize).join
17
+ class_name = ['Html2rss', 'ItemExtractors', camel_cased_name].join('::')
18
18
 
19
- Object.const_get(class_name)
19
+ hash[key] = Object.const_get(class_name)
20
+ end[name || DEFAULT]
20
21
  end
21
22
 
22
23
  ##
@@ -5,6 +5,8 @@ require 'json'
5
5
  require 'nokogiri'
6
6
 
7
7
  module Html2rss
8
+ ##
9
+ # The collecting tank for utility methods.
8
10
  module Utils
9
11
  ##
10
12
  # A Hash with indifferent access, build with {https://github.com/intridea/hashie Hashie}.
@@ -1,3 +1,3 @@
1
1
  module Html2rss
2
- VERSION = '0.5.2'.freeze
2
+ VERSION = '0.6.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2rss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gil Desmarais
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-19 00:00:00.000000000 Z
11
+ date: 2019-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -301,6 +301,7 @@ licenses:
301
301
  - MIT
302
302
  metadata:
303
303
  allowed_push_host: https://rubygems.org
304
+ changelog_uri: https://github.com/gildesmarais/html2rss/blob/master/CHANGELOG.md
304
305
  post_install_message:
305
306
  rdoc_options: []
306
307
  require_paths:
@@ -309,15 +310,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
309
310
  requirements:
310
311
  - - ">="
311
312
  - !ruby/object:Gem::Version
312
- version: '0'
313
+ version: 2.4.0
313
314
  required_rubygems_version: !ruby/object:Gem::Requirement
314
315
  requirements:
315
316
  - - ">="
316
317
  - !ruby/object:Gem::Version
317
318
  version: '0'
318
319
  requirements: []
319
- rubyforge_project:
320
- rubygems_version: 2.7.7
320
+ rubygems_version: 3.0.6
321
321
  signing_key:
322
322
  specification_version: 4
323
323
  summary: Returns an RSS::Rss object by scraping a URL.