weneedfeed 0.7.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e666f142f72459d43fd7c6fe071740d1fce11dbac4605bcd1d7fbcc8766187ab
4
- data.tar.gz: a6fdf6ce28e9c610d9f47fa747eaedc4d0198c59c70fd5b736d99dbeaf90d7df
3
+ metadata.gz: 7aeb8844141a877a1c8e17c1aa1549d030a9384d1fbc01219a75a45891823830
4
+ data.tar.gz: e707d7fdec8299c0b5cf76c0628ab58a5e8b1c1aec443fe2cc00524671df68b5
5
5
  SHA512:
6
- metadata.gz: e07f3fe6d8d712e20384310e1bee1eca48065ccf2c0a400aaccd6e9d0f0d219b011f7944c34d11870bb5e3b83750f17de267d8eff260926916daaff1b9f08f35
7
- data.tar.gz: 792f22dbcda78c4df3f0d738fe6dee28dae5a64eb075f8a1603da6d0c9d5ca68bba3337374f3f2d6dd93e798a6c1f57c4436a9ff34425ee84366d94b56c89903
6
+ metadata.gz: a3db194daf276a2abff61e6c7a92cc137404b6bfb640df7db77a770316e4f10f6cc57b72172bcdda94eceb1b4db726bfc0547719452d097dd33d320394e7f6ea
7
+ data.tar.gz: 14cfe2f49aa515f396d4ae598bb07eaeab91cec57d1f5201e68d04770bb6d762b86e346700030a0f7fdd2b9fd57b81e64ff3982c03bdbb34b02e73f34a83f03f
@@ -7,14 +7,24 @@ on:
7
7
  - master
8
8
 
9
9
  jobs:
10
- build:
11
- runs-on: ubuntu-18.04
10
+ rspec:
11
+ runs-on: ubuntu-20.04
12
12
  steps:
13
13
  - uses: actions/checkout@v2
14
+ - uses: ruby/setup-ruby@v1
15
+ with:
16
+ bundler-cache: true
17
+ ruby-version: 2.7.2
18
+ - run: bundle exec rspec --force-color
19
+ rubocop:
20
+ runs-on: ubuntu-20.04
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ with:
24
+ ref: ${{ github.event.pull_request.head.sha }}
14
25
  - uses: ruby/setup-ruby@v1
15
26
  with:
16
27
  bundler-cache: true
17
28
  ruby-version: 2.7.2
18
29
  - uses: r7kamura/rubocop-problem-matchers-action@v1
19
30
  - run: bundle exec rubocop --parallel
20
- - run: bundle exec rspec --force-color
@@ -7,6 +7,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.9.1 - 2020-12-03
11
+
12
+ ### Fixed
13
+
14
+ - Fix escape bug in escaping HTML and generating XML.
15
+
16
+ ## 0.9.0 - 2020-12-03
17
+
18
+ ### Added
19
+
20
+ - Add item_image_selector.
21
+
22
+ ## 0.8.0 - 2020-11-25
23
+
24
+ ### Added
25
+
26
+ - Add page description for writing channel description.
27
+
28
+ ### Changed
29
+
30
+ - Change channel link content from feeds index URL to page URL.
31
+
32
+ ## 0.7.2 - 2020-11-25
33
+
34
+ ### Fixed
35
+
36
+ - Fix time parse error by HTML entities handling.
37
+
38
+ ## 0.7.1 - 2020-11-25
39
+
40
+ ### Fixed
41
+
42
+ - Fix error when no title element found.
43
+
10
44
  ## 0.7.0 - 2020-11-23
11
45
 
12
46
  ### Added
@@ -1,12 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.7.0)
4
+ weneedfeed (0.9.1)
5
5
  activesupport
6
6
  builder
7
7
  faraday
8
8
  faraday_middleware
9
9
  hibana (>= 0.2)
10
+ mimemagic
10
11
  nokogiri
11
12
  rack-capture (>= 0.4.0)
12
13
  thor
@@ -44,6 +45,7 @@ GEM
44
45
  tilt
45
46
  i18n (1.8.5)
46
47
  concurrent-ruby (~> 1.0)
48
+ mimemagic (0.3.5)
47
49
  mini_portile2 (2.4.0)
48
50
  minitest (5.14.2)
49
51
  multipart-post (2.1.1)
@@ -106,7 +108,7 @@ GEM
106
108
  addressable (>= 2.3.6)
107
109
  crack (>= 0.3.2)
108
110
  hashdiff (>= 0.4.0, < 2.0.0)
109
- zeitwerk (2.4.1)
111
+ zeitwerk (2.4.2)
110
112
 
111
113
  PLATFORMS
112
114
  ruby
data/README.md CHANGED
@@ -27,24 +27,28 @@ gem install weneedfeed
27
27
 
28
28
  ## Schema
29
29
 
30
- You need to write a schema file named with `weneedfeed.yml` to use this gem.
30
+ Weneedfeed requires `weneedfeed.yml` that describes URLs and selectors.
31
31
 
32
32
  ### Example
33
33
 
34
34
  ```yaml
35
35
  pages:
36
36
  - id: example1
37
- title: Example feed with CSS Selector
37
+ title: Example 1
38
+ description: Example feed with CSS Selector
38
39
  url: http://example.com/1
39
40
  item_selector: li
40
41
  item_description_selector: p:nth-child(3)
42
+ item_image_selector: img
41
43
  item_link_selector: a
42
44
  item_time_selector: time
43
45
  item_title_selector: p:nth-child(2)
44
46
  - id: example2
45
- title: Example feed with XPath
47
+ title: Example 2
48
+ description: Example feed with XPath
46
49
  url: http://example.com/2
47
50
  item_selector: //li
51
+ item_image_selector: .//img
48
52
  item_description_selector: .//p[3]
49
53
  item_link_selector: .//a
50
54
  item_time_selector: .//time
@@ -56,7 +60,7 @@ pages:
56
60
  Feed ID.
57
61
 
58
62
  - required
59
- - Used for feed URL.
63
+ - Used for feed URL and `<link>` element in `<channel>` element.
60
64
 
61
65
  ### `title`
62
66
 
@@ -65,6 +69,13 @@ Feed title.
65
69
  - required
66
70
  - Used for RSS `<title>` element in `<channel>` element.
67
71
 
72
+ ### `description`
73
+
74
+ Feed description.
75
+
76
+ - optional
77
+ - Used for RSS `<description>` element in `<channel>` element.
78
+
68
79
  ### `url`
69
80
 
70
81
  HTML source URL.
@@ -100,6 +111,13 @@ CSS or XPath selector to find element with description information in each item.
100
111
  - optional
101
112
  - Used for `<description>` in `<item>`.
102
113
 
114
+ ### `item_image_selector`
115
+
116
+ CSS or XPath selector to find `<img>`element in each item.
117
+
118
+ - optional
119
+ - Used for `<enclosure>` in `<item>`.
120
+
103
121
  ### `item_time_selector`
104
122
 
105
123
  CSS or XPath selector to find element with datetime information in each item.
@@ -11,7 +11,9 @@ module Weneedfeed
11
11
  end
12
12
 
13
13
  scraping = ::Weneedfeed::Scraping.new(
14
+ description: page_schema.description,
14
15
  item_description_selector: page_schema.item_description_selector,
16
+ item_image_selector: page_schema.item_image_selector,
15
17
  item_link_selector: page_schema.item_link_selector,
16
18
  item_time_selector: page_schema.item_time_selector,
17
19
  item_title_selector: page_schema.item_title_selector,
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'mimemagic'
4
+
3
5
  module Weneedfeed
4
6
  class Item
5
7
  class << self
@@ -16,6 +18,7 @@ module Weneedfeed
16
18
  end
17
19
 
18
20
  # @param [String, nil] description_selector
21
+ # @param [String, nil] image_selector
19
22
  # @param [String, nil] link_selector
20
23
  # @param [Nokogiri::Node] node
21
24
  # @param [String] time_selector
@@ -23,6 +26,7 @@ module Weneedfeed
23
26
  # @param [String] url
24
27
  def initialize(
25
28
  description_selector:,
29
+ image_selector:,
26
30
  link_selector:,
27
31
  node:,
28
32
  time_selector:,
@@ -30,6 +34,7 @@ module Weneedfeed
30
34
  url:
31
35
  )
32
36
  @description_selector = description_selector
37
+ @image_selector = image_selector
33
38
  @link_selector = link_selector
34
39
  @node = node
35
40
  @time_selector = time_selector
@@ -44,6 +49,29 @@ module Weneedfeed
44
49
  @node.at(@description_selector)&.inner_html
45
50
  end
46
51
 
52
+ # @return [String, nil]
53
+ def image_mime_type
54
+ return unless image_url
55
+
56
+ ::MimeMagic.by_path(image_url)&.type
57
+ end
58
+
59
+ # @return [String, nil]
60
+ def image_path_or_url
61
+ return unless @image_selector
62
+
63
+ @node.at(@image_selector)&.[]('src')
64
+ end
65
+
66
+ def image_url
67
+ return unless image_path_or_url
68
+
69
+ ::URI.join(
70
+ @url,
71
+ image_path_or_url
72
+ ).to_s
73
+ end
74
+
47
75
  # @return [String]
48
76
  def link
49
77
  ::URI.join(
@@ -73,9 +101,9 @@ module Weneedfeed
73
101
  self.class.parse_time(string)
74
102
  end
75
103
 
76
- # @return [String]
104
+ # @return [String, nil]
77
105
  def title
78
- @node.at(@title_selector).inner_text
106
+ @node.at(@title_selector)&.inner_text
79
107
  end
80
108
 
81
109
  private
@@ -90,7 +118,7 @@ module Weneedfeed
90
118
  node = time_node
91
119
  return unless node
92
120
 
93
- node['datetime'] || node.inner_html
121
+ node['datetime'] || node.inner_text
94
122
  end
95
123
  end
96
124
  end
@@ -2,13 +2,18 @@
2
2
 
3
3
  module Weneedfeed
4
4
  class Page
5
+ # @return [String, nil]
6
+ attr_reader :description
7
+
5
8
  # @return [String]
6
9
  attr_reader :title
7
10
 
8
11
  # @return [String]
9
12
  attr_reader :url
10
13
 
14
+ # @param [String, nil] description
11
15
  # @param [String, nil] item_description_selector
16
+ # @param [String, nil] item_image_selector
12
17
  # @param [String] item_link_selector
13
18
  # @param [String, nil] item_time_selector
14
19
  # @param [String] item_title_selector
@@ -17,7 +22,9 @@ module Weneedfeed
17
22
  # @param [String] title
18
23
  # @param [String] url
19
24
  def initialize(
25
+ description:,
20
26
  item_description_selector:,
27
+ item_image_selector:,
21
28
  item_link_selector:,
22
29
  item_time_selector:,
23
30
  item_title_selector:,
@@ -26,7 +33,9 @@ module Weneedfeed
26
33
  title:,
27
34
  url:
28
35
  )
36
+ @description = description
29
37
  @item_description_selector = item_description_selector
38
+ @item_image_selector = item_image_selector
30
39
  @item_link_selector = item_link_selector
31
40
  @item_time_selector = item_time_selector
32
41
  @item_title_selector = item_title_selector
@@ -41,6 +50,7 @@ module Weneedfeed
41
50
  @node.search(@item_selector).map do |node|
42
51
  ::Weneedfeed::Item.new(
43
52
  description_selector: @item_description_selector,
53
+ image_selector: @item_image_selector,
44
54
  link_selector: @item_link_selector,
45
55
  node: node,
46
56
  time_selector: @item_time_selector,
@@ -2,8 +2,10 @@
2
2
 
3
3
  module Weneedfeed
4
4
  PageSchema = Struct.new(
5
+ :description,
5
6
  :id,
6
7
  :item_description_selector,
8
+ :item_image_selector,
7
9
  :item_link_selector,
8
10
  :item_time_selector,
9
11
  :item_title_selector,
@@ -24,8 +24,10 @@ module Weneedfeed
24
24
  def page_schemata
25
25
  @raw['pages'].map do |hash|
26
26
  ::Weneedfeed::PageSchema.new(
27
+ description: hash['description'],
27
28
  id: hash['id'],
28
29
  item_description_selector: hash['item_description_selector'],
30
+ item_image_selector: hash['item_image_selector'],
29
31
  item_link_selector: hash['item_link_selector'],
30
32
  item_time_selector: hash['item_time_selector'],
31
33
  item_title_selector: hash['item_title_selector'],
@@ -14,7 +14,9 @@ module Weneedfeed
14
14
  end
15
15
  end
16
16
 
17
+ # @param [String, nil] description
17
18
  # @param [String, nil] item_description_selector
19
+ # @param [String, nil] item_image_selector
18
20
  # @param [String] item_link_selector
19
21
  # @param [String, nil] item_time_selector
20
22
  # @param [String] item_title_selector
@@ -22,7 +24,9 @@ module Weneedfeed
22
24
  # @param [String] title
23
25
  # @param [String] url
24
26
  def initialize(
27
+ description:,
25
28
  item_description_selector:,
29
+ item_image_selector:,
26
30
  item_link_selector:,
27
31
  item_time_selector:,
28
32
  item_title_selector:,
@@ -30,7 +34,9 @@ module Weneedfeed
30
34
  title:,
31
35
  url:
32
36
  )
37
+ @description = description
33
38
  @item_description_selector = item_description_selector
39
+ @item_image_selector = item_image_selector
34
40
  @item_link_selector = item_link_selector
35
41
  @item_time_selector = item_time_selector
36
42
  @item_title_selector = item_title_selector
@@ -42,8 +48,10 @@ module Weneedfeed
42
48
  # @return [Weneedfeed::Page]
43
49
  def call
44
50
  ::Weneedfeed::Page.new(
51
+ description: @description,
45
52
  node: parsed_body,
46
53
  item_description_selector: @item_description_selector,
54
+ item_image_selector: @item_image_selector,
47
55
  item_selector: @item_selector,
48
56
  item_link_selector: @item_link_selector,
49
57
  item_time_selector: @item_time_selector,
@@ -57,12 +65,16 @@ module Weneedfeed
57
65
 
58
66
  # @return [Nokogiri::Node]
59
67
  def parsed_body
60
- ::Nokogiri::XML.parse(response.body)
68
+ if response.headers['Content-Type']&.include?('application/json')
69
+ ::Nokogiri::XML.parse(response.body)
70
+ else
71
+ ::Nokogiri::HTML.parse(response.body)
72
+ end
61
73
  end
62
74
 
63
75
  # @return [Faraday::Response]
64
76
  def response
65
- self.class.faraday_connection.get(@url)
77
+ @response ||= self.class.faraday_connection.get(@url)
66
78
  end
67
79
  end
68
80
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.7.0'
4
+ VERSION = '0.9.1'
5
5
  end
@@ -4,9 +4,9 @@
4
4
  xmlns:content="http://purl.org/rss/1.0/modules/content/">
5
5
  <channel>
6
6
  <title><![CDATA[<%= @page.title %>]]></title>
7
- <link><%= "#{request.base_url}#{top_page_path}" %></link>
7
+ <link><%= "#{@page.url}" %></link>
8
8
  <atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
9
- <description><![CDATA[Recent content on <%= @page.title %>]]></description>
9
+ <description><![CDATA[<%= @page.description %>]]></description>
10
10
  <lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
11
11
  <% items.each do |item| %>
12
12
  <item>
@@ -18,6 +18,9 @@
18
18
  <description><![CDATA[<%= item.description %>]]></description>
19
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
20
20
  <guid isPermaLink="true"><%= item.link %></guid>
21
+ <% if item.image_url %>
22
+ <enclosure url=<%= item.image_url.encode(xml: :attr) %> length="0" type="<%= item.image_mime_type %>"/>
23
+ <% end %>
21
24
  </item>
22
25
  <% end %>
23
26
  </channel>
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.add_runtime_dependency 'faraday'
31
31
  spec.add_runtime_dependency 'faraday_middleware'
32
32
  spec.add_runtime_dependency 'hibana', '>= 0.2'
33
+ spec.add_runtime_dependency 'mimemagic'
33
34
  spec.add_runtime_dependency 'nokogiri'
34
35
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
35
36
  spec.add_runtime_dependency 'thor'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-23 00:00:00.000000000 Z
11
+ date: 2020-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: mimemagic
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: nokogiri
85
99
  requirement: !ruby/object:Gem::Requirement