weneedfeed 0.7.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 95dd3b68968422630f470e196bc5d72dc3994955d3e1ec98ce8696f1a4484227
4
- data.tar.gz: 60341f1fd574dbe7ed491f1ee304d521401ce0d3cc4ecce21477abbcd076d2b4
3
+ metadata.gz: 42d4185b6423aab2b4120f63152efd3a1f21b22b78caed8c9e8f797f37432853
4
+ data.tar.gz: fccbdc509d5e945d1c69db3b8dd5ab266e097e10b8ca19f753037656c411eff8
5
5
  SHA512:
6
- metadata.gz: 8f9efbb13b594e7f3921f5a221d59e5c3d72ba02f59e4d6f05bc3611a1977a0f1f7b0e216714021f6a47abace051b46da0ea5d0df69021b91f61ff496589110f
7
- data.tar.gz: 00e8744d224a3dca697a46769632071656a3c6e2ff0a9da018ac4d0d5aa5bf6265dbf636c6b24adc2e074d239b38c509c9b1ff207d938fce3359e716d46dbd91
6
+ metadata.gz: d39391a66a6d38b8c264b2bdda57b1da9d88c9b41acabd81976adde18822e37c2e4e64c1b915ca4e2cf9105e12f2299e34657350237a2cbd026a94f801105189
7
+ data.tar.gz: a16bb4796e0254bbd251d6de8885118a385e54b43de2c8851a6122e389ef9bdc22cd8b08c30aa5d0e7f676f8c57c692bb9e8f898f6b88cd3b06a4190785ffd10
@@ -7,14 +7,24 @@ on:
7
7
  - master
8
8
 
9
9
  jobs:
10
- build:
11
- runs-on: ubuntu-18.04
10
+ rspec:
11
+ runs-on: ubuntu-20.04
12
12
  steps:
13
13
  - uses: actions/checkout@v2
14
+ - uses: ruby/setup-ruby@v1
15
+ with:
16
+ bundler-cache: true
17
+ ruby-version: 2.7.2
18
+ - run: bundle exec rspec --force-color
19
+ rubocop:
20
+ runs-on: ubuntu-20.04
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ with:
24
+ ref: ${{ github.event.pull_request.head.sha }}
14
25
  - uses: ruby/setup-ruby@v1
15
26
  with:
16
27
  bundler-cache: true
17
28
  ruby-version: 2.7.2
18
29
  - uses: r7kamura/rubocop-problem-matchers-action@v1
19
30
  - run: bundle exec rubocop --parallel
20
- - run: bundle exec rspec --force-color
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.9.2
11
+
12
+ ### Fixed
13
+
14
+ - Fix item image URL on JSON source.
15
+ - Fix item image MIME type detection.
16
+
17
+ ## 0.9.1 - 2020-12-03
18
+
19
+ ### Fixed
20
+
21
+ - Fix escape bug in escaping HTML and generating XML.
22
+
23
+ ## 0.9.0 - 2020-12-03
24
+
25
+ ### Added
26
+
27
+ - Add item_image_selector.
28
+
29
+ ## 0.8.0 - 2020-11-25
30
+
31
+ ### Added
32
+
33
+ - Add page description for writing channel description.
34
+
35
+ ### Changed
36
+
37
+ - Change channel link content from feeds index URL to page URL.
38
+
39
+ ## 0.7.2 - 2020-11-25
40
+
41
+ ### Fixed
42
+
43
+ - Fix time parse error by HTML entities handling.
44
+
10
45
  ## 0.7.1 - 2020-11-25
11
46
 
12
47
  ### Fixed
@@ -1,12 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.7.1)
4
+ weneedfeed (0.9.2)
5
5
  activesupport
6
6
  builder
7
7
  faraday
8
8
  faraday_middleware
9
9
  hibana (>= 0.2)
10
+ mimemagic
10
11
  nokogiri
11
12
  rack-capture (>= 0.4.0)
12
13
  thor
@@ -44,6 +45,7 @@ GEM
44
45
  tilt
45
46
  i18n (1.8.5)
46
47
  concurrent-ruby (~> 1.0)
48
+ mimemagic (0.3.5)
47
49
  mini_portile2 (2.4.0)
48
50
  minitest (5.14.2)
49
51
  multipart-post (2.1.1)
@@ -106,7 +108,7 @@ GEM
106
108
  addressable (>= 2.3.6)
107
109
  crack (>= 0.3.2)
108
110
  hashdiff (>= 0.4.0, < 2.0.0)
109
- zeitwerk (2.4.1)
111
+ zeitwerk (2.4.2)
110
112
 
111
113
  PLATFORMS
112
114
  ruby
data/README.md CHANGED
@@ -27,24 +27,28 @@ gem install weneedfeed
27
27
 
28
28
  ## Schema
29
29
 
30
- You need to write a schema file named with `weneedfeed.yml` to use this gem.
30
+ Weneedfeed requires `weneedfeed.yml` that describes URLs and selectors.
31
31
 
32
32
  ### Example
33
33
 
34
34
  ```yaml
35
35
  pages:
36
36
  - id: example1
37
- title: Example feed with CSS Selector
37
+ title: Example 1
38
+ description: Example feed with CSS Selector
38
39
  url: http://example.com/1
39
40
  item_selector: li
40
41
  item_description_selector: p:nth-child(3)
42
+ item_image_selector: img
41
43
  item_link_selector: a
42
44
  item_time_selector: time
43
45
  item_title_selector: p:nth-child(2)
44
46
  - id: example2
45
- title: Example feed with XPath
47
+ title: Example 2
48
+ description: Example feed with XPath
46
49
  url: http://example.com/2
47
50
  item_selector: //li
51
+ item_image_selector: .//img
48
52
  item_description_selector: .//p[3]
49
53
  item_link_selector: .//a
50
54
  item_time_selector: .//time
@@ -56,7 +60,7 @@ pages:
56
60
  Feed ID.
57
61
 
58
62
  - required
59
- - Used for feed URL.
63
+ - Used for feed URL and `<link>` element in `<channel>` element.
60
64
 
61
65
  ### `title`
62
66
 
@@ -65,6 +69,13 @@ Feed title.
65
69
  - required
66
70
  - Used for RSS `<title>` element in `<channel>` element.
67
71
 
72
+ ### `description`
73
+
74
+ Feed description.
75
+
76
+ - optional
77
+ - Used for RSS `<description>` element in `<channel>` element.
78
+
68
79
  ### `url`
69
80
 
70
81
  HTML source URL.
@@ -100,6 +111,13 @@ CSS or XPath selector to find element with description information in each item.
100
111
  - optional
101
112
  - Used for `<description>` in `<item>`.
102
113
 
114
+ ### `item_image_selector`
115
+
116
+ CSS or XPath selector to find `<img>`element in each item.
117
+
118
+ - optional
119
+ - Used for `<enclosure>` in `<item>`.
120
+
103
121
  ### `item_time_selector`
104
122
 
105
123
  CSS or XPath selector to find element with datetime information in each item.
@@ -11,7 +11,9 @@ module Weneedfeed
11
11
  end
12
12
 
13
13
  scraping = ::Weneedfeed::Scraping.new(
14
+ description: page_schema.description,
14
15
  item_description_selector: page_schema.item_description_selector,
16
+ item_image_selector: page_schema.item_image_selector,
15
17
  item_link_selector: page_schema.item_link_selector,
16
18
  item_time_selector: page_schema.item_time_selector,
17
19
  item_title_selector: page_schema.item_title_selector,
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'mimemagic'
4
+ require 'uri'
5
+
3
6
  module Weneedfeed
4
7
  class Item
5
8
  class << self
@@ -16,6 +19,7 @@ module Weneedfeed
16
19
  end
17
20
 
18
21
  # @param [String, nil] description_selector
22
+ # @param [String, nil] image_selector
19
23
  # @param [String, nil] link_selector
20
24
  # @param [Nokogiri::Node] node
21
25
  # @param [String] time_selector
@@ -23,6 +27,7 @@ module Weneedfeed
23
27
  # @param [String] url
24
28
  def initialize(
25
29
  description_selector:,
30
+ image_selector:,
26
31
  link_selector:,
27
32
  node:,
28
33
  time_selector:,
@@ -30,6 +35,7 @@ module Weneedfeed
30
35
  url:
31
36
  )
32
37
  @description_selector = description_selector
38
+ @image_selector = image_selector
33
39
  @link_selector = link_selector
34
40
  @node = node
35
41
  @time_selector = time_selector
@@ -44,6 +50,37 @@ module Weneedfeed
44
50
  @node.at(@description_selector)&.inner_html
45
51
  end
46
52
 
53
+ # @return [String, nil]
54
+ def image_mime_type
55
+ return unless image_url
56
+
57
+ uri = ::URI.parse(image_url)
58
+ ::MimeMagic.by_path(uri.path)&.type
59
+ end
60
+
61
+ # @return [String, nil]
62
+ def image_path_or_url
63
+ return unless @image_selector
64
+
65
+ node = @node.at(@image_selector)
66
+ return unless node
67
+
68
+ if node.name == 'img'
69
+ node['src']
70
+ else
71
+ node.content
72
+ end
73
+ end
74
+
75
+ def image_url
76
+ return unless image_path_or_url
77
+
78
+ ::URI.join(
79
+ @url,
80
+ image_path_or_url
81
+ ).to_s
82
+ end
83
+
47
84
  # @return [String]
48
85
  def link
49
86
  ::URI.join(
@@ -90,7 +127,7 @@ module Weneedfeed
90
127
  node = time_node
91
128
  return unless node
92
129
 
93
- node['datetime'] || node.inner_html
130
+ node['datetime'] || node.inner_text
94
131
  end
95
132
  end
96
133
  end
@@ -2,13 +2,18 @@
2
2
 
3
3
  module Weneedfeed
4
4
  class Page
5
+ # @return [String, nil]
6
+ attr_reader :description
7
+
5
8
  # @return [String]
6
9
  attr_reader :title
7
10
 
8
11
  # @return [String]
9
12
  attr_reader :url
10
13
 
14
+ # @param [String, nil] description
11
15
  # @param [String, nil] item_description_selector
16
+ # @param [String, nil] item_image_selector
12
17
  # @param [String] item_link_selector
13
18
  # @param [String, nil] item_time_selector
14
19
  # @param [String] item_title_selector
@@ -17,7 +22,9 @@ module Weneedfeed
17
22
  # @param [String] title
18
23
  # @param [String] url
19
24
  def initialize(
25
+ description:,
20
26
  item_description_selector:,
27
+ item_image_selector:,
21
28
  item_link_selector:,
22
29
  item_time_selector:,
23
30
  item_title_selector:,
@@ -26,7 +33,9 @@ module Weneedfeed
26
33
  title:,
27
34
  url:
28
35
  )
36
+ @description = description
29
37
  @item_description_selector = item_description_selector
38
+ @item_image_selector = item_image_selector
30
39
  @item_link_selector = item_link_selector
31
40
  @item_time_selector = item_time_selector
32
41
  @item_title_selector = item_title_selector
@@ -41,6 +50,7 @@ module Weneedfeed
41
50
  @node.search(@item_selector).map do |node|
42
51
  ::Weneedfeed::Item.new(
43
52
  description_selector: @item_description_selector,
53
+ image_selector: @item_image_selector,
44
54
  link_selector: @item_link_selector,
45
55
  node: node,
46
56
  time_selector: @item_time_selector,
@@ -2,8 +2,10 @@
2
2
 
3
3
  module Weneedfeed
4
4
  PageSchema = Struct.new(
5
+ :description,
5
6
  :id,
6
7
  :item_description_selector,
8
+ :item_image_selector,
7
9
  :item_link_selector,
8
10
  :item_time_selector,
9
11
  :item_title_selector,
@@ -24,8 +24,10 @@ module Weneedfeed
24
24
  def page_schemata
25
25
  @raw['pages'].map do |hash|
26
26
  ::Weneedfeed::PageSchema.new(
27
+ description: hash['description'],
27
28
  id: hash['id'],
28
29
  item_description_selector: hash['item_description_selector'],
30
+ item_image_selector: hash['item_image_selector'],
29
31
  item_link_selector: hash['item_link_selector'],
30
32
  item_time_selector: hash['item_time_selector'],
31
33
  item_title_selector: hash['item_title_selector'],
@@ -14,7 +14,9 @@ module Weneedfeed
14
14
  end
15
15
  end
16
16
 
17
+ # @param [String, nil] description
17
18
  # @param [String, nil] item_description_selector
19
+ # @param [String, nil] item_image_selector
18
20
  # @param [String] item_link_selector
19
21
  # @param [String, nil] item_time_selector
20
22
  # @param [String] item_title_selector
@@ -22,7 +24,9 @@ module Weneedfeed
22
24
  # @param [String] title
23
25
  # @param [String] url
24
26
  def initialize(
27
+ description:,
25
28
  item_description_selector:,
29
+ item_image_selector:,
26
30
  item_link_selector:,
27
31
  item_time_selector:,
28
32
  item_title_selector:,
@@ -30,7 +34,9 @@ module Weneedfeed
30
34
  title:,
31
35
  url:
32
36
  )
37
+ @description = description
33
38
  @item_description_selector = item_description_selector
39
+ @item_image_selector = item_image_selector
34
40
  @item_link_selector = item_link_selector
35
41
  @item_time_selector = item_time_selector
36
42
  @item_title_selector = item_title_selector
@@ -42,8 +48,10 @@ module Weneedfeed
42
48
  # @return [Weneedfeed::Page]
43
49
  def call
44
50
  ::Weneedfeed::Page.new(
51
+ description: @description,
45
52
  node: parsed_body,
46
53
  item_description_selector: @item_description_selector,
54
+ item_image_selector: @item_image_selector,
47
55
  item_selector: @item_selector,
48
56
  item_link_selector: @item_link_selector,
49
57
  item_time_selector: @item_time_selector,
@@ -57,12 +65,16 @@ module Weneedfeed
57
65
 
58
66
  # @return [Nokogiri::Node]
59
67
  def parsed_body
60
- ::Nokogiri::XML.parse(response.body)
68
+ if response.headers['Content-Type']&.include?('application/json')
69
+ ::Nokogiri::XML.parse(response.body)
70
+ else
71
+ ::Nokogiri::HTML.parse(response.body)
72
+ end
61
73
  end
62
74
 
63
75
  # @return [Faraday::Response]
64
76
  def response
65
- self.class.faraday_connection.get(@url)
77
+ @response ||= self.class.faraday_connection.get(@url)
66
78
  end
67
79
  end
68
80
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.7.1'
4
+ VERSION = '0.9.2'
5
5
  end
@@ -4,9 +4,9 @@
4
4
  xmlns:content="http://purl.org/rss/1.0/modules/content/">
5
5
  <channel>
6
6
  <title><![CDATA[<%= @page.title %>]]></title>
7
- <link><%= "#{request.base_url}#{top_page_path}" %></link>
7
+ <link><%= "#{@page.url}" %></link>
8
8
  <atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
9
- <description><![CDATA[Recent content on <%= @page.title %>]]></description>
9
+ <description><![CDATA[<%= @page.description %>]]></description>
10
10
  <lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
11
11
  <% items.each do |item| %>
12
12
  <item>
@@ -18,6 +18,9 @@
18
18
  <description><![CDATA[<%= item.description %>]]></description>
19
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
20
20
  <guid isPermaLink="true"><%= item.link %></guid>
21
+ <% if item.image_url %>
22
+ <enclosure url=<%= item.image_url.encode(xml: :attr) %> length="0" type="<%= item.image_mime_type %>"/>
23
+ <% end %>
21
24
  </item>
22
25
  <% end %>
23
26
  </channel>
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.add_runtime_dependency 'faraday'
31
31
  spec.add_runtime_dependency 'faraday_middleware'
32
32
  spec.add_runtime_dependency 'hibana', '>= 0.2'
33
+ spec.add_runtime_dependency 'mimemagic'
33
34
  spec.add_runtime_dependency 'nokogiri'
34
35
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
35
36
  spec.add_runtime_dependency 'thor'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-24 00:00:00.000000000 Z
11
+ date: 2020-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: mimemagic
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: nokogiri
85
99
  requirement: !ruby/object:Gem::Requirement