weneedfeed 0.7.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 95dd3b68968422630f470e196bc5d72dc3994955d3e1ec98ce8696f1a4484227
4
- data.tar.gz: 60341f1fd574dbe7ed491f1ee304d521401ce0d3cc4ecce21477abbcd076d2b4
3
+ metadata.gz: 42d4185b6423aab2b4120f63152efd3a1f21b22b78caed8c9e8f797f37432853
4
+ data.tar.gz: fccbdc509d5e945d1c69db3b8dd5ab266e097e10b8ca19f753037656c411eff8
5
5
  SHA512:
6
- metadata.gz: 8f9efbb13b594e7f3921f5a221d59e5c3d72ba02f59e4d6f05bc3611a1977a0f1f7b0e216714021f6a47abace051b46da0ea5d0df69021b91f61ff496589110f
7
- data.tar.gz: 00e8744d224a3dca697a46769632071656a3c6e2ff0a9da018ac4d0d5aa5bf6265dbf636c6b24adc2e074d239b38c509c9b1ff207d938fce3359e716d46dbd91
6
+ metadata.gz: d39391a66a6d38b8c264b2bdda57b1da9d88c9b41acabd81976adde18822e37c2e4e64c1b915ca4e2cf9105e12f2299e34657350237a2cbd026a94f801105189
7
+ data.tar.gz: a16bb4796e0254bbd251d6de8885118a385e54b43de2c8851a6122e389ef9bdc22cd8b08c30aa5d0e7f676f8c57c692bb9e8f898f6b88cd3b06a4190785ffd10
@@ -7,14 +7,24 @@ on:
7
7
  - master
8
8
 
9
9
  jobs:
10
- build:
11
- runs-on: ubuntu-18.04
10
+ rspec:
11
+ runs-on: ubuntu-20.04
12
12
  steps:
13
13
  - uses: actions/checkout@v2
14
+ - uses: ruby/setup-ruby@v1
15
+ with:
16
+ bundler-cache: true
17
+ ruby-version: 2.7.2
18
+ - run: bundle exec rspec --force-color
19
+ rubocop:
20
+ runs-on: ubuntu-20.04
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ with:
24
+ ref: ${{ github.event.pull_request.head.sha }}
14
25
  - uses: ruby/setup-ruby@v1
15
26
  with:
16
27
  bundler-cache: true
17
28
  ruby-version: 2.7.2
18
29
  - uses: r7kamura/rubocop-problem-matchers-action@v1
19
30
  - run: bundle exec rubocop --parallel
20
- - run: bundle exec rspec --force-color
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.9.2
11
+
12
+ ### Fixed
13
+
14
+ - Fix item image URL on JSON source.
15
+ - Fix item image MIME type detection.
16
+
17
+ ## 0.9.1 - 2020-12-03
18
+
19
+ ### Fixed
20
+
21
+ - Fix escape bug in escaping HTML and generating XML.
22
+
23
+ ## 0.9.0 - 2020-12-03
24
+
25
+ ### Added
26
+
27
+ - Add item_image_selector.
28
+
29
+ ## 0.8.0 - 2020-11-25
30
+
31
+ ### Added
32
+
33
+ - Add page description for writing channel description.
34
+
35
+ ### Changed
36
+
37
+ - Change channel link content from feeds index URL to page URL.
38
+
39
+ ## 0.7.2 - 2020-11-25
40
+
41
+ ### Fixed
42
+
43
+ - Fix time parse error by HTML entities handling.
44
+
10
45
  ## 0.7.1 - 2020-11-25
11
46
 
12
47
  ### Fixed
@@ -1,12 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.7.1)
4
+ weneedfeed (0.9.2)
5
5
  activesupport
6
6
  builder
7
7
  faraday
8
8
  faraday_middleware
9
9
  hibana (>= 0.2)
10
+ mimemagic
10
11
  nokogiri
11
12
  rack-capture (>= 0.4.0)
12
13
  thor
@@ -44,6 +45,7 @@ GEM
44
45
  tilt
45
46
  i18n (1.8.5)
46
47
  concurrent-ruby (~> 1.0)
48
+ mimemagic (0.3.5)
47
49
  mini_portile2 (2.4.0)
48
50
  minitest (5.14.2)
49
51
  multipart-post (2.1.1)
@@ -106,7 +108,7 @@ GEM
106
108
  addressable (>= 2.3.6)
107
109
  crack (>= 0.3.2)
108
110
  hashdiff (>= 0.4.0, < 2.0.0)
109
- zeitwerk (2.4.1)
111
+ zeitwerk (2.4.2)
110
112
 
111
113
  PLATFORMS
112
114
  ruby
data/README.md CHANGED
@@ -27,24 +27,28 @@ gem install weneedfeed
27
27
 
28
28
  ## Schema
29
29
 
30
- You need to write a schema file named with `weneedfeed.yml` to use this gem.
30
+ Weneedfeed requires `weneedfeed.yml` that describes URLs and selectors.
31
31
 
32
32
  ### Example
33
33
 
34
34
  ```yaml
35
35
  pages:
36
36
  - id: example1
37
- title: Example feed with CSS Selector
37
+ title: Example 1
38
+ description: Example feed with CSS Selector
38
39
  url: http://example.com/1
39
40
  item_selector: li
40
41
  item_description_selector: p:nth-child(3)
42
+ item_image_selector: img
41
43
  item_link_selector: a
42
44
  item_time_selector: time
43
45
  item_title_selector: p:nth-child(2)
44
46
  - id: example2
45
- title: Example feed with XPath
47
+ title: Example 2
48
+ description: Example feed with XPath
46
49
  url: http://example.com/2
47
50
  item_selector: //li
51
+ item_image_selector: .//img
48
52
  item_description_selector: .//p[3]
49
53
  item_link_selector: .//a
50
54
  item_time_selector: .//time
@@ -56,7 +60,7 @@ pages:
56
60
  Feed ID.
57
61
 
58
62
  - required
59
- - Used for feed URL.
63
+ - Used for feed URL and `<link>` element in `<channel>` element.
60
64
 
61
65
  ### `title`
62
66
 
@@ -65,6 +69,13 @@ Feed title.
65
69
  - required
66
70
  - Used for RSS `<title>` element in `<channel>` element.
67
71
 
72
+ ### `description`
73
+
74
+ Feed description.
75
+
76
+ - optional
77
+ - Used for RSS `<description>` element in `<channel>` element.
78
+
68
79
  ### `url`
69
80
 
70
81
  HTML source URL.
@@ -100,6 +111,13 @@ CSS or XPath selector to find element with description information in each item.
100
111
  - optional
101
112
  - Used for `<description>` in `<item>`.
102
113
 
114
+ ### `item_image_selector`
115
+
116
+ CSS or XPath selector to find `<img>`element in each item.
117
+
118
+ - optional
119
+ - Used for `<enclosure>` in `<item>`.
120
+
103
121
  ### `item_time_selector`
104
122
 
105
123
  CSS or XPath selector to find element with datetime information in each item.
@@ -11,7 +11,9 @@ module Weneedfeed
11
11
  end
12
12
 
13
13
  scraping = ::Weneedfeed::Scraping.new(
14
+ description: page_schema.description,
14
15
  item_description_selector: page_schema.item_description_selector,
16
+ item_image_selector: page_schema.item_image_selector,
15
17
  item_link_selector: page_schema.item_link_selector,
16
18
  item_time_selector: page_schema.item_time_selector,
17
19
  item_title_selector: page_schema.item_title_selector,
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'mimemagic'
4
+ require 'uri'
5
+
3
6
  module Weneedfeed
4
7
  class Item
5
8
  class << self
@@ -16,6 +19,7 @@ module Weneedfeed
16
19
  end
17
20
 
18
21
  # @param [String, nil] description_selector
22
+ # @param [String, nil] image_selector
19
23
  # @param [String, nil] link_selector
20
24
  # @param [Nokogiri::Node] node
21
25
  # @param [String] time_selector
@@ -23,6 +27,7 @@ module Weneedfeed
23
27
  # @param [String] url
24
28
  def initialize(
25
29
  description_selector:,
30
+ image_selector:,
26
31
  link_selector:,
27
32
  node:,
28
33
  time_selector:,
@@ -30,6 +35,7 @@ module Weneedfeed
30
35
  url:
31
36
  )
32
37
  @description_selector = description_selector
38
+ @image_selector = image_selector
33
39
  @link_selector = link_selector
34
40
  @node = node
35
41
  @time_selector = time_selector
@@ -44,6 +50,37 @@ module Weneedfeed
44
50
  @node.at(@description_selector)&.inner_html
45
51
  end
46
52
 
53
+ # @return [String, nil]
54
+ def image_mime_type
55
+ return unless image_url
56
+
57
+ uri = ::URI.parse(image_url)
58
+ ::MimeMagic.by_path(uri.path)&.type
59
+ end
60
+
61
+ # @return [String, nil]
62
+ def image_path_or_url
63
+ return unless @image_selector
64
+
65
+ node = @node.at(@image_selector)
66
+ return unless node
67
+
68
+ if node.name == 'img'
69
+ node['src']
70
+ else
71
+ node.content
72
+ end
73
+ end
74
+
75
+ def image_url
76
+ return unless image_path_or_url
77
+
78
+ ::URI.join(
79
+ @url,
80
+ image_path_or_url
81
+ ).to_s
82
+ end
83
+
47
84
  # @return [String]
48
85
  def link
49
86
  ::URI.join(
@@ -90,7 +127,7 @@ module Weneedfeed
90
127
  node = time_node
91
128
  return unless node
92
129
 
93
- node['datetime'] || node.inner_html
130
+ node['datetime'] || node.inner_text
94
131
  end
95
132
  end
96
133
  end
@@ -2,13 +2,18 @@
2
2
 
3
3
  module Weneedfeed
4
4
  class Page
5
+ # @return [String, nil]
6
+ attr_reader :description
7
+
5
8
  # @return [String]
6
9
  attr_reader :title
7
10
 
8
11
  # @return [String]
9
12
  attr_reader :url
10
13
 
14
+ # @param [String, nil] description
11
15
  # @param [String, nil] item_description_selector
16
+ # @param [String, nil] item_image_selector
12
17
  # @param [String] item_link_selector
13
18
  # @param [String, nil] item_time_selector
14
19
  # @param [String] item_title_selector
@@ -17,7 +22,9 @@ module Weneedfeed
17
22
  # @param [String] title
18
23
  # @param [String] url
19
24
  def initialize(
25
+ description:,
20
26
  item_description_selector:,
27
+ item_image_selector:,
21
28
  item_link_selector:,
22
29
  item_time_selector:,
23
30
  item_title_selector:,
@@ -26,7 +33,9 @@ module Weneedfeed
26
33
  title:,
27
34
  url:
28
35
  )
36
+ @description = description
29
37
  @item_description_selector = item_description_selector
38
+ @item_image_selector = item_image_selector
30
39
  @item_link_selector = item_link_selector
31
40
  @item_time_selector = item_time_selector
32
41
  @item_title_selector = item_title_selector
@@ -41,6 +50,7 @@ module Weneedfeed
41
50
  @node.search(@item_selector).map do |node|
42
51
  ::Weneedfeed::Item.new(
43
52
  description_selector: @item_description_selector,
53
+ image_selector: @item_image_selector,
44
54
  link_selector: @item_link_selector,
45
55
  node: node,
46
56
  time_selector: @item_time_selector,
@@ -2,8 +2,10 @@
2
2
 
3
3
  module Weneedfeed
4
4
  PageSchema = Struct.new(
5
+ :description,
5
6
  :id,
6
7
  :item_description_selector,
8
+ :item_image_selector,
7
9
  :item_link_selector,
8
10
  :item_time_selector,
9
11
  :item_title_selector,
@@ -24,8 +24,10 @@ module Weneedfeed
24
24
  def page_schemata
25
25
  @raw['pages'].map do |hash|
26
26
  ::Weneedfeed::PageSchema.new(
27
+ description: hash['description'],
27
28
  id: hash['id'],
28
29
  item_description_selector: hash['item_description_selector'],
30
+ item_image_selector: hash['item_image_selector'],
29
31
  item_link_selector: hash['item_link_selector'],
30
32
  item_time_selector: hash['item_time_selector'],
31
33
  item_title_selector: hash['item_title_selector'],
@@ -14,7 +14,9 @@ module Weneedfeed
14
14
  end
15
15
  end
16
16
 
17
+ # @param [String, nil] description
17
18
  # @param [String, nil] item_description_selector
19
+ # @param [String, nil] item_image_selector
18
20
  # @param [String] item_link_selector
19
21
  # @param [String, nil] item_time_selector
20
22
  # @param [String] item_title_selector
@@ -22,7 +24,9 @@ module Weneedfeed
22
24
  # @param [String] title
23
25
  # @param [String] url
24
26
  def initialize(
27
+ description:,
25
28
  item_description_selector:,
29
+ item_image_selector:,
26
30
  item_link_selector:,
27
31
  item_time_selector:,
28
32
  item_title_selector:,
@@ -30,7 +34,9 @@ module Weneedfeed
30
34
  title:,
31
35
  url:
32
36
  )
37
+ @description = description
33
38
  @item_description_selector = item_description_selector
39
+ @item_image_selector = item_image_selector
34
40
  @item_link_selector = item_link_selector
35
41
  @item_time_selector = item_time_selector
36
42
  @item_title_selector = item_title_selector
@@ -42,8 +48,10 @@ module Weneedfeed
42
48
  # @return [Weneedfeed::Page]
43
49
  def call
44
50
  ::Weneedfeed::Page.new(
51
+ description: @description,
45
52
  node: parsed_body,
46
53
  item_description_selector: @item_description_selector,
54
+ item_image_selector: @item_image_selector,
47
55
  item_selector: @item_selector,
48
56
  item_link_selector: @item_link_selector,
49
57
  item_time_selector: @item_time_selector,
@@ -57,12 +65,16 @@ module Weneedfeed
57
65
 
58
66
  # @return [Nokogiri::Node]
59
67
  def parsed_body
60
- ::Nokogiri::XML.parse(response.body)
68
+ if response.headers['Content-Type']&.include?('application/json')
69
+ ::Nokogiri::XML.parse(response.body)
70
+ else
71
+ ::Nokogiri::HTML.parse(response.body)
72
+ end
61
73
  end
62
74
 
63
75
  # @return [Faraday::Response]
64
76
  def response
65
- self.class.faraday_connection.get(@url)
77
+ @response ||= self.class.faraday_connection.get(@url)
66
78
  end
67
79
  end
68
80
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.7.1'
4
+ VERSION = '0.9.2'
5
5
  end
@@ -4,9 +4,9 @@
4
4
  xmlns:content="http://purl.org/rss/1.0/modules/content/">
5
5
  <channel>
6
6
  <title><![CDATA[<%= @page.title %>]]></title>
7
- <link><%= "#{request.base_url}#{top_page_path}" %></link>
7
+ <link><%= "#{@page.url}" %></link>
8
8
  <atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
9
- <description><![CDATA[Recent content on <%= @page.title %>]]></description>
9
+ <description><![CDATA[<%= @page.description %>]]></description>
10
10
  <lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
11
11
  <% items.each do |item| %>
12
12
  <item>
@@ -18,6 +18,9 @@
18
18
  <description><![CDATA[<%= item.description %>]]></description>
19
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
20
20
  <guid isPermaLink="true"><%= item.link %></guid>
21
+ <% if item.image_url %>
22
+ <enclosure url=<%= item.image_url.encode(xml: :attr) %> length="0" type="<%= item.image_mime_type %>"/>
23
+ <% end %>
21
24
  </item>
22
25
  <% end %>
23
26
  </channel>
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.add_runtime_dependency 'faraday'
31
31
  spec.add_runtime_dependency 'faraday_middleware'
32
32
  spec.add_runtime_dependency 'hibana', '>= 0.2'
33
+ spec.add_runtime_dependency 'mimemagic'
33
34
  spec.add_runtime_dependency 'nokogiri'
34
35
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
35
36
  spec.add_runtime_dependency 'thor'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-24 00:00:00.000000000 Z
11
+ date: 2020-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: mimemagic
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: nokogiri
85
99
  requirement: !ruby/object:Gem::Requirement