weneedfeed 0.7.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +13 -3
- data/CHANGELOG.md +35 -0
- data/Gemfile.lock +4 -2
- data/README.md +22 -4
- data/lib/weneedfeed/controllers/show_feed.rb +2 -0
- data/lib/weneedfeed/item.rb +38 -1
- data/lib/weneedfeed/page.rb +10 -0
- data/lib/weneedfeed/page_schema.rb +2 -0
- data/lib/weneedfeed/schema.rb +2 -0
- data/lib/weneedfeed/scraping.rb +14 -2
- data/lib/weneedfeed/version.rb +1 -1
- data/templates/show_feed.xml.erb +5 -2
- data/weneedfeed.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 42d4185b6423aab2b4120f63152efd3a1f21b22b78caed8c9e8f797f37432853
|
4
|
+
data.tar.gz: fccbdc509d5e945d1c69db3b8dd5ab266e097e10b8ca19f753037656c411eff8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d39391a66a6d38b8c264b2bdda57b1da9d88c9b41acabd81976adde18822e37c2e4e64c1b915ca4e2cf9105e12f2299e34657350237a2cbd026a94f801105189
|
7
|
+
data.tar.gz: a16bb4796e0254bbd251d6de8885118a385e54b43de2c8851a6122e389ef9bdc22cd8b08c30aa5d0e7f676f8c57c692bb9e8f898f6b88cd3b06a4190785ffd10
|
data/.github/workflows/test.yml
CHANGED
@@ -7,14 +7,24 @@ on:
|
|
7
7
|
- master
|
8
8
|
|
9
9
|
jobs:
|
10
|
-
|
11
|
-
runs-on: ubuntu-
|
10
|
+
rspec:
|
11
|
+
runs-on: ubuntu-20.04
|
12
12
|
steps:
|
13
13
|
- uses: actions/checkout@v2
|
14
|
+
- uses: ruby/setup-ruby@v1
|
15
|
+
with:
|
16
|
+
bundler-cache: true
|
17
|
+
ruby-version: 2.7.2
|
18
|
+
- run: bundle exec rspec --force-color
|
19
|
+
rubocop:
|
20
|
+
runs-on: ubuntu-20.04
|
21
|
+
steps:
|
22
|
+
- uses: actions/checkout@v2
|
23
|
+
with:
|
24
|
+
ref: ${{ github.event.pull_request.head.sha }}
|
14
25
|
- uses: ruby/setup-ruby@v1
|
15
26
|
with:
|
16
27
|
bundler-cache: true
|
17
28
|
ruby-version: 2.7.2
|
18
29
|
- uses: r7kamura/rubocop-problem-matchers-action@v1
|
19
30
|
- run: bundle exec rubocop --parallel
|
20
|
-
- run: bundle exec rspec --force-color
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## Unreleased
|
9
9
|
|
10
|
+
## 0.9.2
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
- Fix item image URL on JSON source.
|
15
|
+
- Fix item image MIME type detection.
|
16
|
+
|
17
|
+
## 0.9.1 - 2020-12-03
|
18
|
+
|
19
|
+
### Fixed
|
20
|
+
|
21
|
+
- Fix escape bug in escaping HTML and generating XML.
|
22
|
+
|
23
|
+
## 0.9.0 - 2020-12-03
|
24
|
+
|
25
|
+
### Added
|
26
|
+
|
27
|
+
- Add item_image_selector.
|
28
|
+
|
29
|
+
## 0.8.0 - 2020-11-25
|
30
|
+
|
31
|
+
### Added
|
32
|
+
|
33
|
+
- Add page description for writing channel description.
|
34
|
+
|
35
|
+
### Changed
|
36
|
+
|
37
|
+
- Change channel link content from feeds index URL to page URL.
|
38
|
+
|
39
|
+
## 0.7.2 - 2020-11-25
|
40
|
+
|
41
|
+
### Fixed
|
42
|
+
|
43
|
+
- Fix time parse error by HTML entities handling.
|
44
|
+
|
10
45
|
## 0.7.1 - 2020-11-25
|
11
46
|
|
12
47
|
### Fixed
|
data/Gemfile.lock
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
weneedfeed (0.
|
4
|
+
weneedfeed (0.9.2)
|
5
5
|
activesupport
|
6
6
|
builder
|
7
7
|
faraday
|
8
8
|
faraday_middleware
|
9
9
|
hibana (>= 0.2)
|
10
|
+
mimemagic
|
10
11
|
nokogiri
|
11
12
|
rack-capture (>= 0.4.0)
|
12
13
|
thor
|
@@ -44,6 +45,7 @@ GEM
|
|
44
45
|
tilt
|
45
46
|
i18n (1.8.5)
|
46
47
|
concurrent-ruby (~> 1.0)
|
48
|
+
mimemagic (0.3.5)
|
47
49
|
mini_portile2 (2.4.0)
|
48
50
|
minitest (5.14.2)
|
49
51
|
multipart-post (2.1.1)
|
@@ -106,7 +108,7 @@ GEM
|
|
106
108
|
addressable (>= 2.3.6)
|
107
109
|
crack (>= 0.3.2)
|
108
110
|
hashdiff (>= 0.4.0, < 2.0.0)
|
109
|
-
zeitwerk (2.4.
|
111
|
+
zeitwerk (2.4.2)
|
110
112
|
|
111
113
|
PLATFORMS
|
112
114
|
ruby
|
data/README.md
CHANGED
@@ -27,24 +27,28 @@ gem install weneedfeed
|
|
27
27
|
|
28
28
|
## Schema
|
29
29
|
|
30
|
-
|
30
|
+
Weneedfeed requires `weneedfeed.yml` that describes URLs and selectors.
|
31
31
|
|
32
32
|
### Example
|
33
33
|
|
34
34
|
```yaml
|
35
35
|
pages:
|
36
36
|
- id: example1
|
37
|
-
title: Example
|
37
|
+
title: Example 1
|
38
|
+
description: Example feed with CSS Selector
|
38
39
|
url: http://example.com/1
|
39
40
|
item_selector: li
|
40
41
|
item_description_selector: p:nth-child(3)
|
42
|
+
item_image_selector: img
|
41
43
|
item_link_selector: a
|
42
44
|
item_time_selector: time
|
43
45
|
item_title_selector: p:nth-child(2)
|
44
46
|
- id: example2
|
45
|
-
title: Example
|
47
|
+
title: Example 2
|
48
|
+
description: Example feed with XPath
|
46
49
|
url: http://example.com/2
|
47
50
|
item_selector: //li
|
51
|
+
item_image_selector: .//img
|
48
52
|
item_description_selector: .//p[3]
|
49
53
|
item_link_selector: .//a
|
50
54
|
item_time_selector: .//time
|
@@ -56,7 +60,7 @@ pages:
|
|
56
60
|
Feed ID.
|
57
61
|
|
58
62
|
- required
|
59
|
-
- Used for feed URL.
|
63
|
+
- Used for feed URL and `<link>` element in `<channel>` element.
|
60
64
|
|
61
65
|
### `title`
|
62
66
|
|
@@ -65,6 +69,13 @@ Feed title.
|
|
65
69
|
- required
|
66
70
|
- Used for RSS `<title>` element in `<channel>` element.
|
67
71
|
|
72
|
+
### `description`
|
73
|
+
|
74
|
+
Feed description.
|
75
|
+
|
76
|
+
- optional
|
77
|
+
- Used for RSS `<description>` element in `<channel>` element.
|
78
|
+
|
68
79
|
### `url`
|
69
80
|
|
70
81
|
HTML source URL.
|
@@ -100,6 +111,13 @@ CSS or XPath selector to find element with description information in each item.
|
|
100
111
|
- optional
|
101
112
|
- Used for `<description>` in `<item>`.
|
102
113
|
|
114
|
+
### `item_image_selector`
|
115
|
+
|
116
|
+
CSS or XPath selector to find `<img>`element in each item.
|
117
|
+
|
118
|
+
- optional
|
119
|
+
- Used for `<enclosure>` in `<item>`.
|
120
|
+
|
103
121
|
### `item_time_selector`
|
104
122
|
|
105
123
|
CSS or XPath selector to find element with datetime information in each item.
|
@@ -11,7 +11,9 @@ module Weneedfeed
|
|
11
11
|
end
|
12
12
|
|
13
13
|
scraping = ::Weneedfeed::Scraping.new(
|
14
|
+
description: page_schema.description,
|
14
15
|
item_description_selector: page_schema.item_description_selector,
|
16
|
+
item_image_selector: page_schema.item_image_selector,
|
15
17
|
item_link_selector: page_schema.item_link_selector,
|
16
18
|
item_time_selector: page_schema.item_time_selector,
|
17
19
|
item_title_selector: page_schema.item_title_selector,
|
data/lib/weneedfeed/item.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'mimemagic'
|
4
|
+
require 'uri'
|
5
|
+
|
3
6
|
module Weneedfeed
|
4
7
|
class Item
|
5
8
|
class << self
|
@@ -16,6 +19,7 @@ module Weneedfeed
|
|
16
19
|
end
|
17
20
|
|
18
21
|
# @param [String, nil] description_selector
|
22
|
+
# @param [String, nil] image_selector
|
19
23
|
# @param [String, nil] link_selector
|
20
24
|
# @param [Nokogiri::Node] node
|
21
25
|
# @param [String] time_selector
|
@@ -23,6 +27,7 @@ module Weneedfeed
|
|
23
27
|
# @param [String] url
|
24
28
|
def initialize(
|
25
29
|
description_selector:,
|
30
|
+
image_selector:,
|
26
31
|
link_selector:,
|
27
32
|
node:,
|
28
33
|
time_selector:,
|
@@ -30,6 +35,7 @@ module Weneedfeed
|
|
30
35
|
url:
|
31
36
|
)
|
32
37
|
@description_selector = description_selector
|
38
|
+
@image_selector = image_selector
|
33
39
|
@link_selector = link_selector
|
34
40
|
@node = node
|
35
41
|
@time_selector = time_selector
|
@@ -44,6 +50,37 @@ module Weneedfeed
|
|
44
50
|
@node.at(@description_selector)&.inner_html
|
45
51
|
end
|
46
52
|
|
53
|
+
# @return [String, nil]
|
54
|
+
def image_mime_type
|
55
|
+
return unless image_url
|
56
|
+
|
57
|
+
uri = ::URI.parse(image_url)
|
58
|
+
::MimeMagic.by_path(uri.path)&.type
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [String, nil]
|
62
|
+
def image_path_or_url
|
63
|
+
return unless @image_selector
|
64
|
+
|
65
|
+
node = @node.at(@image_selector)
|
66
|
+
return unless node
|
67
|
+
|
68
|
+
if node.name == 'img'
|
69
|
+
node['src']
|
70
|
+
else
|
71
|
+
node.content
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def image_url
|
76
|
+
return unless image_path_or_url
|
77
|
+
|
78
|
+
::URI.join(
|
79
|
+
@url,
|
80
|
+
image_path_or_url
|
81
|
+
).to_s
|
82
|
+
end
|
83
|
+
|
47
84
|
# @return [String]
|
48
85
|
def link
|
49
86
|
::URI.join(
|
@@ -90,7 +127,7 @@ module Weneedfeed
|
|
90
127
|
node = time_node
|
91
128
|
return unless node
|
92
129
|
|
93
|
-
node['datetime'] || node.
|
130
|
+
node['datetime'] || node.inner_text
|
94
131
|
end
|
95
132
|
end
|
96
133
|
end
|
data/lib/weneedfeed/page.rb
CHANGED
@@ -2,13 +2,18 @@
|
|
2
2
|
|
3
3
|
module Weneedfeed
|
4
4
|
class Page
|
5
|
+
# @return [String, nil]
|
6
|
+
attr_reader :description
|
7
|
+
|
5
8
|
# @return [String]
|
6
9
|
attr_reader :title
|
7
10
|
|
8
11
|
# @return [String]
|
9
12
|
attr_reader :url
|
10
13
|
|
14
|
+
# @param [String, nil] description
|
11
15
|
# @param [String, nil] item_description_selector
|
16
|
+
# @param [String, nil] item_image_selector
|
12
17
|
# @param [String] item_link_selector
|
13
18
|
# @param [String, nil] item_time_selector
|
14
19
|
# @param [String] item_title_selector
|
@@ -17,7 +22,9 @@ module Weneedfeed
|
|
17
22
|
# @param [String] title
|
18
23
|
# @param [String] url
|
19
24
|
def initialize(
|
25
|
+
description:,
|
20
26
|
item_description_selector:,
|
27
|
+
item_image_selector:,
|
21
28
|
item_link_selector:,
|
22
29
|
item_time_selector:,
|
23
30
|
item_title_selector:,
|
@@ -26,7 +33,9 @@ module Weneedfeed
|
|
26
33
|
title:,
|
27
34
|
url:
|
28
35
|
)
|
36
|
+
@description = description
|
29
37
|
@item_description_selector = item_description_selector
|
38
|
+
@item_image_selector = item_image_selector
|
30
39
|
@item_link_selector = item_link_selector
|
31
40
|
@item_time_selector = item_time_selector
|
32
41
|
@item_title_selector = item_title_selector
|
@@ -41,6 +50,7 @@ module Weneedfeed
|
|
41
50
|
@node.search(@item_selector).map do |node|
|
42
51
|
::Weneedfeed::Item.new(
|
43
52
|
description_selector: @item_description_selector,
|
53
|
+
image_selector: @item_image_selector,
|
44
54
|
link_selector: @item_link_selector,
|
45
55
|
node: node,
|
46
56
|
time_selector: @item_time_selector,
|
data/lib/weneedfeed/schema.rb
CHANGED
@@ -24,8 +24,10 @@ module Weneedfeed
|
|
24
24
|
def page_schemata
|
25
25
|
@raw['pages'].map do |hash|
|
26
26
|
::Weneedfeed::PageSchema.new(
|
27
|
+
description: hash['description'],
|
27
28
|
id: hash['id'],
|
28
29
|
item_description_selector: hash['item_description_selector'],
|
30
|
+
item_image_selector: hash['item_image_selector'],
|
29
31
|
item_link_selector: hash['item_link_selector'],
|
30
32
|
item_time_selector: hash['item_time_selector'],
|
31
33
|
item_title_selector: hash['item_title_selector'],
|
data/lib/weneedfeed/scraping.rb
CHANGED
@@ -14,7 +14,9 @@ module Weneedfeed
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
# @param [String, nil] description
|
17
18
|
# @param [String, nil] item_description_selector
|
19
|
+
# @param [String, nil] item_image_selector
|
18
20
|
# @param [String] item_link_selector
|
19
21
|
# @param [String, nil] item_time_selector
|
20
22
|
# @param [String] item_title_selector
|
@@ -22,7 +24,9 @@ module Weneedfeed
|
|
22
24
|
# @param [String] title
|
23
25
|
# @param [String] url
|
24
26
|
def initialize(
|
27
|
+
description:,
|
25
28
|
item_description_selector:,
|
29
|
+
item_image_selector:,
|
26
30
|
item_link_selector:,
|
27
31
|
item_time_selector:,
|
28
32
|
item_title_selector:,
|
@@ -30,7 +34,9 @@ module Weneedfeed
|
|
30
34
|
title:,
|
31
35
|
url:
|
32
36
|
)
|
37
|
+
@description = description
|
33
38
|
@item_description_selector = item_description_selector
|
39
|
+
@item_image_selector = item_image_selector
|
34
40
|
@item_link_selector = item_link_selector
|
35
41
|
@item_time_selector = item_time_selector
|
36
42
|
@item_title_selector = item_title_selector
|
@@ -42,8 +48,10 @@ module Weneedfeed
|
|
42
48
|
# @return [Weneedfeed::Page]
|
43
49
|
def call
|
44
50
|
::Weneedfeed::Page.new(
|
51
|
+
description: @description,
|
45
52
|
node: parsed_body,
|
46
53
|
item_description_selector: @item_description_selector,
|
54
|
+
item_image_selector: @item_image_selector,
|
47
55
|
item_selector: @item_selector,
|
48
56
|
item_link_selector: @item_link_selector,
|
49
57
|
item_time_selector: @item_time_selector,
|
@@ -57,12 +65,16 @@ module Weneedfeed
|
|
57
65
|
|
58
66
|
# @return [Nokogiri::Node]
|
59
67
|
def parsed_body
|
60
|
-
|
68
|
+
if response.headers['Content-Type']&.include?('application/json')
|
69
|
+
::Nokogiri::XML.parse(response.body)
|
70
|
+
else
|
71
|
+
::Nokogiri::HTML.parse(response.body)
|
72
|
+
end
|
61
73
|
end
|
62
74
|
|
63
75
|
# @return [Faraday::Response]
|
64
76
|
def response
|
65
|
-
self.class.faraday_connection.get(@url)
|
77
|
+
@response ||= self.class.faraday_connection.get(@url)
|
66
78
|
end
|
67
79
|
end
|
68
80
|
end
|
data/lib/weneedfeed/version.rb
CHANGED
data/templates/show_feed.xml.erb
CHANGED
@@ -4,9 +4,9 @@
|
|
4
4
|
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
5
5
|
<channel>
|
6
6
|
<title><![CDATA[<%= @page.title %>]]></title>
|
7
|
-
<link><%= "#{
|
7
|
+
<link><%= "#{@page.url}" %></link>
|
8
8
|
<atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
|
9
|
-
<description><![CDATA[
|
9
|
+
<description><![CDATA[<%= @page.description %>]]></description>
|
10
10
|
<lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
|
11
11
|
<% items.each do |item| %>
|
12
12
|
<item>
|
@@ -18,6 +18,9 @@
|
|
18
18
|
<description><![CDATA[<%= item.description %>]]></description>
|
19
19
|
<content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
|
20
20
|
<guid isPermaLink="true"><%= item.link %></guid>
|
21
|
+
<% if item.image_url %>
|
22
|
+
<enclosure url=<%= item.image_url.encode(xml: :attr) %> length="0" type="<%= item.image_mime_type %>"/>
|
23
|
+
<% end %>
|
21
24
|
</item>
|
22
25
|
<% end %>
|
23
26
|
</channel>
|
data/weneedfeed.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_runtime_dependency 'faraday'
|
31
31
|
spec.add_runtime_dependency 'faraday_middleware'
|
32
32
|
spec.add_runtime_dependency 'hibana', '>= 0.2'
|
33
|
+
spec.add_runtime_dependency 'mimemagic'
|
33
34
|
spec.add_runtime_dependency 'nokogiri'
|
34
35
|
spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
|
35
36
|
spec.add_runtime_dependency 'thor'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weneedfeed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryo Nakamura
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0.2'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: mimemagic
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: nokogiri
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|