weneedfeed 0.7.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +13 -3
- data/CHANGELOG.md +34 -0
- data/Gemfile.lock +4 -2
- data/README.md +22 -4
- data/lib/weneedfeed/controllers/show_feed.rb +2 -0
- data/lib/weneedfeed/item.rb +31 -3
- data/lib/weneedfeed/page.rb +10 -0
- data/lib/weneedfeed/page_schema.rb +2 -0
- data/lib/weneedfeed/schema.rb +2 -0
- data/lib/weneedfeed/scraping.rb +14 -2
- data/lib/weneedfeed/version.rb +1 -1
- data/templates/show_feed.xml.erb +5 -2
- data/weneedfeed.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7aeb8844141a877a1c8e17c1aa1549d030a9384d1fbc01219a75a45891823830
|
4
|
+
data.tar.gz: e707d7fdec8299c0b5cf76c0628ab58a5e8b1c1aec443fe2cc00524671df68b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3db194daf276a2abff61e6c7a92cc137404b6bfb640df7db77a770316e4f10f6cc57b72172bcdda94eceb1b4db726bfc0547719452d097dd33d320394e7f6ea
|
7
|
+
data.tar.gz: 14cfe2f49aa515f396d4ae598bb07eaeab91cec57d1f5201e68d04770bb6d762b86e346700030a0f7fdd2b9fd57b81e64ff3982c03bdbb34b02e73f34a83f03f
|
data/.github/workflows/test.yml
CHANGED
@@ -7,14 +7,24 @@ on:
|
|
7
7
|
- master
|
8
8
|
|
9
9
|
jobs:
|
10
|
-
|
11
|
-
runs-on: ubuntu-
|
10
|
+
rspec:
|
11
|
+
runs-on: ubuntu-20.04
|
12
12
|
steps:
|
13
13
|
- uses: actions/checkout@v2
|
14
|
+
- uses: ruby/setup-ruby@v1
|
15
|
+
with:
|
16
|
+
bundler-cache: true
|
17
|
+
ruby-version: 2.7.2
|
18
|
+
- run: bundle exec rspec --force-color
|
19
|
+
rubocop:
|
20
|
+
runs-on: ubuntu-20.04
|
21
|
+
steps:
|
22
|
+
- uses: actions/checkout@v2
|
23
|
+
with:
|
24
|
+
ref: ${{ github.event.pull_request.head.sha }}
|
14
25
|
- uses: ruby/setup-ruby@v1
|
15
26
|
with:
|
16
27
|
bundler-cache: true
|
17
28
|
ruby-version: 2.7.2
|
18
29
|
- uses: r7kamura/rubocop-problem-matchers-action@v1
|
19
30
|
- run: bundle exec rubocop --parallel
|
20
|
-
- run: bundle exec rspec --force-color
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## Unreleased
|
9
9
|
|
10
|
+
## 0.9.1 - 2020-12-03
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
- Fix escape bug in escaping HTML and generating XML.
|
15
|
+
|
16
|
+
## 0.9.0 - 2020-12-03
|
17
|
+
|
18
|
+
### Added
|
19
|
+
|
20
|
+
- Add item_image_selector.
|
21
|
+
|
22
|
+
## 0.8.0 - 2020-11-25
|
23
|
+
|
24
|
+
### Added
|
25
|
+
|
26
|
+
- Add page description for writing channel description.
|
27
|
+
|
28
|
+
### Changed
|
29
|
+
|
30
|
+
- Change channel link content from feeds index URL to page URL.
|
31
|
+
|
32
|
+
## 0.7.2 - 2020-11-25
|
33
|
+
|
34
|
+
### Fixed
|
35
|
+
|
36
|
+
- Fix time parse error by HTML entities handling.
|
37
|
+
|
38
|
+
## 0.7.1 - 2020-11-25
|
39
|
+
|
40
|
+
### Fixed
|
41
|
+
|
42
|
+
- Fix error when no title element found.
|
43
|
+
|
10
44
|
## 0.7.0 - 2020-11-23
|
11
45
|
|
12
46
|
### Added
|
data/Gemfile.lock
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
weneedfeed (0.
|
4
|
+
weneedfeed (0.9.1)
|
5
5
|
activesupport
|
6
6
|
builder
|
7
7
|
faraday
|
8
8
|
faraday_middleware
|
9
9
|
hibana (>= 0.2)
|
10
|
+
mimemagic
|
10
11
|
nokogiri
|
11
12
|
rack-capture (>= 0.4.0)
|
12
13
|
thor
|
@@ -44,6 +45,7 @@ GEM
|
|
44
45
|
tilt
|
45
46
|
i18n (1.8.5)
|
46
47
|
concurrent-ruby (~> 1.0)
|
48
|
+
mimemagic (0.3.5)
|
47
49
|
mini_portile2 (2.4.0)
|
48
50
|
minitest (5.14.2)
|
49
51
|
multipart-post (2.1.1)
|
@@ -106,7 +108,7 @@ GEM
|
|
106
108
|
addressable (>= 2.3.6)
|
107
109
|
crack (>= 0.3.2)
|
108
110
|
hashdiff (>= 0.4.0, < 2.0.0)
|
109
|
-
zeitwerk (2.4.
|
111
|
+
zeitwerk (2.4.2)
|
110
112
|
|
111
113
|
PLATFORMS
|
112
114
|
ruby
|
data/README.md
CHANGED
@@ -27,24 +27,28 @@ gem install weneedfeed
|
|
27
27
|
|
28
28
|
## Schema
|
29
29
|
|
30
|
-
|
30
|
+
Weneedfeed requires `weneedfeed.yml` that describes URLs and selectors.
|
31
31
|
|
32
32
|
### Example
|
33
33
|
|
34
34
|
```yaml
|
35
35
|
pages:
|
36
36
|
- id: example1
|
37
|
-
title: Example
|
37
|
+
title: Example 1
|
38
|
+
description: Example feed with CSS Selector
|
38
39
|
url: http://example.com/1
|
39
40
|
item_selector: li
|
40
41
|
item_description_selector: p:nth-child(3)
|
42
|
+
item_image_selector: img
|
41
43
|
item_link_selector: a
|
42
44
|
item_time_selector: time
|
43
45
|
item_title_selector: p:nth-child(2)
|
44
46
|
- id: example2
|
45
|
-
title: Example
|
47
|
+
title: Example 2
|
48
|
+
description: Example feed with XPath
|
46
49
|
url: http://example.com/2
|
47
50
|
item_selector: //li
|
51
|
+
item_image_selector: .//img
|
48
52
|
item_description_selector: .//p[3]
|
49
53
|
item_link_selector: .//a
|
50
54
|
item_time_selector: .//time
|
@@ -56,7 +60,7 @@ pages:
|
|
56
60
|
Feed ID.
|
57
61
|
|
58
62
|
- required
|
59
|
-
- Used for feed URL.
|
63
|
+
- Used for feed URL and `<link>` element in `<channel>` element.
|
60
64
|
|
61
65
|
### `title`
|
62
66
|
|
@@ -65,6 +69,13 @@ Feed title.
|
|
65
69
|
- required
|
66
70
|
- Used for RSS `<title>` element in `<channel>` element.
|
67
71
|
|
72
|
+
### `description`
|
73
|
+
|
74
|
+
Feed description.
|
75
|
+
|
76
|
+
- optional
|
77
|
+
- Used for RSS `<description>` element in `<channel>` element.
|
78
|
+
|
68
79
|
### `url`
|
69
80
|
|
70
81
|
HTML source URL.
|
@@ -100,6 +111,13 @@ CSS or XPath selector to find element with description information in each item.
|
|
100
111
|
- optional
|
101
112
|
- Used for `<description>` in `<item>`.
|
102
113
|
|
114
|
+
### `item_image_selector`
|
115
|
+
|
116
|
+
CSS or XPath selector to find `<img>`element in each item.
|
117
|
+
|
118
|
+
- optional
|
119
|
+
- Used for `<enclosure>` in `<item>`.
|
120
|
+
|
103
121
|
### `item_time_selector`
|
104
122
|
|
105
123
|
CSS or XPath selector to find element with datetime information in each item.
|
@@ -11,7 +11,9 @@ module Weneedfeed
|
|
11
11
|
end
|
12
12
|
|
13
13
|
scraping = ::Weneedfeed::Scraping.new(
|
14
|
+
description: page_schema.description,
|
14
15
|
item_description_selector: page_schema.item_description_selector,
|
16
|
+
item_image_selector: page_schema.item_image_selector,
|
15
17
|
item_link_selector: page_schema.item_link_selector,
|
16
18
|
item_time_selector: page_schema.item_time_selector,
|
17
19
|
item_title_selector: page_schema.item_title_selector,
|
data/lib/weneedfeed/item.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'mimemagic'
|
4
|
+
|
3
5
|
module Weneedfeed
|
4
6
|
class Item
|
5
7
|
class << self
|
@@ -16,6 +18,7 @@ module Weneedfeed
|
|
16
18
|
end
|
17
19
|
|
18
20
|
# @param [String, nil] description_selector
|
21
|
+
# @param [String, nil] image_selector
|
19
22
|
# @param [String, nil] link_selector
|
20
23
|
# @param [Nokogiri::Node] node
|
21
24
|
# @param [String] time_selector
|
@@ -23,6 +26,7 @@ module Weneedfeed
|
|
23
26
|
# @param [String] url
|
24
27
|
def initialize(
|
25
28
|
description_selector:,
|
29
|
+
image_selector:,
|
26
30
|
link_selector:,
|
27
31
|
node:,
|
28
32
|
time_selector:,
|
@@ -30,6 +34,7 @@ module Weneedfeed
|
|
30
34
|
url:
|
31
35
|
)
|
32
36
|
@description_selector = description_selector
|
37
|
+
@image_selector = image_selector
|
33
38
|
@link_selector = link_selector
|
34
39
|
@node = node
|
35
40
|
@time_selector = time_selector
|
@@ -44,6 +49,29 @@ module Weneedfeed
|
|
44
49
|
@node.at(@description_selector)&.inner_html
|
45
50
|
end
|
46
51
|
|
52
|
+
# @return [String, nil]
|
53
|
+
def image_mime_type
|
54
|
+
return unless image_url
|
55
|
+
|
56
|
+
::MimeMagic.by_path(image_url)&.type
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [String, nil]
|
60
|
+
def image_path_or_url
|
61
|
+
return unless @image_selector
|
62
|
+
|
63
|
+
@node.at(@image_selector)&.[]('src')
|
64
|
+
end
|
65
|
+
|
66
|
+
def image_url
|
67
|
+
return unless image_path_or_url
|
68
|
+
|
69
|
+
::URI.join(
|
70
|
+
@url,
|
71
|
+
image_path_or_url
|
72
|
+
).to_s
|
73
|
+
end
|
74
|
+
|
47
75
|
# @return [String]
|
48
76
|
def link
|
49
77
|
::URI.join(
|
@@ -73,9 +101,9 @@ module Weneedfeed
|
|
73
101
|
self.class.parse_time(string)
|
74
102
|
end
|
75
103
|
|
76
|
-
# @return [String]
|
104
|
+
# @return [String, nil]
|
77
105
|
def title
|
78
|
-
@node.at(@title_selector)
|
106
|
+
@node.at(@title_selector)&.inner_text
|
79
107
|
end
|
80
108
|
|
81
109
|
private
|
@@ -90,7 +118,7 @@ module Weneedfeed
|
|
90
118
|
node = time_node
|
91
119
|
return unless node
|
92
120
|
|
93
|
-
node['datetime'] || node.
|
121
|
+
node['datetime'] || node.inner_text
|
94
122
|
end
|
95
123
|
end
|
96
124
|
end
|
data/lib/weneedfeed/page.rb
CHANGED
@@ -2,13 +2,18 @@
|
|
2
2
|
|
3
3
|
module Weneedfeed
|
4
4
|
class Page
|
5
|
+
# @return [String, nil]
|
6
|
+
attr_reader :description
|
7
|
+
|
5
8
|
# @return [String]
|
6
9
|
attr_reader :title
|
7
10
|
|
8
11
|
# @return [String]
|
9
12
|
attr_reader :url
|
10
13
|
|
14
|
+
# @param [String, nil] description
|
11
15
|
# @param [String, nil] item_description_selector
|
16
|
+
# @param [String, nil] item_image_selector
|
12
17
|
# @param [String] item_link_selector
|
13
18
|
# @param [String, nil] item_time_selector
|
14
19
|
# @param [String] item_title_selector
|
@@ -17,7 +22,9 @@ module Weneedfeed
|
|
17
22
|
# @param [String] title
|
18
23
|
# @param [String] url
|
19
24
|
def initialize(
|
25
|
+
description:,
|
20
26
|
item_description_selector:,
|
27
|
+
item_image_selector:,
|
21
28
|
item_link_selector:,
|
22
29
|
item_time_selector:,
|
23
30
|
item_title_selector:,
|
@@ -26,7 +33,9 @@ module Weneedfeed
|
|
26
33
|
title:,
|
27
34
|
url:
|
28
35
|
)
|
36
|
+
@description = description
|
29
37
|
@item_description_selector = item_description_selector
|
38
|
+
@item_image_selector = item_image_selector
|
30
39
|
@item_link_selector = item_link_selector
|
31
40
|
@item_time_selector = item_time_selector
|
32
41
|
@item_title_selector = item_title_selector
|
@@ -41,6 +50,7 @@ module Weneedfeed
|
|
41
50
|
@node.search(@item_selector).map do |node|
|
42
51
|
::Weneedfeed::Item.new(
|
43
52
|
description_selector: @item_description_selector,
|
53
|
+
image_selector: @item_image_selector,
|
44
54
|
link_selector: @item_link_selector,
|
45
55
|
node: node,
|
46
56
|
time_selector: @item_time_selector,
|
data/lib/weneedfeed/schema.rb
CHANGED
@@ -24,8 +24,10 @@ module Weneedfeed
|
|
24
24
|
def page_schemata
|
25
25
|
@raw['pages'].map do |hash|
|
26
26
|
::Weneedfeed::PageSchema.new(
|
27
|
+
description: hash['description'],
|
27
28
|
id: hash['id'],
|
28
29
|
item_description_selector: hash['item_description_selector'],
|
30
|
+
item_image_selector: hash['item_image_selector'],
|
29
31
|
item_link_selector: hash['item_link_selector'],
|
30
32
|
item_time_selector: hash['item_time_selector'],
|
31
33
|
item_title_selector: hash['item_title_selector'],
|
data/lib/weneedfeed/scraping.rb
CHANGED
@@ -14,7 +14,9 @@ module Weneedfeed
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
# @param [String, nil] description
|
17
18
|
# @param [String, nil] item_description_selector
|
19
|
+
# @param [String, nil] item_image_selector
|
18
20
|
# @param [String] item_link_selector
|
19
21
|
# @param [String, nil] item_time_selector
|
20
22
|
# @param [String] item_title_selector
|
@@ -22,7 +24,9 @@ module Weneedfeed
|
|
22
24
|
# @param [String] title
|
23
25
|
# @param [String] url
|
24
26
|
def initialize(
|
27
|
+
description:,
|
25
28
|
item_description_selector:,
|
29
|
+
item_image_selector:,
|
26
30
|
item_link_selector:,
|
27
31
|
item_time_selector:,
|
28
32
|
item_title_selector:,
|
@@ -30,7 +34,9 @@ module Weneedfeed
|
|
30
34
|
title:,
|
31
35
|
url:
|
32
36
|
)
|
37
|
+
@description = description
|
33
38
|
@item_description_selector = item_description_selector
|
39
|
+
@item_image_selector = item_image_selector
|
34
40
|
@item_link_selector = item_link_selector
|
35
41
|
@item_time_selector = item_time_selector
|
36
42
|
@item_title_selector = item_title_selector
|
@@ -42,8 +48,10 @@ module Weneedfeed
|
|
42
48
|
# @return [Weneedfeed::Page]
|
43
49
|
def call
|
44
50
|
::Weneedfeed::Page.new(
|
51
|
+
description: @description,
|
45
52
|
node: parsed_body,
|
46
53
|
item_description_selector: @item_description_selector,
|
54
|
+
item_image_selector: @item_image_selector,
|
47
55
|
item_selector: @item_selector,
|
48
56
|
item_link_selector: @item_link_selector,
|
49
57
|
item_time_selector: @item_time_selector,
|
@@ -57,12 +65,16 @@ module Weneedfeed
|
|
57
65
|
|
58
66
|
# @return [Nokogiri::Node]
|
59
67
|
def parsed_body
|
60
|
-
|
68
|
+
if response.headers['Content-Type']&.include?('application/json')
|
69
|
+
::Nokogiri::XML.parse(response.body)
|
70
|
+
else
|
71
|
+
::Nokogiri::HTML.parse(response.body)
|
72
|
+
end
|
61
73
|
end
|
62
74
|
|
63
75
|
# @return [Faraday::Response]
|
64
76
|
def response
|
65
|
-
self.class.faraday_connection.get(@url)
|
77
|
+
@response ||= self.class.faraday_connection.get(@url)
|
66
78
|
end
|
67
79
|
end
|
68
80
|
end
|
data/lib/weneedfeed/version.rb
CHANGED
data/templates/show_feed.xml.erb
CHANGED
@@ -4,9 +4,9 @@
|
|
4
4
|
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
5
5
|
<channel>
|
6
6
|
<title><![CDATA[<%= @page.title %>]]></title>
|
7
|
-
<link><%= "#{
|
7
|
+
<link><%= "#{@page.url}" %></link>
|
8
8
|
<atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
|
9
|
-
<description><![CDATA[
|
9
|
+
<description><![CDATA[<%= @page.description %>]]></description>
|
10
10
|
<lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
|
11
11
|
<% items.each do |item| %>
|
12
12
|
<item>
|
@@ -18,6 +18,9 @@
|
|
18
18
|
<description><![CDATA[<%= item.description %>]]></description>
|
19
19
|
<content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
|
20
20
|
<guid isPermaLink="true"><%= item.link %></guid>
|
21
|
+
<% if item.image_url %>
|
22
|
+
<enclosure url=<%= item.image_url.encode(xml: :attr) %> length="0" type="<%= item.image_mime_type %>"/>
|
23
|
+
<% end %>
|
21
24
|
</item>
|
22
25
|
<% end %>
|
23
26
|
</channel>
|
data/weneedfeed.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_runtime_dependency 'faraday'
|
31
31
|
spec.add_runtime_dependency 'faraday_middleware'
|
32
32
|
spec.add_runtime_dependency 'hibana', '>= 0.2'
|
33
|
+
spec.add_runtime_dependency 'mimemagic'
|
33
34
|
spec.add_runtime_dependency 'nokogiri'
|
34
35
|
spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
|
35
36
|
spec.add_runtime_dependency 'thor'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weneedfeed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryo Nakamura
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0.2'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: mimemagic
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: nokogiri
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|