weneedfeed 0.3.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +6 -4
- data/.rspec +1 -0
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +55 -0
- data/Gemfile.lock +13 -17
- data/README.md +108 -22
- data/lib/weneedfeed.rb +3 -0
- data/lib/weneedfeed/application.rb +4 -4
- data/lib/weneedfeed/command.rb +2 -2
- data/lib/weneedfeed/controllers.rb +1 -0
- data/lib/weneedfeed/controllers/base.rb +16 -0
- data/lib/weneedfeed/controllers/show_feed.rb +10 -19
- data/lib/weneedfeed/controllers/show_top_page.rb +3 -6
- data/lib/weneedfeed/helpers.rb +7 -0
- data/lib/weneedfeed/helpers/parameters.rb +14 -0
- data/lib/weneedfeed/item.rb +34 -19
- data/lib/weneedfeed/page.rb +20 -20
- data/lib/weneedfeed/page_schema.rb +15 -0
- data/lib/weneedfeed/schema.rb +38 -0
- data/lib/weneedfeed/scraping.rb +20 -20
- data/lib/weneedfeed/version.rb +1 -1
- data/lib/weneedfeed/views/show_feed.rb +14 -2
- data/lib/weneedfeed/views/show_top_page.rb +6 -6
- data/templates/show_feed.xml.erb +4 -2
- data/templates/show_top_page.html.erb +2 -2
- data/weneedfeed.gemspec +2 -2
- metadata +10 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c580b642b78a2c26bf06d840fad6a745d498630779c670784deba389d9b28f1a
|
4
|
+
data.tar.gz: e29a86399e141b1c650a064cf6c45066149d93dee722b70fa657366519015f7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3293b7dbba14a24988a77bb2070adfef0a4c4f343197d5633076c497747d6f66d9642b89e37ec11f9d2c51a0f91b18c8dfd614cf053339adfc3c7b4390702774
|
7
|
+
data.tar.gz: f9549a610dec4b77e9e6f86829890bc31928f086652480cbe2c501b1c9fda479a1a0d549ec6efb3a40d1f8ea4fcdb4cc4ccc198403c57ab0a2dbf3c9010ea27f
|
data/.github/workflows/test.yml
CHANGED
@@ -5,16 +5,18 @@ on:
|
|
5
5
|
push:
|
6
6
|
branches:
|
7
7
|
- master
|
8
|
+
|
8
9
|
jobs:
|
9
10
|
build:
|
10
11
|
runs-on: ubuntu-18.04
|
11
12
|
steps:
|
12
13
|
- uses: actions/checkout@v2
|
13
|
-
- uses:
|
14
|
+
- uses: ruby/setup-ruby@v1
|
14
15
|
with:
|
15
|
-
|
16
|
-
|
17
|
-
-
|
16
|
+
bundler-cache: true
|
17
|
+
ruby-version: 2.7.2
|
18
|
+
- uses: r7kamura/rubocop-problem-matchers-action@v1
|
19
|
+
- run: bundle exec rubocop --parallel
|
18
20
|
- run: bundle exec rspec --force-color
|
19
21
|
|
20
22
|
|
data/.rspec
CHANGED
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,61 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## Unreleased
|
9
9
|
|
10
|
+
## 0.6.1 - 2020-11-15
|
11
|
+
|
12
|
+
### Changed
|
13
|
+
|
14
|
+
- Require hibana 0.2 or later.
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
- Fix installation error caused by hanami-router version restriction.
|
19
|
+
|
20
|
+
### Removed
|
21
|
+
|
22
|
+
- Remove direct dependency on hanami-router.
|
23
|
+
|
24
|
+
## 0.6.0 - 2020-11-15
|
25
|
+
|
26
|
+
### Changed
|
27
|
+
|
28
|
+
- Change pages schema from Hash to Array.
|
29
|
+
- Require hanami-router 2.0.0.alpha3 or later versions.
|
30
|
+
- Ignore pubDate when item.time is not found.
|
31
|
+
- Make some item_description_selector and item_time_selector optional.
|
32
|
+
|
33
|
+
## 0.5.0 - 2020-11-14
|
34
|
+
|
35
|
+
### Added
|
36
|
+
|
37
|
+
- Add datetime attribute support on time element.
|
38
|
+
|
39
|
+
### Changed
|
40
|
+
|
41
|
+
- Change schema format about selectors.
|
42
|
+
|
43
|
+
## 0.4.1 - 2020-11-08
|
44
|
+
|
45
|
+
### Fixed
|
46
|
+
|
47
|
+
- Fix error when item description is not found.
|
48
|
+
|
49
|
+
## 0.4.0 - 2020-11-08
|
50
|
+
|
51
|
+
### Added
|
52
|
+
|
53
|
+
- Add CSS selector support.
|
54
|
+
|
55
|
+
### Changed
|
56
|
+
|
57
|
+
- Change schema key from `xpath` to `selectors`.
|
58
|
+
- Change item_link_selector target from href attribute to a element.
|
59
|
+
- Change some argument names from `_xpath` to `_selector`.
|
60
|
+
- Change `weneedfeed build` description.
|
61
|
+
- Change `weneedfeed server` description.
|
62
|
+
- Change channel link from feed URL to top page URL.
|
63
|
+
- Sort top page feeds by its title.
|
64
|
+
|
10
65
|
## 0.3.0 - 2020-11-08
|
11
66
|
|
12
67
|
### Added
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
weneedfeed (0.
|
4
|
+
weneedfeed (0.6.1)
|
5
5
|
faraday
|
6
|
-
hibana
|
6
|
+
hibana (>= 0.2)
|
7
7
|
nokogiri
|
8
8
|
rack-capture (>= 0.4.0)
|
9
9
|
thor
|
@@ -14,29 +14,28 @@ GEM
|
|
14
14
|
addressable (2.7.0)
|
15
15
|
public_suffix (>= 2.0.2, < 5.0)
|
16
16
|
ast (2.4.1)
|
17
|
-
concurrent-ruby (1.1.7)
|
18
17
|
crack (0.4.4)
|
19
18
|
diff-lcs (1.4.4)
|
20
19
|
faraday (1.1.0)
|
21
20
|
multipart-post (>= 1.2, < 3)
|
22
21
|
ruby2_keywords
|
23
|
-
hanami-router (
|
24
|
-
|
25
|
-
|
22
|
+
hanami-router (2.0.0.alpha3)
|
23
|
+
mustermann (~> 1.0)
|
24
|
+
mustermann-contrib (~> 1.0)
|
26
25
|
rack (~> 2.0)
|
27
|
-
|
28
|
-
concurrent-ruby (~> 1.0)
|
29
|
-
transproc (~> 1.0)
|
26
|
+
hansi (0.2.0)
|
30
27
|
hashdiff (1.0.1)
|
31
|
-
hibana (0.
|
32
|
-
hanami-router
|
28
|
+
hibana (0.2.0)
|
29
|
+
hanami-router (>= 2.0.0.alpha3)
|
33
30
|
rack
|
34
31
|
tilt
|
35
|
-
http_router (0.11.2)
|
36
|
-
rack (>= 1.0.0)
|
37
|
-
url_mount (~> 0.2.1)
|
38
32
|
mini_portile2 (2.4.0)
|
39
33
|
multipart-post (2.1.1)
|
34
|
+
mustermann (1.1.1)
|
35
|
+
ruby2_keywords (~> 0.0.1)
|
36
|
+
mustermann-contrib (1.1.1)
|
37
|
+
hansi (~> 0.2.0)
|
38
|
+
mustermann (= 1.1.1)
|
40
39
|
nokogiri (1.10.10)
|
41
40
|
mini_portile2 (~> 2.4.0)
|
42
41
|
parallel (1.19.2)
|
@@ -80,10 +79,7 @@ GEM
|
|
80
79
|
ruby2_keywords (0.0.2)
|
81
80
|
thor (1.0.1)
|
82
81
|
tilt (2.0.10)
|
83
|
-
transproc (1.1.1)
|
84
82
|
unicode-display_width (1.7.0)
|
85
|
-
url_mount (0.2.1)
|
86
|
-
rack
|
87
83
|
webmock (3.9.3)
|
88
84
|
addressable (>= 2.3.6)
|
89
85
|
crack (>= 0.3.2)
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[![](https://badge.fury.io/rb/weneedfeed.svg)](https://rubygems.org/gems/weneedfeed)
|
4
4
|
[![](https://github.com/r7kamura/weneedfeed/workflows/test/badge.svg)](https://github.com/r7kamura/weneedfeed/actions?query=workflow%3Atest)
|
5
5
|
|
6
|
-
Generate feeds from URL and
|
6
|
+
Generate feeds from URL and selectors.
|
7
7
|
|
8
8
|
## Installation
|
9
9
|
|
@@ -25,38 +25,124 @@ Or install it yourself as:
|
|
25
25
|
gem install weneedfeed
|
26
26
|
```
|
27
27
|
|
28
|
-
##
|
28
|
+
## Schema
|
29
|
+
|
30
|
+
You need to write a schema file named with `weneedfeed.yml` to use this gem.
|
29
31
|
|
30
|
-
|
32
|
+
### Example
|
31
33
|
|
32
34
|
```yaml
|
33
35
|
pages:
|
34
|
-
|
35
|
-
title:
|
36
|
-
url: http://example.com/
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
36
|
+
- id: example1
|
37
|
+
title: Example feed with CSS Selector
|
38
|
+
url: http://example.com/1
|
39
|
+
item_selector: li
|
40
|
+
item_description_selector: p:nth-child(3)
|
41
|
+
item_link_selector: a
|
42
|
+
item_time_selector: time
|
43
|
+
item_title_selector: p:nth-child(2)
|
44
|
+
- id: example2
|
45
|
+
title: Example feed with XPath
|
46
|
+
url: http://example.com/2
|
47
|
+
item_selector: //li
|
48
|
+
item_description_selector: .//p[3]
|
49
|
+
item_link_selector: .//a
|
50
|
+
item_time_selector: .//time
|
51
|
+
item_title_selector: .//p[2]
|
43
52
|
```
|
44
53
|
|
45
|
-
|
54
|
+
### `id`
|
46
55
|
|
47
|
-
|
48
|
-
|
56
|
+
Feed ID.
|
57
|
+
|
58
|
+
- required
|
59
|
+
- Used for feed URL.
|
60
|
+
|
61
|
+
### `title`
|
62
|
+
|
63
|
+
Feed title.
|
64
|
+
|
65
|
+
- required
|
66
|
+
- Used for RSS `<title>` element in `<channel>` element.
|
67
|
+
|
68
|
+
### `url`
|
69
|
+
|
70
|
+
HTML source URL.
|
71
|
+
|
72
|
+
- required
|
73
|
+
- Used to fetch HTML page for building feed.
|
74
|
+
|
75
|
+
### `item_selector`
|
76
|
+
|
77
|
+
CSS or XPath selector to search each item.
|
78
|
+
|
79
|
+
- required
|
80
|
+
- Equivalent unit to RSS `<item>` element.
|
81
|
+
|
82
|
+
### `item_link_selector`
|
83
|
+
|
84
|
+
CSS or XPath selector to find `<a>` element in each item.
|
85
|
+
|
86
|
+
- required
|
87
|
+
- Used for `<link>` in `<item>`.
|
88
|
+
|
89
|
+
### `item_title_selector`
|
90
|
+
|
91
|
+
CSS or XPath selector to find element with title information in each item.
|
92
|
+
|
93
|
+
- required
|
94
|
+
- Used for `<title>` in `<item>`.
|
95
|
+
|
96
|
+
### `item_description_selector`
|
97
|
+
|
98
|
+
CSS or XPath selector to find element with description information in each item.
|
99
|
+
|
100
|
+
- optional
|
101
|
+
- Used for `<description>` in `<item>`.
|
102
|
+
|
103
|
+
### `item_time_selector`
|
104
|
+
|
105
|
+
CSS or XPath selector to find element with datetime information in each item.
|
106
|
+
|
107
|
+
- optional
|
108
|
+
- Used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
|
109
|
+
|
110
|
+
## Usage
|
111
|
+
|
112
|
+
### Build
|
113
|
+
|
114
|
+
Run `weneedfeed build` to build static files.
|
115
|
+
|
116
|
+
```
|
117
|
+
Usage:
|
118
|
+
weneedfeed build --base-url=BASE_URL
|
119
|
+
|
120
|
+
Options:
|
121
|
+
--base-url=BASE_URL # Base URL where to locate built files. (e.g. `"https://user.github.io/repo"`)
|
122
|
+
[--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
|
123
|
+
# Default: weneedfeed.yml
|
124
|
+
|
125
|
+
Build static files.
|
126
|
+
```
|
127
|
+
|
128
|
+
### Server
|
129
|
+
|
130
|
+
Run `weneedfeed server` to run HTTP server.
|
131
|
+
|
132
|
+
```
|
133
|
+
Usage:
|
134
|
+
weneedfeed server
|
135
|
+
|
136
|
+
Options:
|
137
|
+
[--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
|
138
|
+
# Default: weneedfeed.yml
|
49
139
|
|
50
|
-
|
51
|
-
base_url: 'https://user.github.io/repo',
|
52
|
-
schema_path: 'schema.yml'
|
53
|
-
)
|
140
|
+
Run HTTP server.
|
54
141
|
```
|
55
142
|
|
56
|
-
|
143
|
+
## GitHub Actions Integration
|
57
144
|
|
58
|
-
-
|
59
|
-
- output/feeds/example.xml
|
145
|
+
Use [weneedfeed-action](https://github.com/r7kamura/weneedfeed-action) for invoking weneedfeed on GitHub Actions.
|
60
146
|
|
61
147
|
## Development
|
62
148
|
|
data/lib/weneedfeed.rb
CHANGED
@@ -7,8 +7,11 @@ module Weneedfeed
|
|
7
7
|
autoload :Capture, 'weneedfeed/capture'
|
8
8
|
autoload :Command, 'weneedfeed/command'
|
9
9
|
autoload :Controllers, 'weneedfeed/controllers'
|
10
|
+
autoload :Helpers, 'weneedfeed/helpers'
|
10
11
|
autoload :Item, 'weneedfeed/item'
|
11
12
|
autoload :Page, 'weneedfeed/page'
|
13
|
+
autoload :PageSchema, 'weneedfeed/page_schema'
|
14
|
+
autoload :Schema, 'weneedfeed/schema'
|
12
15
|
autoload :Scraping, 'weneedfeed/scraping'
|
13
16
|
autoload :Views, 'weneedfeed/views'
|
14
17
|
end
|
@@ -6,12 +6,12 @@ module Weneedfeed
|
|
6
6
|
class Application < ::Hibana::Application
|
7
7
|
route do
|
8
8
|
get '/', to: ::Weneedfeed::Controllers::ShowTopPage, as: :top_page
|
9
|
-
get '/feeds/:
|
9
|
+
get '/feeds/:page_id.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
|
10
10
|
end
|
11
11
|
|
12
12
|
# @param [Hash] schema
|
13
13
|
def initialize(schema:)
|
14
|
-
@schema = schema
|
14
|
+
@schema = ::Weneedfeed::Schema.new(schema)
|
15
15
|
super()
|
16
16
|
end
|
17
17
|
|
@@ -23,8 +23,8 @@ module Weneedfeed
|
|
23
23
|
|
24
24
|
# @return [Array<String>]
|
25
25
|
def paths
|
26
|
-
['/'] + @schema
|
27
|
-
"/feeds/#{
|
26
|
+
['/'] + @schema.page_ids.map do |page_id|
|
27
|
+
"/feeds/#{page_id}.xml"
|
28
28
|
end
|
29
29
|
end
|
30
30
|
end
|
data/lib/weneedfeed/command.rb
CHANGED
@@ -16,7 +16,7 @@ module Weneedfeed
|
|
16
16
|
|
17
17
|
desc(
|
18
18
|
'build',
|
19
|
-
'Build static files
|
19
|
+
'Build static files.'
|
20
20
|
)
|
21
21
|
|
22
22
|
method_option(
|
@@ -44,7 +44,7 @@ module Weneedfeed
|
|
44
44
|
|
45
45
|
desc(
|
46
46
|
'server',
|
47
|
-
'Run HTTP server'
|
47
|
+
'Run HTTP server.'
|
48
48
|
)
|
49
49
|
|
50
50
|
method_option(
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
module Controllers
|
5
|
+
class Base < ::Hibana::Controller
|
6
|
+
include ::Weneedfeed::Helpers::Parameters
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
# @return [Weneedfeed::Schema]
|
11
|
+
def schema
|
12
|
+
request.env['weneedfeed.schema']
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -2,31 +2,22 @@
|
|
2
2
|
|
3
3
|
module Weneedfeed
|
4
4
|
module Controllers
|
5
|
-
class ShowFeed < ::
|
5
|
+
class ShowFeed < ::Weneedfeed::Controllers::Base
|
6
6
|
def call
|
7
|
-
|
8
|
-
|
9
|
-
'router.params',
|
10
|
-
:page_name
|
11
|
-
)
|
12
|
-
properties = env.dig(
|
13
|
-
'weneedfeed.schema',
|
14
|
-
'pages',
|
15
|
-
page_name
|
16
|
-
)
|
17
|
-
unless properties
|
7
|
+
page_schema = schema.find_page_schema(path_parameters[:page_id])
|
8
|
+
unless page_schema
|
18
9
|
response.status = 404
|
19
10
|
return
|
20
11
|
end
|
21
12
|
|
22
13
|
scraping = ::Weneedfeed::Scraping.new(
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
title:
|
29
|
-
url:
|
14
|
+
item_description_selector: page_schema.item_description_selector,
|
15
|
+
item_link_selector: page_schema.item_link_selector,
|
16
|
+
item_time_selector: page_schema.item_time_selector,
|
17
|
+
item_title_selector: page_schema.item_title_selector,
|
18
|
+
item_selector: page_schema.item_selector,
|
19
|
+
title: page_schema.title,
|
20
|
+
url: page_schema.url,
|
30
21
|
)
|
31
22
|
page = scraping.call
|
32
23
|
|
@@ -2,16 +2,13 @@
|
|
2
2
|
|
3
3
|
module Weneedfeed
|
4
4
|
module Controllers
|
5
|
-
class ShowTopPage < ::
|
5
|
+
class ShowTopPage < ::Weneedfeed::Controllers::Base
|
6
6
|
def call
|
7
|
-
|
8
|
-
'weneedfeed.schema',
|
9
|
-
'pages'
|
10
|
-
)
|
7
|
+
page_schemata = schema.page_schemata.sort_by(&:title)
|
11
8
|
response.content_type = 'text/html'
|
12
9
|
response.write(
|
13
10
|
::Weneedfeed::Views::ShowTopPage.new(
|
14
|
-
|
11
|
+
page_schemata: page_schemata,
|
15
12
|
partial_template_path: ::File.expand_path(
|
16
13
|
'templates/show_top_page.html.erb',
|
17
14
|
"#{__dir__}/../../.."
|
data/lib/weneedfeed/item.rb
CHANGED
@@ -15,56 +15,71 @@ module Weneedfeed
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
-
# @param [String]
|
19
|
-
# @param [String]
|
18
|
+
# @param [String, nil] description_selector
|
19
|
+
# @param [String, nil] link_selector
|
20
20
|
# @param [Nokogiri::Node] node
|
21
|
-
# @param [String]
|
22
|
-
# @param [String]
|
21
|
+
# @param [String] time_selector
|
22
|
+
# @param [String] title_selector
|
23
23
|
# @param [String] url
|
24
24
|
def initialize(
|
25
|
-
|
26
|
-
|
25
|
+
description_selector:,
|
26
|
+
link_selector:,
|
27
27
|
node:,
|
28
|
-
|
29
|
-
|
28
|
+
time_selector:,
|
29
|
+
title_selector:,
|
30
30
|
url:
|
31
31
|
)
|
32
|
-
@
|
33
|
-
@
|
32
|
+
@description_selector = description_selector
|
33
|
+
@link_selector = link_selector
|
34
34
|
@node = node
|
35
|
-
@
|
36
|
-
@
|
35
|
+
@time_selector = time_selector
|
36
|
+
@title_selector = title_selector
|
37
37
|
@url = url
|
38
38
|
end
|
39
39
|
|
40
40
|
# @return [String, nil]
|
41
41
|
def description
|
42
|
-
@
|
42
|
+
return unless @description_selector
|
43
|
+
|
44
|
+
@node.at(@description_selector)&.inner_html
|
43
45
|
end
|
44
46
|
|
45
47
|
# @return [String]
|
46
48
|
def link
|
47
49
|
::URI.join(
|
48
50
|
@url,
|
49
|
-
@node.
|
51
|
+
@node.at(@link_selector)['href']
|
50
52
|
).to_s
|
51
53
|
end
|
52
54
|
|
53
55
|
# @return [Time, nil]
|
54
56
|
def time
|
55
|
-
|
57
|
+
return unless @time_selector
|
58
|
+
|
59
|
+
string = time_string
|
60
|
+
return unless string
|
61
|
+
|
62
|
+
self.class.parse_time(string)
|
56
63
|
end
|
57
64
|
|
58
|
-
# @return [String
|
65
|
+
# @return [String]
|
59
66
|
def title
|
60
|
-
@node.
|
67
|
+
@node.at(@title_selector).inner_text
|
61
68
|
end
|
62
69
|
|
63
70
|
private
|
64
71
|
|
65
|
-
# @return [
|
72
|
+
# @return [Nokogiri::Node, nil]
|
73
|
+
def time_node
|
74
|
+
@node.at(@time_selector)
|
75
|
+
end
|
76
|
+
|
77
|
+
# @return [String, nil]
|
66
78
|
def time_string
|
67
|
-
|
79
|
+
node = time_node
|
80
|
+
return unless node
|
81
|
+
|
82
|
+
node['datetime'] || node.inner_html
|
68
83
|
end
|
69
84
|
end
|
70
85
|
end
|
data/lib/weneedfeed/page.rb
CHANGED
@@ -8,29 +8,29 @@ module Weneedfeed
|
|
8
8
|
# @return [String]
|
9
9
|
attr_reader :url
|
10
10
|
|
11
|
-
# @param [String]
|
12
|
-
# @param [String]
|
13
|
-
# @param [String]
|
14
|
-
# @param [String]
|
15
|
-
# @param [String]
|
11
|
+
# @param [String, nil] item_description_selector
|
12
|
+
# @param [String] item_link_selector
|
13
|
+
# @param [String, nil] item_time_selector
|
14
|
+
# @param [String] item_title_selector
|
15
|
+
# @param [String] item_selector
|
16
16
|
# @param [Nokogiri::Node] node
|
17
17
|
# @param [String] title
|
18
18
|
# @param [String] url
|
19
19
|
def initialize(
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
item_description_selector:,
|
21
|
+
item_link_selector:,
|
22
|
+
item_time_selector:,
|
23
|
+
item_title_selector:,
|
24
|
+
item_selector:,
|
25
25
|
node:,
|
26
26
|
title:,
|
27
27
|
url:
|
28
28
|
)
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
32
|
-
@
|
33
|
-
@
|
29
|
+
@item_description_selector = item_description_selector
|
30
|
+
@item_link_selector = item_link_selector
|
31
|
+
@item_time_selector = item_time_selector
|
32
|
+
@item_title_selector = item_title_selector
|
33
|
+
@item_selector = item_selector
|
34
34
|
@node = node
|
35
35
|
@title = title
|
36
36
|
@url = url
|
@@ -38,13 +38,13 @@ module Weneedfeed
|
|
38
38
|
|
39
39
|
# @return [Array<Weneedfeed::Item>]
|
40
40
|
def items
|
41
|
-
@node.
|
41
|
+
@node.search(@item_selector).map do |node|
|
42
42
|
::Weneedfeed::Item.new(
|
43
|
-
|
44
|
-
|
43
|
+
description_selector: @item_description_selector,
|
44
|
+
link_selector: @item_link_selector,
|
45
45
|
node: node,
|
46
|
-
|
47
|
-
|
46
|
+
time_selector: @item_time_selector,
|
47
|
+
title_selector: @item_title_selector,
|
48
48
|
url: @url
|
49
49
|
)
|
50
50
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
PageSchema = Struct.new(
|
5
|
+
:id,
|
6
|
+
:item_description_selector,
|
7
|
+
:item_link_selector,
|
8
|
+
:item_time_selector,
|
9
|
+
:item_title_selector,
|
10
|
+
:item_selector,
|
11
|
+
:title,
|
12
|
+
:url,
|
13
|
+
keyword_init: true,
|
14
|
+
)
|
15
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
class Schema
|
5
|
+
# @param [Hash] raw
|
6
|
+
def initialize(raw)
|
7
|
+
@raw = raw
|
8
|
+
end
|
9
|
+
|
10
|
+
# @param [String, nl] page_schema_id
|
11
|
+
# @return [Weneedfeed::PageSchema]
|
12
|
+
def find_page_schema(page_schema_id)
|
13
|
+
page_schemata.find do |page_schema|
|
14
|
+
page_schema.id == page_schema_id
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Array<String>]
|
19
|
+
def page_ids
|
20
|
+
page_schemata.map(&:id)
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Array<Weneedfeed::PageSchema>]
|
24
|
+
def page_schemata
|
25
|
+
@raw['pages'].map do |hash|
|
26
|
+
::Weneedfeed::PageSchema.new(
|
27
|
+
id: hash['id'],
|
28
|
+
item_link_selector: hash['item_link_selector'],
|
29
|
+
item_time_selector: hash['item_time_selector'],
|
30
|
+
item_title_selector: hash['item_title_selector'],
|
31
|
+
item_selector: hash['item_selector'],
|
32
|
+
title: hash['title'],
|
33
|
+
url: hash['url'],
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/weneedfeed/scraping.rb
CHANGED
@@ -5,27 +5,27 @@ require 'nokogiri'
|
|
5
5
|
|
6
6
|
module Weneedfeed
|
7
7
|
class Scraping
|
8
|
-
# @param [String]
|
9
|
-
# @param [String]
|
10
|
-
# @param [String]
|
11
|
-
# @param [String]
|
12
|
-
# @param [String]
|
8
|
+
# @param [String, nil] item_description_selector
|
9
|
+
# @param [String] item_link_selector
|
10
|
+
# @param [String, nil] item_time_selector
|
11
|
+
# @param [String] item_title_selector
|
12
|
+
# @param [String] item_selector
|
13
13
|
# @param [String] title
|
14
14
|
# @param [String] url
|
15
15
|
def initialize(
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
item_description_selector:,
|
17
|
+
item_link_selector:,
|
18
|
+
item_time_selector:,
|
19
|
+
item_title_selector:,
|
20
|
+
item_selector:,
|
21
21
|
title:,
|
22
22
|
url:
|
23
23
|
)
|
24
|
-
@
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
24
|
+
@item_description_selector = item_description_selector
|
25
|
+
@item_link_selector = item_link_selector
|
26
|
+
@item_time_selector = item_time_selector
|
27
|
+
@item_title_selector = item_title_selector
|
28
|
+
@item_selector = item_selector
|
29
29
|
@title = title
|
30
30
|
@url = url
|
31
31
|
end
|
@@ -34,11 +34,11 @@ module Weneedfeed
|
|
34
34
|
def call
|
35
35
|
::Weneedfeed::Page.new(
|
36
36
|
node: parsed_body,
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
item_description_selector: @item_description_selector,
|
38
|
+
item_selector: @item_selector,
|
39
|
+
item_link_selector: @item_link_selector,
|
40
|
+
item_time_selector: @item_time_selector,
|
41
|
+
item_title_selector: @item_title_selector,
|
42
42
|
title: @title,
|
43
43
|
url: @url
|
44
44
|
)
|
data/lib/weneedfeed/version.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
module Weneedfeed
|
4
4
|
module Views
|
5
5
|
class ShowFeed < ::Hibana::View
|
6
|
+
include ::Weneedfeed::Helpers::Parameters
|
7
|
+
|
6
8
|
# @param [Weneedfeed::Page] page
|
7
9
|
def initialize(page:, **argv)
|
8
10
|
super(**argv)
|
@@ -11,6 +13,11 @@ module Weneedfeed
|
|
11
13
|
|
12
14
|
private
|
13
15
|
|
16
|
+
# @return [String]
|
17
|
+
def top_page_path
|
18
|
+
request.path.delete_suffix(router.path(:feed, page_id: page_id))
|
19
|
+
end
|
20
|
+
|
14
21
|
# @return [Enumerable<Weneedfeed::Item>]
|
15
22
|
def items
|
16
23
|
@page.items.sort_by do |item|
|
@@ -19,8 +26,13 @@ module Weneedfeed
|
|
19
26
|
end
|
20
27
|
|
21
28
|
# @return [String]
|
22
|
-
def
|
23
|
-
|
29
|
+
def page_id
|
30
|
+
path_parameters[:page_id]
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Hanami::Router]
|
34
|
+
def router
|
35
|
+
::Weneedfeed::Application.router
|
24
36
|
end
|
25
37
|
end
|
26
38
|
end
|
@@ -3,10 +3,10 @@
|
|
3
3
|
module Weneedfeed
|
4
4
|
module Views
|
5
5
|
class ShowTopPage < ::Hibana::View
|
6
|
-
# @param [Array<
|
7
|
-
def initialize(
|
6
|
+
# @param [Array<Hash>] page_schemata
|
7
|
+
def initialize(page_schemata:, **argv)
|
8
8
|
super(**argv)
|
9
|
-
@
|
9
|
+
@page_schemata = page_schemata
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
@@ -16,10 +16,10 @@ module Weneedfeed
|
|
16
16
|
request.path.delete_suffix(router.path(:top_page))
|
17
17
|
end
|
18
18
|
|
19
|
-
# @param [String]
|
19
|
+
# @param [String] page_id
|
20
20
|
# @return [String]
|
21
|
-
def feed_path(
|
22
|
-
"#{base_path}#{router.path(:feed,
|
21
|
+
def feed_path(page_id:)
|
22
|
+
"#{base_path}#{router.path(:feed, page_id: page_id)}"
|
23
23
|
end
|
24
24
|
|
25
25
|
# @return [Hanami::Router]
|
data/templates/show_feed.xml.erb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
5
5
|
<channel>
|
6
6
|
<title><![CDATA[<%= @page.title %>]]></title>
|
7
|
-
<link><%= "#{request.base_url}#{
|
7
|
+
<link><%= "#{request.base_url}#{top_page_path}" %></link>
|
8
8
|
<atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
|
9
9
|
<description><![CDATA[Recent content on <%= @page.title %>]]></description>
|
10
10
|
<lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
|
@@ -12,7 +12,9 @@
|
|
12
12
|
<item>
|
13
13
|
<title><![CDATA[<%= item.title %>]]></title>
|
14
14
|
<link><%= item.link %></link>
|
15
|
-
|
15
|
+
<% if item.time %>
|
16
|
+
<pubDate><%= item.time.rfc822 %></pubDate>
|
17
|
+
<% end %>
|
16
18
|
<description><![CDATA[<%= item.description %>]]></description>
|
17
19
|
<content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
|
18
20
|
<guid isPermaLink="true"><%= item.link %></guid>
|
@@ -7,9 +7,9 @@
|
|
7
7
|
</head>
|
8
8
|
<body>
|
9
9
|
<ul>
|
10
|
-
<% @
|
10
|
+
<% @page_schemata.each do |page_schema| %>
|
11
11
|
<li>
|
12
|
-
<a href="<%= feed_path(
|
12
|
+
<a href="<%= feed_path(page_id: page_schema.id) %>"><%= page_schema.title %></a>
|
13
13
|
</li>
|
14
14
|
<% end %>
|
15
15
|
</ul>
|
data/weneedfeed.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ['Ryo Nakamura']
|
9
9
|
spec.email = ['r7kamura@gmail.com']
|
10
10
|
|
11
|
-
spec.summary = 'Generate feeds from URL and
|
11
|
+
spec.summary = 'Generate feeds from URL and selectors.'
|
12
12
|
spec.homepage = 'https://github.com/r7kamura/weneedfeed'
|
13
13
|
spec.license = 'MIT'
|
14
14
|
spec.required_ruby_version = Gem::Requirement.new('>= 2.5.0')
|
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.require_paths = ['lib']
|
27
27
|
|
28
28
|
spec.add_runtime_dependency 'faraday'
|
29
|
-
spec.add_runtime_dependency 'hibana'
|
29
|
+
spec.add_runtime_dependency 'hibana', '>= 0.2'
|
30
30
|
spec.add_runtime_dependency 'nokogiri'
|
31
31
|
spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
|
32
32
|
spec.add_runtime_dependency 'thor'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weneedfeed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryo Nakamura
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-11-
|
11
|
+
date: 2020-11-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
33
|
+
version: '0.2'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
40
|
+
version: '0.2'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: nokogiri
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,10 +106,15 @@ files:
|
|
106
106
|
- lib/weneedfeed/capture.rb
|
107
107
|
- lib/weneedfeed/command.rb
|
108
108
|
- lib/weneedfeed/controllers.rb
|
109
|
+
- lib/weneedfeed/controllers/base.rb
|
109
110
|
- lib/weneedfeed/controllers/show_feed.rb
|
110
111
|
- lib/weneedfeed/controllers/show_top_page.rb
|
112
|
+
- lib/weneedfeed/helpers.rb
|
113
|
+
- lib/weneedfeed/helpers/parameters.rb
|
111
114
|
- lib/weneedfeed/item.rb
|
112
115
|
- lib/weneedfeed/page.rb
|
116
|
+
- lib/weneedfeed/page_schema.rb
|
117
|
+
- lib/weneedfeed/schema.rb
|
113
118
|
- lib/weneedfeed/scraping.rb
|
114
119
|
- lib/weneedfeed/version.rb
|
115
120
|
- lib/weneedfeed/views.rb
|
@@ -142,5 +147,5 @@ requirements: []
|
|
142
147
|
rubygems_version: 3.1.2
|
143
148
|
signing_key:
|
144
149
|
specification_version: 4
|
145
|
-
summary: Generate feeds from URL and
|
150
|
+
summary: Generate feeds from URL and selectors.
|
146
151
|
test_files: []
|