weneedfeed 0.5.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +0 -2
- data/.rubocop.yml +19 -0
- data/CHANGELOG.md +45 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +40 -20
- data/README.md +42 -14
- data/lib/weneedfeed.rb +4 -0
- data/lib/weneedfeed/application.rb +4 -4
- data/lib/weneedfeed/controllers.rb +1 -0
- data/lib/weneedfeed/controllers/base.rb +16 -0
- data/lib/weneedfeed/controllers/show_feed.rb +10 -19
- data/lib/weneedfeed/controllers/show_top_page.rb +3 -8
- data/lib/weneedfeed/faraday_response_middleware.rb +23 -0
- data/lib/weneedfeed/helpers.rb +7 -0
- data/lib/weneedfeed/helpers/parameters.rb +14 -0
- data/lib/weneedfeed/item.rb +26 -8
- data/lib/weneedfeed/page.rb +2 -2
- data/lib/weneedfeed/page_schema.rb +15 -0
- data/lib/weneedfeed/schema.rb +39 -0
- data/lib/weneedfeed/scraping.rb +13 -4
- data/lib/weneedfeed/version.rb +1 -1
- data/lib/weneedfeed/views/show_feed.rb +5 -3
- data/lib/weneedfeed/views/show_top_page.rb +6 -6
- data/templates/show_feed.xml.erb +3 -1
- data/templates/show_top_page.html.erb +2 -2
- data/weneedfeed.gemspec +4 -1
- metadata +51 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2731eafeff7f8a49cd49467cbcdf2dc9b7d156d475d6a6409d9e73ee6fbfd305
|
|
4
|
+
data.tar.gz: 65f0569ec54413e9a0b0f44612782804177cb06dbdc9bed0d2054f0a68655cb8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 544c38f0a268f66386432b2895133a84c39b68bfcd48e6922fe14f4f543a4260283e8df10c10c788ca999d75cef6d37621dca0e571e33489be6a3ed453e52388
|
|
7
|
+
data.tar.gz: bab5629580e689faf7271e0119cc73dce75d4348e844f3a7bcf8e419609a452d5aa55148b5dc9537e25f9be57f5ce6bfb549e7764d19725ef88637452193a35f
|
data/.github/workflows/test.yml
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
+
require:
|
|
2
|
+
- rubocop-rspec
|
|
3
|
+
|
|
1
4
|
AllCops:
|
|
5
|
+
NewCops: enable
|
|
2
6
|
TargetRubyVersion: 2.5
|
|
3
7
|
|
|
4
8
|
Lint/SuppressedException:
|
|
@@ -7,5 +11,20 @@ Lint/SuppressedException:
|
|
|
7
11
|
Metrics:
|
|
8
12
|
Enabled: false
|
|
9
13
|
|
|
14
|
+
RSpec/ImplicitSubject:
|
|
15
|
+
Enabled: false
|
|
16
|
+
|
|
17
|
+
RSpec/MultipleExpectations:
|
|
18
|
+
Enabled: false
|
|
19
|
+
|
|
20
|
+
RSpec/NamedSubject:
|
|
21
|
+
Enabled: false
|
|
22
|
+
|
|
10
23
|
Style/Documentation:
|
|
11
24
|
Enabled: false
|
|
25
|
+
|
|
26
|
+
Style/TrailingCommaInArguments:
|
|
27
|
+
Enabled: false
|
|
28
|
+
|
|
29
|
+
Style/TrailingCommaInHashLiteral:
|
|
30
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## Unreleased
|
|
9
9
|
|
|
10
|
+
## 0.7.2 - 2020-11-25
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- Fix time parse error by HTML entities handling.
|
|
15
|
+
|
|
16
|
+
## 0.7.1 - 2020-11-25
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Fix error when no title element found.
|
|
21
|
+
|
|
22
|
+
## 0.7.0 - 2020-11-23
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
|
|
26
|
+
- Convert JSON response into XML.
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- Fix bug that item_description_selector is not used.
|
|
31
|
+
|
|
32
|
+
## 0.6.1 - 2020-11-15
|
|
33
|
+
|
|
34
|
+
### Changed
|
|
35
|
+
|
|
36
|
+
- Require hibana 0.2 or later.
|
|
37
|
+
|
|
38
|
+
### Fixed
|
|
39
|
+
|
|
40
|
+
- Fix installation error caused by hanami-router version restriction.
|
|
41
|
+
|
|
42
|
+
### Removed
|
|
43
|
+
|
|
44
|
+
- Remove direct dependency on hanami-router.
|
|
45
|
+
|
|
46
|
+
## 0.6.0 - 2020-11-15
|
|
47
|
+
|
|
48
|
+
### Changed
|
|
49
|
+
|
|
50
|
+
- Change pages schema from Hash to Array.
|
|
51
|
+
- Require hanami-router 2.0.0.alpha3 or later versions.
|
|
52
|
+
- Ignore pubDate when item.time is not found.
|
|
53
|
+
- Make some item_description_selector and item_time_selector optional.
|
|
54
|
+
|
|
10
55
|
## 0.5.0 - 2020-11-14
|
|
11
56
|
|
|
12
57
|
### Added
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
weneedfeed (0.
|
|
4
|
+
weneedfeed (0.7.2)
|
|
5
|
+
activesupport
|
|
6
|
+
builder
|
|
5
7
|
faraday
|
|
6
|
-
|
|
8
|
+
faraday_middleware
|
|
9
|
+
hibana (>= 0.2)
|
|
7
10
|
nokogiri
|
|
8
11
|
rack-capture (>= 0.4.0)
|
|
9
12
|
thor
|
|
@@ -11,35 +14,47 @@ PATH
|
|
|
11
14
|
GEM
|
|
12
15
|
remote: https://rubygems.org/
|
|
13
16
|
specs:
|
|
17
|
+
activesupport (6.0.3.4)
|
|
18
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
19
|
+
i18n (>= 0.7, < 2)
|
|
20
|
+
minitest (~> 5.1)
|
|
21
|
+
tzinfo (~> 1.1)
|
|
22
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
|
14
23
|
addressable (2.7.0)
|
|
15
24
|
public_suffix (>= 2.0.2, < 5.0)
|
|
16
25
|
ast (2.4.1)
|
|
26
|
+
builder (3.2.4)
|
|
17
27
|
concurrent-ruby (1.1.7)
|
|
18
28
|
crack (0.4.4)
|
|
19
29
|
diff-lcs (1.4.4)
|
|
20
30
|
faraday (1.1.0)
|
|
21
31
|
multipart-post (>= 1.2, < 3)
|
|
22
32
|
ruby2_keywords
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
33
|
+
faraday_middleware (1.0.0)
|
|
34
|
+
faraday (~> 1.0)
|
|
35
|
+
hanami-router (2.0.0.alpha3)
|
|
36
|
+
mustermann (~> 1.0)
|
|
37
|
+
mustermann-contrib (~> 1.0)
|
|
26
38
|
rack (~> 2.0)
|
|
27
|
-
|
|
28
|
-
concurrent-ruby (~> 1.0)
|
|
29
|
-
transproc (~> 1.0)
|
|
39
|
+
hansi (0.2.0)
|
|
30
40
|
hashdiff (1.0.1)
|
|
31
|
-
hibana (0.
|
|
32
|
-
hanami-router
|
|
41
|
+
hibana (0.2.0)
|
|
42
|
+
hanami-router (>= 2.0.0.alpha3)
|
|
33
43
|
rack
|
|
34
44
|
tilt
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
url_mount (~> 0.2.1)
|
|
45
|
+
i18n (1.8.5)
|
|
46
|
+
concurrent-ruby (~> 1.0)
|
|
38
47
|
mini_portile2 (2.4.0)
|
|
48
|
+
minitest (5.14.2)
|
|
39
49
|
multipart-post (2.1.1)
|
|
50
|
+
mustermann (1.1.1)
|
|
51
|
+
ruby2_keywords (~> 0.0.1)
|
|
52
|
+
mustermann-contrib (1.1.1)
|
|
53
|
+
hansi (~> 0.2.0)
|
|
54
|
+
mustermann (= 1.1.1)
|
|
40
55
|
nokogiri (1.10.10)
|
|
41
56
|
mini_portile2 (~> 2.4.0)
|
|
42
|
-
parallel (1.
|
|
57
|
+
parallel (1.20.1)
|
|
43
58
|
parser (2.7.2.0)
|
|
44
59
|
ast (~> 2.4.1)
|
|
45
60
|
public_suffix (4.0.6)
|
|
@@ -65,29 +80,33 @@ GEM
|
|
|
65
80
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
66
81
|
rspec-support (~> 3.9.0)
|
|
67
82
|
rspec-support (3.9.4)
|
|
68
|
-
rubocop (1.
|
|
83
|
+
rubocop (1.4.0)
|
|
69
84
|
parallel (~> 1.10)
|
|
70
85
|
parser (>= 2.7.1.5)
|
|
71
86
|
rainbow (>= 2.2.2, < 4.0)
|
|
72
87
|
regexp_parser (>= 1.8)
|
|
73
88
|
rexml
|
|
74
|
-
rubocop-ast (>=
|
|
89
|
+
rubocop-ast (>= 1.1.1)
|
|
75
90
|
ruby-progressbar (~> 1.7)
|
|
76
91
|
unicode-display_width (>= 1.4.0, < 2.0)
|
|
77
|
-
rubocop-ast (1.1.
|
|
92
|
+
rubocop-ast (1.1.1)
|
|
78
93
|
parser (>= 2.7.1.5)
|
|
94
|
+
rubocop-rspec (2.0.0)
|
|
95
|
+
rubocop (~> 1.0)
|
|
96
|
+
rubocop-ast (>= 1.1.0)
|
|
79
97
|
ruby-progressbar (1.10.1)
|
|
80
98
|
ruby2_keywords (0.0.2)
|
|
81
99
|
thor (1.0.1)
|
|
100
|
+
thread_safe (0.3.6)
|
|
82
101
|
tilt (2.0.10)
|
|
83
|
-
|
|
102
|
+
tzinfo (1.2.8)
|
|
103
|
+
thread_safe (~> 0.1)
|
|
84
104
|
unicode-display_width (1.7.0)
|
|
85
|
-
url_mount (0.2.1)
|
|
86
|
-
rack
|
|
87
105
|
webmock (3.9.3)
|
|
88
106
|
addressable (>= 2.3.6)
|
|
89
107
|
crack (>= 0.3.2)
|
|
90
108
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
109
|
+
zeitwerk (2.4.1)
|
|
91
110
|
|
|
92
111
|
PLATFORMS
|
|
93
112
|
ruby
|
|
@@ -97,6 +116,7 @@ DEPENDENCIES
|
|
|
97
116
|
rake (~> 12.0)
|
|
98
117
|
rspec
|
|
99
118
|
rubocop
|
|
119
|
+
rubocop-rspec
|
|
100
120
|
webmock
|
|
101
121
|
weneedfeed!
|
|
102
122
|
|
data/README.md
CHANGED
|
@@ -33,51 +33,79 @@ You need to write a schema file named with `weneedfeed.yml` to use this gem.
|
|
|
33
33
|
|
|
34
34
|
```yaml
|
|
35
35
|
pages:
|
|
36
|
-
example1
|
|
37
|
-
title:
|
|
36
|
+
- id: example1
|
|
37
|
+
title: Example feed with CSS Selector
|
|
38
38
|
url: http://example.com/1
|
|
39
39
|
item_selector: li
|
|
40
40
|
item_description_selector: p:nth-child(3)
|
|
41
41
|
item_link_selector: a
|
|
42
42
|
item_time_selector: time
|
|
43
43
|
item_title_selector: p:nth-child(2)
|
|
44
|
-
example2
|
|
45
|
-
title:
|
|
44
|
+
- id: example2
|
|
45
|
+
title: Example feed with XPath
|
|
46
46
|
url: http://example.com/2
|
|
47
47
|
item_selector: //li
|
|
48
48
|
item_description_selector: .//p[3]
|
|
49
|
-
item_link_selector: .//a
|
|
49
|
+
item_link_selector: .//a
|
|
50
50
|
item_time_selector: .//time
|
|
51
51
|
item_title_selector: .//p[2]
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
+
### `id`
|
|
55
|
+
|
|
56
|
+
Feed ID.
|
|
57
|
+
|
|
58
|
+
- required
|
|
59
|
+
- Used for feed URL.
|
|
60
|
+
|
|
54
61
|
### `title`
|
|
55
62
|
|
|
56
|
-
Feed title
|
|
63
|
+
Feed title.
|
|
64
|
+
|
|
65
|
+
- required
|
|
66
|
+
- Used for RSS `<title>` element in `<channel>` element.
|
|
57
67
|
|
|
58
68
|
### `url`
|
|
59
69
|
|
|
60
|
-
|
|
70
|
+
HTML source URL.
|
|
71
|
+
|
|
72
|
+
- required
|
|
73
|
+
- Used to fetch HTML page for building feed.
|
|
61
74
|
|
|
62
75
|
### `item_selector`
|
|
63
76
|
|
|
64
|
-
CSS or XPath selector to search each item
|
|
77
|
+
CSS or XPath selector to search each item.
|
|
65
78
|
|
|
66
|
-
|
|
79
|
+
- required
|
|
80
|
+
- Equivalent unit to RSS `<item>` element.
|
|
67
81
|
|
|
68
|
-
|
|
82
|
+
### `item_link_selector`
|
|
69
83
|
|
|
70
|
-
|
|
84
|
+
CSS or XPath selector to find `<a>` element in each item.
|
|
71
85
|
|
|
72
|
-
|
|
86
|
+
- required
|
|
87
|
+
- Used for `<link>` in `<item>`.
|
|
73
88
|
|
|
74
89
|
### `item_title_selector`
|
|
75
90
|
|
|
76
|
-
CSS or XPath selector to find element with title information in each item
|
|
91
|
+
CSS or XPath selector to find element with title information in each item.
|
|
92
|
+
|
|
93
|
+
- required
|
|
94
|
+
- Used for `<title>` in `<item>`.
|
|
77
95
|
|
|
78
96
|
### `item_description_selector`
|
|
79
97
|
|
|
80
|
-
CSS or XPath selector to find element with description information in each item
|
|
98
|
+
CSS or XPath selector to find element with description information in each item.
|
|
99
|
+
|
|
100
|
+
- optional
|
|
101
|
+
- Used for `<description>` in `<item>`.
|
|
102
|
+
|
|
103
|
+
### `item_time_selector`
|
|
104
|
+
|
|
105
|
+
CSS or XPath selector to find element with datetime information in each item.
|
|
106
|
+
|
|
107
|
+
- optional
|
|
108
|
+
- Used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
|
|
81
109
|
|
|
82
110
|
## Usage
|
|
83
111
|
|
data/lib/weneedfeed.rb
CHANGED
|
@@ -7,8 +7,12 @@ module Weneedfeed
|
|
|
7
7
|
autoload :Capture, 'weneedfeed/capture'
|
|
8
8
|
autoload :Command, 'weneedfeed/command'
|
|
9
9
|
autoload :Controllers, 'weneedfeed/controllers'
|
|
10
|
+
autoload :FaradayResponseMiddleware, 'weneedfeed/faraday_response_middleware'
|
|
11
|
+
autoload :Helpers, 'weneedfeed/helpers'
|
|
10
12
|
autoload :Item, 'weneedfeed/item'
|
|
11
13
|
autoload :Page, 'weneedfeed/page'
|
|
14
|
+
autoload :PageSchema, 'weneedfeed/page_schema'
|
|
15
|
+
autoload :Schema, 'weneedfeed/schema'
|
|
12
16
|
autoload :Scraping, 'weneedfeed/scraping'
|
|
13
17
|
autoload :Views, 'weneedfeed/views'
|
|
14
18
|
end
|
|
@@ -6,12 +6,12 @@ module Weneedfeed
|
|
|
6
6
|
class Application < ::Hibana::Application
|
|
7
7
|
route do
|
|
8
8
|
get '/', to: ::Weneedfeed::Controllers::ShowTopPage, as: :top_page
|
|
9
|
-
get '/feeds/:
|
|
9
|
+
get '/feeds/:page_id.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
# @param [Hash] schema
|
|
13
13
|
def initialize(schema:)
|
|
14
|
-
@schema = schema
|
|
14
|
+
@schema = ::Weneedfeed::Schema.new(schema)
|
|
15
15
|
super()
|
|
16
16
|
end
|
|
17
17
|
|
|
@@ -23,8 +23,8 @@ module Weneedfeed
|
|
|
23
23
|
|
|
24
24
|
# @return [Array<String>]
|
|
25
25
|
def paths
|
|
26
|
-
['/'] + @schema
|
|
27
|
-
"/feeds/#{
|
|
26
|
+
['/'] + @schema.page_ids.map do |page_id|
|
|
27
|
+
"/feeds/#{page_id}.xml"
|
|
28
28
|
end
|
|
29
29
|
end
|
|
30
30
|
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Weneedfeed
|
|
4
|
+
module Controllers
|
|
5
|
+
class Base < ::Hibana::Controller
|
|
6
|
+
include ::Weneedfeed::Helpers::Parameters
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
# @return [Weneedfeed::Schema]
|
|
11
|
+
def schema
|
|
12
|
+
request.env['weneedfeed.schema']
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -2,31 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
module Weneedfeed
|
|
4
4
|
module Controllers
|
|
5
|
-
class ShowFeed < ::
|
|
5
|
+
class ShowFeed < ::Weneedfeed::Controllers::Base
|
|
6
6
|
def call
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
'router.params',
|
|
10
|
-
:page_name
|
|
11
|
-
)
|
|
12
|
-
properties = env.dig(
|
|
13
|
-
'weneedfeed.schema',
|
|
14
|
-
'pages',
|
|
15
|
-
page_name
|
|
16
|
-
)
|
|
17
|
-
unless properties
|
|
7
|
+
page_schema = schema.find_page_schema(path_parameters[:page_id])
|
|
8
|
+
unless page_schema
|
|
18
9
|
response.status = 404
|
|
19
10
|
return
|
|
20
11
|
end
|
|
21
12
|
|
|
22
13
|
scraping = ::Weneedfeed::Scraping.new(
|
|
23
|
-
item_description_selector:
|
|
24
|
-
item_link_selector:
|
|
25
|
-
item_time_selector:
|
|
26
|
-
item_title_selector:
|
|
27
|
-
item_selector:
|
|
28
|
-
title:
|
|
29
|
-
url:
|
|
14
|
+
item_description_selector: page_schema.item_description_selector,
|
|
15
|
+
item_link_selector: page_schema.item_link_selector,
|
|
16
|
+
item_time_selector: page_schema.item_time_selector,
|
|
17
|
+
item_title_selector: page_schema.item_title_selector,
|
|
18
|
+
item_selector: page_schema.item_selector,
|
|
19
|
+
title: page_schema.title,
|
|
20
|
+
url: page_schema.url,
|
|
30
21
|
)
|
|
31
22
|
page = scraping.call
|
|
32
23
|
|
|
@@ -2,18 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
module Weneedfeed
|
|
4
4
|
module Controllers
|
|
5
|
-
class ShowTopPage < ::
|
|
5
|
+
class ShowTopPage < ::Weneedfeed::Controllers::Base
|
|
6
6
|
def call
|
|
7
|
-
|
|
8
|
-
'weneedfeed.schema',
|
|
9
|
-
'pages'
|
|
10
|
-
).sort_by do |_key, value|
|
|
11
|
-
value['title']
|
|
12
|
-
end
|
|
7
|
+
page_schemata = schema.page_schemata.sort_by(&:title)
|
|
13
8
|
response.content_type = 'text/html'
|
|
14
9
|
response.write(
|
|
15
10
|
::Weneedfeed::Views::ShowTopPage.new(
|
|
16
|
-
|
|
11
|
+
page_schemata: page_schemata,
|
|
17
12
|
partial_template_path: ::File.expand_path(
|
|
18
13
|
'templates/show_top_page.html.erb',
|
|
19
14
|
"#{__dir__}/../../.."
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/core_ext/array/conversions'
|
|
4
|
+
require 'active_support/core_ext/hash/conversions'
|
|
5
|
+
require 'faraday'
|
|
6
|
+
require 'faraday_middleware/response_middleware'
|
|
7
|
+
require 'json'
|
|
8
|
+
|
|
9
|
+
module Weneedfeed
|
|
10
|
+
class FaradayResponseMiddleware < ::FaradayMiddleware::ResponseMiddleware
|
|
11
|
+
define_parser do |body, options|
|
|
12
|
+
options ||= {}
|
|
13
|
+
options = { dasherize: false }.merge(options)
|
|
14
|
+
object = ::JSON.parse(body)
|
|
15
|
+
object.to_xml(options)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @note Overriding.
|
|
19
|
+
def parse_response?(env)
|
|
20
|
+
env.response.headers['Content-Type'].to_s.include?('application/json')
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
data/lib/weneedfeed/item.rb
CHANGED
|
@@ -15,8 +15,8 @@ module Weneedfeed
|
|
|
15
15
|
end
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
# @param [String] description_selector
|
|
19
|
-
# @param [String] link_selector
|
|
18
|
+
# @param [String, nil] description_selector
|
|
19
|
+
# @param [String, nil] link_selector
|
|
20
20
|
# @param [Nokogiri::Node] node
|
|
21
21
|
# @param [String] time_selector
|
|
22
22
|
# @param [String] title_selector
|
|
@@ -39,6 +39,8 @@ module Weneedfeed
|
|
|
39
39
|
|
|
40
40
|
# @return [String, nil]
|
|
41
41
|
def description
|
|
42
|
+
return unless @description_selector
|
|
43
|
+
|
|
42
44
|
@node.at(@description_selector)&.inner_html
|
|
43
45
|
end
|
|
44
46
|
|
|
@@ -46,18 +48,34 @@ module Weneedfeed
|
|
|
46
48
|
def link
|
|
47
49
|
::URI.join(
|
|
48
50
|
@url,
|
|
49
|
-
|
|
51
|
+
link_path_or_url
|
|
50
52
|
).to_s
|
|
51
53
|
end
|
|
52
54
|
|
|
55
|
+
# @return [Nokogiri::Node, nil]
|
|
56
|
+
def link_node
|
|
57
|
+
@node.at(@link_selector)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @return [String, nil]
|
|
61
|
+
def link_path_or_url
|
|
62
|
+
node = link_node
|
|
63
|
+
node['href'] || node.inner_text
|
|
64
|
+
end
|
|
65
|
+
|
|
53
66
|
# @return [Time, nil]
|
|
54
67
|
def time
|
|
55
|
-
|
|
68
|
+
return unless @time_selector
|
|
69
|
+
|
|
70
|
+
string = time_string
|
|
71
|
+
return unless string
|
|
72
|
+
|
|
73
|
+
self.class.parse_time(string)
|
|
56
74
|
end
|
|
57
75
|
|
|
58
|
-
# @return [String]
|
|
76
|
+
# @return [String, nil]
|
|
59
77
|
def title
|
|
60
|
-
@node.at(@title_selector)
|
|
78
|
+
@node.at(@title_selector)&.inner_text
|
|
61
79
|
end
|
|
62
80
|
|
|
63
81
|
private
|
|
@@ -67,12 +85,12 @@ module Weneedfeed
|
|
|
67
85
|
@node.at(@time_selector)
|
|
68
86
|
end
|
|
69
87
|
|
|
70
|
-
# @return [String]
|
|
88
|
+
# @return [String, nil]
|
|
71
89
|
def time_string
|
|
72
90
|
node = time_node
|
|
73
91
|
return unless node
|
|
74
92
|
|
|
75
|
-
node['datetime'] || node.
|
|
93
|
+
node['datetime'] || node.inner_text
|
|
76
94
|
end
|
|
77
95
|
end
|
|
78
96
|
end
|
data/lib/weneedfeed/page.rb
CHANGED
|
@@ -8,9 +8,9 @@ module Weneedfeed
|
|
|
8
8
|
# @return [String]
|
|
9
9
|
attr_reader :url
|
|
10
10
|
|
|
11
|
-
# @param [String] item_description_selector
|
|
11
|
+
# @param [String, nil] item_description_selector
|
|
12
12
|
# @param [String] item_link_selector
|
|
13
|
-
# @param [String] item_time_selector
|
|
13
|
+
# @param [String, nil] item_time_selector
|
|
14
14
|
# @param [String] item_title_selector
|
|
15
15
|
# @param [String] item_selector
|
|
16
16
|
# @param [Nokogiri::Node] node
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Weneedfeed
|
|
4
|
+
PageSchema = Struct.new(
|
|
5
|
+
:id,
|
|
6
|
+
:item_description_selector,
|
|
7
|
+
:item_link_selector,
|
|
8
|
+
:item_time_selector,
|
|
9
|
+
:item_title_selector,
|
|
10
|
+
:item_selector,
|
|
11
|
+
:title,
|
|
12
|
+
:url,
|
|
13
|
+
keyword_init: true,
|
|
14
|
+
)
|
|
15
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Weneedfeed
|
|
4
|
+
class Schema
|
|
5
|
+
# @param [Hash] raw
|
|
6
|
+
def initialize(raw)
|
|
7
|
+
@raw = raw
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# @param [String, nl] page_schema_id
|
|
11
|
+
# @return [Weneedfeed::PageSchema]
|
|
12
|
+
def find_page_schema(page_schema_id)
|
|
13
|
+
page_schemata.find do |page_schema|
|
|
14
|
+
page_schema.id == page_schema_id
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return [Array<String>]
|
|
19
|
+
def page_ids
|
|
20
|
+
page_schemata.map(&:id)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Array<Weneedfeed::PageSchema>]
|
|
24
|
+
def page_schemata
|
|
25
|
+
@raw['pages'].map do |hash|
|
|
26
|
+
::Weneedfeed::PageSchema.new(
|
|
27
|
+
id: hash['id'],
|
|
28
|
+
item_description_selector: hash['item_description_selector'],
|
|
29
|
+
item_link_selector: hash['item_link_selector'],
|
|
30
|
+
item_time_selector: hash['item_time_selector'],
|
|
31
|
+
item_title_selector: hash['item_title_selector'],
|
|
32
|
+
item_selector: hash['item_selector'],
|
|
33
|
+
title: hash['title'],
|
|
34
|
+
url: hash['url'],
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
data/lib/weneedfeed/scraping.rb
CHANGED
|
@@ -5,9 +5,18 @@ require 'nokogiri'
|
|
|
5
5
|
|
|
6
6
|
module Weneedfeed
|
|
7
7
|
class Scraping
|
|
8
|
-
|
|
8
|
+
class << self
|
|
9
|
+
# @return [Faraday::Connection]
|
|
10
|
+
def faraday_connection
|
|
11
|
+
@faraday_connection ||= ::Faraday.new do |connection|
|
|
12
|
+
connection.use ::Weneedfeed::FaradayResponseMiddleware
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# @param [String, nil] item_description_selector
|
|
9
18
|
# @param [String] item_link_selector
|
|
10
|
-
# @param [String] item_time_selector
|
|
19
|
+
# @param [String, nil] item_time_selector
|
|
11
20
|
# @param [String] item_title_selector
|
|
12
21
|
# @param [String] item_selector
|
|
13
22
|
# @param [String] title
|
|
@@ -48,12 +57,12 @@ module Weneedfeed
|
|
|
48
57
|
|
|
49
58
|
# @return [Nokogiri::Node]
|
|
50
59
|
def parsed_body
|
|
51
|
-
::Nokogiri::
|
|
60
|
+
::Nokogiri::XML.parse(response.body)
|
|
52
61
|
end
|
|
53
62
|
|
|
54
63
|
# @return [Faraday::Response]
|
|
55
64
|
def response
|
|
56
|
-
|
|
65
|
+
self.class.faraday_connection.get(@url)
|
|
57
66
|
end
|
|
58
67
|
end
|
|
59
68
|
end
|
data/lib/weneedfeed/version.rb
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Weneedfeed
|
|
4
4
|
module Views
|
|
5
5
|
class ShowFeed < ::Hibana::View
|
|
6
|
+
include ::Weneedfeed::Helpers::Parameters
|
|
7
|
+
|
|
6
8
|
# @param [Weneedfeed::Page] page
|
|
7
9
|
def initialize(page:, **argv)
|
|
8
10
|
super(**argv)
|
|
@@ -13,7 +15,7 @@ module Weneedfeed
|
|
|
13
15
|
|
|
14
16
|
# @return [String]
|
|
15
17
|
def top_page_path
|
|
16
|
-
request.path.delete_suffix(router.path(:feed,
|
|
18
|
+
request.path.delete_suffix(router.path(:feed, page_id: page_id))
|
|
17
19
|
end
|
|
18
20
|
|
|
19
21
|
# @return [Enumerable<Weneedfeed::Item>]
|
|
@@ -24,8 +26,8 @@ module Weneedfeed
|
|
|
24
26
|
end
|
|
25
27
|
|
|
26
28
|
# @return [String]
|
|
27
|
-
def
|
|
28
|
-
|
|
29
|
+
def page_id
|
|
30
|
+
path_parameters[:page_id]
|
|
29
31
|
end
|
|
30
32
|
|
|
31
33
|
# @return [Hanami::Router]
|
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
module Weneedfeed
|
|
4
4
|
module Views
|
|
5
5
|
class ShowTopPage < ::Hibana::View
|
|
6
|
-
# @param [Array<
|
|
7
|
-
def initialize(
|
|
6
|
+
# @param [Array<Hash>] page_schemata
|
|
7
|
+
def initialize(page_schemata:, **argv)
|
|
8
8
|
super(**argv)
|
|
9
|
-
@
|
|
9
|
+
@page_schemata = page_schemata
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
private
|
|
@@ -16,10 +16,10 @@ module Weneedfeed
|
|
|
16
16
|
request.path.delete_suffix(router.path(:top_page))
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
# @param [String]
|
|
19
|
+
# @param [String] page_id
|
|
20
20
|
# @return [String]
|
|
21
|
-
def feed_path(
|
|
22
|
-
"#{base_path}#{router.path(:feed,
|
|
21
|
+
def feed_path(page_id:)
|
|
22
|
+
"#{base_path}#{router.path(:feed, page_id: page_id)}"
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
# @return [Hanami::Router]
|
data/templates/show_feed.xml.erb
CHANGED
|
@@ -12,7 +12,9 @@
|
|
|
12
12
|
<item>
|
|
13
13
|
<title><![CDATA[<%= item.title %>]]></title>
|
|
14
14
|
<link><%= item.link %></link>
|
|
15
|
-
|
|
15
|
+
<% if item.time %>
|
|
16
|
+
<pubDate><%= item.time.rfc822 %></pubDate>
|
|
17
|
+
<% end %>
|
|
16
18
|
<description><![CDATA[<%= item.description %>]]></description>
|
|
17
19
|
<content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
|
|
18
20
|
<guid isPermaLink="true"><%= item.link %></guid>
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
</head>
|
|
8
8
|
<body>
|
|
9
9
|
<ul>
|
|
10
|
-
<% @
|
|
10
|
+
<% @page_schemata.each do |page_schema| %>
|
|
11
11
|
<li>
|
|
12
|
-
<a href="<%= feed_path(
|
|
12
|
+
<a href="<%= feed_path(page_id: page_schema.id) %>"><%= page_schema.title %></a>
|
|
13
13
|
</li>
|
|
14
14
|
<% end %>
|
|
15
15
|
</ul>
|
data/weneedfeed.gemspec
CHANGED
|
@@ -25,8 +25,11 @@ Gem::Specification.new do |spec|
|
|
|
25
25
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
26
26
|
spec.require_paths = ['lib']
|
|
27
27
|
|
|
28
|
+
spec.add_runtime_dependency 'activesupport'
|
|
29
|
+
spec.add_runtime_dependency 'builder'
|
|
28
30
|
spec.add_runtime_dependency 'faraday'
|
|
29
|
-
spec.add_runtime_dependency '
|
|
31
|
+
spec.add_runtime_dependency 'faraday_middleware'
|
|
32
|
+
spec.add_runtime_dependency 'hibana', '>= 0.2'
|
|
30
33
|
spec.add_runtime_dependency 'nokogiri'
|
|
31
34
|
spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
|
|
32
35
|
spec.add_runtime_dependency 'thor'
|
metadata
CHANGED
|
@@ -1,15 +1,43 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: weneedfeed
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ryo Nakamura
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-11-
|
|
11
|
+
date: 2020-11-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activesupport
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: builder
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
13
41
|
- !ruby/object:Gem::Dependency
|
|
14
42
|
name: faraday
|
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -25,7 +53,7 @@ dependencies:
|
|
|
25
53
|
- !ruby/object:Gem::Version
|
|
26
54
|
version: '0'
|
|
27
55
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
56
|
+
name: faraday_middleware
|
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
|
30
58
|
requirements:
|
|
31
59
|
- - ">="
|
|
@@ -38,6 +66,20 @@ dependencies:
|
|
|
38
66
|
- - ">="
|
|
39
67
|
- !ruby/object:Gem::Version
|
|
40
68
|
version: '0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: hibana
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0.2'
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0.2'
|
|
41
83
|
- !ruby/object:Gem::Dependency
|
|
42
84
|
name: nokogiri
|
|
43
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -106,10 +148,16 @@ files:
|
|
|
106
148
|
- lib/weneedfeed/capture.rb
|
|
107
149
|
- lib/weneedfeed/command.rb
|
|
108
150
|
- lib/weneedfeed/controllers.rb
|
|
151
|
+
- lib/weneedfeed/controllers/base.rb
|
|
109
152
|
- lib/weneedfeed/controllers/show_feed.rb
|
|
110
153
|
- lib/weneedfeed/controllers/show_top_page.rb
|
|
154
|
+
- lib/weneedfeed/faraday_response_middleware.rb
|
|
155
|
+
- lib/weneedfeed/helpers.rb
|
|
156
|
+
- lib/weneedfeed/helpers/parameters.rb
|
|
111
157
|
- lib/weneedfeed/item.rb
|
|
112
158
|
- lib/weneedfeed/page.rb
|
|
159
|
+
- lib/weneedfeed/page_schema.rb
|
|
160
|
+
- lib/weneedfeed/schema.rb
|
|
113
161
|
- lib/weneedfeed/scraping.rb
|
|
114
162
|
- lib/weneedfeed/version.rb
|
|
115
163
|
- lib/weneedfeed/views.rb
|