weneedfeed 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +20 -0
- data/.gitignore +12 -0
- data/.rspec +1 -0
- data/.rubocop.yml +8 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +102 -0
- data/LICENSE.txt +21 -0
- data/README.md +80 -0
- data/Rakefile +7 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/weneedfeed.rb +13 -0
- data/lib/weneedfeed/application.rb +31 -0
- data/lib/weneedfeed/capture.rb +81 -0
- data/lib/weneedfeed/controllers.rb +8 -0
- data/lib/weneedfeed/controllers/show_feed.rb +47 -0
- data/lib/weneedfeed/controllers/show_top_page.rb +25 -0
- data/lib/weneedfeed/item.rb +70 -0
- data/lib/weneedfeed/page.rb +53 -0
- data/lib/weneedfeed/scraping.rb +59 -0
- data/lib/weneedfeed/version.rb +5 -0
- data/lib/weneedfeed/views.rb +8 -0
- data/lib/weneedfeed/views/show_feed.rb +27 -0
- data/lib/weneedfeed/views/show_top_page.rb +13 -0
- data/templates/show_feed.xml.erb +21 -0
- data/templates/show_top_page.html.erb +17 -0
- data/weneedfeed.gemspec +30 -0
- metadata +128 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7fb7f94b87cc29f50d6a50a9c4f4f289d175bba47e4514eccd35b373a0438ff6
|
4
|
+
data.tar.gz: 56071e66614605dd824597dc7149260d47b8beee95b2338190c20cbd1fb30bbf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 25bc5a5e40fb87b3f2516aa949819b0ec258f2bb6ee8d9ab8a1ea8f1672a428c002d4448ad86028e51140db7c4d9462c95ca2678b9e421423a8b17d54454294b
|
7
|
+
data.tar.gz: 874ab7d89dfeb3f238aaf2ed3aff6146dac0e4f5a8410a45feb6967219a6062f67e0b38b12bc6369ac597a46f5cf1503db1d213dbdeb1d5f6a6de9ee45e3b41d
|
@@ -0,0 +1,20 @@
|
|
1
|
+
name: test
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
push:
|
6
|
+
branches:
|
7
|
+
- master
|
8
|
+
jobs:
|
9
|
+
build:
|
10
|
+
runs-on: ubuntu-18.04
|
11
|
+
steps:
|
12
|
+
- uses: actions/checkout@v2
|
13
|
+
- uses: actions/setup-ruby@v1
|
14
|
+
with:
|
15
|
+
ruby-version: '2.7.2'
|
16
|
+
- run: bundle install --jobs=$(($(nproc) - 1)) --retry=3
|
17
|
+
- run: bundle exec rubocop --color --parallel
|
18
|
+
- run: bundle exec rspec --force-color
|
19
|
+
|
20
|
+
|
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--require spec_helper
|
data/.rubocop.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
weneedfeed (0.1.0)
|
5
|
+
faraday
|
6
|
+
hibana
|
7
|
+
nokogiri
|
8
|
+
rack-capture (>= 0.4.0)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
addressable (2.7.0)
|
14
|
+
public_suffix (>= 2.0.2, < 5.0)
|
15
|
+
ast (2.4.1)
|
16
|
+
concurrent-ruby (1.1.7)
|
17
|
+
crack (0.4.4)
|
18
|
+
diff-lcs (1.4.4)
|
19
|
+
faraday (1.1.0)
|
20
|
+
multipart-post (>= 1.2, < 3)
|
21
|
+
ruby2_keywords
|
22
|
+
hanami-router (1.3.2)
|
23
|
+
hanami-utils (~> 1.3)
|
24
|
+
http_router (= 0.11.2)
|
25
|
+
rack (~> 2.0)
|
26
|
+
hanami-utils (1.3.6)
|
27
|
+
concurrent-ruby (~> 1.0)
|
28
|
+
transproc (~> 1.0)
|
29
|
+
hashdiff (1.0.1)
|
30
|
+
hibana (0.1.1)
|
31
|
+
hanami-router
|
32
|
+
rack
|
33
|
+
tilt
|
34
|
+
http_router (0.11.2)
|
35
|
+
rack (>= 1.0.0)
|
36
|
+
url_mount (~> 0.2.1)
|
37
|
+
mini_portile2 (2.4.0)
|
38
|
+
multipart-post (2.1.1)
|
39
|
+
nokogiri (1.10.10)
|
40
|
+
mini_portile2 (~> 2.4.0)
|
41
|
+
parallel (1.19.2)
|
42
|
+
parser (2.7.2.0)
|
43
|
+
ast (~> 2.4.1)
|
44
|
+
public_suffix (4.0.6)
|
45
|
+
rack (2.2.3)
|
46
|
+
rack-capture (0.4.0)
|
47
|
+
rack
|
48
|
+
rack-test (1.1.0)
|
49
|
+
rack (>= 1.0, < 3)
|
50
|
+
rainbow (3.0.0)
|
51
|
+
rake (12.3.3)
|
52
|
+
regexp_parser (1.8.2)
|
53
|
+
rexml (3.2.4)
|
54
|
+
rspec (3.9.0)
|
55
|
+
rspec-core (~> 3.9.0)
|
56
|
+
rspec-expectations (~> 3.9.0)
|
57
|
+
rspec-mocks (~> 3.9.0)
|
58
|
+
rspec-core (3.9.3)
|
59
|
+
rspec-support (~> 3.9.3)
|
60
|
+
rspec-expectations (3.9.3)
|
61
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
62
|
+
rspec-support (~> 3.9.0)
|
63
|
+
rspec-mocks (3.9.1)
|
64
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
65
|
+
rspec-support (~> 3.9.0)
|
66
|
+
rspec-support (3.9.4)
|
67
|
+
rubocop (1.0.0)
|
68
|
+
parallel (~> 1.10)
|
69
|
+
parser (>= 2.7.1.5)
|
70
|
+
rainbow (>= 2.2.2, < 4.0)
|
71
|
+
regexp_parser (>= 1.8)
|
72
|
+
rexml
|
73
|
+
rubocop-ast (>= 0.6.0)
|
74
|
+
ruby-progressbar (~> 1.7)
|
75
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
76
|
+
rubocop-ast (1.1.0)
|
77
|
+
parser (>= 2.7.1.5)
|
78
|
+
ruby-progressbar (1.10.1)
|
79
|
+
ruby2_keywords (0.0.2)
|
80
|
+
tilt (2.0.10)
|
81
|
+
transproc (1.1.1)
|
82
|
+
unicode-display_width (1.7.0)
|
83
|
+
url_mount (0.2.1)
|
84
|
+
rack
|
85
|
+
webmock (3.9.3)
|
86
|
+
addressable (>= 2.3.6)
|
87
|
+
crack (>= 0.3.2)
|
88
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
89
|
+
|
90
|
+
PLATFORMS
|
91
|
+
ruby
|
92
|
+
|
93
|
+
DEPENDENCIES
|
94
|
+
rack-test
|
95
|
+
rake (~> 12.0)
|
96
|
+
rspec
|
97
|
+
rubocop
|
98
|
+
webmock
|
99
|
+
weneedfeed!
|
100
|
+
|
101
|
+
BUNDLED WITH
|
102
|
+
2.1.4
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 Ryo Nakamura
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# Weneedfeed
|
2
|
+
|
3
|
+
[![](https://badge.fury.io/rb/weneedfeed.svg)](https://rubygems.org/gems/weneedfeed)
|
4
|
+
[![](https://github.com/r7kamura/weneedfeed/workflows/test/badge.svg)](https://github.com/r7kamura/weneedfeed/actions?query=workflow%3Atest)
|
5
|
+
|
6
|
+
Generate feeds from URL and XPath.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'weneedfeed'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
```sh
|
19
|
+
bundle install
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
```sh
|
24
|
+
gem install weneedfeed
|
25
|
+
```
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
Write schema:
|
30
|
+
|
31
|
+
```yaml
|
32
|
+
pages:
|
33
|
+
example:
|
34
|
+
title: example name
|
35
|
+
url: http://example.com/
|
36
|
+
xpath:
|
37
|
+
item: //li
|
38
|
+
item_description: .//p[3]
|
39
|
+
item_link: .//a/@href
|
40
|
+
item_time: .//time/@datetime
|
41
|
+
item_title: .//p[2]
|
42
|
+
```
|
43
|
+
|
44
|
+
And then call `Weneedfeed::Capture`:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
require 'weneedfeed'
|
48
|
+
|
49
|
+
Weneedfeed::Capture.call(
|
50
|
+
base_url: 'https://user.github.io/repo',
|
51
|
+
schema_path: 'schema.yml'
|
52
|
+
)
|
53
|
+
```
|
54
|
+
|
55
|
+
These files will be generated:
|
56
|
+
|
57
|
+
- output/index.html
|
58
|
+
- output/feeds/example.xml
|
59
|
+
|
60
|
+
## Development
|
61
|
+
|
62
|
+
### Setup
|
63
|
+
|
64
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
65
|
+
|
66
|
+
### Local installation
|
67
|
+
|
68
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
69
|
+
|
70
|
+
### Release
|
71
|
+
|
72
|
+
To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
73
|
+
|
74
|
+
## Contributing
|
75
|
+
|
76
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/r7kamura/weneedfeed.
|
77
|
+
|
78
|
+
## License
|
79
|
+
|
80
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'weneedfeed'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require 'irb'
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/weneedfeed.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'weneedfeed/version'
|
4
|
+
|
5
|
+
module Weneedfeed
|
6
|
+
autoload :Application, 'weneedfeed/application'
|
7
|
+
autoload :Capture, 'weneedfeed/capture'
|
8
|
+
autoload :Controllers, 'weneedfeed/controllers'
|
9
|
+
autoload :Item, 'weneedfeed/item'
|
10
|
+
autoload :Page, 'weneedfeed/page'
|
11
|
+
autoload :Scraping, 'weneedfeed/scraping'
|
12
|
+
autoload :Views, 'weneedfeed/views'
|
13
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'hibana'
|
4
|
+
|
5
|
+
module Weneedfeed
|
6
|
+
class Application < ::Hibana::Application
|
7
|
+
route do
|
8
|
+
get '/', to: ::Weneedfeed::Controllers::ShowTopPage
|
9
|
+
get '/feeds/:page_name.xml', to: ::Weneedfeed::Controllers::ShowFeed
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param [Hash] schema
|
13
|
+
def initialize(schema:)
|
14
|
+
@schema = schema
|
15
|
+
super()
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param [Hash] env
|
19
|
+
def call(env)
|
20
|
+
env['weneedfeed.schema'] = @schema
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Array<String>]
|
25
|
+
def paths
|
26
|
+
['/'] + @schema['pages'].keys.map do |page_name|
|
27
|
+
"/feeds/#{page_name}.xml"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rack/capture'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Weneedfeed
|
7
|
+
class Capture
|
8
|
+
class << self
|
9
|
+
# @param [String] base_url
|
10
|
+
# @param [String] schema_path
|
11
|
+
def call(
|
12
|
+
base_url:,
|
13
|
+
schema_path:
|
14
|
+
)
|
15
|
+
new(
|
16
|
+
base_url: base_url,
|
17
|
+
schema_path: schema_path
|
18
|
+
).call
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param [String] base_url
|
23
|
+
# @param [String] schema_path
|
24
|
+
def initialize(
|
25
|
+
base_url:,
|
26
|
+
schema_path:
|
27
|
+
)
|
28
|
+
@base_url = base_url
|
29
|
+
@schema_path = schema_path
|
30
|
+
end
|
31
|
+
|
32
|
+
def call
|
33
|
+
urls.each do |url|
|
34
|
+
::Rack::Capture.call(
|
35
|
+
app: app,
|
36
|
+
script_name: script_name,
|
37
|
+
url: url
|
38
|
+
)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# @return [Weneedfeed::Application]
|
45
|
+
def app
|
46
|
+
@app ||= ::Weneedfeed::Application.new(
|
47
|
+
schema: ::YAML.load_file(@schema_path)
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [URI]
|
52
|
+
def base_uri
|
53
|
+
@base_uri ||= ::URI.parse(@base_url)
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [Hash]
|
57
|
+
def schema
|
58
|
+
::YAML.load_file(@schema_path)
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [String]
|
62
|
+
def script_name
|
63
|
+
case base_uri.path
|
64
|
+
when '', '/'
|
65
|
+
''
|
66
|
+
else
|
67
|
+
base_uri.path
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @return [Array<String>]
|
72
|
+
def urls
|
73
|
+
app.paths.map do |path|
|
74
|
+
[
|
75
|
+
@base_url,
|
76
|
+
path
|
77
|
+
].join
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
module Controllers
|
5
|
+
class ShowFeed < ::Hibana::Controller
|
6
|
+
def call
|
7
|
+
env = request.env
|
8
|
+
page_name = env.dig(
|
9
|
+
'router.params',
|
10
|
+
:page_name
|
11
|
+
)
|
12
|
+
properties = env.dig(
|
13
|
+
'weneedfeed.schema',
|
14
|
+
'pages',
|
15
|
+
page_name
|
16
|
+
)
|
17
|
+
unless properties
|
18
|
+
response.status = 404
|
19
|
+
return
|
20
|
+
end
|
21
|
+
|
22
|
+
scraping = ::Weneedfeed::Scraping.new(
|
23
|
+
item_description_xpath: properties['xpath']['item_description'],
|
24
|
+
item_link_xpath: properties['xpath']['item_link'],
|
25
|
+
item_time_xpath: properties['xpath']['item_time'],
|
26
|
+
item_title_xpath: properties['xpath']['item_title'],
|
27
|
+
item_xpath: properties['xpath']['item'],
|
28
|
+
title: properties['title'],
|
29
|
+
url: properties['url']
|
30
|
+
)
|
31
|
+
page = scraping.call
|
32
|
+
|
33
|
+
response.content_type = 'application/xml; charset=utf-8'
|
34
|
+
response.write(
|
35
|
+
::Weneedfeed::Views::ShowFeed.new(
|
36
|
+
page: page,
|
37
|
+
partial_template_path: ::File.expand_path(
|
38
|
+
'templates/show_feed.xml.erb',
|
39
|
+
"#{__dir__}/../../.."
|
40
|
+
),
|
41
|
+
request: request
|
42
|
+
).to_s
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
module Controllers
|
5
|
+
class ShowTopPage < ::Hibana::Controller
|
6
|
+
def call
|
7
|
+
pages = request.env.dig(
|
8
|
+
'weneedfeed.schema',
|
9
|
+
'pages'
|
10
|
+
)
|
11
|
+
response.content_type = 'text/html'
|
12
|
+
response.write(
|
13
|
+
::Weneedfeed::Views::ShowTopPage.new(
|
14
|
+
pages: pages,
|
15
|
+
partial_template_path: ::File.expand_path(
|
16
|
+
'templates/show_top_page.html.erb',
|
17
|
+
"#{__dir__}/../../.."
|
18
|
+
),
|
19
|
+
request: request
|
20
|
+
).to_s
|
21
|
+
)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
class Item
|
5
|
+
class << self
|
6
|
+
# @param [String] string
|
7
|
+
# @return [Time, nil]
|
8
|
+
def parse_time(string)
|
9
|
+
::Time.parse(string)
|
10
|
+
rescue ArgumentError, RangeError
|
11
|
+
begin
|
12
|
+
::Time.strptime(string, '%Y年%m月%d日')
|
13
|
+
rescue ArgumentError
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param [String] description_xpath
|
19
|
+
# @param [String] link_xpath
|
20
|
+
# @param [Nokogiri::Node] node
|
21
|
+
# @param [String] time_xpath
|
22
|
+
# @param [String] title_xpath
|
23
|
+
# @param [String] url
|
24
|
+
def initialize(
|
25
|
+
description_xpath:,
|
26
|
+
link_xpath:,
|
27
|
+
node:,
|
28
|
+
time_xpath:,
|
29
|
+
title_xpath:,
|
30
|
+
url:
|
31
|
+
)
|
32
|
+
@description_xpath = description_xpath
|
33
|
+
@link_xpath = link_xpath
|
34
|
+
@node = node
|
35
|
+
@time_xpath = time_xpath
|
36
|
+
@title_xpath = title_xpath
|
37
|
+
@url = url
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [String, nil]
|
41
|
+
def description
|
42
|
+
@node.xpath(@description_xpath).inner_html
|
43
|
+
end
|
44
|
+
|
45
|
+
# @return [String]
|
46
|
+
def link
|
47
|
+
::URI.join(
|
48
|
+
@url,
|
49
|
+
@node.xpath(@link_xpath).inner_html
|
50
|
+
).to_s
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Time, nil]
|
54
|
+
def time
|
55
|
+
self.class.parse_time(time_string)
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [String, nil]
|
59
|
+
def title
|
60
|
+
@node.xpath(@title_xpath).inner_text
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
# @return [String]
|
66
|
+
def time_string
|
67
|
+
@node.xpath(@time_xpath).inner_html
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
class Page
|
5
|
+
# @return [String]
|
6
|
+
attr_reader :title
|
7
|
+
|
8
|
+
# @return [String]
|
9
|
+
attr_reader :url
|
10
|
+
|
11
|
+
# @param [String] item_description_xpath
|
12
|
+
# @param [String] item_link_xpath
|
13
|
+
# @param [String] item_time_xpath
|
14
|
+
# @param [String] item_title_xpath
|
15
|
+
# @param [String] item_xpath
|
16
|
+
# @param [Nokogiri::Node] node
|
17
|
+
# @param [String] title
|
18
|
+
# @param [String] url
|
19
|
+
def initialize(
|
20
|
+
item_description_xpath:,
|
21
|
+
item_link_xpath:,
|
22
|
+
item_time_xpath:,
|
23
|
+
item_title_xpath:,
|
24
|
+
item_xpath:,
|
25
|
+
node:,
|
26
|
+
title:,
|
27
|
+
url:
|
28
|
+
)
|
29
|
+
@item_description_xpath = item_description_xpath
|
30
|
+
@item_link_xpath = item_link_xpath
|
31
|
+
@item_time_xpath = item_time_xpath
|
32
|
+
@item_title_xpath = item_title_xpath
|
33
|
+
@item_xpath = item_xpath
|
34
|
+
@node = node
|
35
|
+
@title = title
|
36
|
+
@url = url
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Array<Weneedfeed::Item>]
|
40
|
+
def items
|
41
|
+
@node.xpath(@item_xpath).map do |node|
|
42
|
+
::Weneedfeed::Item.new(
|
43
|
+
description_xpath: @item_description_xpath,
|
44
|
+
link_xpath: @item_link_xpath,
|
45
|
+
node: node,
|
46
|
+
time_xpath: @item_time_xpath,
|
47
|
+
title_xpath: @item_title_xpath,
|
48
|
+
url: @url
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'faraday'
|
4
|
+
require 'nokogiri'
|
5
|
+
|
6
|
+
module Weneedfeed
|
7
|
+
class Scraping
|
8
|
+
# @param [String] item_description_xpath
|
9
|
+
# @param [String] item_link_xpath
|
10
|
+
# @param [String] item_time_xpath
|
11
|
+
# @param [String] item_title_xpath
|
12
|
+
# @param [String] item_xpath
|
13
|
+
# @param [String] title
|
14
|
+
# @param [String] url
|
15
|
+
def initialize(
|
16
|
+
item_description_xpath:,
|
17
|
+
item_link_xpath:,
|
18
|
+
item_time_xpath:,
|
19
|
+
item_title_xpath:,
|
20
|
+
item_xpath:,
|
21
|
+
title:,
|
22
|
+
url:
|
23
|
+
)
|
24
|
+
@item_description_xpath = item_description_xpath
|
25
|
+
@item_link_xpath = item_link_xpath
|
26
|
+
@item_time_xpath = item_time_xpath
|
27
|
+
@item_title_xpath = item_title_xpath
|
28
|
+
@item_xpath = item_xpath
|
29
|
+
@title = title
|
30
|
+
@url = url
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Weneedfeed::Page]
|
34
|
+
def call
|
35
|
+
::Weneedfeed::Page.new(
|
36
|
+
node: parsed_body,
|
37
|
+
item_description_xpath: @item_description_xpath,
|
38
|
+
item_xpath: @item_xpath,
|
39
|
+
item_link_xpath: @item_link_xpath,
|
40
|
+
item_time_xpath: @item_time_xpath,
|
41
|
+
item_title_xpath: @item_title_xpath,
|
42
|
+
title: @title,
|
43
|
+
url: @url
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
# @return [Nokogiri::Node]
|
50
|
+
def parsed_body
|
51
|
+
::Nokogiri::HTML.parse(response.body)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Faraday::Response]
|
55
|
+
def response
|
56
|
+
::Faraday.get(@url)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Weneedfeed
|
4
|
+
module Views
|
5
|
+
class ShowFeed < ::Hibana::View
|
6
|
+
# @param [Weneedfeed::Page] page
|
7
|
+
def initialize(page:, **argv)
|
8
|
+
super(**argv)
|
9
|
+
@page = page
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# @return [Enumerable<Weneedfeed::Item>]
|
15
|
+
def items
|
16
|
+
@page.items.sort_by do |item|
|
17
|
+
-(item.time || ::Time.now).to_i
|
18
|
+
end.take(10)
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String]
|
22
|
+
def page_name
|
23
|
+
request.env['router.params'][:page_name]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<rss version="2.0"
|
3
|
+
xmlns:atom="http://www.w3.org/2005/Atom"
|
4
|
+
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
5
|
+
<channel>
|
6
|
+
<description></description>
|
7
|
+
<link><%= "#{request.base_url}#{request.script_name}" %></link>
|
8
|
+
<atom:link href="<%= request.base_url %>/feeds/<%= page_name %>" rel="self"/>
|
9
|
+
<title><%= @page.title %></title>
|
10
|
+
<lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
|
11
|
+
<% items.each do |item| %>
|
12
|
+
<item>
|
13
|
+
<title><![CDATA[<%= item.title %>]]></title>
|
14
|
+
<link><%= item.link %></link>
|
15
|
+
<pubDate><%= item.time.rfc822 %></pubDate>
|
16
|
+
<content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
|
17
|
+
<guid isPermaLink="true"><%= item.link %></guid>
|
18
|
+
</item>
|
19
|
+
<% end %>
|
20
|
+
</channel>
|
21
|
+
</rss>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<meta name="viewport" content="width=device-width">
|
6
|
+
<title>Weneedfeed</title>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<ul>
|
10
|
+
<% @pages.each do |page_name, hash| %>
|
11
|
+
<li>
|
12
|
+
<a href="/feeds/<%= page_name %>"><%= hash['title'] %></a>
|
13
|
+
</li>
|
14
|
+
<% end %>
|
15
|
+
</ul>
|
16
|
+
</body>
|
17
|
+
</html>
|
data/weneedfeed.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'lib/weneedfeed/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'weneedfeed'
|
7
|
+
spec.version = Weneedfeed::VERSION
|
8
|
+
spec.authors = ['Ryo Nakamura']
|
9
|
+
spec.email = ['r7kamura@gmail.com']
|
10
|
+
|
11
|
+
spec.summary = 'Generate feeds from URL and XPath.'
|
12
|
+
spec.homepage = 'https://github.com/r7kamura/weneedfeed'
|
13
|
+
spec.license = 'MIT'
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new('>= 2.4.0')
|
15
|
+
|
16
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
17
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
18
|
+
|
19
|
+
# Specify which files should be added to the gem when it is released.
|
20
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
22
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
|
+
end
|
24
|
+
spec.require_paths = ['lib']
|
25
|
+
|
26
|
+
spec.add_runtime_dependency 'faraday'
|
27
|
+
spec.add_runtime_dependency 'hibana'
|
28
|
+
spec.add_runtime_dependency 'nokogiri'
|
29
|
+
spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: weneedfeed
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ryo Nakamura
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hibana
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rack-capture
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.4.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.4.0
|
69
|
+
description:
|
70
|
+
email:
|
71
|
+
- r7kamura@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".github/workflows/test.yml"
|
77
|
+
- ".gitignore"
|
78
|
+
- ".rspec"
|
79
|
+
- ".rubocop.yml"
|
80
|
+
- Gemfile
|
81
|
+
- Gemfile.lock
|
82
|
+
- LICENSE.txt
|
83
|
+
- README.md
|
84
|
+
- Rakefile
|
85
|
+
- bin/console
|
86
|
+
- bin/setup
|
87
|
+
- lib/weneedfeed.rb
|
88
|
+
- lib/weneedfeed/application.rb
|
89
|
+
- lib/weneedfeed/capture.rb
|
90
|
+
- lib/weneedfeed/controllers.rb
|
91
|
+
- lib/weneedfeed/controllers/show_feed.rb
|
92
|
+
- lib/weneedfeed/controllers/show_top_page.rb
|
93
|
+
- lib/weneedfeed/item.rb
|
94
|
+
- lib/weneedfeed/page.rb
|
95
|
+
- lib/weneedfeed/scraping.rb
|
96
|
+
- lib/weneedfeed/version.rb
|
97
|
+
- lib/weneedfeed/views.rb
|
98
|
+
- lib/weneedfeed/views/show_feed.rb
|
99
|
+
- lib/weneedfeed/views/show_top_page.rb
|
100
|
+
- templates/show_feed.xml.erb
|
101
|
+
- templates/show_top_page.html.erb
|
102
|
+
- weneedfeed.gemspec
|
103
|
+
homepage: https://github.com/r7kamura/weneedfeed
|
104
|
+
licenses:
|
105
|
+
- MIT
|
106
|
+
metadata:
|
107
|
+
homepage_uri: https://github.com/r7kamura/weneedfeed
|
108
|
+
source_code_uri: https://github.com/r7kamura/weneedfeed
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 2.4.0
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubygems_version: 3.1.2
|
125
|
+
signing_key:
|
126
|
+
specification_version: 4
|
127
|
+
summary: Generate feeds from URL and XPath.
|
128
|
+
test_files: []
|