web_stat 0.2.9 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/Dockerfile +25 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +35 -31
- data/README.md +4 -0
- data/docker-compose.yml +18 -0
- data/docker/exec +8 -0
- data/docker/start +13 -0
- data/lib/helpers/web_drive_helper.rb +30 -0
- data/lib/web_stat.rb +6 -2
- data/lib/web_stat/config/web_stat.yml +2 -1
- data/lib/web_stat/fetch.rb +28 -12
- data/lib/web_stat/final_redirect_url.rb +54 -0
- data/lib/web_stat/tag.rb +1 -1
- data/lib/web_stat/version.rb +2 -2
- data/spec/spec_helper.rb +8 -1
- data/spec/web_stat/fetch_spec.rb +1 -0
- data/web_stat.gemspec +3 -1
- metadata +38 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: eff1d2b80ff6bf4b61f82a100319a6b37ee13cb8a2ad96a11fd336c8a7164398
|
|
4
|
+
data.tar.gz: f4bf36491dbe6ae32b1cbf718ceb1fe7e91adce60848f4296d6c1319ea5c4eea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f38fe9d4f0ad495107c5ac42d2d240b42e810ae4f7ad0db5738b705f7e03b24846875cbc2139a334ebfc7d33ff1df953dec768f69f13a0932cd01a2c6f221753
|
|
7
|
+
data.tar.gz: 5f30259af5a84e2eb43645766aac49d151367a82c9e5643a0ddf3a5f15dbcbba59884def134cfaa19eaca1a932e77c28d65bfd389cecdd0262812cad808ff5e3
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.7.
|
|
1
|
+
2.7.1
|
data/Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Define base image, you can use --build-arg
|
|
2
|
+
ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.35.2_nodev14.3.0_rubyv2.7.1_sasscv2.3.0_ffiv1.13.1_chromedriver"
|
|
3
|
+
FROM $base_image
|
|
4
|
+
|
|
5
|
+
# Set locale
|
|
6
|
+
ENV LANG "C.UTF-8"
|
|
7
|
+
ENV NOKOGIRI_USE_SYSTEM_LIBRARIES "YES"
|
|
8
|
+
|
|
9
|
+
# Set correct environment variables.
|
|
10
|
+
RUN mkdir -p /var/www/docker
|
|
11
|
+
WORKDIR /var/www/docker
|
|
12
|
+
|
|
13
|
+
# Set up application
|
|
14
|
+
COPY . .
|
|
15
|
+
|
|
16
|
+
# Init gems
|
|
17
|
+
RUN echo "gem: --no-rdoc --no-ri" > ~/.gemrc
|
|
18
|
+
RUN . /etc/profile.d/rvm.sh && \
|
|
19
|
+
bundle config --global with 'development test' && \
|
|
20
|
+
bundle config --global system true && \
|
|
21
|
+
bundle config --global jobs 10 && \
|
|
22
|
+
bundle config --global build.nokogiri --use-system-libraries && \
|
|
23
|
+
bundle install
|
|
24
|
+
|
|
25
|
+
CMD ["bash"]
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,13 +1,7 @@
|
|
|
1
|
-
GIT
|
|
2
|
-
remote: git@github.com:yubele/final_redirect_url
|
|
3
|
-
revision: 45df878ec9495ebbfa06dc0a60cc5043c2519e16
|
|
4
|
-
specs:
|
|
5
|
-
final_redirect_url (0.1.1)
|
|
6
|
-
|
|
7
1
|
PATH
|
|
8
2
|
remote: .
|
|
9
3
|
specs:
|
|
10
|
-
web_stat (0.2
|
|
4
|
+
web_stat (0.3.2)
|
|
11
5
|
bundler (>= 2.0.2)
|
|
12
6
|
cld (>= 0.8.0)
|
|
13
7
|
mechanize (>= 2.7)
|
|
@@ -15,25 +9,28 @@ PATH
|
|
|
15
9
|
nokogiri (>= 1.10.4)
|
|
16
10
|
ruby-readability (>= 0.7)
|
|
17
11
|
sanitize (>= 5.0.0)
|
|
12
|
+
selenium-webdriver (= 3.142.7)
|
|
18
13
|
|
|
19
14
|
GEM
|
|
20
15
|
remote: https://rubygems.org/
|
|
21
16
|
specs:
|
|
22
17
|
addressable (2.7.0)
|
|
23
18
|
public_suffix (>= 2.0.2, < 5.0)
|
|
19
|
+
byebug (11.1.3)
|
|
20
|
+
childprocess (3.0.0)
|
|
24
21
|
cld (0.8.0)
|
|
25
22
|
ffi
|
|
26
|
-
coderay (1.1.
|
|
27
|
-
connection_pool (2.2.
|
|
23
|
+
coderay (1.1.3)
|
|
24
|
+
connection_pool (2.2.3)
|
|
28
25
|
crack (0.4.3)
|
|
29
26
|
safe_yaml (~> 1.0.0)
|
|
30
|
-
crass (1.0.
|
|
27
|
+
crass (1.0.6)
|
|
31
28
|
diff-lcs (1.3)
|
|
32
29
|
domain_name (0.5.20190701)
|
|
33
30
|
unf (>= 0.0.5, < 1.0.0)
|
|
34
|
-
ffi (1.
|
|
31
|
+
ffi (1.13.1)
|
|
35
32
|
guess_html_encoding (0.0.11)
|
|
36
|
-
hashdiff (1.0.
|
|
33
|
+
hashdiff (1.0.1)
|
|
37
34
|
http-cookie (1.0.3)
|
|
38
35
|
domain_name (~> 0.5)
|
|
39
36
|
mechanize (2.7.6)
|
|
@@ -45,51 +42,58 @@ GEM
|
|
|
45
42
|
nokogiri (~> 1.6)
|
|
46
43
|
ntlm-http (~> 0.1, >= 0.1.1)
|
|
47
44
|
webrobots (>= 0.0.9, < 0.2)
|
|
48
|
-
method_source (0.
|
|
45
|
+
method_source (1.0.0)
|
|
49
46
|
mime-types (3.3.1)
|
|
50
47
|
mime-types-data (~> 3.2015)
|
|
51
|
-
mime-types-data (3.
|
|
48
|
+
mime-types-data (3.2020.0512)
|
|
52
49
|
mini_portile2 (2.4.0)
|
|
53
|
-
natto (1.
|
|
50
|
+
natto (1.2.0)
|
|
54
51
|
ffi (>= 1.9.0)
|
|
55
52
|
net-http-digest_auth (1.4.1)
|
|
56
|
-
net-http-persistent (
|
|
53
|
+
net-http-persistent (4.0.0)
|
|
57
54
|
connection_pool (~> 2.2)
|
|
58
|
-
nokogiri (1.10.
|
|
55
|
+
nokogiri (1.10.9)
|
|
59
56
|
mini_portile2 (~> 2.4.0)
|
|
60
57
|
nokogumbo (2.0.2)
|
|
61
58
|
nokogiri (~> 1.8, >= 1.8.4)
|
|
62
59
|
ntlm-http (0.1.1)
|
|
63
|
-
pry (0.
|
|
64
|
-
coderay (~> 1.1
|
|
65
|
-
method_source (~>
|
|
66
|
-
|
|
60
|
+
pry (0.13.1)
|
|
61
|
+
coderay (~> 1.1)
|
|
62
|
+
method_source (~> 1.0)
|
|
63
|
+
pry-byebug (3.9.0)
|
|
64
|
+
byebug (~> 11.0)
|
|
65
|
+
pry (~> 0.13.0)
|
|
66
|
+
public_suffix (4.0.5)
|
|
67
67
|
rake (13.0.1)
|
|
68
68
|
rspec (3.9.0)
|
|
69
69
|
rspec-core (~> 3.9.0)
|
|
70
70
|
rspec-expectations (~> 3.9.0)
|
|
71
71
|
rspec-mocks (~> 3.9.0)
|
|
72
|
-
rspec-core (3.9.
|
|
73
|
-
rspec-support (~> 3.9.
|
|
74
|
-
rspec-expectations (3.9.
|
|
72
|
+
rspec-core (3.9.2)
|
|
73
|
+
rspec-support (~> 3.9.3)
|
|
74
|
+
rspec-expectations (3.9.2)
|
|
75
75
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
76
76
|
rspec-support (~> 3.9.0)
|
|
77
77
|
rspec-mocks (3.9.1)
|
|
78
78
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
79
79
|
rspec-support (~> 3.9.0)
|
|
80
|
-
rspec-support (3.9.
|
|
80
|
+
rspec-support (3.9.3)
|
|
81
81
|
ruby-readability (0.7.0)
|
|
82
82
|
guess_html_encoding (>= 0.0.4)
|
|
83
83
|
nokogiri (>= 1.6.0)
|
|
84
|
+
rubyzip (2.3.0)
|
|
84
85
|
safe_yaml (1.0.5)
|
|
85
|
-
sanitize (5.
|
|
86
|
+
sanitize (5.2.0)
|
|
86
87
|
crass (~> 1.0.2)
|
|
87
88
|
nokogiri (>= 1.8.0)
|
|
88
89
|
nokogumbo (~> 2.0)
|
|
90
|
+
selenium-webdriver (3.142.7)
|
|
91
|
+
childprocess (>= 0.5, < 4.0)
|
|
92
|
+
rubyzip (>= 1.2.2)
|
|
89
93
|
unf (0.1.4)
|
|
90
94
|
unf_ext
|
|
91
|
-
unf_ext (0.0.7.
|
|
92
|
-
webmock (3.
|
|
95
|
+
unf_ext (0.0.7.7)
|
|
96
|
+
webmock (3.8.3)
|
|
93
97
|
addressable (>= 2.3.6)
|
|
94
98
|
crack (>= 0.3.2)
|
|
95
99
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
@@ -99,12 +103,12 @@ PLATFORMS
|
|
|
99
103
|
ruby
|
|
100
104
|
|
|
101
105
|
DEPENDENCIES
|
|
102
|
-
|
|
103
|
-
pry (
|
|
106
|
+
pry (>= 0.13.1)
|
|
107
|
+
pry-byebug (= 3.9.0)
|
|
104
108
|
rake (>= 10.0)
|
|
105
109
|
rspec (>= 3.0)
|
|
106
110
|
web_stat!
|
|
107
111
|
webmock (>= 3.6.0)
|
|
108
112
|
|
|
109
113
|
BUNDLED WITH
|
|
110
|
-
2.1.
|
|
114
|
+
2.1.4
|
data/README.md
CHANGED
data/docker-compose.yml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
version: "3.8"
|
|
2
|
+
networks:
|
|
3
|
+
app-tier:
|
|
4
|
+
driver: bridge
|
|
5
|
+
services:
|
|
6
|
+
web_stat:
|
|
7
|
+
tty: true
|
|
8
|
+
stdin_open: true
|
|
9
|
+
container_name: web_stat
|
|
10
|
+
build:
|
|
11
|
+
context: .
|
|
12
|
+
dockerfile: Dockerfile
|
|
13
|
+
volumes:
|
|
14
|
+
- ./:/var/www/docker:cached
|
|
15
|
+
working_dir: /var/www/docker
|
|
16
|
+
command: bash
|
|
17
|
+
networks:
|
|
18
|
+
- app-tier
|
data/docker/exec
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Check to support tty.
|
|
3
|
+
if [ "$(tty>/dev/null;echo $?)" != "0" ];then
|
|
4
|
+
DOCKERCOMPOSE_EXEC="docker-compose exec -T"
|
|
5
|
+
else
|
|
6
|
+
DOCKERCOMPOSE_EXEC="docker-compose exec"
|
|
7
|
+
fi
|
|
8
|
+
$DOCKERCOMPOSE_EXEC web_stat /bin/bash -c ". /etc/profile.d/rvm.sh && . /root/.nvm/nvm.sh && $*"
|
data/docker/start
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module WebStat
|
|
2
|
+
class WebDriverHelper
|
|
3
|
+
class << self
|
|
4
|
+
# Get last url
|
|
5
|
+
# @param [String] url
|
|
6
|
+
# @param [Integer] delay
|
|
7
|
+
def get_last_url(url, delay=nil)
|
|
8
|
+
Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
|
|
9
|
+
Selenium::WebDriver.logger.level = :info
|
|
10
|
+
options = Selenium::WebDriver::Chrome::Options.new(args: [
|
|
11
|
+
'headless',
|
|
12
|
+
'no-sandbox',
|
|
13
|
+
'disable-gpu',
|
|
14
|
+
'start-maximized',
|
|
15
|
+
'window-size=1920,1080'
|
|
16
|
+
])
|
|
17
|
+
driver = Selenium::WebDriver.for(:chrome, options: options)
|
|
18
|
+
driver.manage.timeouts.implicit_wait = 10
|
|
19
|
+
Selenium::WebDriver::Wait.new(timeout: 10)
|
|
20
|
+
driver.get(url)
|
|
21
|
+
if delay.is_a?(Integer)
|
|
22
|
+
sleep delay
|
|
23
|
+
end
|
|
24
|
+
last_url = driver.current_url
|
|
25
|
+
driver.quit
|
|
26
|
+
last_url
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
data/lib/web_stat.rb
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
require "bundler"
|
|
2
2
|
|
|
3
|
+
require 'cld'
|
|
3
4
|
require 'uri'
|
|
4
5
|
require 'digest'
|
|
6
|
+
require 'logger'
|
|
5
7
|
require 'sanitize'
|
|
6
8
|
require 'nokogiri'
|
|
7
9
|
require 'open-uri'
|
|
10
|
+
require 'net/http'
|
|
8
11
|
require 'ruby-readability'
|
|
9
|
-
require '
|
|
10
|
-
require 'cld'
|
|
12
|
+
require 'selenium-webdriver'
|
|
11
13
|
|
|
14
|
+
require "helpers/web_drive_helper"
|
|
15
|
+
require "web_stat/final_redirect_url"
|
|
12
16
|
require "web_stat/categorize"
|
|
13
17
|
require "web_stat/configure"
|
|
14
18
|
require "web_stat/errors"
|
data/lib/web_stat/fetch.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module WebStat
|
|
2
2
|
class Fetch
|
|
3
|
-
attr_accessor :url, :html, :nokogiri, :userdic
|
|
3
|
+
attr_accessor :url, :html, :nokogiri, :userdic, :status
|
|
4
4
|
|
|
5
5
|
# Get title
|
|
6
6
|
# @return [String] title
|
|
@@ -13,9 +13,12 @@ module WebStat
|
|
|
13
13
|
rescue
|
|
14
14
|
title = @nokogiri.title
|
|
15
15
|
end
|
|
16
|
-
title.
|
|
16
|
+
if title.nil?
|
|
17
|
+
"No Title"
|
|
18
|
+
else
|
|
19
|
+
title.strip
|
|
20
|
+
end
|
|
17
21
|
end
|
|
18
|
-
|
|
19
22
|
# Get name of domain
|
|
20
23
|
def site_name
|
|
21
24
|
begin
|
|
@@ -23,9 +26,12 @@ module WebStat
|
|
|
23
26
|
rescue
|
|
24
27
|
site_name = @nokogiri.title
|
|
25
28
|
end
|
|
26
|
-
site_name.
|
|
29
|
+
if site_name.nil?
|
|
30
|
+
"No Sitename"
|
|
31
|
+
else
|
|
32
|
+
site_name.strip
|
|
33
|
+
end
|
|
27
34
|
end
|
|
28
|
-
[]
|
|
29
35
|
# Get main section
|
|
30
36
|
def content
|
|
31
37
|
Sanitize.clean(Readability::Document.new(@nokogiri.at('body')).content)
|
|
@@ -67,16 +73,24 @@ module WebStat
|
|
|
67
73
|
|
|
68
74
|
# Get url
|
|
69
75
|
# @param [String] url
|
|
76
|
+
# @param [String] body
|
|
70
77
|
def get_url(url)
|
|
71
78
|
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
|
72
79
|
# Enable to read Robots.txt
|
|
73
80
|
agent.robots = true
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
document.
|
|
77
|
-
|
|
78
|
-
|
|
81
|
+
begin
|
|
82
|
+
document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
|
83
|
+
if document.class == Mechanize::File
|
|
84
|
+
body = document.body
|
|
85
|
+
else
|
|
86
|
+
body = document.body.encode('UTF-8', document.encoding)
|
|
87
|
+
end
|
|
88
|
+
@status = document.code
|
|
89
|
+
rescue Mechanize::ResponseCodeError => e
|
|
90
|
+
body = e.page.body
|
|
91
|
+
@status = e.page.code
|
|
79
92
|
end
|
|
93
|
+
body
|
|
80
94
|
end
|
|
81
95
|
|
|
82
96
|
# Get the informations of @url
|
|
@@ -96,6 +110,7 @@ module WebStat
|
|
|
96
110
|
site_name: site_name,
|
|
97
111
|
content: clean_content,
|
|
98
112
|
language_code: language_code,
|
|
113
|
+
status: @status,
|
|
99
114
|
url: @url,
|
|
100
115
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
|
101
116
|
tags: tag.nouns
|
|
@@ -107,8 +122,9 @@ module WebStat
|
|
|
107
122
|
# Get original url
|
|
108
123
|
# @param [String] url
|
|
109
124
|
def original_url(url)
|
|
110
|
-
|
|
111
|
-
|
|
125
|
+
last_url = WebStat::FinalRedirectUrl.final_redirect_url(url)
|
|
126
|
+
unless last_url.nil? || last_url.scrub('').empty?
|
|
127
|
+
last_url
|
|
112
128
|
else
|
|
113
129
|
url
|
|
114
130
|
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# ref) https://github.com/indyarocks/final_redirect_url
|
|
2
|
+
# customize
|
|
3
|
+
# Changed
|
|
4
|
+
module WebStat
|
|
5
|
+
class FinalRedirectUrl
|
|
6
|
+
class << self
|
|
7
|
+
def final_redirect_url(url, options={})
|
|
8
|
+
final_url = ''
|
|
9
|
+
if is_valid_url?(url)
|
|
10
|
+
begin
|
|
11
|
+
redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
|
|
12
|
+
response_uri = get_final_redirect_url(url, redirect_lookup_depth)
|
|
13
|
+
final_url = url_string_from_uri(response_uri)
|
|
14
|
+
rescue Exception => ex
|
|
15
|
+
# nothing
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
final_url
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
def is_valid_url?(url)
|
|
23
|
+
url.to_s.match? URI::regexp(['http', 'https'])
|
|
24
|
+
end
|
|
25
|
+
def get_final_redirect_url(url, limit = 10)
|
|
26
|
+
return url if limit <= 0
|
|
27
|
+
uri = URI.parse(url)
|
|
28
|
+
response = ::Net::HTTP.get_response(uri)
|
|
29
|
+
if response.class == Net::HTTPOK
|
|
30
|
+
if WebStat::Configure.get["use_chromedirver"]
|
|
31
|
+
return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
|
|
32
|
+
else
|
|
33
|
+
return URI.parse(uri)
|
|
34
|
+
end
|
|
35
|
+
else
|
|
36
|
+
redirect_location = response['location']
|
|
37
|
+
location_uri = URI.parse(redirect_location)
|
|
38
|
+
if location_uri.host.nil?
|
|
39
|
+
redirect_location = uri.scheme + '://' + uri.host + redirect_location
|
|
40
|
+
end
|
|
41
|
+
warn "redirected to #{redirect_location}"
|
|
42
|
+
get_final_redirect_url(redirect_location, limit - 1)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
def url_string_from_uri(uri)
|
|
46
|
+
url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
|
|
47
|
+
if uri.fragment
|
|
48
|
+
url_str = url_str + "##{uri.fragment}"
|
|
49
|
+
end
|
|
50
|
+
url_str
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
data/lib/web_stat/tag.rb
CHANGED
data/lib/web_stat/version.rb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
module WebStat
|
|
2
|
-
VERSION = "0.2
|
|
3
|
-
end
|
|
2
|
+
VERSION = "0.3.2"
|
|
3
|
+
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
require 'rspec/expectations'
|
|
2
2
|
require "bundler/setup"
|
|
3
3
|
require 'pry'
|
|
4
|
+
require 'pry-byebug'
|
|
4
5
|
require "web_stat"
|
|
5
6
|
|
|
6
7
|
require 'webmock'
|
|
7
8
|
include WebMock::API
|
|
8
9
|
WebMock.enable!
|
|
9
10
|
|
|
11
|
+
WebMock.disable_net_connect!({
|
|
12
|
+
allow_localhost: true,
|
|
13
|
+
allow: 'chromedriver.storage.googleapis.com'
|
|
14
|
+
})
|
|
15
|
+
|
|
10
16
|
RSpec.configure do |config|
|
|
11
17
|
# Enable flags like --only-failures and --next-failure
|
|
12
18
|
config.example_status_persistence_file_path = ".rspec_status"
|
|
@@ -59,9 +65,10 @@ end
|
|
|
59
65
|
|
|
60
66
|
# Set webmock
|
|
61
67
|
WebStatTestHelper.scheme_and_files.each do |url|
|
|
68
|
+
status = [200, 404, 503].sample
|
|
62
69
|
WebMock.stub_request(:get, url)
|
|
63
70
|
.to_return(
|
|
64
|
-
status:
|
|
71
|
+
status: status,
|
|
65
72
|
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
|
|
66
73
|
headers: {content_type: 'application/html; charset=utf-8'})
|
|
67
74
|
end
|
data/spec/web_stat/fetch_spec.rb
CHANGED
|
@@ -83,6 +83,7 @@ RSpec.describe WebStat::Fetch do
|
|
|
83
83
|
expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
|
|
84
84
|
expect(web_stat[:site_name]).to eq "newsdict.blog"
|
|
85
85
|
expect(web_stat[:content]).not_to eq nil
|
|
86
|
+
expect(web_stat[:status]).to eq("200").or eq("404").or eq("503")
|
|
86
87
|
expect(Sanitize.clean(web_stat[:content]).length).to eq web_stat[:content].length
|
|
87
88
|
expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
|
88
89
|
end
|
data/web_stat.gemspec
CHANGED
|
@@ -27,9 +27,11 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_runtime_dependency "natto", ">= 1.1.2"
|
|
28
28
|
spec.add_runtime_dependency "sanitize", ">= 5.0.0"
|
|
29
29
|
spec.add_runtime_dependency "cld", ">= 0.8.0"
|
|
30
|
+
spec.add_runtime_dependency "selenium-webdriver", "= 3.142.7"
|
|
30
31
|
|
|
31
32
|
spec.add_development_dependency "rake", ">= 10.0"
|
|
32
33
|
spec.add_development_dependency "rspec", ">= 3.0"
|
|
33
|
-
spec.add_development_dependency "pry", ">= 0.
|
|
34
|
+
spec.add_development_dependency "pry", ">= 0.13.1"
|
|
34
35
|
spec.add_development_dependency "webmock", ">= 3.6.0"
|
|
36
|
+
spec.add_development_dependency "pry-byebug", "3.9.0"
|
|
35
37
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: web_stat
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yusuke abe
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-06-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -108,6 +108,20 @@ dependencies:
|
|
|
108
108
|
- - ">="
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: 0.8.0
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: selenium-webdriver
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - '='
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 3.142.7
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - '='
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 3.142.7
|
|
111
125
|
- !ruby/object:Gem::Dependency
|
|
112
126
|
name: rake
|
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -142,14 +156,14 @@ dependencies:
|
|
|
142
156
|
requirements:
|
|
143
157
|
- - ">="
|
|
144
158
|
- !ruby/object:Gem::Version
|
|
145
|
-
version: 0.
|
|
159
|
+
version: 0.13.1
|
|
146
160
|
type: :development
|
|
147
161
|
prerelease: false
|
|
148
162
|
version_requirements: !ruby/object:Gem::Requirement
|
|
149
163
|
requirements:
|
|
150
164
|
- - ">="
|
|
151
165
|
- !ruby/object:Gem::Version
|
|
152
|
-
version: 0.
|
|
166
|
+
version: 0.13.1
|
|
153
167
|
- !ruby/object:Gem::Dependency
|
|
154
168
|
name: webmock
|
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -164,6 +178,20 @@ dependencies:
|
|
|
164
178
|
- - ">="
|
|
165
179
|
- !ruby/object:Gem::Version
|
|
166
180
|
version: 3.6.0
|
|
181
|
+
- !ruby/object:Gem::Dependency
|
|
182
|
+
name: pry-byebug
|
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
|
184
|
+
requirements:
|
|
185
|
+
- - '='
|
|
186
|
+
- !ruby/object:Gem::Version
|
|
187
|
+
version: 3.9.0
|
|
188
|
+
type: :development
|
|
189
|
+
prerelease: false
|
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
191
|
+
requirements:
|
|
192
|
+
- - '='
|
|
193
|
+
- !ruby/object:Gem::Version
|
|
194
|
+
version: 3.9.0
|
|
167
195
|
description: Fetch the web pages and stat.
|
|
168
196
|
email:
|
|
169
197
|
- yube@newsdict.jp
|
|
@@ -177,12 +205,17 @@ files:
|
|
|
177
205
|
- ".ruby-version"
|
|
178
206
|
- ".travis.yml"
|
|
179
207
|
- CODE_OF_CONDUCT.md
|
|
208
|
+
- Dockerfile
|
|
180
209
|
- Gemfile
|
|
181
210
|
- Gemfile.lock
|
|
182
211
|
- LICENSE.txt
|
|
183
212
|
- README.md
|
|
184
213
|
- Rakefile
|
|
185
214
|
- bin/fetch_as_html
|
|
215
|
+
- docker-compose.yml
|
|
216
|
+
- docker/exec
|
|
217
|
+
- docker/start
|
|
218
|
+
- lib/helpers/web_drive_helper.rb
|
|
186
219
|
- lib/web_stat.rb
|
|
187
220
|
- lib/web_stat/categorize.rb
|
|
188
221
|
- lib/web_stat/config/web_stat.yml
|
|
@@ -191,6 +224,7 @@ files:
|
|
|
191
224
|
- lib/web_stat/fetch.rb
|
|
192
225
|
- lib/web_stat/fetch/fetch_as_html.rb
|
|
193
226
|
- lib/web_stat/fetch/fetch_as_web.rb
|
|
227
|
+
- lib/web_stat/final_redirect_url.rb
|
|
194
228
|
- lib/web_stat/tag.rb
|
|
195
229
|
- lib/web_stat/tasks/install.rake
|
|
196
230
|
- lib/web_stat/version.rb
|