web_stat 0.2.11 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/Dockerfile +25 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +20 -16
- data/docker-compose.yml +18 -0
- data/docker/exec +8 -0
- data/docker/start +13 -0
- data/lib/helpers/web_drive_helper.rb +28 -0
- data/lib/web_stat.rb +6 -2
- data/lib/web_stat/fetch.rb +0 -2
- data/lib/web_stat/final_redirect_url.rb +50 -0
- data/lib/web_stat/tag.rb +1 -1
- data/lib/web_stat/version.rb +1 -1
- data/spec/spec_helper.rb +6 -0
- data/web_stat.gemspec +3 -1
- metadata +39 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f62b7ce2720dcd1851a3c0620af8da6e00313e1b5de918e8b1e23ceeeacee89
|
4
|
+
data.tar.gz: 1a52579415bf2a4f96efd39931ef41849751f693658f5cfbcd88202c680a446c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51b5d4fbc2e0969913cec3c1ae4b745cca8a790b4f9638d9a920d844840500424c69f18e7d3f58d2d870b8391e0817f06781699b302f36b2a68d81467608a867
|
7
|
+
data.tar.gz: 2d4ba4bfac9c5d5c76fcc48ba0202b7dbf223b4f42b7d6a649cce22c0742d6b7605c3386c93be2cb29487308714fc94fd2bf1063faf78bdcd5b7b6c5ab060d8b
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.7.
|
1
|
+
2.7.1
|
data/Dockerfile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Define base image, you can use --build-arg
|
2
|
+
ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.35.2_nodev14.3.0_rubyv2.7.1_sasscv2.3.0_ffiv1.13.1_chromedriver"
|
3
|
+
FROM $base_image
|
4
|
+
|
5
|
+
# Set locale
|
6
|
+
ENV LANG "C.UTF-8"
|
7
|
+
ENV NOKOGIRI_USE_SYSTEM_LIBRARIES "YES"
|
8
|
+
|
9
|
+
# Set correct environment variables.
|
10
|
+
RUN mkdir -p /var/www/docker
|
11
|
+
WORKDIR /var/www/docker
|
12
|
+
|
13
|
+
# Set up application
|
14
|
+
COPY . .
|
15
|
+
|
16
|
+
# Init gems
|
17
|
+
RUN echo "gem: --no-rdoc --no-ri" > ~/.gemrc
|
18
|
+
RUN . /etc/profile.d/rvm.sh && \
|
19
|
+
bundle config --global with 'development test' && \
|
20
|
+
bundle config --global system true && \
|
21
|
+
bundle config --global jobs 10 && \
|
22
|
+
bundle config --global build.nokogiri --use-system-libraries && \
|
23
|
+
bundle install
|
24
|
+
|
25
|
+
CMD ["bash"]
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,13 +1,7 @@
|
|
1
|
-
GIT
|
2
|
-
remote: git@github.com:yubele/final_redirect_url
|
3
|
-
revision: 45df878ec9495ebbfa06dc0a60cc5043c2519e16
|
4
|
-
specs:
|
5
|
-
final_redirect_url (0.1.1)
|
6
|
-
|
7
1
|
PATH
|
8
2
|
remote: .
|
9
3
|
specs:
|
10
|
-
web_stat (0.
|
4
|
+
web_stat (0.3.0)
|
11
5
|
bundler (>= 2.0.2)
|
12
6
|
cld (>= 0.8.0)
|
13
7
|
mechanize (>= 2.7)
|
@@ -15,23 +9,26 @@ PATH
|
|
15
9
|
nokogiri (>= 1.10.4)
|
16
10
|
ruby-readability (>= 0.7)
|
17
11
|
sanitize (>= 5.0.0)
|
12
|
+
selenium-webdriver (= 3.142.7)
|
18
13
|
|
19
14
|
GEM
|
20
15
|
remote: https://rubygems.org/
|
21
16
|
specs:
|
22
17
|
addressable (2.7.0)
|
23
18
|
public_suffix (>= 2.0.2, < 5.0)
|
19
|
+
byebug (11.1.3)
|
20
|
+
childprocess (3.0.0)
|
24
21
|
cld (0.8.0)
|
25
22
|
ffi
|
26
|
-
coderay (1.1.
|
27
|
-
connection_pool (2.2.
|
23
|
+
coderay (1.1.3)
|
24
|
+
connection_pool (2.2.3)
|
28
25
|
crack (0.4.3)
|
29
26
|
safe_yaml (~> 1.0.0)
|
30
27
|
crass (1.0.6)
|
31
28
|
diff-lcs (1.3)
|
32
29
|
domain_name (0.5.20190701)
|
33
30
|
unf (>= 0.0.5, < 1.0.0)
|
34
|
-
ffi (1.
|
31
|
+
ffi (1.13.1)
|
35
32
|
guess_html_encoding (0.0.11)
|
36
33
|
hashdiff (1.0.1)
|
37
34
|
http-cookie (1.0.3)
|
@@ -48,7 +45,7 @@ GEM
|
|
48
45
|
method_source (1.0.0)
|
49
46
|
mime-types (3.3.1)
|
50
47
|
mime-types-data (~> 3.2015)
|
51
|
-
mime-types-data (3.2020.
|
48
|
+
mime-types-data (3.2020.0512)
|
52
49
|
mini_portile2 (2.4.0)
|
53
50
|
natto (1.2.0)
|
54
51
|
ffi (>= 1.9.0)
|
@@ -63,7 +60,10 @@ GEM
|
|
63
60
|
pry (0.13.1)
|
64
61
|
coderay (~> 1.1)
|
65
62
|
method_source (~> 1.0)
|
66
|
-
|
63
|
+
pry-byebug (3.9.0)
|
64
|
+
byebug (~> 11.0)
|
65
|
+
pry (~> 0.13.0)
|
66
|
+
public_suffix (4.0.5)
|
67
67
|
rake (13.0.1)
|
68
68
|
rspec (3.9.0)
|
69
69
|
rspec-core (~> 3.9.0)
|
@@ -71,7 +71,7 @@ GEM
|
|
71
71
|
rspec-mocks (~> 3.9.0)
|
72
72
|
rspec-core (3.9.2)
|
73
73
|
rspec-support (~> 3.9.3)
|
74
|
-
rspec-expectations (3.9.
|
74
|
+
rspec-expectations (3.9.2)
|
75
75
|
diff-lcs (>= 1.2.0, < 2.0)
|
76
76
|
rspec-support (~> 3.9.0)
|
77
77
|
rspec-mocks (3.9.1)
|
@@ -81,11 +81,15 @@ GEM
|
|
81
81
|
ruby-readability (0.7.0)
|
82
82
|
guess_html_encoding (>= 0.0.4)
|
83
83
|
nokogiri (>= 1.6.0)
|
84
|
+
rubyzip (2.3.0)
|
84
85
|
safe_yaml (1.0.5)
|
85
|
-
sanitize (5.
|
86
|
+
sanitize (5.2.0)
|
86
87
|
crass (~> 1.0.2)
|
87
88
|
nokogiri (>= 1.8.0)
|
88
89
|
nokogumbo (~> 2.0)
|
90
|
+
selenium-webdriver (3.142.7)
|
91
|
+
childprocess (>= 0.5, < 4.0)
|
92
|
+
rubyzip (>= 1.2.2)
|
89
93
|
unf (0.1.4)
|
90
94
|
unf_ext
|
91
95
|
unf_ext (0.0.7.7)
|
@@ -99,8 +103,8 @@ PLATFORMS
|
|
99
103
|
ruby
|
100
104
|
|
101
105
|
DEPENDENCIES
|
102
|
-
|
103
|
-
pry (
|
106
|
+
pry (>= 0.13.1)
|
107
|
+
pry-byebug (= 3.9.0)
|
104
108
|
rake (>= 10.0)
|
105
109
|
rspec (>= 3.0)
|
106
110
|
web_stat!
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
version: "3.8"
|
2
|
+
networks:
|
3
|
+
app-tier:
|
4
|
+
driver: bridge
|
5
|
+
services:
|
6
|
+
web_stat:
|
7
|
+
tty: true
|
8
|
+
stdin_open: true
|
9
|
+
container_name: web_stat
|
10
|
+
build:
|
11
|
+
context: .
|
12
|
+
dockerfile: Dockerfile
|
13
|
+
volumes:
|
14
|
+
- ./:/var/www/docker:cached
|
15
|
+
working_dir: /var/www/docker
|
16
|
+
command: bash
|
17
|
+
networks:
|
18
|
+
- app-tier
|
data/docker/exec
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
# Check to support tty.
|
3
|
+
if [ "$(tty>/dev/null;echo $?)" != "0" ];then
|
4
|
+
DOCKERCOMPOSE_EXEC="docker-compose exec -T"
|
5
|
+
else
|
6
|
+
DOCKERCOMPOSE_EXEC="docker-compose exec"
|
7
|
+
fi
|
8
|
+
$DOCKERCOMPOSE_EXEC web_stat /bin/bash -c ". /etc/profile.d/rvm.sh && . /root/.nvm/nvm.sh && $*"
|
data/docker/start
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
class WebDriverHelper
|
2
|
+
class << self
|
3
|
+
# Get last url
|
4
|
+
# @param [String] url
|
5
|
+
# @param [Integer] delay
|
6
|
+
def get_last_url(url, delay=nil)
|
7
|
+
Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
|
8
|
+
Selenium::WebDriver.logger.level = :info
|
9
|
+
options = Selenium::WebDriver::Chrome::Options.new(args: [
|
10
|
+
'headless',
|
11
|
+
'no-sandbox',
|
12
|
+
'disable-gpu',
|
13
|
+
'start-maximized',
|
14
|
+
'window-size=1920,1080'
|
15
|
+
])
|
16
|
+
driver = Selenium::WebDriver.for(:chrome, options: options)
|
17
|
+
driver.manage.timeouts.implicit_wait = 10
|
18
|
+
Selenium::WebDriver::Wait.new(timeout: 10)
|
19
|
+
driver.get(url)
|
20
|
+
if delay.is_a?(Integer)
|
21
|
+
sleep delay
|
22
|
+
end
|
23
|
+
last_url = driver.current_url
|
24
|
+
driver.quit
|
25
|
+
last_url
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/web_stat.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require "bundler"
|
2
2
|
|
3
|
+
require 'cld'
|
3
4
|
require 'uri'
|
4
5
|
require 'digest'
|
6
|
+
require 'logger'
|
5
7
|
require 'sanitize'
|
6
8
|
require 'nokogiri'
|
7
9
|
require 'open-uri'
|
10
|
+
require 'net/http'
|
8
11
|
require 'ruby-readability'
|
9
|
-
require '
|
10
|
-
require 'cld'
|
12
|
+
require 'selenium-webdriver'
|
11
13
|
|
14
|
+
require "helpers/web_drive_helper"
|
15
|
+
require "web_stat/final_redirect_url"
|
12
16
|
require "web_stat/categorize"
|
13
17
|
require "web_stat/configure"
|
14
18
|
require "web_stat/errors"
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -19,7 +19,6 @@ module WebStat
|
|
19
19
|
title.strip
|
20
20
|
end
|
21
21
|
end
|
22
|
-
|
23
22
|
# Get name of domain
|
24
23
|
def site_name
|
25
24
|
begin
|
@@ -33,7 +32,6 @@ module WebStat
|
|
33
32
|
site_name.strip
|
34
33
|
end
|
35
34
|
end
|
36
|
-
[]
|
37
35
|
# Get main section
|
38
36
|
def content
|
39
37
|
Sanitize.clean(Readability::Document.new(@nokogiri.at('body')).content)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# ref) https://github.com/indyarocks/final_redirect_url
|
2
|
+
# customize
|
3
|
+
# Changed
|
4
|
+
|
5
|
+
module FinalRedirectUrl
|
6
|
+
|
7
|
+
def self.final_redirect_url(url, options={})
|
8
|
+
final_url = ''
|
9
|
+
if is_valid_url?(url)
|
10
|
+
begin
|
11
|
+
redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
|
12
|
+
response_uri = get_final_redirect_url(url, redirect_lookup_depth)
|
13
|
+
final_url = url_string_from_uri(response_uri)
|
14
|
+
rescue Exception => ex
|
15
|
+
# nothing
|
16
|
+
end
|
17
|
+
end
|
18
|
+
final_url
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def self.is_valid_url?(url)
|
23
|
+
url.to_s.match? URI::regexp(['http', 'https'])
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.get_final_redirect_url(url, limit = 10)
|
27
|
+
return url if limit <= 0
|
28
|
+
uri = URI.parse(url)
|
29
|
+
response = ::Net::HTTP.get_response(uri)
|
30
|
+
if response.class == Net::HTTPOK
|
31
|
+
return URI.parse(WebDriverHelper.get_last_url(uri))
|
32
|
+
else
|
33
|
+
redirect_location = response['location']
|
34
|
+
location_uri = URI.parse(redirect_location)
|
35
|
+
if location_uri.host.nil?
|
36
|
+
redirect_location = uri.scheme + '://' + uri.host + redirect_location
|
37
|
+
end
|
38
|
+
warn "redirected to #{redirect_location}"
|
39
|
+
get_final_redirect_url(redirect_location, limit - 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.url_string_from_uri(uri)
|
44
|
+
url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
|
45
|
+
if uri.fragment
|
46
|
+
url_str = url_str + "##{uri.fragment}"
|
47
|
+
end
|
48
|
+
url_str
|
49
|
+
end
|
50
|
+
end
|
data/lib/web_stat/tag.rb
CHANGED
data/lib/web_stat/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
1
|
require 'rspec/expectations'
|
2
2
|
require "bundler/setup"
|
3
3
|
require 'pry'
|
4
|
+
require 'pry-byebug'
|
4
5
|
require "web_stat"
|
5
6
|
|
6
7
|
require 'webmock'
|
7
8
|
include WebMock::API
|
8
9
|
WebMock.enable!
|
9
10
|
|
11
|
+
WebMock.disable_net_connect!({
|
12
|
+
allow_localhost: true,
|
13
|
+
allow: 'chromedriver.storage.googleapis.com'
|
14
|
+
})
|
15
|
+
|
10
16
|
RSpec.configure do |config|
|
11
17
|
# Enable flags like --only-failures and --next-failure
|
12
18
|
config.example_status_persistence_file_path = ".rspec_status"
|
data/web_stat.gemspec
CHANGED
@@ -27,9 +27,11 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_runtime_dependency "natto", ">= 1.1.2"
|
28
28
|
spec.add_runtime_dependency "sanitize", ">= 5.0.0"
|
29
29
|
spec.add_runtime_dependency "cld", ">= 0.8.0"
|
30
|
+
spec.add_runtime_dependency "selenium-webdriver", "= 3.142.7"
|
30
31
|
|
31
32
|
spec.add_development_dependency "rake", ">= 10.0"
|
32
33
|
spec.add_development_dependency "rspec", ">= 3.0"
|
33
|
-
spec.add_development_dependency "pry", ">= 0.
|
34
|
+
spec.add_development_dependency "pry", ">= 0.13.1"
|
34
35
|
spec.add_development_dependency "webmock", ">= 3.6.0"
|
36
|
+
spec.add_development_dependency "pry-byebug", "3.9.0"
|
35
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 0.8.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: selenium-webdriver
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 3.142.7
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 3.142.7
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: rake
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +156,14 @@ dependencies:
|
|
142
156
|
requirements:
|
143
157
|
- - ">="
|
144
158
|
- !ruby/object:Gem::Version
|
145
|
-
version: 0.
|
159
|
+
version: 0.13.1
|
146
160
|
type: :development
|
147
161
|
prerelease: false
|
148
162
|
version_requirements: !ruby/object:Gem::Requirement
|
149
163
|
requirements:
|
150
164
|
- - ">="
|
151
165
|
- !ruby/object:Gem::Version
|
152
|
-
version: 0.
|
166
|
+
version: 0.13.1
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: webmock
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +178,20 @@ dependencies:
|
|
164
178
|
- - ">="
|
165
179
|
- !ruby/object:Gem::Version
|
166
180
|
version: 3.6.0
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: pry-byebug
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - '='
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 3.9.0
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - '='
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: 3.9.0
|
167
195
|
description: Fetch the web pages and stat.
|
168
196
|
email:
|
169
197
|
- yube@newsdict.jp
|
@@ -177,12 +205,17 @@ files:
|
|
177
205
|
- ".ruby-version"
|
178
206
|
- ".travis.yml"
|
179
207
|
- CODE_OF_CONDUCT.md
|
208
|
+
- Dockerfile
|
180
209
|
- Gemfile
|
181
210
|
- Gemfile.lock
|
182
211
|
- LICENSE.txt
|
183
212
|
- README.md
|
184
213
|
- Rakefile
|
185
214
|
- bin/fetch_as_html
|
215
|
+
- docker-compose.yml
|
216
|
+
- docker/exec
|
217
|
+
- docker/start
|
218
|
+
- lib/helpers/web_drive_helper.rb
|
186
219
|
- lib/web_stat.rb
|
187
220
|
- lib/web_stat/categorize.rb
|
188
221
|
- lib/web_stat/config/web_stat.yml
|
@@ -191,6 +224,7 @@ files:
|
|
191
224
|
- lib/web_stat/fetch.rb
|
192
225
|
- lib/web_stat/fetch/fetch_as_html.rb
|
193
226
|
- lib/web_stat/fetch/fetch_as_web.rb
|
227
|
+
- lib/web_stat/final_redirect_url.rb
|
194
228
|
- lib/web_stat/tag.rb
|
195
229
|
- lib/web_stat/tasks/install.rake
|
196
230
|
- lib/web_stat/version.rb
|
@@ -224,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
224
258
|
- !ruby/object:Gem::Version
|
225
259
|
version: '0'
|
226
260
|
requirements: []
|
227
|
-
rubygems_version: 3.
|
261
|
+
rubygems_version: 3.1.2
|
228
262
|
signing_key:
|
229
263
|
specification_version: 4
|
230
264
|
summary: Get the status of the web pages.
|