web_stat 0.2.11 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/Dockerfile +25 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +20 -16
- data/docker-compose.yml +18 -0
- data/docker/exec +8 -0
- data/docker/start +13 -0
- data/lib/helpers/web_drive_helper.rb +28 -0
- data/lib/web_stat.rb +6 -2
- data/lib/web_stat/fetch.rb +0 -2
- data/lib/web_stat/final_redirect_url.rb +50 -0
- data/lib/web_stat/tag.rb +1 -1
- data/lib/web_stat/version.rb +1 -1
- data/spec/spec_helper.rb +6 -0
- data/web_stat.gemspec +3 -1
- metadata +39 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f62b7ce2720dcd1851a3c0620af8da6e00313e1b5de918e8b1e23ceeeacee89
|
4
|
+
data.tar.gz: 1a52579415bf2a4f96efd39931ef41849751f693658f5cfbcd88202c680a446c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51b5d4fbc2e0969913cec3c1ae4b745cca8a790b4f9638d9a920d844840500424c69f18e7d3f58d2d870b8391e0817f06781699b302f36b2a68d81467608a867
|
7
|
+
data.tar.gz: 2d4ba4bfac9c5d5c76fcc48ba0202b7dbf223b4f42b7d6a649cce22c0742d6b7605c3386c93be2cb29487308714fc94fd2bf1063faf78bdcd5b7b6c5ab060d8b
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.7.
|
1
|
+
2.7.1
|
data/Dockerfile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Define base image, you can use --build-arg
|
2
|
+
ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.35.2_nodev14.3.0_rubyv2.7.1_sasscv2.3.0_ffiv1.13.1_chromedriver"
|
3
|
+
FROM $base_image
|
4
|
+
|
5
|
+
# Set locale
|
6
|
+
ENV LANG "C.UTF-8"
|
7
|
+
ENV NOKOGIRI_USE_SYSTEM_LIBRARIES "YES"
|
8
|
+
|
9
|
+
# Set correct environment variables.
|
10
|
+
RUN mkdir -p /var/www/docker
|
11
|
+
WORKDIR /var/www/docker
|
12
|
+
|
13
|
+
# Set up application
|
14
|
+
COPY . .
|
15
|
+
|
16
|
+
# Init gems
|
17
|
+
RUN echo "gem: --no-rdoc --no-ri" > ~/.gemrc
|
18
|
+
RUN . /etc/profile.d/rvm.sh && \
|
19
|
+
bundle config --global with 'development test' && \
|
20
|
+
bundle config --global system true && \
|
21
|
+
bundle config --global jobs 10 && \
|
22
|
+
bundle config --global build.nokogiri --use-system-libraries && \
|
23
|
+
bundle install
|
24
|
+
|
25
|
+
CMD ["bash"]
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,13 +1,7 @@
|
|
1
|
-
GIT
|
2
|
-
remote: git@github.com:yubele/final_redirect_url
|
3
|
-
revision: 45df878ec9495ebbfa06dc0a60cc5043c2519e16
|
4
|
-
specs:
|
5
|
-
final_redirect_url (0.1.1)
|
6
|
-
|
7
1
|
PATH
|
8
2
|
remote: .
|
9
3
|
specs:
|
10
|
-
web_stat (0.
|
4
|
+
web_stat (0.3.0)
|
11
5
|
bundler (>= 2.0.2)
|
12
6
|
cld (>= 0.8.0)
|
13
7
|
mechanize (>= 2.7)
|
@@ -15,23 +9,26 @@ PATH
|
|
15
9
|
nokogiri (>= 1.10.4)
|
16
10
|
ruby-readability (>= 0.7)
|
17
11
|
sanitize (>= 5.0.0)
|
12
|
+
selenium-webdriver (= 3.142.7)
|
18
13
|
|
19
14
|
GEM
|
20
15
|
remote: https://rubygems.org/
|
21
16
|
specs:
|
22
17
|
addressable (2.7.0)
|
23
18
|
public_suffix (>= 2.0.2, < 5.0)
|
19
|
+
byebug (11.1.3)
|
20
|
+
childprocess (3.0.0)
|
24
21
|
cld (0.8.0)
|
25
22
|
ffi
|
26
|
-
coderay (1.1.
|
27
|
-
connection_pool (2.2.
|
23
|
+
coderay (1.1.3)
|
24
|
+
connection_pool (2.2.3)
|
28
25
|
crack (0.4.3)
|
29
26
|
safe_yaml (~> 1.0.0)
|
30
27
|
crass (1.0.6)
|
31
28
|
diff-lcs (1.3)
|
32
29
|
domain_name (0.5.20190701)
|
33
30
|
unf (>= 0.0.5, < 1.0.0)
|
34
|
-
ffi (1.
|
31
|
+
ffi (1.13.1)
|
35
32
|
guess_html_encoding (0.0.11)
|
36
33
|
hashdiff (1.0.1)
|
37
34
|
http-cookie (1.0.3)
|
@@ -48,7 +45,7 @@ GEM
|
|
48
45
|
method_source (1.0.0)
|
49
46
|
mime-types (3.3.1)
|
50
47
|
mime-types-data (~> 3.2015)
|
51
|
-
mime-types-data (3.2020.
|
48
|
+
mime-types-data (3.2020.0512)
|
52
49
|
mini_portile2 (2.4.0)
|
53
50
|
natto (1.2.0)
|
54
51
|
ffi (>= 1.9.0)
|
@@ -63,7 +60,10 @@ GEM
|
|
63
60
|
pry (0.13.1)
|
64
61
|
coderay (~> 1.1)
|
65
62
|
method_source (~> 1.0)
|
66
|
-
|
63
|
+
pry-byebug (3.9.0)
|
64
|
+
byebug (~> 11.0)
|
65
|
+
pry (~> 0.13.0)
|
66
|
+
public_suffix (4.0.5)
|
67
67
|
rake (13.0.1)
|
68
68
|
rspec (3.9.0)
|
69
69
|
rspec-core (~> 3.9.0)
|
@@ -71,7 +71,7 @@ GEM
|
|
71
71
|
rspec-mocks (~> 3.9.0)
|
72
72
|
rspec-core (3.9.2)
|
73
73
|
rspec-support (~> 3.9.3)
|
74
|
-
rspec-expectations (3.9.
|
74
|
+
rspec-expectations (3.9.2)
|
75
75
|
diff-lcs (>= 1.2.0, < 2.0)
|
76
76
|
rspec-support (~> 3.9.0)
|
77
77
|
rspec-mocks (3.9.1)
|
@@ -81,11 +81,15 @@ GEM
|
|
81
81
|
ruby-readability (0.7.0)
|
82
82
|
guess_html_encoding (>= 0.0.4)
|
83
83
|
nokogiri (>= 1.6.0)
|
84
|
+
rubyzip (2.3.0)
|
84
85
|
safe_yaml (1.0.5)
|
85
|
-
sanitize (5.
|
86
|
+
sanitize (5.2.0)
|
86
87
|
crass (~> 1.0.2)
|
87
88
|
nokogiri (>= 1.8.0)
|
88
89
|
nokogumbo (~> 2.0)
|
90
|
+
selenium-webdriver (3.142.7)
|
91
|
+
childprocess (>= 0.5, < 4.0)
|
92
|
+
rubyzip (>= 1.2.2)
|
89
93
|
unf (0.1.4)
|
90
94
|
unf_ext
|
91
95
|
unf_ext (0.0.7.7)
|
@@ -99,8 +103,8 @@ PLATFORMS
|
|
99
103
|
ruby
|
100
104
|
|
101
105
|
DEPENDENCIES
|
102
|
-
|
103
|
-
pry (
|
106
|
+
pry (>= 0.13.1)
|
107
|
+
pry-byebug (= 3.9.0)
|
104
108
|
rake (>= 10.0)
|
105
109
|
rspec (>= 3.0)
|
106
110
|
web_stat!
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
version: "3.8"
|
2
|
+
networks:
|
3
|
+
app-tier:
|
4
|
+
driver: bridge
|
5
|
+
services:
|
6
|
+
web_stat:
|
7
|
+
tty: true
|
8
|
+
stdin_open: true
|
9
|
+
container_name: web_stat
|
10
|
+
build:
|
11
|
+
context: .
|
12
|
+
dockerfile: Dockerfile
|
13
|
+
volumes:
|
14
|
+
- ./:/var/www/docker:cached
|
15
|
+
working_dir: /var/www/docker
|
16
|
+
command: bash
|
17
|
+
networks:
|
18
|
+
- app-tier
|
data/docker/exec
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
# Check to support tty.
|
3
|
+
if [ "$(tty>/dev/null;echo $?)" != "0" ];then
|
4
|
+
DOCKERCOMPOSE_EXEC="docker-compose exec -T"
|
5
|
+
else
|
6
|
+
DOCKERCOMPOSE_EXEC="docker-compose exec"
|
7
|
+
fi
|
8
|
+
$DOCKERCOMPOSE_EXEC web_stat /bin/bash -c ". /etc/profile.d/rvm.sh && . /root/.nvm/nvm.sh && $*"
|
data/docker/start
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
class WebDriverHelper
|
2
|
+
class << self
|
3
|
+
# Get last url
|
4
|
+
# @param [String] url
|
5
|
+
# @param [Integer] delay
|
6
|
+
def get_last_url(url, delay=nil)
|
7
|
+
Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
|
8
|
+
Selenium::WebDriver.logger.level = :info
|
9
|
+
options = Selenium::WebDriver::Chrome::Options.new(args: [
|
10
|
+
'headless',
|
11
|
+
'no-sandbox',
|
12
|
+
'disable-gpu',
|
13
|
+
'start-maximized',
|
14
|
+
'window-size=1920,1080'
|
15
|
+
])
|
16
|
+
driver = Selenium::WebDriver.for(:chrome, options: options)
|
17
|
+
driver.manage.timeouts.implicit_wait = 10
|
18
|
+
Selenium::WebDriver::Wait.new(timeout: 10)
|
19
|
+
driver.get(url)
|
20
|
+
if delay.is_a?(Integer)
|
21
|
+
sleep delay
|
22
|
+
end
|
23
|
+
last_url = driver.current_url
|
24
|
+
driver.quit
|
25
|
+
last_url
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/web_stat.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require "bundler"
|
2
2
|
|
3
|
+
require 'cld'
|
3
4
|
require 'uri'
|
4
5
|
require 'digest'
|
6
|
+
require 'logger'
|
5
7
|
require 'sanitize'
|
6
8
|
require 'nokogiri'
|
7
9
|
require 'open-uri'
|
10
|
+
require 'net/http'
|
8
11
|
require 'ruby-readability'
|
9
|
-
require '
|
10
|
-
require 'cld'
|
12
|
+
require 'selenium-webdriver'
|
11
13
|
|
14
|
+
require "helpers/web_drive_helper"
|
15
|
+
require "web_stat/final_redirect_url"
|
12
16
|
require "web_stat/categorize"
|
13
17
|
require "web_stat/configure"
|
14
18
|
require "web_stat/errors"
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -19,7 +19,6 @@ module WebStat
|
|
19
19
|
title.strip
|
20
20
|
end
|
21
21
|
end
|
22
|
-
|
23
22
|
# Get name of domain
|
24
23
|
def site_name
|
25
24
|
begin
|
@@ -33,7 +32,6 @@ module WebStat
|
|
33
32
|
site_name.strip
|
34
33
|
end
|
35
34
|
end
|
36
|
-
[]
|
37
35
|
# Get main section
|
38
36
|
def content
|
39
37
|
Sanitize.clean(Readability::Document.new(@nokogiri.at('body')).content)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# ref) https://github.com/indyarocks/final_redirect_url
|
2
|
+
# customize
|
3
|
+
# Changed
|
4
|
+
|
5
|
+
module FinalRedirectUrl
|
6
|
+
|
7
|
+
def self.final_redirect_url(url, options={})
|
8
|
+
final_url = ''
|
9
|
+
if is_valid_url?(url)
|
10
|
+
begin
|
11
|
+
redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
|
12
|
+
response_uri = get_final_redirect_url(url, redirect_lookup_depth)
|
13
|
+
final_url = url_string_from_uri(response_uri)
|
14
|
+
rescue Exception => ex
|
15
|
+
# nothing
|
16
|
+
end
|
17
|
+
end
|
18
|
+
final_url
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def self.is_valid_url?(url)
|
23
|
+
url.to_s.match? URI::regexp(['http', 'https'])
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.get_final_redirect_url(url, limit = 10)
|
27
|
+
return url if limit <= 0
|
28
|
+
uri = URI.parse(url)
|
29
|
+
response = ::Net::HTTP.get_response(uri)
|
30
|
+
if response.class == Net::HTTPOK
|
31
|
+
return URI.parse(WebDriverHelper.get_last_url(uri))
|
32
|
+
else
|
33
|
+
redirect_location = response['location']
|
34
|
+
location_uri = URI.parse(redirect_location)
|
35
|
+
if location_uri.host.nil?
|
36
|
+
redirect_location = uri.scheme + '://' + uri.host + redirect_location
|
37
|
+
end
|
38
|
+
warn "redirected to #{redirect_location}"
|
39
|
+
get_final_redirect_url(redirect_location, limit - 1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.url_string_from_uri(uri)
|
44
|
+
url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
|
45
|
+
if uri.fragment
|
46
|
+
url_str = url_str + "##{uri.fragment}"
|
47
|
+
end
|
48
|
+
url_str
|
49
|
+
end
|
50
|
+
end
|
data/lib/web_stat/tag.rb
CHANGED
data/lib/web_stat/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
1
|
require 'rspec/expectations'
|
2
2
|
require "bundler/setup"
|
3
3
|
require 'pry'
|
4
|
+
require 'pry-byebug'
|
4
5
|
require "web_stat"
|
5
6
|
|
6
7
|
require 'webmock'
|
7
8
|
include WebMock::API
|
8
9
|
WebMock.enable!
|
9
10
|
|
11
|
+
WebMock.disable_net_connect!({
|
12
|
+
allow_localhost: true,
|
13
|
+
allow: 'chromedriver.storage.googleapis.com'
|
14
|
+
})
|
15
|
+
|
10
16
|
RSpec.configure do |config|
|
11
17
|
# Enable flags like --only-failures and --next-failure
|
12
18
|
config.example_status_persistence_file_path = ".rspec_status"
|
data/web_stat.gemspec
CHANGED
@@ -27,9 +27,11 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_runtime_dependency "natto", ">= 1.1.2"
|
28
28
|
spec.add_runtime_dependency "sanitize", ">= 5.0.0"
|
29
29
|
spec.add_runtime_dependency "cld", ">= 0.8.0"
|
30
|
+
spec.add_runtime_dependency "selenium-webdriver", "= 3.142.7"
|
30
31
|
|
31
32
|
spec.add_development_dependency "rake", ">= 10.0"
|
32
33
|
spec.add_development_dependency "rspec", ">= 3.0"
|
33
|
-
spec.add_development_dependency "pry", ">= 0.
|
34
|
+
spec.add_development_dependency "pry", ">= 0.13.1"
|
34
35
|
spec.add_development_dependency "webmock", ">= 3.6.0"
|
36
|
+
spec.add_development_dependency "pry-byebug", "3.9.0"
|
35
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 0.8.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: selenium-webdriver
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 3.142.7
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 3.142.7
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: rake
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +156,14 @@ dependencies:
|
|
142
156
|
requirements:
|
143
157
|
- - ">="
|
144
158
|
- !ruby/object:Gem::Version
|
145
|
-
version: 0.
|
159
|
+
version: 0.13.1
|
146
160
|
type: :development
|
147
161
|
prerelease: false
|
148
162
|
version_requirements: !ruby/object:Gem::Requirement
|
149
163
|
requirements:
|
150
164
|
- - ">="
|
151
165
|
- !ruby/object:Gem::Version
|
152
|
-
version: 0.
|
166
|
+
version: 0.13.1
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: webmock
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +178,20 @@ dependencies:
|
|
164
178
|
- - ">="
|
165
179
|
- !ruby/object:Gem::Version
|
166
180
|
version: 3.6.0
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: pry-byebug
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - '='
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: 3.9.0
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - '='
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: 3.9.0
|
167
195
|
description: Fetch the web pages and stat.
|
168
196
|
email:
|
169
197
|
- yube@newsdict.jp
|
@@ -177,12 +205,17 @@ files:
|
|
177
205
|
- ".ruby-version"
|
178
206
|
- ".travis.yml"
|
179
207
|
- CODE_OF_CONDUCT.md
|
208
|
+
- Dockerfile
|
180
209
|
- Gemfile
|
181
210
|
- Gemfile.lock
|
182
211
|
- LICENSE.txt
|
183
212
|
- README.md
|
184
213
|
- Rakefile
|
185
214
|
- bin/fetch_as_html
|
215
|
+
- docker-compose.yml
|
216
|
+
- docker/exec
|
217
|
+
- docker/start
|
218
|
+
- lib/helpers/web_drive_helper.rb
|
186
219
|
- lib/web_stat.rb
|
187
220
|
- lib/web_stat/categorize.rb
|
188
221
|
- lib/web_stat/config/web_stat.yml
|
@@ -191,6 +224,7 @@ files:
|
|
191
224
|
- lib/web_stat/fetch.rb
|
192
225
|
- lib/web_stat/fetch/fetch_as_html.rb
|
193
226
|
- lib/web_stat/fetch/fetch_as_web.rb
|
227
|
+
- lib/web_stat/final_redirect_url.rb
|
194
228
|
- lib/web_stat/tag.rb
|
195
229
|
- lib/web_stat/tasks/install.rake
|
196
230
|
- lib/web_stat/version.rb
|
@@ -224,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
224
258
|
- !ruby/object:Gem::Version
|
225
259
|
version: '0'
|
226
260
|
requirements: []
|
227
|
-
rubygems_version: 3.
|
261
|
+
rubygems_version: 3.1.2
|
228
262
|
signing_key:
|
229
263
|
specification_version: 4
|
230
264
|
summary: Get the status of the web pages.
|