web_stat 0.3.8 → 0.3.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +4 -6
- data/lib/helpers/web_drive_helper.rb +21 -5
- data/lib/web_stat/configure.rb +1 -1
- data/lib/web_stat/fetch.rb +7 -3
- data/lib/web_stat/final_redirect_url.rb +1 -1
- data/lib/web_stat/version.rb +1 -1
- data/spec/spec_helper.rb +1 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6021c6bacb394385b1e5634449f4a3adfb27a5c2d799f9d0874084889a3ac66c
|
4
|
+
data.tar.gz: d35180f95d0937b69df93dc5ba051edb8eeb79b52bd6df0e8770c849bc33c511
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 812aa33c7a4d8642b1239d05b8a0f2fa6bc938379f48e998732d71dcdd40c5c15d04b2795dde6b7e9cee0f439a12ad5b7259bc58fe741076106108bbc6c16d90
|
7
|
+
data.tar.gz: 3bf21e030a43868ccee4c45fe1e8993c6375b85f05d74ec3b11d68b97fe10a09194e186cfd942c3fe03f9ad2ce009e5ed15edefb06d21830180f9b04bd2fd8ea
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
web_stat (0.3.
|
4
|
+
web_stat (0.3.12)
|
5
5
|
bundler (>= 2.0.2)
|
6
6
|
cld (>= 0.8.0)
|
7
7
|
mechanize (>= 2.7)
|
@@ -83,7 +83,7 @@ GEM
|
|
83
83
|
nokogiri (>= 1.6.0)
|
84
84
|
rubyzip (2.3.0)
|
85
85
|
safe_yaml (1.0.5)
|
86
|
-
sanitize (5.2.
|
86
|
+
sanitize (5.2.0)
|
87
87
|
crass (~> 1.0.2)
|
88
88
|
nokogiri (>= 1.8.0)
|
89
89
|
nokogumbo (~> 2.0)
|
data/README.md
CHANGED
@@ -55,12 +55,10 @@ And then execute:
|
|
55
55
|
|
56
56
|
### spec
|
57
57
|
|
58
|
-
$
|
59
|
-
|
60
|
-
or
|
61
|
-
|
62
|
-
$ bundle exec rspec
|
58
|
+
$ docker/start -d
|
59
|
+
$ docker/exec ENV=development bundle exec rspec
|
63
60
|
|
64
61
|
Test a file
|
65
62
|
|
66
|
-
$
|
63
|
+
$ docker/start -d
|
64
|
+
$ docker/exec ENV=development bundle exec rspec spec/web_stat/fetch_spec.rb
|
@@ -5,14 +5,29 @@ module WebStat
|
|
5
5
|
# @param [String] url
|
6
6
|
# @param [Integer] delay
|
7
7
|
def get_last_url(url, delay=nil)
|
8
|
+
driver = get_driver(url, delay)
|
9
|
+
last_url = driver.current_url
|
10
|
+
driver.quit
|
11
|
+
last_url
|
12
|
+
end
|
13
|
+
# Get source of html
|
14
|
+
# @param [String] url
|
15
|
+
# @param [Integer] delay
|
16
|
+
def get_source(url, delay=nil)
|
17
|
+
driver = get_driver(url, delay)
|
18
|
+
source = driver.page_source
|
19
|
+
driver.quit
|
20
|
+
source
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def get_driver(url, delay=nil)
|
8
25
|
Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
|
9
26
|
Selenium::WebDriver.logger.level = :info
|
10
27
|
options = Selenium::WebDriver::Chrome::Options.new(args: [
|
11
28
|
'headless',
|
12
29
|
'no-sandbox',
|
13
|
-
'disable-gpu'
|
14
|
-
'start-maximized',
|
15
|
-
'window-size=1920,1080'
|
30
|
+
'disable-gpu'
|
16
31
|
])
|
17
32
|
driver = Selenium::WebDriver.for(:chrome, options: options)
|
18
33
|
driver.manage.timeouts.implicit_wait = 10
|
@@ -21,9 +36,10 @@ module WebStat
|
|
21
36
|
if delay.is_a?(Integer)
|
22
37
|
sleep delay
|
23
38
|
end
|
24
|
-
|
39
|
+
driver
|
40
|
+
rescue => e
|
25
41
|
driver.quit
|
26
|
-
|
42
|
+
raise e
|
27
43
|
end
|
28
44
|
end
|
29
45
|
end
|
data/lib/web_stat/configure.rb
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
@@ -47,8 +47,12 @@ module WebStat
|
|
47
47
|
break
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
51
|
-
|
50
|
+
readability_content = Readability::Document.new(@nokogiri.at('body')).content
|
51
|
+
if (path.nil? || path.empty?) && readability_content.xpath('//img').first
|
52
|
+
path = ::Nokogiri::HTML(readability_content).xpath('//img').first.attr('src')
|
53
|
+
end
|
54
|
+
if (path.nil? || path.empty?) && @nokogiri.xpath('//img').first
|
55
|
+
path = @nokogiri.xpath('//img').first.attr('src')
|
52
56
|
end
|
53
57
|
if ! path.nil? && path.match(/^\//)
|
54
58
|
"#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
@@ -86,7 +90,7 @@ module WebStat
|
|
86
90
|
raise Mechanize::RobotsDisallowedError.new(url)
|
87
91
|
end
|
88
92
|
if WebStat::Configure.get["use_chromedirver"]
|
89
|
-
|
93
|
+
body = WebStat::WebDriverHelper.get_source(url)
|
90
94
|
@status = 200
|
91
95
|
else
|
92
96
|
document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
@@ -11,7 +11,7 @@ module WebStat
|
|
11
11
|
redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
|
12
12
|
response_uri = get_final_redirect_url(url, redirect_lookup_depth)
|
13
13
|
final_url = url_string_from_uri(response_uri)
|
14
|
-
rescue
|
14
|
+
rescue => e
|
15
15
|
# nothing
|
16
16
|
end
|
17
17
|
end
|
data/lib/web_stat/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -7,12 +7,7 @@ require "web_stat"
|
|
7
7
|
require 'webmock'
|
8
8
|
include WebMock::API
|
9
9
|
WebMock.enable!
|
10
|
-
|
11
|
-
WebMock.disable_net_connect!({
|
12
|
-
allow_localhost: true,
|
13
|
-
allow: 'chromedriver.storage.googleapis.com'
|
14
|
-
})
|
15
|
-
|
10
|
+
|
16
11
|
RSpec.configure do |config|
|
17
12
|
# Enable flags like --only-failures and --next-failure
|
18
13
|
config.example_status_persistence_file_path = ".rspec_status"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|