web_stat 0.3.8 → 0.3.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bbf065f5482ee7bf68e14e41af1e48b855f4219b9f44a7e16440834c92f404d5
4
- data.tar.gz: a16fc8b8a66cb534bb8ce61c6da28212faff44f2df6324772261f9b6847cfc7f
3
+ metadata.gz: 6021c6bacb394385b1e5634449f4a3adfb27a5c2d799f9d0874084889a3ac66c
4
+ data.tar.gz: d35180f95d0937b69df93dc5ba051edb8eeb79b52bd6df0e8770c849bc33c511
5
5
  SHA512:
6
- metadata.gz: 83b8b7d71a1047ec2614a9bfbc3c6305806cf7a616ba6b2568382b72ef91f5b0947e7aefb3d926b33c4970d291d2a34c99a86ba0f0e320a3e7fde9dd2b5bc154
7
- data.tar.gz: aa4af6063d7dd81c7c78c0100ff5354d7c54f28d7b2fac7393656cb5bde932b7260c95d1fe08fc560784e85ead7bbf49134c4d56cb108cef522cef753e477032
6
+ metadata.gz: 812aa33c7a4d8642b1239d05b8a0f2fa6bc938379f48e998732d71dcdd40c5c15d04b2795dde6b7e9cee0f439a12ad5b7259bc58fe741076106108bbc6c16d90
7
+ data.tar.gz: 3bf21e030a43868ccee4c45fe1e8993c6375b85f05d74ec3b11d68b97fe10a09194e186cfd942c3fe03f9ad2ce009e5ed15edefb06d21830180f9b04bd2fd8ea
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.7)
4
+ web_stat (0.3.12)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -83,7 +83,7 @@ GEM
83
83
  nokogiri (>= 1.6.0)
84
84
  rubyzip (2.3.0)
85
85
  safe_yaml (1.0.5)
86
- sanitize (5.2.1)
86
+ sanitize (5.2.0)
87
87
  crass (~> 1.0.2)
88
88
  nokogiri (>= 1.8.0)
89
89
  nokogumbo (~> 2.0)
data/README.md CHANGED
@@ -55,12 +55,10 @@ And then execute:
55
55
 
56
56
  ### spec
57
57
 
58
- $ bundle exec rake spec
59
-
60
- or
61
-
62
- $ bundle exec rspec
58
+ $ docker/start -d
59
+ $ docker/exec ENV=development bundle exec rspec
63
60
 
64
61
  Test a file
65
62
 
66
- $ bundle exec rspec spec/web_stat/fetch_spec.rb
63
+ $ docker/start -d
64
+ $ docker/exec ENV=development bundle exec rspec spec/web_stat/fetch_spec.rb
@@ -5,14 +5,29 @@ module WebStat
5
5
  # @param [String] url
6
6
  # @param [Integer] delay
7
7
  def get_last_url(url, delay=nil)
8
+ driver = get_driver(url, delay)
9
+ last_url = driver.current_url
10
+ driver.quit
11
+ last_url
12
+ end
13
+ # Get source of html
14
+ # @param [String] url
15
+ # @param [Integer] delay
16
+ def get_source(url, delay=nil)
17
+ driver = get_driver(url, delay)
18
+ source = driver.page_source
19
+ driver.quit
20
+ source
21
+ end
22
+
23
+ private
24
+ def get_driver(url, delay=nil)
8
25
  Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
9
26
  Selenium::WebDriver.logger.level = :info
10
27
  options = Selenium::WebDriver::Chrome::Options.new(args: [
11
28
  'headless',
12
29
  'no-sandbox',
13
- 'disable-gpu',
14
- 'start-maximized',
15
- 'window-size=1920,1080'
30
+ 'disable-gpu'
16
31
  ])
17
32
  driver = Selenium::WebDriver.for(:chrome, options: options)
18
33
  driver.manage.timeouts.implicit_wait = 10
@@ -21,9 +36,10 @@ module WebStat
21
36
  if delay.is_a?(Integer)
22
37
  sleep delay
23
38
  end
24
- last_url = driver.current_url
39
+ driver
40
+ rescue => e
25
41
  driver.quit
26
- last_url
42
+ raise e
27
43
  end
28
44
  end
29
45
  end
@@ -9,7 +9,7 @@ module WebStat
9
9
  if defined? Rails
10
10
  YAML.load_file(get_configure_path)[Rails.env]
11
11
  else
12
- YAML.load_file(get_configure_path)["production"]
12
+ YAML.load_file(get_configure_path)[ENV["ENV"] || "production"]
13
13
  end
14
14
  end
15
15
 
@@ -47,8 +47,12 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
- if path.nil? || path.empty? || @nokogiri.at('body').xpath('//img').first
51
- path = @nokogiri.at('body').xpath('//img').first.attr('src')
50
+ readability_content = Readability::Document.new(@nokogiri.at('body')).content
51
+ if (path.nil? || path.empty?) && readability_content.xpath('//img').first
52
+ path = ::Nokogiri::HTML(readability_content).xpath('//img').first.attr('src')
53
+ end
54
+ if (path.nil? || path.empty?) && @nokogiri.xpath('//img').first
55
+ path = @nokogiri.xpath('//img').first.attr('src')
52
56
  end
53
57
  if ! path.nil? && path.match(/^\//)
54
58
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
@@ -86,7 +90,7 @@ module WebStat
86
90
  raise Mechanize::RobotsDisallowedError.new(url)
87
91
  end
88
92
  if WebStat::Configure.get["use_chromedirver"]
89
- document = WebStat::WebDriverHelper.get_last_url(url)
93
+ body = WebStat::WebDriverHelper.get_source(url)
90
94
  @status = 200
91
95
  else
92
96
  document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
@@ -11,7 +11,7 @@ module WebStat
11
11
  redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
12
  response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
13
  final_url = url_string_from_uri(response_uri)
14
- rescue Exception => ex
14
+ rescue => e
15
15
  # nothing
16
16
  end
17
17
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.8"
2
+ VERSION = "0.3.13"
3
3
  end
@@ -7,12 +7,7 @@ require "web_stat"
7
7
  require 'webmock'
8
8
  include WebMock::API
9
9
  WebMock.enable!
10
-
11
- WebMock.disable_net_connect!({
12
- allow_localhost: true,
13
- allow: 'chromedriver.storage.googleapis.com'
14
- })
15
-
10
+
16
11
  RSpec.configure do |config|
17
12
  # Enable flags like --only-failures and --next-failure
18
13
  config.example_status_persistence_file_path = ".rspec_status"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-17 00:00:00.000000000 Z
11
+ date: 2020-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler