web_stat 0.3.8 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bbf065f5482ee7bf68e14e41af1e48b855f4219b9f44a7e16440834c92f404d5
4
- data.tar.gz: a16fc8b8a66cb534bb8ce61c6da28212faff44f2df6324772261f9b6847cfc7f
3
+ metadata.gz: 6021c6bacb394385b1e5634449f4a3adfb27a5c2d799f9d0874084889a3ac66c
4
+ data.tar.gz: d35180f95d0937b69df93dc5ba051edb8eeb79b52bd6df0e8770c849bc33c511
5
5
  SHA512:
6
- metadata.gz: 83b8b7d71a1047ec2614a9bfbc3c6305806cf7a616ba6b2568382b72ef91f5b0947e7aefb3d926b33c4970d291d2a34c99a86ba0f0e320a3e7fde9dd2b5bc154
7
- data.tar.gz: aa4af6063d7dd81c7c78c0100ff5354d7c54f28d7b2fac7393656cb5bde932b7260c95d1fe08fc560784e85ead7bbf49134c4d56cb108cef522cef753e477032
6
+ metadata.gz: 812aa33c7a4d8642b1239d05b8a0f2fa6bc938379f48e998732d71dcdd40c5c15d04b2795dde6b7e9cee0f439a12ad5b7259bc58fe741076106108bbc6c16d90
7
+ data.tar.gz: 3bf21e030a43868ccee4c45fe1e8993c6375b85f05d74ec3b11d68b97fe10a09194e186cfd942c3fe03f9ad2ce009e5ed15edefb06d21830180f9b04bd2fd8ea
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.7)
4
+ web_stat (0.3.12)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -83,7 +83,7 @@ GEM
83
83
  nokogiri (>= 1.6.0)
84
84
  rubyzip (2.3.0)
85
85
  safe_yaml (1.0.5)
86
- sanitize (5.2.1)
86
+ sanitize (5.2.0)
87
87
  crass (~> 1.0.2)
88
88
  nokogiri (>= 1.8.0)
89
89
  nokogumbo (~> 2.0)
data/README.md CHANGED
@@ -55,12 +55,10 @@ And then execute:
55
55
 
56
56
  ### spec
57
57
 
58
- $ bundle exec rake spec
59
-
60
- or
61
-
62
- $ bundle exec rspec
58
+ $ docker/start -d
59
+ $ docker/exec ENV=development bundle exec rspec
63
60
 
64
61
  Test a file
65
62
 
66
- $ bundle exec rspec spec/web_stat/fetch_spec.rb
63
+ $ docker/start -d
64
+ $ docker/exec ENV=development bundle exec rspec spec/web_stat/fetch_spec.rb
@@ -5,14 +5,29 @@ module WebStat
5
5
  # @param [String] url
6
6
  # @param [Integer] delay
7
7
  def get_last_url(url, delay=nil)
8
+ driver = get_driver(url, delay)
9
+ last_url = driver.current_url
10
+ driver.quit
11
+ last_url
12
+ end
13
+ # Get source of html
14
+ # @param [String] url
15
+ # @param [Integer] delay
16
+ def get_source(url, delay=nil)
17
+ driver = get_driver(url, delay)
18
+ source = driver.page_source
19
+ driver.quit
20
+ source
21
+ end
22
+
23
+ private
24
+ def get_driver(url, delay=nil)
8
25
  Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
9
26
  Selenium::WebDriver.logger.level = :info
10
27
  options = Selenium::WebDriver::Chrome::Options.new(args: [
11
28
  'headless',
12
29
  'no-sandbox',
13
- 'disable-gpu',
14
- 'start-maximized',
15
- 'window-size=1920,1080'
30
+ 'disable-gpu'
16
31
  ])
17
32
  driver = Selenium::WebDriver.for(:chrome, options: options)
18
33
  driver.manage.timeouts.implicit_wait = 10
@@ -21,9 +36,10 @@ module WebStat
21
36
  if delay.is_a?(Integer)
22
37
  sleep delay
23
38
  end
24
- last_url = driver.current_url
39
+ driver
40
+ rescue => e
25
41
  driver.quit
26
- last_url
42
+ raise e
27
43
  end
28
44
  end
29
45
  end
@@ -9,7 +9,7 @@ module WebStat
9
9
  if defined? Rails
10
10
  YAML.load_file(get_configure_path)[Rails.env]
11
11
  else
12
- YAML.load_file(get_configure_path)["production"]
12
+ YAML.load_file(get_configure_path)[ENV["ENV"] || "production"]
13
13
  end
14
14
  end
15
15
 
@@ -47,8 +47,12 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
- if path.nil? || path.empty? || @nokogiri.at('body').xpath('//img').first
51
- path = @nokogiri.at('body').xpath('//img').first.attr('src')
50
+ readability_content = Readability::Document.new(@nokogiri.at('body')).content
51
+ if (path.nil? || path.empty?) && readability_content.xpath('//img').first
52
+ path = ::Nokogiri::HTML(readability_content).xpath('//img').first.attr('src')
53
+ end
54
+ if (path.nil? || path.empty?) && @nokogiri.xpath('//img').first
55
+ path = @nokogiri.xpath('//img').first.attr('src')
52
56
  end
53
57
  if ! path.nil? && path.match(/^\//)
54
58
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
@@ -86,7 +90,7 @@ module WebStat
86
90
  raise Mechanize::RobotsDisallowedError.new(url)
87
91
  end
88
92
  if WebStat::Configure.get["use_chromedirver"]
89
- document = WebStat::WebDriverHelper.get_last_url(url)
93
+ body = WebStat::WebDriverHelper.get_source(url)
90
94
  @status = 200
91
95
  else
92
96
  document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
@@ -11,7 +11,7 @@ module WebStat
11
11
  redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
12
  response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
13
  final_url = url_string_from_uri(response_uri)
14
- rescue Exception => ex
14
+ rescue => e
15
15
  # nothing
16
16
  end
17
17
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.8"
2
+ VERSION = "0.3.13"
3
3
  end
@@ -7,12 +7,7 @@ require "web_stat"
7
7
  require 'webmock'
8
8
  include WebMock::API
9
9
  WebMock.enable!
10
-
11
- WebMock.disable_net_connect!({
12
- allow_localhost: true,
13
- allow: 'chromedriver.storage.googleapis.com'
14
- })
15
-
10
+
16
11
  RSpec.configure do |config|
17
12
  # Enable flags like --only-failures and --next-failure
18
13
  config.example_status_persistence_file_path = ".rspec_status"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-17 00:00:00.000000000 Z
11
+ date: 2020-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler