web_stat 0.3.9 → 0.3.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ff258b9a50e224dfb465ad698871b2dc95dc2aaf028f5e966b0b920ca127f5f6
4
- data.tar.gz: 0fe37dff66de85ac77137f95e94ef2e3a2e42ae1931b3b138017481c3cda9734
3
+ metadata.gz: 549db2077648ce028b556a72126335f05336155f86c4b18d47856f219c71fff0
4
+ data.tar.gz: be8e0cee272fc20013659346608bf9a50a69dc48e777a46214c29f5c1865232d
5
5
  SHA512:
6
- metadata.gz: c2158946ad78a192e902e6b74b9fd72ecfc001afe2c3b531c2e8cea9d7b37239d280174066329e3f908d843c0173ddc03181bf8af529870326f29bd19bf5ee06
7
- data.tar.gz: eea91e8addd528e53989c94385c9d339895d138fea4fa911383b6c8157ade28a9c1c75566f2f40ca40d0f5c3b73267188b3695a573bc155a14150b22e3bebb47
6
+ metadata.gz: c78fa085f475c7cdf0747b4c777e357503ce41929bbe5462ccda7b28a6cf4f20a5394deb7853ee11570a4b3338573e392f8697e61484649fe06beadc54aa38a8
7
+ data.tar.gz: 941f0de20548a37899ac7610bd95b381ac18ca71761ccfb7f7788c299dd41ff521ae7eea283df64ee624044f919d2951f396ec38c3c4a15ded378ee0acbd0a20
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.7)
4
+ web_stat (0.3.12)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -83,7 +83,7 @@ GEM
83
83
  nokogiri (>= 1.6.0)
84
84
  rubyzip (2.3.0)
85
85
  safe_yaml (1.0.5)
86
- sanitize (5.2.1)
86
+ sanitize (5.2.0)
87
87
  crass (~> 1.0.2)
88
88
  nokogiri (>= 1.8.0)
89
89
  nokogumbo (~> 2.0)
data/README.md CHANGED
@@ -55,12 +55,10 @@ And then execute:
55
55
 
56
56
  ### spec
57
57
 
58
- $ bundle exec rake spec
59
-
60
- or
61
-
62
- $ bundle exec rspec
58
+ $ docker/start -d
59
+ $ docker/exec ENV=development bundle exec rspec
63
60
 
64
61
  Test a file
65
62
 
66
- $ bundle exec rspec spec/web_stat/fetch_spec.rb
63
+ $ docker/start -d
64
+ $ docker/exec ENV=development bundle exec rspec spec/web_stat/fetch_spec.rb
@@ -5,6 +5,23 @@ module WebStat
5
5
  # @param [String] url
6
6
  # @param [Integer] delay
7
7
  def get_last_url(url, delay=nil)
8
+ driver = get_driver(url, delay)
9
+ last_url = driver.current_url
10
+ driver.quit
11
+ last_url
12
+ end
13
+ # Get source of html
14
+ # @param [String] url
15
+ # @param [Integer] delay
16
+ def get_source(url, delay=nil)
17
+ driver = get_driver(url, delay)
18
+ source = driver.page_source
19
+ driver.quit
20
+ source
21
+ end
22
+
23
+ private
24
+ def get_driver(url, delay=nil)
8
25
  Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
9
26
  Selenium::WebDriver.logger.level = :info
10
27
  options = Selenium::WebDriver::Chrome::Options.new(args: [
@@ -19,9 +36,10 @@ module WebStat
19
36
  if delay.is_a?(Integer)
20
37
  sleep delay
21
38
  end
22
- last_url = driver.current_url
39
+ driver
40
+ rescue => e
23
41
  driver.quit
24
- last_url
42
+ raise e
25
43
  end
26
44
  end
27
45
  end
@@ -9,7 +9,7 @@ module WebStat
9
9
  if defined? Rails
10
10
  YAML.load_file(get_configure_path)[Rails.env]
11
11
  else
12
- YAML.load_file(get_configure_path)["production"]
12
+ YAML.load_file(get_configure_path)[ENV["ENV"] || "production"]
13
13
  end
14
14
  end
15
15
 
@@ -47,8 +47,12 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
- if path.nil? || path.empty? || @nokogiri.at('body').xpath('//img').first
51
- path = @nokogiri.at('body').xpath('//img').first.attr('src')
50
+ readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body')).content)
51
+ if (path.nil? || path.empty?) && readability_content.xpath('//img').first
52
+ path = readability_content.xpath('//img').first.attr('src')
53
+ end
54
+ if (path.nil? || path.empty?) && @nokogiri.xpath('//img').first
55
+ path = @nokogiri.xpath('//img').first.attr('src')
52
56
  end
53
57
  if ! path.nil? && path.match(/^\//)
54
58
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
@@ -86,7 +90,7 @@ module WebStat
86
90
  raise Mechanize::RobotsDisallowedError.new(url)
87
91
  end
88
92
  if WebStat::Configure.get["use_chromedirver"]
89
- document = WebStat::WebDriverHelper.get_last_url(url)
93
+ body = WebStat::WebDriverHelper.get_source(url)
90
94
  @status = 200
91
95
  else
92
96
  document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
@@ -11,7 +11,7 @@ module WebStat
11
11
  redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
12
  response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
13
  final_url = url_string_from_uri(response_uri)
14
- rescue Exception => ex
14
+ rescue => e
15
15
  # nothing
16
16
  end
17
17
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.9"
2
+ VERSION = "0.3.14"
3
3
  end
@@ -7,12 +7,7 @@ require "web_stat"
7
7
  require 'webmock'
8
8
  include WebMock::API
9
9
  WebMock.enable!
10
-
11
- WebMock.disable_net_connect!({
12
- allow_localhost: true,
13
- allow: 'chromedriver.storage.googleapis.com'
14
- })
15
-
10
+
16
11
  RSpec.configure do |config|
17
12
  # Enable flags like --only-failures and --next-failure
18
13
  config.example_status_persistence_file_path = ".rspec_status"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.3.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-18 00:00:00.000000000 Z
11
+ date: 2020-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler