web_stat 0.3.7 → 0.3.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5304507f8ce01a6a98717756927366005db0eceb89b3d22f29e4283c6eedb4d7
4
- data.tar.gz: f561221644f98831867c2d853201a0312dcb75674ca4ae247b319b4f2a1ba085
3
+ metadata.gz: 2af80415014e94830b1e323f44e2e71732196bdfc535627295f1f6b7b2f44285
4
+ data.tar.gz: 85fc9facafff40063c434824ebc64fcd847f347bdfa0c928ee8c35b2b3e0c8b5
5
5
  SHA512:
6
- metadata.gz: bec01871973e80dc46b9246a264791213b3bdd4cb20693fc0d51cc28c2d9bba28b40af65322dc2b0c950e9bb5aedd288ff8fd771adfbd5b2aee1564b98888cb0
7
- data.tar.gz: 5cfce031ad779f347fae362ccff2deadb3711e4f9fcc06cafcd232b86e8c18aff63e40c430a330cfdf00d398aa5257457c5dc9f3bee492478359d3801e217f08
6
+ metadata.gz: 30870501431d61d5d90bdcfcf4f6ba339918b810fda48bd9a77abe183e341575bc9c7397aadb7494da1591d5d647fea7b17512ed24cea61471d4e112a0c17bd8
7
+ data.tar.gz: 8340739ddae12fd6a670d51e7fcd7e3987294bcb32219cde6e8dbcee2a067f064b2b632d102636471d0b7fd0ff27ca4f55c477eca94d88b7194a3cf17b1869c7
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.6)
4
+ web_stat (0.3.11)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
data/README.md CHANGED
@@ -55,12 +55,10 @@ And then execute:
55
55
 
56
56
  ### spec
57
57
 
58
- $ bundle exec rake spec
59
-
60
- or
61
-
62
- $ bundle exec rspec
58
+ $ docker/start -d
59
+ $ docker/exec ENV=development bundle exec rspec
63
60
 
64
61
  Test a file
65
62
 
66
- $ bundle exec rspec spec/web_stat/fetch_spec.rb
63
+ $ docker/start -d
64
+ $ docker/exec ENV=development bundle exec rspec spec/web_stat/fetch_spec.rb
@@ -5,14 +5,29 @@ module WebStat
5
5
  # @param [String] url
6
6
  # @param [Integer] delay
7
7
  def get_last_url(url, delay=nil)
8
+ driver = get_driver(url, delay)
9
+ last_url = driver.current_url
10
+ driver.quit
11
+ last_url
12
+ end
13
+ # Get source of html
14
+ # @param [String] url
15
+ # @param [Integer] delay
16
+ def get_source(url, delay=nil)
17
+ driver = get_driver(url, delay)
18
+ source = driver.page_source
19
+ driver.quit
20
+ source
21
+ end
22
+
23
+ private
24
+ def get_driver(url, delay=nil)
8
25
  Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
9
26
  Selenium::WebDriver.logger.level = :info
10
27
  options = Selenium::WebDriver::Chrome::Options.new(args: [
11
28
  'headless',
12
29
  'no-sandbox',
13
- 'disable-gpu',
14
- 'start-maximized',
15
- 'window-size=1920,1080'
30
+ 'disable-gpu'
16
31
  ])
17
32
  driver = Selenium::WebDriver.for(:chrome, options: options)
18
33
  driver.manage.timeouts.implicit_wait = 10
@@ -21,9 +36,10 @@ module WebStat
21
36
  if delay.is_a?(Integer)
22
37
  sleep delay
23
38
  end
24
- last_url = driver.current_url
39
+ driver
40
+ rescue => e
25
41
  driver.quit
26
- last_url
42
+ raise e
27
43
  end
28
44
  end
29
45
  end
@@ -9,7 +9,7 @@ module WebStat
9
9
  if defined? Rails
10
10
  YAML.load_file(get_configure_path)[Rails.env]
11
11
  else
12
- YAML.load_file(get_configure_path)["production"]
12
+ YAML.load_file(get_configure_path)[ENV["ENV"] || "production"]
13
13
  end
14
14
  end
15
15
 
@@ -47,8 +47,8 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
- if path.nil? || path.empty?
51
- path = @nokogiri.at('body').xpath('//img').first.attr('src')
50
+ if (path.nil? || path.empty?) && @nokogiri.xpath('//img')
51
+ path = @nokogiri.xpath('//img').attr('src')
52
52
  end
53
53
  if ! path.nil? && path.match(/^\//)
54
54
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
@@ -78,17 +78,25 @@ module WebStat
78
78
  # @param [String] url
79
79
  # @param [String] body
80
80
  def get_url(url)
81
- agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
81
+ mech = Mechanize.new { |_mech| _mech.user_agent = WebStat::Configure.get["user_agent"] }
82
82
  # Enable to read Robots.txt
83
- agent.robots = true
83
+ mech.robots = true
84
84
  begin
85
- document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
86
- if document.class == Mechanize::File
87
- body = document.body
85
+ if mech.agent.robots_disallowed?(url)
86
+ raise Mechanize::RobotsDisallowedError.new(url)
87
+ end
88
+ if WebStat::Configure.get["use_chromedirver"]
89
+ body = WebStat::WebDriverHelper.get_source(url)
90
+ @status = 200
88
91
  else
89
- body = document.body.encode('UTF-8', document.encoding)
92
+ document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
93
+ if document.class == Mechanize::File
94
+ body = document.body
95
+ else
96
+ body = document.body.encode('UTF-8', document.encoding)
97
+ end
98
+ @status = document.code
90
99
  end
91
- @status = document.code
92
100
  rescue Mechanize::ResponseCodeError => e
93
101
  body = e.page.body
94
102
  @status = e.page.code
@@ -11,7 +11,7 @@ module WebStat
11
11
  redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
12
  response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
13
  final_url = url_string_from_uri(response_uri)
14
- rescue Exception => ex
14
+ rescue => e
15
15
  # nothing
16
16
  end
17
17
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.7"
2
+ VERSION = "0.3.12"
3
3
  end
@@ -7,12 +7,7 @@ require "web_stat"
7
7
  require 'webmock'
8
8
  include WebMock::API
9
9
  WebMock.enable!
10
-
11
- WebMock.disable_net_connect!({
12
- allow_localhost: true,
13
- allow: 'chromedriver.storage.googleapis.com'
14
- })
15
-
10
+
16
11
  RSpec.configure do |config|
17
12
  # Enable flags like --only-failures and --next-failure
18
13
  config.example_status_persistence_file_path = ".rspec_status"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-14 00:00:00.000000000 Z
11
+ date: 2020-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler