web_stat 0.3.0 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f62b7ce2720dcd1851a3c0620af8da6e00313e1b5de918e8b1e23ceeeacee89
4
- data.tar.gz: 1a52579415bf2a4f96efd39931ef41849751f693658f5cfbcd88202c680a446c
3
+ metadata.gz: 3b35413482a93f316bbd3d99a037f0e97102544a4e04604b5add670ac1a1500a
4
+ data.tar.gz: 4517b7754b2096901b005c26497fa2addad572b02f08a7aed385f6dc7de2e55a
5
5
  SHA512:
6
- metadata.gz: 51b5d4fbc2e0969913cec3c1ae4b745cca8a790b4f9638d9a920d844840500424c69f18e7d3f58d2d870b8391e0817f06781699b302f36b2a68d81467608a867
7
- data.tar.gz: 2d4ba4bfac9c5d5c76fcc48ba0202b7dbf223b4f42b7d6a649cce22c0742d6b7605c3386c93be2cb29487308714fc94fd2bf1063faf78bdcd5b7b6c5ab060d8b
6
+ metadata.gz: a54c666953b0c51e1e5ea8d230069bf608e6284629740070f77f3816468553037852c0cbc6c39c35cd53000435fbced4acee6a7b0ca855f2c241e0fb769d32da
7
+ data.tar.gz: 3fef9c1c48f272e27c877a4a588ce45f841ad316429bb78fc3aa6de5748d6471b9e5aaed2ee076af0d3fa0779b5696c452c1b0fc2f77644c7914b171377def8e
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.0)
4
+ web_stat (0.3.5)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -1,28 +1,30 @@
1
- class WebDriverHelper
2
- class << self
3
- # Get last url
4
- # @param [String] url
5
- # @param [Integer] delay
6
- def get_last_url(url, delay=nil)
7
- Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
8
- Selenium::WebDriver.logger.level = :info
9
- options = Selenium::WebDriver::Chrome::Options.new(args: [
10
- 'headless',
11
- 'no-sandbox',
12
- 'disable-gpu',
13
- 'start-maximized',
14
- 'window-size=1920,1080'
15
- ])
16
- driver = Selenium::WebDriver.for(:chrome, options: options)
17
- driver.manage.timeouts.implicit_wait = 10
18
- Selenium::WebDriver::Wait.new(timeout: 10)
19
- driver.get(url)
20
- if delay.is_a?(Integer)
21
- sleep delay
1
+ module WebStat
2
+ class WebDriverHelper
3
+ class << self
4
+ # Get last url
5
+ # @param [String] url
6
+ # @param [Integer] delay
7
+ def get_last_url(url, delay=nil)
8
+ Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
9
+ Selenium::WebDriver.logger.level = :info
10
+ options = Selenium::WebDriver::Chrome::Options.new(args: [
11
+ 'headless',
12
+ 'no-sandbox',
13
+ 'disable-gpu',
14
+ 'start-maximized',
15
+ 'window-size=1920,1080'
16
+ ])
17
+ driver = Selenium::WebDriver.for(:chrome, options: options)
18
+ driver.manage.timeouts.implicit_wait = 10
19
+ Selenium::WebDriver::Wait.new(timeout: 10)
20
+ driver.get(url)
21
+ if delay.is_a?(Integer)
22
+ sleep delay
23
+ end
24
+ last_url = driver.current_url
25
+ driver.quit
26
+ last_url
22
27
  end
23
- last_url = driver.current_url
24
- driver.quit
25
- last_url
26
28
  end
27
29
  end
28
30
  end
@@ -1,14 +1,21 @@
1
- # Minimum number of characters to detect meta title
2
- min_length_of_meta_title: 10
3
- # Split regular expression for titles
4
- regex_to_sprit_title: '\||-|:|||:|〜|\~| '
5
- # User Agent
6
- user_agent: "web_stat gem agent"
7
- # Eyecatch image xpaths
8
- eyecatch_image_xpaths:
9
- - '/html/head/meta[@property="twitter:image"]/@content'
10
- - '/html/head/meta[@property="og:image"]/@content'
11
- - '//img[@class="attachment-post-thumbnail"]/@src'
12
- - '//div[@id="content"]//img/@src'
13
- - '//img/@src'
14
- userdic: ""
1
+ development: &development
2
+ # Minimum number of characters to detect meta title
3
+ min_length_of_meta_title: 10
4
+ # Split regular expression for titles
5
+ regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
6
+ # User Agent
7
+ user_agent: "web_stat gem agent"
8
+ # Eyecatch image xpaths
9
+ eyecatch_image_xpaths:
10
+ - '/html/head/meta[@property="twitter:image"]/@content'
11
+ - '/html/head/meta[@property="og:image"]/@content'
12
+ - '//img[@class="attachment-post-thumbnail"]/@src'
13
+ - '//div[@id="content"]//img/@src'
14
+ - '//img/@src'
15
+ userdic: ""
16
+ use_chromedirver: false
17
+ test:
18
+ <<: *development
19
+ production:
20
+ <<: *development
21
+ use_chromedirver: true
@@ -3,31 +3,37 @@ module WebStat
3
3
  class Configure
4
4
  DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
5
5
 
6
- # Get yaml
7
- def self.get
8
- YAML.load_file(self.get_configure_path)
9
- end
10
-
11
- # Get configure path
12
- def self.get_configure_path
13
- if File.exists?(self.get_custom_configure_path)
14
- self.get_custom_configure_path
15
- else
16
- self.get_default_configure_path
6
+ class << self
7
+ # Get yaml
8
+ def get
9
+ if defined? Rails
10
+ YAML.load_file(get_configure_path)[Rails.env]
11
+ else
12
+ YAML.load_file(get_configure_path)["production"]
13
+ end
17
14
  end
18
- end
19
-
20
- # Get default configure path
21
- def self.get_default_configure_path
22
- File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
23
- end
24
-
25
- # Get custom configure path
26
- def self.get_custom_configure_path
27
- if defined? Rails
28
- File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
29
- else
30
- File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
15
+
16
+ # Get configure path
17
+ def get_configure_path
18
+ if File.exists?(get_custom_configure_path)
19
+ get_custom_configure_path
20
+ else
21
+ get_default_configure_path
22
+ end
23
+ end
24
+
25
+ # Get default configure path
26
+ def get_default_configure_path
27
+ File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
28
+ end
29
+
30
+ # Get custom configure path
31
+ def get_custom_configure_path
32
+ if defined? Rails
33
+ File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
34
+ else
35
+ File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
36
+ end
31
37
  end
32
38
  end
33
39
  end
@@ -57,7 +57,7 @@ module WebStat
57
57
  # Get local path to save url
58
58
  # @param [String] url
59
59
  def save_local_path(url)
60
- return nil if url.nil?
60
+ return nil if url.nil? || ! url.match(%{^http})
61
61
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
62
62
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
63
63
  image = agent.get(url)
@@ -122,7 +122,7 @@ module WebStat
122
122
  # Get original url
123
123
  # @param [String] url
124
124
  def original_url(url)
125
- last_url = FinalRedirectUrl.final_redirect_url(url)
125
+ last_url = WebStat::FinalRedirectUrl.final_redirect_url(url)
126
126
  unless last_url.nil? || last_url.scrub('').empty?
127
127
  last_url
128
128
  else
@@ -1,50 +1,54 @@
1
1
  # ref) https://github.com/indyarocks/final_redirect_url
2
2
  # customize
3
3
  # Changed
4
-
5
- module FinalRedirectUrl
6
-
7
- def self.final_redirect_url(url, options={})
8
- final_url = ''
9
- if is_valid_url?(url)
10
- begin
11
- redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
- response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
- final_url = url_string_from_uri(response_uri)
14
- rescue Exception => ex
15
- # nothing
4
+ module WebStat
5
+ class FinalRedirectUrl
6
+ class << self
7
+ def final_redirect_url(url, options={})
8
+ final_url = ''
9
+ if is_valid_url?(url)
10
+ begin
11
+ redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
+ response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
+ final_url = url_string_from_uri(response_uri)
14
+ rescue Exception => ex
15
+ # nothing
16
+ end
17
+ end
18
+ final_url
16
19
  end
17
- end
18
- final_url
19
- end
20
-
21
- private
22
- def self.is_valid_url?(url)
23
- url.to_s.match? URI::regexp(['http', 'https'])
24
- end
25
-
26
- def self.get_final_redirect_url(url, limit = 10)
27
- return url if limit <= 0
28
- uri = URI.parse(url)
29
- response = ::Net::HTTP.get_response(uri)
30
- if response.class == Net::HTTPOK
31
- return URI.parse(WebDriverHelper.get_last_url(uri))
32
- else
33
- redirect_location = response['location']
34
- location_uri = URI.parse(redirect_location)
35
- if location_uri.host.nil?
36
- redirect_location = uri.scheme + '://' + uri.host + redirect_location
20
+
21
+ private
22
+ def is_valid_url?(url)
23
+ url.to_s.match? URI::regexp(['http', 'https'])
24
+ end
25
+ def get_final_redirect_url(url, limit = 10)
26
+ return url if limit <= 0
27
+ uri = URI.parse(url)
28
+ response = ::Net::HTTP.get_response(uri)
29
+ if response.class == Net::HTTPOK
30
+ if WebStat::Configure.get["use_chromedirver"]
31
+ return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
32
+ else
33
+ return URI.parse(uri)
34
+ end
35
+ else
36
+ redirect_location = response['location']
37
+ location_uri = URI.parse(redirect_location)
38
+ if location_uri.host.nil?
39
+ redirect_location = uri.scheme + '://' + uri.host + redirect_location
40
+ end
41
+ warn "redirected to #{redirect_location}"
42
+ get_final_redirect_url(redirect_location, limit - 1)
43
+ end
44
+ end
45
+ def url_string_from_uri(uri)
46
+ url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
47
+ if uri.fragment
48
+ url_str = url_str + "##{uri.fragment}"
49
+ end
50
+ url_str
37
51
  end
38
- warn "redirected to #{redirect_location}"
39
- get_final_redirect_url(redirect_location, limit - 1)
40
- end
41
- end
42
-
43
- def self.url_string_from_uri(uri)
44
- url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
45
- if uri.fragment
46
- url_str = url_str + "##{uri.fragment}"
47
52
  end
48
- url_str
49
53
  end
50
- end
54
+ end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-11 00:00:00.000000000 Z
11
+ date: 2020-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler