web_stat 0.3.0 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f62b7ce2720dcd1851a3c0620af8da6e00313e1b5de918e8b1e23ceeeacee89
4
- data.tar.gz: 1a52579415bf2a4f96efd39931ef41849751f693658f5cfbcd88202c680a446c
3
+ metadata.gz: 3b35413482a93f316bbd3d99a037f0e97102544a4e04604b5add670ac1a1500a
4
+ data.tar.gz: 4517b7754b2096901b005c26497fa2addad572b02f08a7aed385f6dc7de2e55a
5
5
  SHA512:
6
- metadata.gz: 51b5d4fbc2e0969913cec3c1ae4b745cca8a790b4f9638d9a920d844840500424c69f18e7d3f58d2d870b8391e0817f06781699b302f36b2a68d81467608a867
7
- data.tar.gz: 2d4ba4bfac9c5d5c76fcc48ba0202b7dbf223b4f42b7d6a649cce22c0742d6b7605c3386c93be2cb29487308714fc94fd2bf1063faf78bdcd5b7b6c5ab060d8b
6
+ metadata.gz: a54c666953b0c51e1e5ea8d230069bf608e6284629740070f77f3816468553037852c0cbc6c39c35cd53000435fbced4acee6a7b0ca855f2c241e0fb769d32da
7
+ data.tar.gz: 3fef9c1c48f272e27c877a4a588ce45f841ad316429bb78fc3aa6de5748d6471b9e5aaed2ee076af0d3fa0779b5696c452c1b0fc2f77644c7914b171377def8e
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.0)
4
+ web_stat (0.3.5)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -1,28 +1,30 @@
1
- class WebDriverHelper
2
- class << self
3
- # Get last url
4
- # @param [String] url
5
- # @param [Integer] delay
6
- def get_last_url(url, delay=nil)
7
- Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
8
- Selenium::WebDriver.logger.level = :info
9
- options = Selenium::WebDriver::Chrome::Options.new(args: [
10
- 'headless',
11
- 'no-sandbox',
12
- 'disable-gpu',
13
- 'start-maximized',
14
- 'window-size=1920,1080'
15
- ])
16
- driver = Selenium::WebDriver.for(:chrome, options: options)
17
- driver.manage.timeouts.implicit_wait = 10
18
- Selenium::WebDriver::Wait.new(timeout: 10)
19
- driver.get(url)
20
- if delay.is_a?(Integer)
21
- sleep delay
1
+ module WebStat
2
+ class WebDriverHelper
3
+ class << self
4
+ # Get last url
5
+ # @param [String] url
6
+ # @param [Integer] delay
7
+ def get_last_url(url, delay=nil)
8
+ Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
9
+ Selenium::WebDriver.logger.level = :info
10
+ options = Selenium::WebDriver::Chrome::Options.new(args: [
11
+ 'headless',
12
+ 'no-sandbox',
13
+ 'disable-gpu',
14
+ 'start-maximized',
15
+ 'window-size=1920,1080'
16
+ ])
17
+ driver = Selenium::WebDriver.for(:chrome, options: options)
18
+ driver.manage.timeouts.implicit_wait = 10
19
+ Selenium::WebDriver::Wait.new(timeout: 10)
20
+ driver.get(url)
21
+ if delay.is_a?(Integer)
22
+ sleep delay
23
+ end
24
+ last_url = driver.current_url
25
+ driver.quit
26
+ last_url
22
27
  end
23
- last_url = driver.current_url
24
- driver.quit
25
- last_url
26
28
  end
27
29
  end
28
30
  end
@@ -1,14 +1,21 @@
1
- # Minimum number of characters to detect meta title
2
- min_length_of_meta_title: 10
3
- # Split regular expression for titles
4
- regex_to_sprit_title: '\||-|:|||:|〜|\~| '
5
- # User Agent
6
- user_agent: "web_stat gem agent"
7
- # Eyecatch image xpaths
8
- eyecatch_image_xpaths:
9
- - '/html/head/meta[@property="twitter:image"]/@content'
10
- - '/html/head/meta[@property="og:image"]/@content'
11
- - '//img[@class="attachment-post-thumbnail"]/@src'
12
- - '//div[@id="content"]//img/@src'
13
- - '//img/@src'
14
- userdic: ""
1
+ development: &development
2
+ # Minimum number of characters to detect meta title
3
+ min_length_of_meta_title: 10
4
+ # Split regular expression for titles
5
+ regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
6
+ # User Agent
7
+ user_agent: "web_stat gem agent"
8
+ # Eyecatch image xpaths
9
+ eyecatch_image_xpaths:
10
+ - '/html/head/meta[@property="twitter:image"]/@content'
11
+ - '/html/head/meta[@property="og:image"]/@content'
12
+ - '//img[@class="attachment-post-thumbnail"]/@src'
13
+ - '//div[@id="content"]//img/@src'
14
+ - '//img/@src'
15
+ userdic: ""
16
+ use_chromedirver: false
17
+ test:
18
+ <<: *development
19
+ production:
20
+ <<: *development
21
+ use_chromedirver: true
@@ -3,31 +3,37 @@ module WebStat
3
3
  class Configure
4
4
  DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
5
5
 
6
- # Get yaml
7
- def self.get
8
- YAML.load_file(self.get_configure_path)
9
- end
10
-
11
- # Get configure path
12
- def self.get_configure_path
13
- if File.exists?(self.get_custom_configure_path)
14
- self.get_custom_configure_path
15
- else
16
- self.get_default_configure_path
6
+ class << self
7
+ # Get yaml
8
+ def get
9
+ if defined? Rails
10
+ YAML.load_file(get_configure_path)[Rails.env]
11
+ else
12
+ YAML.load_file(get_configure_path)["production"]
13
+ end
17
14
  end
18
- end
19
-
20
- # Get default configure path
21
- def self.get_default_configure_path
22
- File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
23
- end
24
-
25
- # Get custom configure path
26
- def self.get_custom_configure_path
27
- if defined? Rails
28
- File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
29
- else
30
- File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
15
+
16
+ # Get configure path
17
+ def get_configure_path
18
+ if File.exists?(get_custom_configure_path)
19
+ get_custom_configure_path
20
+ else
21
+ get_default_configure_path
22
+ end
23
+ end
24
+
25
+ # Get default configure path
26
+ def get_default_configure_path
27
+ File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
28
+ end
29
+
30
+ # Get custom configure path
31
+ def get_custom_configure_path
32
+ if defined? Rails
33
+ File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
34
+ else
35
+ File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
36
+ end
31
37
  end
32
38
  end
33
39
  end
@@ -57,7 +57,7 @@ module WebStat
57
57
  # Get local path to save url
58
58
  # @param [String] url
59
59
  def save_local_path(url)
60
- return nil if url.nil?
60
+ return nil if url.nil? || ! url.match(%{^http})
61
61
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
62
62
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
63
63
  image = agent.get(url)
@@ -122,7 +122,7 @@ module WebStat
122
122
  # Get original url
123
123
  # @param [String] url
124
124
  def original_url(url)
125
- last_url = FinalRedirectUrl.final_redirect_url(url)
125
+ last_url = WebStat::FinalRedirectUrl.final_redirect_url(url)
126
126
  unless last_url.nil? || last_url.scrub('').empty?
127
127
  last_url
128
128
  else
@@ -1,50 +1,54 @@
1
1
  # ref) https://github.com/indyarocks/final_redirect_url
2
2
  # customize
3
3
  # Changed
4
-
5
- module FinalRedirectUrl
6
-
7
- def self.final_redirect_url(url, options={})
8
- final_url = ''
9
- if is_valid_url?(url)
10
- begin
11
- redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
- response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
- final_url = url_string_from_uri(response_uri)
14
- rescue Exception => ex
15
- # nothing
4
+ module WebStat
5
+ class FinalRedirectUrl
6
+ class << self
7
+ def final_redirect_url(url, options={})
8
+ final_url = ''
9
+ if is_valid_url?(url)
10
+ begin
11
+ redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
+ response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
+ final_url = url_string_from_uri(response_uri)
14
+ rescue Exception => ex
15
+ # nothing
16
+ end
17
+ end
18
+ final_url
16
19
  end
17
- end
18
- final_url
19
- end
20
-
21
- private
22
- def self.is_valid_url?(url)
23
- url.to_s.match? URI::regexp(['http', 'https'])
24
- end
25
-
26
- def self.get_final_redirect_url(url, limit = 10)
27
- return url if limit <= 0
28
- uri = URI.parse(url)
29
- response = ::Net::HTTP.get_response(uri)
30
- if response.class == Net::HTTPOK
31
- return URI.parse(WebDriverHelper.get_last_url(uri))
32
- else
33
- redirect_location = response['location']
34
- location_uri = URI.parse(redirect_location)
35
- if location_uri.host.nil?
36
- redirect_location = uri.scheme + '://' + uri.host + redirect_location
20
+
21
+ private
22
+ def is_valid_url?(url)
23
+ url.to_s.match? URI::regexp(['http', 'https'])
24
+ end
25
+ def get_final_redirect_url(url, limit = 10)
26
+ return url if limit <= 0
27
+ uri = URI.parse(url)
28
+ response = ::Net::HTTP.get_response(uri)
29
+ if response.class == Net::HTTPOK
30
+ if WebStat::Configure.get["use_chromedirver"]
31
+ return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
32
+ else
33
+ return URI.parse(uri)
34
+ end
35
+ else
36
+ redirect_location = response['location']
37
+ location_uri = URI.parse(redirect_location)
38
+ if location_uri.host.nil?
39
+ redirect_location = uri.scheme + '://' + uri.host + redirect_location
40
+ end
41
+ warn "redirected to #{redirect_location}"
42
+ get_final_redirect_url(redirect_location, limit - 1)
43
+ end
44
+ end
45
+ def url_string_from_uri(uri)
46
+ url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
47
+ if uri.fragment
48
+ url_str = url_str + "##{uri.fragment}"
49
+ end
50
+ url_str
37
51
  end
38
- warn "redirected to #{redirect_location}"
39
- get_final_redirect_url(redirect_location, limit - 1)
40
- end
41
- end
42
-
43
- def self.url_string_from_uri(uri)
44
- url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
45
- if uri.fragment
46
- url_str = url_str + "##{uri.fragment}"
47
52
  end
48
- url_str
49
53
  end
50
- end
54
+ end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-11 00:00:00.000000000 Z
11
+ date: 2020-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler