web_stat 0.3.1 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf6e7c6f1461adb76d15e934bbcb4866350dbef5ee45dc36ab5bac44cac88ce5
4
- data.tar.gz: 3f788539423a476ba805b5cc023320fbd9e16b5ef3cffe476f8c7c9a9c6bcedf
3
+ metadata.gz: 2ed82b635d751fd0e40eeeef77348ebfc02636f1a9f2d86ca0018c572c4b1f2d
4
+ data.tar.gz: 5034dcc4ba993f6d084228eb3e7e12fe03b92e8ff9e0119d3db8f7ad81521f4a
5
5
  SHA512:
6
- metadata.gz: 77787d2811e56d7e7db5d78c378204ff83fb44252a38c41e5843e8abf98e5c06dc5e5763f5e877202557294ea480c065f541f05ae9bd944e28433d9f46be5520
7
- data.tar.gz: ed612a3a5bb29330c08876736e834a166f658db6f4ca75c7d93c120a60ddeb25695f98416efaeb5d42693b2278b74ca22ab83b77235135b962ce0f9ed956fd6a
6
+ metadata.gz: d1a20f17519854baf5cea62ff55236b0ea63f9ab61b3c97df2f276ad7e64694d7d6387824957bee717c50a042734b37dcd26a6761591fa3d6550de8266454ac6
7
+ data.tar.gz: 1669e18b93b126bc3d382eba1e25de554fc23751f0d51635ec42c89fa8c05392d61901bb4fa510e27b1048133e96dba153e213cab3c3de90a2af1461cca88bf2
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.0)
4
+ web_stat (0.3.5)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -1,14 +1,21 @@
1
- # Minimum number of characters to detect meta title
2
- min_length_of_meta_title: 10
3
- # Split regular expression for titles
4
- regex_to_sprit_title: '\||-|:|||:|〜|\~| '
5
- # User Agent
6
- user_agent: "web_stat gem agent"
7
- # Eyecatch image xpaths
8
- eyecatch_image_xpaths:
9
- - '/html/head/meta[@property="twitter:image"]/@content'
10
- - '/html/head/meta[@property="og:image"]/@content'
11
- - '//img[@class="attachment-post-thumbnail"]/@src'
12
- - '//div[@id="content"]//img/@src'
13
- - '//img/@src'
14
- userdic: ""
1
+ development: &development
2
+ # Minimum number of characters to detect meta title
3
+ min_length_of_meta_title: 10
4
+ # Split regular expression for titles
5
+ regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
6
+ # User Agent
7
+ user_agent: "web_stat gem agent"
8
+ # Eyecatch image xpaths
9
+ eyecatch_image_xpaths:
10
+ - '/html/head/meta[@property="twitter:image"]/@content'
11
+ - '/html/head/meta[@property="og:image"]/@content'
12
+ - '//img[@class="attachment-post-thumbnail"]/@src'
13
+ - '//div[@id="content"]//img/@src'
14
+ - '//img/@src'
15
+ userdic: ""
16
+ use_chromedirver: false
17
+ test:
18
+ <<: *development
19
+ production:
20
+ <<: *development
21
+ use_chromedirver: true
@@ -3,31 +3,37 @@ module WebStat
3
3
  class Configure
4
4
  DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
5
5
 
6
- # Get yaml
7
- def self.get
8
- YAML.load_file(self.get_configure_path)
9
- end
10
-
11
- # Get configure path
12
- def self.get_configure_path
13
- if File.exists?(self.get_custom_configure_path)
14
- self.get_custom_configure_path
15
- else
16
- self.get_default_configure_path
6
+ class << self
7
+ # Get yaml
8
+ def get
9
+ if defined? Rails
10
+ YAML.load_file(get_configure_path)[Rails.env]
11
+ else
12
+ YAML.load_file(get_configure_path)["production"]
13
+ end
17
14
  end
18
- end
19
-
20
- # Get default configure path
21
- def self.get_default_configure_path
22
- File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
23
- end
24
-
25
- # Get custom configure path
26
- def self.get_custom_configure_path
27
- if defined? Rails
28
- File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
29
- else
30
- File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
15
+
16
+ # Get configure path
17
+ def get_configure_path
18
+ if File.exists?(get_custom_configure_path)
19
+ get_custom_configure_path
20
+ else
21
+ get_default_configure_path
22
+ end
23
+ end
24
+
25
+ # Get default configure path
26
+ def get_default_configure_path
27
+ File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
28
+ end
29
+
30
+ # Get custom configure path
31
+ def get_custom_configure_path
32
+ if defined? Rails
33
+ File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
34
+ else
35
+ File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
36
+ end
31
37
  end
32
38
  end
33
39
  end
@@ -47,6 +47,12 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
+ if path.nil?
51
+ path = @nokogiri.at('body').xpath('//img').first.attr('src')
52
+ end
53
+ if path.nil?
54
+ Readability::Document.new(@nokogiri.at('body')).content
55
+ end
50
56
  if ! path.nil? && path.match(/^\//)
51
57
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
52
58
  else
@@ -57,7 +63,7 @@ module WebStat
57
63
  # Get local path to save url
58
64
  # @param [String] url
59
65
  def save_local_path(url)
60
- return nil if url.nil?
66
+ return nil if url.nil? || ! url.match(%{^http})
61
67
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
62
68
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
63
69
  image = agent.get(url)
@@ -96,7 +102,7 @@ module WebStat
96
102
  # Get the informations of @url
97
103
  # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
98
104
  def stat(userdics: nil)
99
- clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
105
+ clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s").gsub(URI.regexp, "")
100
106
  language_code = CLD.detect_language(clean_content)[:code]
101
107
  if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
102
108
  tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
@@ -27,7 +27,11 @@ module WebStat
27
27
  uri = URI.parse(url)
28
28
  response = ::Net::HTTP.get_response(uri)
29
29
  if response.class == Net::HTTPOK
30
- return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
30
+ if WebStat::Configure.get["use_chromedirver"]
31
+ return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
32
+ else
33
+ return URI.parse(uri)
34
+ end
31
35
  else
32
36
  redirect_location = response['location']
33
37
  location_uri = URI.parse(redirect_location)
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-12 00:00:00.000000000 Z
11
+ date: 2020-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler