web_stat 0.3.1 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf6e7c6f1461adb76d15e934bbcb4866350dbef5ee45dc36ab5bac44cac88ce5
4
- data.tar.gz: 3f788539423a476ba805b5cc023320fbd9e16b5ef3cffe476f8c7c9a9c6bcedf
3
+ metadata.gz: 2ed82b635d751fd0e40eeeef77348ebfc02636f1a9f2d86ca0018c572c4b1f2d
4
+ data.tar.gz: 5034dcc4ba993f6d084228eb3e7e12fe03b92e8ff9e0119d3db8f7ad81521f4a
5
5
  SHA512:
6
- metadata.gz: 77787d2811e56d7e7db5d78c378204ff83fb44252a38c41e5843e8abf98e5c06dc5e5763f5e877202557294ea480c065f541f05ae9bd944e28433d9f46be5520
7
- data.tar.gz: ed612a3a5bb29330c08876736e834a166f658db6f4ca75c7d93c120a60ddeb25695f98416efaeb5d42693b2278b74ca22ab83b77235135b962ce0f9ed956fd6a
6
+ metadata.gz: d1a20f17519854baf5cea62ff55236b0ea63f9ab61b3c97df2f276ad7e64694d7d6387824957bee717c50a042734b37dcd26a6761591fa3d6550de8266454ac6
7
+ data.tar.gz: 1669e18b93b126bc3d382eba1e25de554fc23751f0d51635ec42c89fa8c05392d61901bb4fa510e27b1048133e96dba153e213cab3c3de90a2af1461cca88bf2
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.3.0)
4
+ web_stat (0.3.5)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
7
  mechanize (>= 2.7)
@@ -1,14 +1,21 @@
1
- # Minimum number of characters to detect meta title
2
- min_length_of_meta_title: 10
3
- # Split regular expression for titles
4
- regex_to_sprit_title: '\||-|:|||:|〜|\~| '
5
- # User Agent
6
- user_agent: "web_stat gem agent"
7
- # Eyecatch image xpaths
8
- eyecatch_image_xpaths:
9
- - '/html/head/meta[@property="twitter:image"]/@content'
10
- - '/html/head/meta[@property="og:image"]/@content'
11
- - '//img[@class="attachment-post-thumbnail"]/@src'
12
- - '//div[@id="content"]//img/@src'
13
- - '//img/@src'
14
- userdic: ""
1
+ development: &development
2
+ # Minimum number of characters to detect meta title
3
+ min_length_of_meta_title: 10
4
+ # Split regular expression for titles
5
+ regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
6
+ # User Agent
7
+ user_agent: "web_stat gem agent"
8
+ # Eyecatch image xpaths
9
+ eyecatch_image_xpaths:
10
+ - '/html/head/meta[@property="twitter:image"]/@content'
11
+ - '/html/head/meta[@property="og:image"]/@content'
12
+ - '//img[@class="attachment-post-thumbnail"]/@src'
13
+ - '//div[@id="content"]//img/@src'
14
+ - '//img/@src'
15
+ userdic: ""
16
+ use_chromedirver: false
17
+ test:
18
+ <<: *development
19
+ production:
20
+ <<: *development
21
+ use_chromedirver: true
@@ -3,31 +3,37 @@ module WebStat
3
3
  class Configure
4
4
  DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
5
5
 
6
- # Get yaml
7
- def self.get
8
- YAML.load_file(self.get_configure_path)
9
- end
10
-
11
- # Get configure path
12
- def self.get_configure_path
13
- if File.exists?(self.get_custom_configure_path)
14
- self.get_custom_configure_path
15
- else
16
- self.get_default_configure_path
6
+ class << self
7
+ # Get yaml
8
+ def get
9
+ if defined? Rails
10
+ YAML.load_file(get_configure_path)[Rails.env]
11
+ else
12
+ YAML.load_file(get_configure_path)["production"]
13
+ end
17
14
  end
18
- end
19
-
20
- # Get default configure path
21
- def self.get_default_configure_path
22
- File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
23
- end
24
-
25
- # Get custom configure path
26
- def self.get_custom_configure_path
27
- if defined? Rails
28
- File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
29
- else
30
- File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
15
+
16
+ # Get configure path
17
+ def get_configure_path
18
+ if File.exists?(get_custom_configure_path)
19
+ get_custom_configure_path
20
+ else
21
+ get_default_configure_path
22
+ end
23
+ end
24
+
25
+ # Get default configure path
26
+ def get_default_configure_path
27
+ File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
28
+ end
29
+
30
+ # Get custom configure path
31
+ def get_custom_configure_path
32
+ if defined? Rails
33
+ File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
34
+ else
35
+ File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
36
+ end
31
37
  end
32
38
  end
33
39
  end
@@ -47,6 +47,12 @@ module WebStat
47
47
  break
48
48
  end
49
49
  end
50
+ if path.nil?
51
+ path = @nokogiri.at('body').xpath('//img').first.attr('src')
52
+ end
53
+ if path.nil?
54
+ Readability::Document.new(@nokogiri.at('body')).content
55
+ end
50
56
  if ! path.nil? && path.match(/^\//)
51
57
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
52
58
  else
@@ -57,7 +63,7 @@ module WebStat
57
63
  # Get local path to save url
58
64
  # @param [String] url
59
65
  def save_local_path(url)
60
- return nil if url.nil?
66
+ return nil if url.nil? || ! url.match(%{^http})
61
67
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
62
68
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
63
69
  image = agent.get(url)
@@ -96,7 +102,7 @@ module WebStat
96
102
  # Get the informations of @url
97
103
  # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
98
104
  def stat(userdics: nil)
99
- clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
105
+ clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s").gsub(URI.regexp, "")
100
106
  language_code = CLD.detect_language(clean_content)[:code]
101
107
  if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
102
108
  tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
@@ -27,7 +27,11 @@ module WebStat
27
27
  uri = URI.parse(url)
28
28
  response = ::Net::HTTP.get_response(uri)
29
29
  if response.class == Net::HTTPOK
30
- return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
30
+ if WebStat::Configure.get["use_chromedirver"]
31
+ return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
32
+ else
33
+ return URI.parse(uri)
34
+ end
31
35
  else
32
36
  redirect_location = response['location']
33
37
  location_uri = URI.parse(redirect_location)
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-12 00:00:00.000000000 Z
11
+ date: 2020-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler