web_stat 0.3.1 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/web_stat/config/web_stat.yml +21 -14
- data/lib/web_stat/configure.rb +30 -24
- data/lib/web_stat/fetch.rb +8 -2
- data/lib/web_stat/final_redirect_url.rb +5 -1
- data/lib/web_stat/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ed82b635d751fd0e40eeeef77348ebfc02636f1a9f2d86ca0018c572c4b1f2d
|
4
|
+
data.tar.gz: 5034dcc4ba993f6d084228eb3e7e12fe03b92e8ff9e0119d3db8f7ad81521f4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1a20f17519854baf5cea62ff55236b0ea63f9ab61b3c97df2f276ad7e64694d7d6387824957bee717c50a042734b37dcd26a6761591fa3d6550de8266454ac6
|
7
|
+
data.tar.gz: 1669e18b93b126bc3d382eba1e25de554fc23751f0d51635ec42c89fa8c05392d61901bb4fa510e27b1048133e96dba153e213cab3c3de90a2af1461cca88bf2
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
1
|
+
development: &development
|
2
|
+
# Minimum number of characters to detect meta title
|
3
|
+
min_length_of_meta_title: 10
|
4
|
+
# Split regular expression for titles
|
5
|
+
regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
|
6
|
+
# User Agent
|
7
|
+
user_agent: "web_stat gem agent"
|
8
|
+
# Eyecatch image xpaths
|
9
|
+
eyecatch_image_xpaths:
|
10
|
+
- '/html/head/meta[@property="twitter:image"]/@content'
|
11
|
+
- '/html/head/meta[@property="og:image"]/@content'
|
12
|
+
- '//img[@class="attachment-post-thumbnail"]/@src'
|
13
|
+
- '//div[@id="content"]//img/@src'
|
14
|
+
- '//img/@src'
|
15
|
+
userdic: ""
|
16
|
+
use_chromedirver: false
|
17
|
+
test:
|
18
|
+
<<: *development
|
19
|
+
production:
|
20
|
+
<<: *development
|
21
|
+
use_chromedirver: true
|
data/lib/web_stat/configure.rb
CHANGED
@@ -3,31 +3,37 @@ module WebStat
|
|
3
3
|
class Configure
|
4
4
|
DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
self.get_custom_configure_path
|
15
|
-
else
|
16
|
-
self.get_default_configure_path
|
6
|
+
class << self
|
7
|
+
# Get yaml
|
8
|
+
def get
|
9
|
+
if defined? Rails
|
10
|
+
YAML.load_file(get_configure_path)[Rails.env]
|
11
|
+
else
|
12
|
+
YAML.load_file(get_configure_path)["production"]
|
13
|
+
end
|
17
14
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
File.join(
|
15
|
+
|
16
|
+
# Get configure path
|
17
|
+
def get_configure_path
|
18
|
+
if File.exists?(get_custom_configure_path)
|
19
|
+
get_custom_configure_path
|
20
|
+
else
|
21
|
+
get_default_configure_path
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get default configure path
|
26
|
+
def get_default_configure_path
|
27
|
+
File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get custom configure path
|
31
|
+
def get_custom_configure_path
|
32
|
+
if defined? Rails
|
33
|
+
File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
|
34
|
+
else
|
35
|
+
File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
|
36
|
+
end
|
31
37
|
end
|
32
38
|
end
|
33
39
|
end
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -47,6 +47,12 @@ module WebStat
|
|
47
47
|
break
|
48
48
|
end
|
49
49
|
end
|
50
|
+
if path.nil?
|
51
|
+
path = @nokogiri.at('body').xpath('//img').first.attr('src')
|
52
|
+
end
|
53
|
+
if path.nil?
|
54
|
+
Readability::Document.new(@nokogiri.at('body')).content
|
55
|
+
end
|
50
56
|
if ! path.nil? && path.match(/^\//)
|
51
57
|
"#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
52
58
|
else
|
@@ -57,7 +63,7 @@ module WebStat
|
|
57
63
|
# Get local path to save url
|
58
64
|
# @param [String] url
|
59
65
|
def save_local_path(url)
|
60
|
-
return nil if url.nil?
|
66
|
+
return nil if url.nil? || ! url.match(%{^http})
|
61
67
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
62
68
|
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
63
69
|
image = agent.get(url)
|
@@ -96,7 +102,7 @@ module WebStat
|
|
96
102
|
# Get the informations of @url
|
97
103
|
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
98
104
|
def stat(userdics: nil)
|
99
|
-
clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
|
105
|
+
clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s").gsub(URI.regexp, "")
|
100
106
|
language_code = CLD.detect_language(clean_content)[:code]
|
101
107
|
if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
|
102
108
|
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
|
@@ -27,7 +27,11 @@ module WebStat
|
|
27
27
|
uri = URI.parse(url)
|
28
28
|
response = ::Net::HTTP.get_response(uri)
|
29
29
|
if response.class == Net::HTTPOK
|
30
|
-
|
30
|
+
if WebStat::Configure.get["use_chromedirver"]
|
31
|
+
return URI.parse(WebStat::WebDriverHelper.get_last_url(uri))
|
32
|
+
else
|
33
|
+
return URI.parse(uri)
|
34
|
+
end
|
31
35
|
else
|
32
36
|
redirect_location = response['location']
|
33
37
|
location_uri = URI.parse(redirect_location)
|
data/lib/web_stat/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|