web_stat 0.3.2 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/web_stat/config/web_stat.yml +21 -15
- data/lib/web_stat/configure.rb +30 -24
- data/lib/web_stat/fetch.rb +5 -2
- data/lib/web_stat/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5304507f8ce01a6a98717756927366005db0eceb89b3d22f29e4283c6eedb4d7
|
4
|
+
data.tar.gz: f561221644f98831867c2d853201a0312dcb75674ca4ae247b319b4f2a1ba085
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bec01871973e80dc46b9246a264791213b3bdd4cb20693fc0d51cc28c2d9bba28b40af65322dc2b0c950e9bb5aedd288ff8fd771adfbd5b2aee1564b98888cb0
|
7
|
+
data.tar.gz: 5cfce031ad779f347fae362ccff2deadb3711e4f9fcc06cafcd232b86e8c18aff63e40c430a330cfdf00d398aa5257457c5dc9f3bee492478359d3801e217f08
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
development: &development
|
2
|
+
# Minimum number of characters to detect meta title
|
3
|
+
min_length_of_meta_title: 10
|
4
|
+
# Split regular expression for titles
|
5
|
+
regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
|
6
|
+
# User Agent
|
7
|
+
user_agent: "web_stat gem agent"
|
8
|
+
# Eyecatch image xpaths
|
9
|
+
eyecatch_image_xpaths:
|
10
|
+
- '/html/head/meta[@property="twitter:image"]/@content'
|
11
|
+
- '/html/head/meta[@property="og:image"]/@content'
|
12
|
+
- '//img[@class="attachment-post-thumbnail"]/@src'
|
13
|
+
- '//div[@id="content"]//img/@src'
|
14
|
+
- '//img/@src'
|
15
|
+
userdic: ""
|
16
|
+
use_chromedirver: false
|
17
|
+
test:
|
18
|
+
<<: *development
|
19
|
+
production:
|
20
|
+
<<: *development
|
21
|
+
use_chromedirver: true
|
data/lib/web_stat/configure.rb
CHANGED
@@ -3,31 +3,37 @@ module WebStat
|
|
3
3
|
class Configure
|
4
4
|
DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
self.get_custom_configure_path
|
15
|
-
else
|
16
|
-
self.get_default_configure_path
|
6
|
+
class << self
|
7
|
+
# Get yaml
|
8
|
+
def get
|
9
|
+
if defined? Rails
|
10
|
+
YAML.load_file(get_configure_path)[Rails.env]
|
11
|
+
else
|
12
|
+
YAML.load_file(get_configure_path)["production"]
|
13
|
+
end
|
17
14
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
File.join(
|
15
|
+
|
16
|
+
# Get configure path
|
17
|
+
def get_configure_path
|
18
|
+
if File.exists?(get_custom_configure_path)
|
19
|
+
get_custom_configure_path
|
20
|
+
else
|
21
|
+
get_default_configure_path
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get default configure path
|
26
|
+
def get_default_configure_path
|
27
|
+
File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get custom configure path
|
31
|
+
def get_custom_configure_path
|
32
|
+
if defined? Rails
|
33
|
+
File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
|
34
|
+
else
|
35
|
+
File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
|
36
|
+
end
|
31
37
|
end
|
32
38
|
end
|
33
39
|
end
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -47,6 +47,9 @@ module WebStat
|
|
47
47
|
break
|
48
48
|
end
|
49
49
|
end
|
50
|
+
if path.nil? || path.empty?
|
51
|
+
path = @nokogiri.at('body').xpath('//img').first.attr('src')
|
52
|
+
end
|
50
53
|
if ! path.nil? && path.match(/^\//)
|
51
54
|
"#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
52
55
|
else
|
@@ -57,7 +60,7 @@ module WebStat
|
|
57
60
|
# Get local path to save url
|
58
61
|
# @param [String] url
|
59
62
|
def save_local_path(url)
|
60
|
-
return nil if url.nil?
|
63
|
+
return nil if url.nil? || ! url.match(%{^http})
|
61
64
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
62
65
|
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
63
66
|
image = agent.get(url)
|
@@ -96,7 +99,7 @@ module WebStat
|
|
96
99
|
# Get the informations of @url
|
97
100
|
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
98
101
|
def stat(userdics: nil)
|
99
|
-
clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
|
102
|
+
clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s").gsub(URI.regexp, "")
|
100
103
|
language_code = CLD.detect_language(clean_content)[:code]
|
101
104
|
if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
|
102
105
|
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
|
data/lib/web_stat/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|