web_stat 0.3.2 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/web_stat/config/web_stat.yml +21 -15
- data/lib/web_stat/configure.rb +30 -24
- data/lib/web_stat/fetch.rb +5 -2
- data/lib/web_stat/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5304507f8ce01a6a98717756927366005db0eceb89b3d22f29e4283c6eedb4d7
|
4
|
+
data.tar.gz: f561221644f98831867c2d853201a0312dcb75674ca4ae247b319b4f2a1ba085
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bec01871973e80dc46b9246a264791213b3bdd4cb20693fc0d51cc28c2d9bba28b40af65322dc2b0c950e9bb5aedd288ff8fd771adfbd5b2aee1564b98888cb0
|
7
|
+
data.tar.gz: 5cfce031ad779f347fae362ccff2deadb3711e4f9fcc06cafcd232b86e8c18aff63e40c430a330cfdf00d398aa5257457c5dc9f3bee492478359d3801e217f08
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
development: &development
|
2
|
+
# Minimum number of characters to detect meta title
|
3
|
+
min_length_of_meta_title: 10
|
4
|
+
# Split regular expression for titles
|
5
|
+
regex_to_sprit_title: '\||-|:|||:|〜|\~| – '
|
6
|
+
# User Agent
|
7
|
+
user_agent: "web_stat gem agent"
|
8
|
+
# Eyecatch image xpaths
|
9
|
+
eyecatch_image_xpaths:
|
10
|
+
- '/html/head/meta[@property="twitter:image"]/@content'
|
11
|
+
- '/html/head/meta[@property="og:image"]/@content'
|
12
|
+
- '//img[@class="attachment-post-thumbnail"]/@src'
|
13
|
+
- '//div[@id="content"]//img/@src'
|
14
|
+
- '//img/@src'
|
15
|
+
userdic: ""
|
16
|
+
use_chromedirver: false
|
17
|
+
test:
|
18
|
+
<<: *development
|
19
|
+
production:
|
20
|
+
<<: *development
|
21
|
+
use_chromedirver: true
|
data/lib/web_stat/configure.rb
CHANGED
@@ -3,31 +3,37 @@ module WebStat
|
|
3
3
|
class Configure
|
4
4
|
DEFAULT_CONFIG_FILE_PATH = 'config/web_stat.yml'
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
self.get_custom_configure_path
|
15
|
-
else
|
16
|
-
self.get_default_configure_path
|
6
|
+
class << self
|
7
|
+
# Get yaml
|
8
|
+
def get
|
9
|
+
if defined? Rails
|
10
|
+
YAML.load_file(get_configure_path)[Rails.env]
|
11
|
+
else
|
12
|
+
YAML.load_file(get_configure_path)["production"]
|
13
|
+
end
|
17
14
|
end
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
File.join(
|
15
|
+
|
16
|
+
# Get configure path
|
17
|
+
def get_configure_path
|
18
|
+
if File.exists?(get_custom_configure_path)
|
19
|
+
get_custom_configure_path
|
20
|
+
else
|
21
|
+
get_default_configure_path
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get default configure path
|
26
|
+
def get_default_configure_path
|
27
|
+
File.join(File.expand_path("../", __FILE__), DEFAULT_CONFIG_FILE_PATH)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get custom configure path
|
31
|
+
def get_custom_configure_path
|
32
|
+
if defined? Rails
|
33
|
+
File.join(Rails.root, DEFAULT_CONFIG_FILE_PATH)
|
34
|
+
else
|
35
|
+
File.join(Bundler.root, DEFAULT_CONFIG_FILE_PATH)
|
36
|
+
end
|
31
37
|
end
|
32
38
|
end
|
33
39
|
end
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -47,6 +47,9 @@ module WebStat
|
|
47
47
|
break
|
48
48
|
end
|
49
49
|
end
|
50
|
+
if path.nil? || path.empty?
|
51
|
+
path = @nokogiri.at('body').xpath('//img').first.attr('src')
|
52
|
+
end
|
50
53
|
if ! path.nil? && path.match(/^\//)
|
51
54
|
"#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
52
55
|
else
|
@@ -57,7 +60,7 @@ module WebStat
|
|
57
60
|
# Get local path to save url
|
58
61
|
# @param [String] url
|
59
62
|
def save_local_path(url)
|
60
|
-
return nil if url.nil?
|
63
|
+
return nil if url.nil? || ! url.match(%{^http})
|
61
64
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
62
65
|
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
63
66
|
image = agent.get(url)
|
@@ -96,7 +99,7 @@ module WebStat
|
|
96
99
|
# Get the informations of @url
|
97
100
|
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
98
101
|
def stat(userdics: nil)
|
99
|
-
clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
|
102
|
+
clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s").gsub(URI.regexp, "")
|
100
103
|
language_code = CLD.detect_language(clean_content)[:code]
|
101
104
|
if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
|
102
105
|
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
|
data/lib/web_stat/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|