web_stat 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/web_stat/fetch.rb +23 -7
- data/lib/web_stat/version.rb +1 -1
- data/lib/web_stat.rb +8 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32af8e84a325614bf546a76fdf603e592abbf4201a559857bc0964fe49226cc1
|
4
|
+
data.tar.gz: 8b63075dbd78d2e4b7368cb6a0a4aa2f8ae655cf7f85b526bd413a5fa210fbc0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45a0fe16c58679cbea01c56710324a461da3bd5c48caa1ca44cf981b2f5443b6ed06ce1db84577edac968e7651cd47ec998ef331dbf47c7306b243a1190fa76d
|
7
|
+
data.tar.gz: c391ce77bf97673c708821fa9067ba64e5b89fbf67d3490376341931f3b9ee6ecebf4a51e0abe69dcc827f7f35d7d6986af353ea2ab4f1d0945f4de2a7d34b2f
|
data/Gemfile.lock
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
@@ -53,9 +53,13 @@ module WebStat
|
|
53
53
|
def save_local_path(url)
|
54
54
|
return nil if url.nil?
|
55
55
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
57
|
+
image = agent.get(url)
|
58
|
+
File.open(tmp_file, "w+b") do |_file|
|
59
|
+
if image.class == Mechanize::File
|
60
|
+
_file.puts(image.body)
|
61
|
+
else
|
62
|
+
_file.puts(image.body_io.read)
|
59
63
|
end
|
60
64
|
end
|
61
65
|
tmp_file
|
@@ -68,18 +72,30 @@ module WebStat
|
|
68
72
|
# Enable to read Robots.txt
|
69
73
|
agent.robots = true
|
70
74
|
document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
71
|
-
document.
|
75
|
+
if document.class == Mechanize::File
|
76
|
+
document.body
|
77
|
+
else
|
78
|
+
document.body.encode('UTF-8', document.encoding)
|
79
|
+
end
|
72
80
|
end
|
73
81
|
|
74
82
|
# Get the informations of @url
|
75
|
-
|
83
|
+
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
84
|
+
def stat(userdics: nil)
|
76
85
|
clean_content = content.scrub('').gsub(/[\s ]/, "")
|
77
|
-
|
86
|
+
language_code = CLD.detect_language(clean_content)[:code]
|
87
|
+
if userdics && userdics.has_key?(language_code)
|
88
|
+
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
|
89
|
+
elsif userdics && userdics.has_key?("other")
|
90
|
+
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics["other"])
|
91
|
+
else
|
92
|
+
tag = WebStat::Tag.new("#{title} #{content}", userdic: WebStat::Configure.get["userdic"])
|
93
|
+
end
|
78
94
|
{
|
79
95
|
title: title,
|
80
96
|
site_name: site_name,
|
81
97
|
content: clean_content,
|
82
|
-
language_code:
|
98
|
+
language_code: language_code,
|
83
99
|
url: @url,
|
84
100
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
85
101
|
tags: tag.nouns
|
data/lib/web_stat/version.rb
CHANGED
data/lib/web_stat.rb
CHANGED
@@ -21,24 +21,26 @@ require "web_stat/fetch/fetch_as_web"
|
|
21
21
|
module WebStat
|
22
22
|
class << self
|
23
23
|
# Get web page's stat by url
|
24
|
-
|
24
|
+
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
25
|
+
def stat_by_web(url, userdics: nil)
|
25
26
|
web_stat = WebStat::FetchAsWeb.new(url)
|
26
|
-
web_stat.stat
|
27
|
+
web_stat.stat(userdics: userdics)
|
27
28
|
end
|
28
29
|
|
29
30
|
# Get web page's stat by url
|
30
31
|
# @param String url
|
31
|
-
def stat_by_url(url)
|
32
|
-
stat_by_web(url)
|
32
|
+
def stat_by_url(url, userdics: nil)
|
33
|
+
stat_by_web(url, userdics: userdics)
|
33
34
|
end
|
34
35
|
|
35
36
|
# Get web page's stat by html
|
36
37
|
# @param String html
|
37
38
|
# @param [String] url
|
38
|
-
|
39
|
+
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
40
|
+
def stat_by_html(html, url=nil, userdics: nil)
|
39
41
|
web_stat = WebStat::FetchAsHtml.new(html)
|
40
42
|
web_stat.url = url unless url.nil?
|
41
|
-
web_stat.stat
|
43
|
+
web_stat.stat(userdics: userdics)
|
42
44
|
end
|
43
45
|
end
|
44
46
|
end
|