web_stat 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/web_stat/fetch.rb +23 -7
- data/lib/web_stat/version.rb +1 -1
- data/lib/web_stat.rb +8 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32af8e84a325614bf546a76fdf603e592abbf4201a559857bc0964fe49226cc1
|
4
|
+
data.tar.gz: 8b63075dbd78d2e4b7368cb6a0a4aa2f8ae655cf7f85b526bd413a5fa210fbc0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45a0fe16c58679cbea01c56710324a461da3bd5c48caa1ca44cf981b2f5443b6ed06ce1db84577edac968e7651cd47ec998ef331dbf47c7306b243a1190fa76d
|
7
|
+
data.tar.gz: c391ce77bf97673c708821fa9067ba64e5b89fbf67d3490376341931f3b9ee6ecebf4a51e0abe69dcc827f7f35d7d6986af353ea2ab4f1d0945f4de2a7d34b2f
|
data/Gemfile.lock
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
@@ -53,9 +53,13 @@ module WebStat
|
|
53
53
|
def save_local_path(url)
|
54
54
|
return nil if url.nil?
|
55
55
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
57
|
+
image = agent.get(url)
|
58
|
+
File.open(tmp_file, "w+b") do |_file|
|
59
|
+
if image.class == Mechanize::File
|
60
|
+
_file.puts(image.body)
|
61
|
+
else
|
62
|
+
_file.puts(image.body_io.read)
|
59
63
|
end
|
60
64
|
end
|
61
65
|
tmp_file
|
@@ -68,18 +72,30 @@ module WebStat
|
|
68
72
|
# Enable to read Robots.txt
|
69
73
|
agent.robots = true
|
70
74
|
document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
71
|
-
document.
|
75
|
+
if document.class == Mechanize::File
|
76
|
+
document.body
|
77
|
+
else
|
78
|
+
document.body.encode('UTF-8', document.encoding)
|
79
|
+
end
|
72
80
|
end
|
73
81
|
|
74
82
|
# Get the informations of @url
|
75
|
-
|
83
|
+
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
84
|
+
def stat(userdics: nil)
|
76
85
|
clean_content = content.scrub('').gsub(/[\s ]/, "")
|
77
|
-
|
86
|
+
language_code = CLD.detect_language(clean_content)[:code]
|
87
|
+
if userdics && userdics.has_key?(language_code)
|
88
|
+
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
|
89
|
+
elsif userdics && userdics.has_key?("other")
|
90
|
+
tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics["other"])
|
91
|
+
else
|
92
|
+
tag = WebStat::Tag.new("#{title} #{content}", userdic: WebStat::Configure.get["userdic"])
|
93
|
+
end
|
78
94
|
{
|
79
95
|
title: title,
|
80
96
|
site_name: site_name,
|
81
97
|
content: clean_content,
|
82
|
-
language_code:
|
98
|
+
language_code: language_code,
|
83
99
|
url: @url,
|
84
100
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
85
101
|
tags: tag.nouns
|
data/lib/web_stat/version.rb
CHANGED
data/lib/web_stat.rb
CHANGED
@@ -21,24 +21,26 @@ require "web_stat/fetch/fetch_as_web"
|
|
21
21
|
module WebStat
|
22
22
|
class << self
|
23
23
|
# Get web page's stat by url
|
24
|
-
|
24
|
+
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
25
|
+
def stat_by_web(url, userdics: nil)
|
25
26
|
web_stat = WebStat::FetchAsWeb.new(url)
|
26
|
-
web_stat.stat
|
27
|
+
web_stat.stat(userdics: userdics)
|
27
28
|
end
|
28
29
|
|
29
30
|
# Get web page's stat by url
|
30
31
|
# @param String url
|
31
|
-
def stat_by_url(url)
|
32
|
-
stat_by_web(url)
|
32
|
+
def stat_by_url(url, userdics: nil)
|
33
|
+
stat_by_web(url, userdics: userdics)
|
33
34
|
end
|
34
35
|
|
35
36
|
# Get web page's stat by html
|
36
37
|
# @param String html
|
37
38
|
# @param [String] url
|
38
|
-
|
39
|
+
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
40
|
+
def stat_by_html(html, url=nil, userdics: nil)
|
39
41
|
web_stat = WebStat::FetchAsHtml.new(html)
|
40
42
|
web_stat.url = url unless url.nil?
|
41
|
-
web_stat.stat
|
43
|
+
web_stat.stat(userdics: userdics)
|
42
44
|
end
|
43
45
|
end
|
44
46
|
end
|