web_stat 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f508dae86c4163037aa8ca2dd299be2f98ad01a2b4e61cff8c20b7562cb91e4
4
- data.tar.gz: 2a4e70f10396243d5917decf75714302ded5959cebb517fca6d27bc3c45db5eb
3
+ metadata.gz: 32af8e84a325614bf546a76fdf603e592abbf4201a559857bc0964fe49226cc1
4
+ data.tar.gz: 8b63075dbd78d2e4b7368cb6a0a4aa2f8ae655cf7f85b526bd413a5fa210fbc0
5
5
  SHA512:
6
- metadata.gz: c6e4b4066d42003bb4e07ac9bfd4184e73dfea3a3d4e3d7a5b239c227f009a631e01cfb7b5d17c0240453d5f943b9e1f49c846d6d2167dd9ad2122372e709fe3
7
- data.tar.gz: 597cef76252fdcc77beefcc5d24e68625c93fa5293bed0d7a40f26847b0db6a0b981c83a8e6f3beafee93778c1ff2bf16899a29c6df1866bf6b929b5d29c2975
6
+ metadata.gz: 45a0fe16c58679cbea01c56710324a461da3bd5c48caa1ca44cf981b2f5443b6ed06ce1db84577edac968e7651cd47ec998ef331dbf47c7306b243a1190fa76d
7
+ data.tar.gz: c391ce77bf97673c708821fa9067ba64e5b89fbf67d3490376341931f3b9ee6ecebf4a51e0abe69dcc827f7f35d7d6986af353ea2ab4f1d0945f4de2a7d34b2f
data/Gemfile.lock CHANGED
@@ -7,7 +7,7 @@ GIT
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- web_stat (0.2.6)
10
+ web_stat (0.2.7)
11
11
  bundler (>= 2.0.2)
12
12
  cld (>= 0.8.0)
13
13
  mechanize (>= 2.7)
@@ -53,9 +53,13 @@ module WebStat
53
53
  def save_local_path(url)
54
54
  return nil if url.nil?
55
55
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
56
- URI.open(original_url(url)) do |remote_file|
57
- File.open(tmp_file, "w+b") do |_file|
58
- _file.puts(remote_file.read)
56
+ agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
57
+ image = agent.get(url)
58
+ File.open(tmp_file, "w+b") do |_file|
59
+ if image.class == Mechanize::File
60
+ _file.puts(image.body)
61
+ else
62
+ _file.puts(image.body_io.read)
59
63
  end
60
64
  end
61
65
  tmp_file
@@ -68,18 +72,30 @@ module WebStat
68
72
  # Enable to read Robots.txt
69
73
  agent.robots = true
70
74
  document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
71
- document.body.encode('UTF-8', document.encoding)
75
+ if document.class == Mechanize::File
76
+ document.body
77
+ else
78
+ document.body.encode('UTF-8', document.encoding)
79
+ end
72
80
  end
73
81
 
74
82
  # Get the informations of @url
75
- def stat
83
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
84
+ def stat(userdics: nil)
76
85
  clean_content = content.scrub('').gsub(/[\s ]/, "")
77
- tag = WebStat::Tag.new(content, userdic: WebStat::Configure.get["userdic"])
86
+ language_code = CLD.detect_language(clean_content)[:code]
87
+ if userdics && userdics.has_key?(language_code)
88
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
89
+ elsif userdics && userdics.has_key?("other")
90
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics["other"])
91
+ else
92
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: WebStat::Configure.get["userdic"])
93
+ end
78
94
  {
79
95
  title: title,
80
96
  site_name: site_name,
81
97
  content: clean_content,
82
- language_code: CLD.detect_language(clean_content)[:code],
98
+ language_code: language_code,
83
99
  url: @url,
84
100
  eyecatch_image_path: save_local_path(eyecatch_image_path),
85
101
  tags: tag.nouns
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.6"
2
+ VERSION = "0.2.7"
3
3
  end
data/lib/web_stat.rb CHANGED
@@ -21,24 +21,26 @@ require "web_stat/fetch/fetch_as_web"
21
21
  module WebStat
22
22
  class << self
23
23
  # Get web page's stat by url
24
- def stat_by_web(url)
24
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
25
+ def stat_by_web(url, userdics: nil)
25
26
  web_stat = WebStat::FetchAsWeb.new(url)
26
- web_stat.stat
27
+ web_stat.stat(userdics: userdics)
27
28
  end
28
29
 
29
30
  # Get web page's stat by url
30
31
  # @param String url
31
- def stat_by_url(url)
32
- stat_by_web(url)
32
+ def stat_by_url(url, userdics: nil)
33
+ stat_by_web(url, userdics: userdics)
33
34
  end
34
35
 
35
36
  # Get web page's stat by html
36
37
  # @param String html
37
38
  # @param [String] url
38
- def stat_by_html(html, url=nil)
39
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
40
+ def stat_by_html(html, url=nil, userdics: nil)
39
41
  web_stat = WebStat::FetchAsHtml.new(html)
40
42
  web_stat.url = url unless url.nil?
41
- web_stat.stat
43
+ web_stat.stat(userdics: userdics)
42
44
  end
43
45
  end
44
46
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe