web_stat 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f508dae86c4163037aa8ca2dd299be2f98ad01a2b4e61cff8c20b7562cb91e4
4
- data.tar.gz: 2a4e70f10396243d5917decf75714302ded5959cebb517fca6d27bc3c45db5eb
3
+ metadata.gz: 32af8e84a325614bf546a76fdf603e592abbf4201a559857bc0964fe49226cc1
4
+ data.tar.gz: 8b63075dbd78d2e4b7368cb6a0a4aa2f8ae655cf7f85b526bd413a5fa210fbc0
5
5
  SHA512:
6
- metadata.gz: c6e4b4066d42003bb4e07ac9bfd4184e73dfea3a3d4e3d7a5b239c227f009a631e01cfb7b5d17c0240453d5f943b9e1f49c846d6d2167dd9ad2122372e709fe3
7
- data.tar.gz: 597cef76252fdcc77beefcc5d24e68625c93fa5293bed0d7a40f26847b0db6a0b981c83a8e6f3beafee93778c1ff2bf16899a29c6df1866bf6b929b5d29c2975
6
+ metadata.gz: 45a0fe16c58679cbea01c56710324a461da3bd5c48caa1ca44cf981b2f5443b6ed06ce1db84577edac968e7651cd47ec998ef331dbf47c7306b243a1190fa76d
7
+ data.tar.gz: c391ce77bf97673c708821fa9067ba64e5b89fbf67d3490376341931f3b9ee6ecebf4a51e0abe69dcc827f7f35d7d6986af353ea2ab4f1d0945f4de2a7d34b2f
data/Gemfile.lock CHANGED
@@ -7,7 +7,7 @@ GIT
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- web_stat (0.2.6)
10
+ web_stat (0.2.7)
11
11
  bundler (>= 2.0.2)
12
12
  cld (>= 0.8.0)
13
13
  mechanize (>= 2.7)
@@ -53,9 +53,13 @@ module WebStat
53
53
  def save_local_path(url)
54
54
  return nil if url.nil?
55
55
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
56
- URI.open(original_url(url)) do |remote_file|
57
- File.open(tmp_file, "w+b") do |_file|
58
- _file.puts(remote_file.read)
56
+ agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
57
+ image = agent.get(url)
58
+ File.open(tmp_file, "w+b") do |_file|
59
+ if image.class == Mechanize::File
60
+ _file.puts(image.body)
61
+ else
62
+ _file.puts(image.body_io.read)
59
63
  end
60
64
  end
61
65
  tmp_file
@@ -68,18 +72,30 @@ module WebStat
68
72
  # Enable to read Robots.txt
69
73
  agent.robots = true
70
74
  document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
71
- document.body.encode('UTF-8', document.encoding)
75
+ if document.class == Mechanize::File
76
+ document.body
77
+ else
78
+ document.body.encode('UTF-8', document.encoding)
79
+ end
72
80
  end
73
81
 
74
82
  # Get the informations of @url
75
- def stat
83
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
84
+ def stat(userdics: nil)
76
85
  clean_content = content.scrub('').gsub(/[\s ]/, "")
77
- tag = WebStat::Tag.new(content, userdic: WebStat::Configure.get["userdic"])
86
+ language_code = CLD.detect_language(clean_content)[:code]
87
+ if userdics && userdics.has_key?(language_code)
88
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
89
+ elsif userdics && userdics.has_key?("other")
90
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics["other"])
91
+ else
92
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: WebStat::Configure.get["userdic"])
93
+ end
78
94
  {
79
95
  title: title,
80
96
  site_name: site_name,
81
97
  content: clean_content,
82
- language_code: CLD.detect_language(clean_content)[:code],
98
+ language_code: language_code,
83
99
  url: @url,
84
100
  eyecatch_image_path: save_local_path(eyecatch_image_path),
85
101
  tags: tag.nouns
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.6"
2
+ VERSION = "0.2.7"
3
3
  end
data/lib/web_stat.rb CHANGED
@@ -21,24 +21,26 @@ require "web_stat/fetch/fetch_as_web"
21
21
  module WebStat
22
22
  class << self
23
23
  # Get web page's stat by url
24
- def stat_by_web(url)
24
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
25
+ def stat_by_web(url, userdics: nil)
25
26
  web_stat = WebStat::FetchAsWeb.new(url)
26
- web_stat.stat
27
+ web_stat.stat(userdics: userdics)
27
28
  end
28
29
 
29
30
  # Get web page's stat by url
30
31
  # @param String url
31
- def stat_by_url(url)
32
- stat_by_web(url)
32
+ def stat_by_url(url, userdics: nil)
33
+ stat_by_web(url, userdics: userdics)
33
34
  end
34
35
 
35
36
  # Get web page's stat by html
36
37
  # @param String html
37
38
  # @param [String] url
38
- def stat_by_html(html, url=nil)
39
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
40
+ def stat_by_html(html, url=nil, userdics: nil)
39
41
  web_stat = WebStat::FetchAsHtml.new(html)
40
42
  web_stat.url = url unless url.nil?
41
- web_stat.stat
43
+ web_stat.stat(userdics: userdics)
42
44
  end
43
45
  end
44
46
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe