web_stat 0.2.6 → 0.2.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f508dae86c4163037aa8ca2dd299be2f98ad01a2b4e61cff8c20b7562cb91e4
4
- data.tar.gz: 2a4e70f10396243d5917decf75714302ded5959cebb517fca6d27bc3c45db5eb
3
+ metadata.gz: bcfaeb202076ea30cae6205877d0b0ad9060eb84116b84ef7bd3580cc4349aac
4
+ data.tar.gz: 98e586440c8f3aed29e38a003f6d1afc96b26b2ae4c21e43d47e1dfaf6aaa16a
5
5
  SHA512:
6
- metadata.gz: c6e4b4066d42003bb4e07ac9bfd4184e73dfea3a3d4e3d7a5b239c227f009a631e01cfb7b5d17c0240453d5f943b9e1f49c846d6d2167dd9ad2122372e709fe3
7
- data.tar.gz: 597cef76252fdcc77beefcc5d24e68625c93fa5293bed0d7a40f26847b0db6a0b981c83a8e6f3beafee93778c1ff2bf16899a29c6df1866bf6b929b5d29c2975
6
+ metadata.gz: dfbe6264256f08550ebb42244d92bd81c976dd5dd72a6a4dabffdf6a1366a8f010e8a20527d0bbe7a8334cdb028e86ecae0c63d6cc4368741abe815f1fcb3092
7
+ data.tar.gz: 9098d904f26dfdfe14c87352cb47ca3f0333f5424ef26d2cd8232c088c2cf8d7dcbeb07012a9294a5c4e4949b51fe71e94d0995465400d87cf7c037fd09ba978
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- gem "final_redirect_url", :git => "git@github.com:yubele/final_redirect_url.git"
3
+ gem "final_redirect_url", :git => "git@github.com:yubele/final_redirect_url"
4
4
 
5
5
  # Specify your gem's dependencies in web_stat.gemspec
6
6
  gemspec
@@ -1,13 +1,13 @@
1
1
  GIT
2
- remote: git@github.com:yubele/final_redirect_url.git
3
- revision: a8e8fb256f044606313d9726278516a0f24449d4
2
+ remote: git@github.com:yubele/final_redirect_url
3
+ revision: 45df878ec9495ebbfa06dc0a60cc5043c2519e16
4
4
  specs:
5
- final_redirect_url (0.1.0)
5
+ final_redirect_url (0.1.1)
6
6
 
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- web_stat (0.2.6)
10
+ web_stat (0.2.11)
11
11
  bundler (>= 2.0.2)
12
12
  cld (>= 0.8.0)
13
13
  mechanize (>= 2.7)
@@ -27,13 +27,13 @@ GEM
27
27
  connection_pool (2.2.2)
28
28
  crack (0.4.3)
29
29
  safe_yaml (~> 1.0.0)
30
- crass (1.0.5)
30
+ crass (1.0.6)
31
31
  diff-lcs (1.3)
32
32
  domain_name (0.5.20190701)
33
33
  unf (>= 0.0.5, < 1.0.0)
34
- ffi (1.11.3)
34
+ ffi (1.12.2)
35
35
  guess_html_encoding (0.0.11)
36
- hashdiff (1.0.0)
36
+ hashdiff (1.0.1)
37
37
  http-cookie (1.0.3)
38
38
  domain_name (~> 0.5)
39
39
  mechanize (2.7.6)
@@ -45,39 +45,39 @@ GEM
45
45
  nokogiri (~> 1.6)
46
46
  ntlm-http (~> 0.1, >= 0.1.1)
47
47
  webrobots (>= 0.0.9, < 0.2)
48
- method_source (0.9.2)
48
+ method_source (1.0.0)
49
49
  mime-types (3.3.1)
50
50
  mime-types-data (~> 3.2015)
51
- mime-types-data (3.2019.1009)
51
+ mime-types-data (3.2020.0425)
52
52
  mini_portile2 (2.4.0)
53
- natto (1.1.2)
53
+ natto (1.2.0)
54
54
  ffi (>= 1.9.0)
55
55
  net-http-digest_auth (1.4.1)
56
- net-http-persistent (3.1.0)
56
+ net-http-persistent (4.0.0)
57
57
  connection_pool (~> 2.2)
58
- nokogiri (1.10.7)
58
+ nokogiri (1.10.9)
59
59
  mini_portile2 (~> 2.4.0)
60
60
  nokogumbo (2.0.2)
61
61
  nokogiri (~> 1.8, >= 1.8.4)
62
62
  ntlm-http (0.1.1)
63
- pry (0.12.2)
64
- coderay (~> 1.1.0)
65
- method_source (~> 0.9.0)
66
- public_suffix (4.0.2)
63
+ pry (0.13.1)
64
+ coderay (~> 1.1)
65
+ method_source (~> 1.0)
66
+ public_suffix (4.0.4)
67
67
  rake (13.0.1)
68
68
  rspec (3.9.0)
69
69
  rspec-core (~> 3.9.0)
70
70
  rspec-expectations (~> 3.9.0)
71
71
  rspec-mocks (~> 3.9.0)
72
- rspec-core (3.9.1)
73
- rspec-support (~> 3.9.1)
74
- rspec-expectations (3.9.0)
72
+ rspec-core (3.9.2)
73
+ rspec-support (~> 3.9.3)
74
+ rspec-expectations (3.9.1)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.9.0)
77
77
  rspec-mocks (3.9.1)
78
78
  diff-lcs (>= 1.2.0, < 2.0)
79
79
  rspec-support (~> 3.9.0)
80
- rspec-support (3.9.2)
80
+ rspec-support (3.9.3)
81
81
  ruby-readability (0.7.0)
82
82
  guess_html_encoding (>= 0.0.4)
83
83
  nokogiri (>= 1.6.0)
@@ -88,8 +88,8 @@ GEM
88
88
  nokogumbo (~> 2.0)
89
89
  unf (0.1.4)
90
90
  unf_ext
91
- unf_ext (0.0.7.6)
92
- webmock (3.7.6)
91
+ unf_ext (0.0.7.7)
92
+ webmock (3.8.3)
93
93
  addressable (>= 2.3.6)
94
94
  crack (>= 0.3.2)
95
95
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -107,4 +107,4 @@ DEPENDENCIES
107
107
  webmock (>= 3.6.0)
108
108
 
109
109
  BUNDLED WITH
110
- 2.1.2
110
+ 2.1.4
data/README.md CHANGED
@@ -1,3 +1,10 @@
1
+ # !!!!! Precautions when using with Rails !!!!!
2
+
3
+ Write this line your Gemfile.
4
+ ```
5
+ gem "final_redirect_url", :git => "git@github.com:yubele/final_redirect_url"
6
+ ```
7
+
1
8
  # WebStat
2
9
 
3
10
  Fetch the web pages and stat.
@@ -18,6 +25,10 @@ Fetch the web pages and stat.
18
25
  - "rake", "~> 10.0"
19
26
  - "rspec", "~> 3.0"
20
27
 
28
+ ### Install mecab
29
+
30
+ $ sudo apt install mecab-ipadic-utf8 libmecab
31
+
21
32
  ## Installation
22
33
 
23
34
  Add this line to your application's Gemfile:
@@ -21,24 +21,26 @@ require "web_stat/fetch/fetch_as_web"
21
21
  module WebStat
22
22
  class << self
23
23
  # Get web page's stat by url
24
- def stat_by_web(url)
24
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
25
+ def stat_by_web(url, userdics: nil)
25
26
  web_stat = WebStat::FetchAsWeb.new(url)
26
- web_stat.stat
27
+ web_stat.stat(userdics: userdics)
27
28
  end
28
29
 
29
30
  # Get web page's stat by url
30
31
  # @param String url
31
- def stat_by_url(url)
32
- stat_by_web(url)
32
+ def stat_by_url(url, userdics: nil)
33
+ stat_by_web(url, userdics: userdics)
33
34
  end
34
35
 
35
36
  # Get web page's stat by html
36
37
  # @param String html
37
38
  # @param [String] url
38
- def stat_by_html(html, url=nil)
39
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
40
+ def stat_by_html(html, url=nil, userdics: nil)
39
41
  web_stat = WebStat::FetchAsHtml.new(html)
40
42
  web_stat.url = url unless url.nil?
41
- web_stat.stat
43
+ web_stat.stat(userdics: userdics)
42
44
  end
43
45
  end
44
46
  end
@@ -1,6 +1,6 @@
1
1
  module WebStat
2
2
  class Fetch
3
- attr_accessor :url, :html, :nokogiri, :userdic
3
+ attr_accessor :url, :html, :nokogiri, :userdic, :status
4
4
 
5
5
  # Get title
6
6
  # @return [String] title
@@ -13,7 +13,11 @@ module WebStat
13
13
  rescue
14
14
  title = @nokogiri.title
15
15
  end
16
- title.strip
16
+ if title.nil?
17
+ "No Title"
18
+ else
19
+ title.strip
20
+ end
17
21
  end
18
22
 
19
23
  # Get name of domain
@@ -23,7 +27,11 @@ module WebStat
23
27
  rescue
24
28
  site_name = @nokogiri.title
25
29
  end
26
- site_name.strip
30
+ if site_name.nil?
31
+ "No Sitename"
32
+ else
33
+ site_name.strip
34
+ end
27
35
  end
28
36
  []
29
37
  # Get main section
@@ -41,7 +49,7 @@ module WebStat
41
49
  break
42
50
  end
43
51
  end
44
- if path.match(/^\//)
52
+ if ! path.nil? && path.match(/^\//)
45
53
  "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
46
54
  else
47
55
  path
@@ -53,9 +61,13 @@ module WebStat
53
61
  def save_local_path(url)
54
62
  return nil if url.nil?
55
63
  tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
56
- URI.open(original_url(url)) do |remote_file|
57
- File.open(tmp_file, "w+b") do |_file|
58
- _file.puts(remote_file.read)
64
+ agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
65
+ image = agent.get(url)
66
+ File.open(tmp_file, "w+b") do |_file|
67
+ if image.class == Mechanize::File
68
+ _file.puts(image.body)
69
+ else
70
+ _file.puts(image.body_io.read)
59
71
  end
60
72
  end
61
73
  tmp_file
@@ -63,23 +75,44 @@ module WebStat
63
75
 
64
76
  # Get url
65
77
  # @param [String] url
78
+ # @param [String] body
66
79
  def get_url(url)
67
80
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
68
81
  # Enable to read Robots.txt
69
82
  agent.robots = true
70
- document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
71
- document.body.encode('UTF-8', document.encoding)
83
+ begin
84
+ document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
85
+ if document.class == Mechanize::File
86
+ body = document.body
87
+ else
88
+ body = document.body.encode('UTF-8', document.encoding)
89
+ end
90
+ @status = document.code
91
+ rescue Mechanize::ResponseCodeError => e
92
+ body = e.page.body
93
+ @status = e.page.code
94
+ end
95
+ body
72
96
  end
73
97
 
74
98
  # Get the informations of @url
75
- def stat
76
- clean_content = content.scrub('').gsub(/[\s ]/, "")
77
- tag = WebStat::Tag.new(content, userdic: WebStat::Configure.get["userdic"])
99
+ # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
100
+ def stat(userdics: nil)
101
+ clean_content = content.scrub('').gsub(/[\n\t\r ]/, "").gsub(/\s{2,}/, "\s")
102
+ language_code = CLD.detect_language(clean_content)[:code]
103
+ if userdics && userdics.has_key?(language_code) && File.exists?(userdics[language_code])
104
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics[language_code])
105
+ elsif userdics && userdics.has_key?("other") && File.exists?(userdics["other"])
106
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: userdics["other"])
107
+ else
108
+ tag = WebStat::Tag.new("#{title} #{content}", userdic: WebStat::Configure.get["userdic"])
109
+ end
78
110
  {
79
111
  title: title,
80
112
  site_name: site_name,
81
113
  content: clean_content,
82
- language_code: CLD.detect_language(clean_content)[:code],
114
+ language_code: language_code,
115
+ status: @status,
83
116
  url: @url,
84
117
  eyecatch_image_path: save_local_path(eyecatch_image_path),
85
118
  tags: tag.nouns
@@ -91,8 +124,9 @@ module WebStat
91
124
  # Get original url
92
125
  # @param [String] url
93
126
  def original_url(url)
94
- if url.match(/^http/)
95
- FinalRedirectUrl.final_redirect_url(url)
127
+ last_url = FinalRedirectUrl.final_redirect_url(url)
128
+ unless last_url.nil? || last_url.scrub('').empty?
129
+ last_url
96
130
  else
97
131
  url
98
132
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.6"
3
- end
2
+ VERSION = "0.2.11"
3
+ end
@@ -59,9 +59,10 @@ end
59
59
 
60
60
  # Set webmock
61
61
  WebStatTestHelper.scheme_and_files.each do |url|
62
+ status = [200, 404, 503].sample
62
63
  WebMock.stub_request(:get, url)
63
64
  .to_return(
64
- status: 200,
65
+ status: status,
65
66
  body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
66
67
  headers: {content_type: 'application/html; charset=utf-8'})
67
68
  end
@@ -83,6 +83,7 @@ RSpec.describe WebStat::Fetch do
83
83
  expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
84
84
  expect(web_stat[:site_name]).to eq "newsdict.blog"
85
85
  expect(web_stat[:content]).not_to eq nil
86
+ expect(web_stat[:status]).to eq("200").or eq("404").or eq("503")
86
87
  expect(Sanitize.clean(web_stat[:content]).length).to eq web_stat[:content].length
87
88
  expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
88
89
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-05 00:00:00.000000000 Z
11
+ date: 2020-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -224,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
224
  - !ruby/object:Gem::Version
225
225
  version: '0'
226
226
  requirements: []
227
- rubygems_version: 3.1.2
227
+ rubygems_version: 3.0.3
228
228
  signing_key:
229
229
  specification_version: 4
230
230
  summary: Get the status of the web pages.