web_stat 0.2.9 → 0.2.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6851adcf7a19adb66dbfc7f241cd2b659878ef954836e5656d42d3540a117fd8
4
- data.tar.gz: '013188fbf26bd0ccce741afc7500060e40a33650e89fd7c6a1dc4d2ad216fcf3'
3
+ metadata.gz: 5821645b40a23008360e4e9717ec9f420fdd5adc5a7b4fdfea6d08de111af27f
4
+ data.tar.gz: c78ddd43e9ad8691dd05481e2854e4280bfcda857cb3fa97624c947c6233af75
5
5
  SHA512:
6
- metadata.gz: a1bf10c3439fbe6e93b1368970725838f6b649854b5eac8e03bfd3e3b1e16f2e99b5a5bfe324980f51073e75e74a890b6a9e8d49735f28ca638d48eb36c0696f
7
- data.tar.gz: 5fc04389537b37d597200c50de8691010e46308063804a29bbbfd1ce2406c592f9d11678752e748606c45061b3d55e2b694ad590af1f486cfc166f646b6ec6c9
6
+ metadata.gz: 439ad9a5e969057996d802374739bb587648d6fbc27a9b2fe0da71fdc36345c87b2b483fe474f8d499769d107cab6c335cf41f5575363b2a25edc2c1f88f7cd2
7
+ data.tar.gz: 0e6942690a71294c2402111e21605715d3191bae076167febf2df7277d50da3c8ac2682ef319e570b7f3763ce9515cb7410243f901fb9d3d6a5f86bb67177537
data/Gemfile.lock CHANGED
@@ -7,7 +7,7 @@ GIT
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- web_stat (0.2.9)
10
+ web_stat (0.2.10)
11
11
  bundler (>= 2.0.2)
12
12
  cld (>= 0.8.0)
13
13
  mechanize (>= 2.7)
@@ -27,13 +27,13 @@ GEM
27
27
  connection_pool (2.2.2)
28
28
  crack (0.4.3)
29
29
  safe_yaml (~> 1.0.0)
30
- crass (1.0.5)
30
+ crass (1.0.6)
31
31
  diff-lcs (1.3)
32
32
  domain_name (0.5.20190701)
33
33
  unf (>= 0.0.5, < 1.0.0)
34
- ffi (1.11.3)
34
+ ffi (1.12.2)
35
35
  guess_html_encoding (0.0.11)
36
- hashdiff (1.0.0)
36
+ hashdiff (1.0.1)
37
37
  http-cookie (1.0.3)
38
38
  domain_name (~> 0.5)
39
39
  mechanize (2.7.6)
@@ -45,39 +45,39 @@ GEM
45
45
  nokogiri (~> 1.6)
46
46
  ntlm-http (~> 0.1, >= 0.1.1)
47
47
  webrobots (>= 0.0.9, < 0.2)
48
- method_source (0.9.2)
48
+ method_source (1.0.0)
49
49
  mime-types (3.3.1)
50
50
  mime-types-data (~> 3.2015)
51
- mime-types-data (3.2019.1009)
51
+ mime-types-data (3.2020.0425)
52
52
  mini_portile2 (2.4.0)
53
- natto (1.1.2)
53
+ natto (1.2.0)
54
54
  ffi (>= 1.9.0)
55
55
  net-http-digest_auth (1.4.1)
56
- net-http-persistent (3.1.0)
56
+ net-http-persistent (4.0.0)
57
57
  connection_pool (~> 2.2)
58
- nokogiri (1.10.7)
58
+ nokogiri (1.10.9)
59
59
  mini_portile2 (~> 2.4.0)
60
60
  nokogumbo (2.0.2)
61
61
  nokogiri (~> 1.8, >= 1.8.4)
62
62
  ntlm-http (0.1.1)
63
- pry (0.12.2)
64
- coderay (~> 1.1.0)
65
- method_source (~> 0.9.0)
66
- public_suffix (4.0.2)
63
+ pry (0.13.1)
64
+ coderay (~> 1.1)
65
+ method_source (~> 1.0)
66
+ public_suffix (4.0.4)
67
67
  rake (13.0.1)
68
68
  rspec (3.9.0)
69
69
  rspec-core (~> 3.9.0)
70
70
  rspec-expectations (~> 3.9.0)
71
71
  rspec-mocks (~> 3.9.0)
72
- rspec-core (3.9.1)
73
- rspec-support (~> 3.9.1)
74
- rspec-expectations (3.9.0)
72
+ rspec-core (3.9.2)
73
+ rspec-support (~> 3.9.3)
74
+ rspec-expectations (3.9.1)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.9.0)
77
77
  rspec-mocks (3.9.1)
78
78
  diff-lcs (>= 1.2.0, < 2.0)
79
79
  rspec-support (~> 3.9.0)
80
- rspec-support (3.9.2)
80
+ rspec-support (3.9.3)
81
81
  ruby-readability (0.7.0)
82
82
  guess_html_encoding (>= 0.0.4)
83
83
  nokogiri (>= 1.6.0)
@@ -88,8 +88,8 @@ GEM
88
88
  nokogumbo (~> 2.0)
89
89
  unf (0.1.4)
90
90
  unf_ext
91
- unf_ext (0.0.7.6)
92
- webmock (3.7.6)
91
+ unf_ext (0.0.7.7)
92
+ webmock (3.8.3)
93
93
  addressable (>= 2.3.6)
94
94
  crack (>= 0.3.2)
95
95
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -107,4 +107,4 @@ DEPENDENCIES
107
107
  webmock (>= 3.6.0)
108
108
 
109
109
  BUNDLED WITH
110
- 2.1.2
110
+ 2.1.4
data/README.md CHANGED
@@ -25,6 +25,10 @@ Fetch the web pages and stat.
25
25
  - "rake", "~> 10.0"
26
26
  - "rspec", "~> 3.0"
27
27
 
28
+ ### Install mecab
29
+
30
+ $ sudo apt install mecab-ipadic-utf8 libmecab
31
+
28
32
  ## Installation
29
33
 
30
34
  Add this line to your application's Gemfile:
@@ -1,6 +1,6 @@
1
1
  module WebStat
2
2
  class Fetch
3
- attr_accessor :url, :html, :nokogiri, :userdic
3
+ attr_accessor :url, :html, :nokogiri, :userdic, :status
4
4
 
5
5
  # Get title
6
6
  # @return [String] title
@@ -67,16 +67,24 @@ module WebStat
67
67
 
68
68
  # Get url
69
69
  # @param [String] url
70
+ # @param [String] body
70
71
  def get_url(url)
71
72
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
72
73
  # Enable to read Robots.txt
73
74
  agent.robots = true
74
- document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
75
- if document.class == Mechanize::File
76
- document.body
77
- else
78
- document.body.encode('UTF-8', document.encoding)
75
+ begin
76
+ document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
77
+ if document.class == Mechanize::File
78
+ body = document.body
79
+ else
80
+ body = document.body.encode('UTF-8', document.encoding)
81
+ end
82
+ @status = document.code
83
+ rescue Mechanize::ResponseCodeError => e
84
+ body = e.page.body
85
+ @status = e.page.code
79
86
  end
87
+ body
80
88
  end
81
89
 
82
90
  # Get the informations of @url
@@ -96,6 +104,7 @@ module WebStat
96
104
  site_name: site_name,
97
105
  content: clean_content,
98
106
  language_code: language_code,
107
+ status: @status,
99
108
  url: @url,
100
109
  eyecatch_image_path: save_local_path(eyecatch_image_path),
101
110
  tags: tag.nouns
@@ -107,8 +116,9 @@ module WebStat
107
116
  # Get original url
108
117
  # @param [String] url
109
118
  def original_url(url)
110
- if url.match(/^http/)
111
- FinalRedirectUrl.final_redirect_url(url)
119
+ last_url = FinalRedirectUrl.final_redirect_url(url)
120
+ unless last_url.nil? || last_url.scrub('').empty?
121
+ last_url
112
122
  else
113
123
  url
114
124
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.9"
3
- end
2
+ VERSION = "0.2.10"
3
+ end
data/spec/spec_helper.rb CHANGED
@@ -59,9 +59,10 @@ end
59
59
 
60
60
  # Set webmock
61
61
  WebStatTestHelper.scheme_and_files.each do |url|
62
+ status = [200, 404, 503].sample
62
63
  WebMock.stub_request(:get, url)
63
64
  .to_return(
64
- status: 200,
65
+ status: status,
65
66
  body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
66
67
  headers: {content_type: 'application/html; charset=utf-8'})
67
68
  end
@@ -83,6 +83,7 @@ RSpec.describe WebStat::Fetch do
83
83
  expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
84
84
  expect(web_stat[:site_name]).to eq "newsdict.blog"
85
85
  expect(web_stat[:content]).not_to eq nil
86
+ expect(web_stat[:status]).to eq("200").or eq("404").or eq("503")
86
87
  expect(Sanitize.clean(web_stat[:content]).length).to eq web_stat[:content].length
87
88
  expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
88
89
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-08 00:00:00.000000000 Z
11
+ date: 2020-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -224,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
224
  - !ruby/object:Gem::Version
225
225
  version: '0'
226
226
  requirements: []
227
- rubygems_version: 3.1.2
227
+ rubygems_version: 3.0.3
228
228
  signing_key:
229
229
  specification_version: 4
230
230
  summary: Get the status of the web pages.