web_stat 0.2.9 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6851adcf7a19adb66dbfc7f241cd2b659878ef954836e5656d42d3540a117fd8
4
- data.tar.gz: '013188fbf26bd0ccce741afc7500060e40a33650e89fd7c6a1dc4d2ad216fcf3'
3
+ metadata.gz: 5821645b40a23008360e4e9717ec9f420fdd5adc5a7b4fdfea6d08de111af27f
4
+ data.tar.gz: c78ddd43e9ad8691dd05481e2854e4280bfcda857cb3fa97624c947c6233af75
5
5
  SHA512:
6
- metadata.gz: a1bf10c3439fbe6e93b1368970725838f6b649854b5eac8e03bfd3e3b1e16f2e99b5a5bfe324980f51073e75e74a890b6a9e8d49735f28ca638d48eb36c0696f
7
- data.tar.gz: 5fc04389537b37d597200c50de8691010e46308063804a29bbbfd1ce2406c592f9d11678752e748606c45061b3d55e2b694ad590af1f486cfc166f646b6ec6c9
6
+ metadata.gz: 439ad9a5e969057996d802374739bb587648d6fbc27a9b2fe0da71fdc36345c87b2b483fe474f8d499769d107cab6c335cf41f5575363b2a25edc2c1f88f7cd2
7
+ data.tar.gz: 0e6942690a71294c2402111e21605715d3191bae076167febf2df7277d50da3c8ac2682ef319e570b7f3763ce9515cb7410243f901fb9d3d6a5f86bb67177537
data/Gemfile.lock CHANGED
@@ -7,7 +7,7 @@ GIT
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- web_stat (0.2.9)
10
+ web_stat (0.2.10)
11
11
  bundler (>= 2.0.2)
12
12
  cld (>= 0.8.0)
13
13
  mechanize (>= 2.7)
@@ -27,13 +27,13 @@ GEM
27
27
  connection_pool (2.2.2)
28
28
  crack (0.4.3)
29
29
  safe_yaml (~> 1.0.0)
30
- crass (1.0.5)
30
+ crass (1.0.6)
31
31
  diff-lcs (1.3)
32
32
  domain_name (0.5.20190701)
33
33
  unf (>= 0.0.5, < 1.0.0)
34
- ffi (1.11.3)
34
+ ffi (1.12.2)
35
35
  guess_html_encoding (0.0.11)
36
- hashdiff (1.0.0)
36
+ hashdiff (1.0.1)
37
37
  http-cookie (1.0.3)
38
38
  domain_name (~> 0.5)
39
39
  mechanize (2.7.6)
@@ -45,39 +45,39 @@ GEM
45
45
  nokogiri (~> 1.6)
46
46
  ntlm-http (~> 0.1, >= 0.1.1)
47
47
  webrobots (>= 0.0.9, < 0.2)
48
- method_source (0.9.2)
48
+ method_source (1.0.0)
49
49
  mime-types (3.3.1)
50
50
  mime-types-data (~> 3.2015)
51
- mime-types-data (3.2019.1009)
51
+ mime-types-data (3.2020.0425)
52
52
  mini_portile2 (2.4.0)
53
- natto (1.1.2)
53
+ natto (1.2.0)
54
54
  ffi (>= 1.9.0)
55
55
  net-http-digest_auth (1.4.1)
56
- net-http-persistent (3.1.0)
56
+ net-http-persistent (4.0.0)
57
57
  connection_pool (~> 2.2)
58
- nokogiri (1.10.7)
58
+ nokogiri (1.10.9)
59
59
  mini_portile2 (~> 2.4.0)
60
60
  nokogumbo (2.0.2)
61
61
  nokogiri (~> 1.8, >= 1.8.4)
62
62
  ntlm-http (0.1.1)
63
- pry (0.12.2)
64
- coderay (~> 1.1.0)
65
- method_source (~> 0.9.0)
66
- public_suffix (4.0.2)
63
+ pry (0.13.1)
64
+ coderay (~> 1.1)
65
+ method_source (~> 1.0)
66
+ public_suffix (4.0.4)
67
67
  rake (13.0.1)
68
68
  rspec (3.9.0)
69
69
  rspec-core (~> 3.9.0)
70
70
  rspec-expectations (~> 3.9.0)
71
71
  rspec-mocks (~> 3.9.0)
72
- rspec-core (3.9.1)
73
- rspec-support (~> 3.9.1)
74
- rspec-expectations (3.9.0)
72
+ rspec-core (3.9.2)
73
+ rspec-support (~> 3.9.3)
74
+ rspec-expectations (3.9.1)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.9.0)
77
77
  rspec-mocks (3.9.1)
78
78
  diff-lcs (>= 1.2.0, < 2.0)
79
79
  rspec-support (~> 3.9.0)
80
- rspec-support (3.9.2)
80
+ rspec-support (3.9.3)
81
81
  ruby-readability (0.7.0)
82
82
  guess_html_encoding (>= 0.0.4)
83
83
  nokogiri (>= 1.6.0)
@@ -88,8 +88,8 @@ GEM
88
88
  nokogumbo (~> 2.0)
89
89
  unf (0.1.4)
90
90
  unf_ext
91
- unf_ext (0.0.7.6)
92
- webmock (3.7.6)
91
+ unf_ext (0.0.7.7)
92
+ webmock (3.8.3)
93
93
  addressable (>= 2.3.6)
94
94
  crack (>= 0.3.2)
95
95
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -107,4 +107,4 @@ DEPENDENCIES
107
107
  webmock (>= 3.6.0)
108
108
 
109
109
  BUNDLED WITH
110
- 2.1.2
110
+ 2.1.4
data/README.md CHANGED
@@ -25,6 +25,10 @@ Fetch the web pages and stat.
25
25
  - "rake", "~> 10.0"
26
26
  - "rspec", "~> 3.0"
27
27
 
28
+ ### Install mecab
29
+
30
+ $ sudo apt install mecab-ipadic-utf8 libmecab
31
+
28
32
  ## Installation
29
33
 
30
34
  Add this line to your application's Gemfile:
@@ -1,6 +1,6 @@
1
1
  module WebStat
2
2
  class Fetch
3
- attr_accessor :url, :html, :nokogiri, :userdic
3
+ attr_accessor :url, :html, :nokogiri, :userdic, :status
4
4
 
5
5
  # Get title
6
6
  # @return [String] title
@@ -67,16 +67,24 @@ module WebStat
67
67
 
68
68
  # Get url
69
69
  # @param [String] url
70
+ # @param [String] body
70
71
  def get_url(url)
71
72
  agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
72
73
  # Enable to read Robots.txt
73
74
  agent.robots = true
74
- document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
75
- if document.class == Mechanize::File
76
- document.body
77
- else
78
- document.body.encode('UTF-8', document.encoding)
75
+ begin
76
+ document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
77
+ if document.class == Mechanize::File
78
+ body = document.body
79
+ else
80
+ body = document.body.encode('UTF-8', document.encoding)
81
+ end
82
+ @status = document.code
83
+ rescue Mechanize::ResponseCodeError => e
84
+ body = e.page.body
85
+ @status = e.page.code
79
86
  end
87
+ body
80
88
  end
81
89
 
82
90
  # Get the informations of @url
@@ -96,6 +104,7 @@ module WebStat
96
104
  site_name: site_name,
97
105
  content: clean_content,
98
106
  language_code: language_code,
107
+ status: @status,
99
108
  url: @url,
100
109
  eyecatch_image_path: save_local_path(eyecatch_image_path),
101
110
  tags: tag.nouns
@@ -107,8 +116,9 @@ module WebStat
107
116
  # Get original url
108
117
  # @param [String] url
109
118
  def original_url(url)
110
- if url.match(/^http/)
111
- FinalRedirectUrl.final_redirect_url(url)
119
+ last_url = FinalRedirectUrl.final_redirect_url(url)
120
+ unless last_url.nil? || last_url.scrub('').empty?
121
+ last_url
112
122
  else
113
123
  url
114
124
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.9"
3
- end
2
+ VERSION = "0.2.10"
3
+ end
data/spec/spec_helper.rb CHANGED
@@ -59,9 +59,10 @@ end
59
59
 
60
60
  # Set webmock
61
61
  WebStatTestHelper.scheme_and_files.each do |url|
62
+ status = [200, 404, 503].sample
62
63
  WebMock.stub_request(:get, url)
63
64
  .to_return(
64
- status: 200,
65
+ status: status,
65
66
  body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
66
67
  headers: {content_type: 'application/html; charset=utf-8'})
67
68
  end
@@ -83,6 +83,7 @@ RSpec.describe WebStat::Fetch do
83
83
  expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
84
84
  expect(web_stat[:site_name]).to eq "newsdict.blog"
85
85
  expect(web_stat[:content]).not_to eq nil
86
+ expect(web_stat[:status]).to eq("200").or eq("404").or eq("503")
86
87
  expect(Sanitize.clean(web_stat[:content]).length).to eq web_stat[:content].length
87
88
  expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
88
89
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-08 00:00:00.000000000 Z
11
+ date: 2020-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -224,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
224
  - !ruby/object:Gem::Version
225
225
  version: '0'
226
226
  requirements: []
227
- rubygems_version: 3.1.2
227
+ rubygems_version: 3.0.3
228
228
  signing_key:
229
229
  specification_version: 4
230
230
  summary: Get the status of the web pages.