web_stat 0.2.9 → 0.2.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +20 -20
- data/README.md +4 -0
- data/lib/web_stat/fetch.rb +18 -8
- data/lib/web_stat/version.rb +2 -2
- data/spec/spec_helper.rb +2 -1
- data/spec/web_stat/fetch_spec.rb +1 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5821645b40a23008360e4e9717ec9f420fdd5adc5a7b4fdfea6d08de111af27f
|
4
|
+
data.tar.gz: c78ddd43e9ad8691dd05481e2854e4280bfcda857cb3fa97624c947c6233af75
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 439ad9a5e969057996d802374739bb587648d6fbc27a9b2fe0da71fdc36345c87b2b483fe474f8d499769d107cab6c335cf41f5575363b2a25edc2c1f88f7cd2
|
7
|
+
data.tar.gz: 0e6942690a71294c2402111e21605715d3191bae076167febf2df7277d50da3c8ac2682ef319e570b7f3763ce9515cb7410243f901fb9d3d6a5f86bb67177537
|
data/Gemfile.lock
CHANGED
@@ -7,7 +7,7 @@ GIT
|
|
7
7
|
PATH
|
8
8
|
remote: .
|
9
9
|
specs:
|
10
|
-
web_stat (0.2.
|
10
|
+
web_stat (0.2.10)
|
11
11
|
bundler (>= 2.0.2)
|
12
12
|
cld (>= 0.8.0)
|
13
13
|
mechanize (>= 2.7)
|
@@ -27,13 +27,13 @@ GEM
|
|
27
27
|
connection_pool (2.2.2)
|
28
28
|
crack (0.4.3)
|
29
29
|
safe_yaml (~> 1.0.0)
|
30
|
-
crass (1.0.
|
30
|
+
crass (1.0.6)
|
31
31
|
diff-lcs (1.3)
|
32
32
|
domain_name (0.5.20190701)
|
33
33
|
unf (>= 0.0.5, < 1.0.0)
|
34
|
-
ffi (1.
|
34
|
+
ffi (1.12.2)
|
35
35
|
guess_html_encoding (0.0.11)
|
36
|
-
hashdiff (1.0.
|
36
|
+
hashdiff (1.0.1)
|
37
37
|
http-cookie (1.0.3)
|
38
38
|
domain_name (~> 0.5)
|
39
39
|
mechanize (2.7.6)
|
@@ -45,39 +45,39 @@ GEM
|
|
45
45
|
nokogiri (~> 1.6)
|
46
46
|
ntlm-http (~> 0.1, >= 0.1.1)
|
47
47
|
webrobots (>= 0.0.9, < 0.2)
|
48
|
-
method_source (0.
|
48
|
+
method_source (1.0.0)
|
49
49
|
mime-types (3.3.1)
|
50
50
|
mime-types-data (~> 3.2015)
|
51
|
-
mime-types-data (3.
|
51
|
+
mime-types-data (3.2020.0425)
|
52
52
|
mini_portile2 (2.4.0)
|
53
|
-
natto (1.
|
53
|
+
natto (1.2.0)
|
54
54
|
ffi (>= 1.9.0)
|
55
55
|
net-http-digest_auth (1.4.1)
|
56
|
-
net-http-persistent (
|
56
|
+
net-http-persistent (4.0.0)
|
57
57
|
connection_pool (~> 2.2)
|
58
|
-
nokogiri (1.10.
|
58
|
+
nokogiri (1.10.9)
|
59
59
|
mini_portile2 (~> 2.4.0)
|
60
60
|
nokogumbo (2.0.2)
|
61
61
|
nokogiri (~> 1.8, >= 1.8.4)
|
62
62
|
ntlm-http (0.1.1)
|
63
|
-
pry (0.
|
64
|
-
coderay (~> 1.1
|
65
|
-
method_source (~>
|
66
|
-
public_suffix (4.0.
|
63
|
+
pry (0.13.1)
|
64
|
+
coderay (~> 1.1)
|
65
|
+
method_source (~> 1.0)
|
66
|
+
public_suffix (4.0.4)
|
67
67
|
rake (13.0.1)
|
68
68
|
rspec (3.9.0)
|
69
69
|
rspec-core (~> 3.9.0)
|
70
70
|
rspec-expectations (~> 3.9.0)
|
71
71
|
rspec-mocks (~> 3.9.0)
|
72
|
-
rspec-core (3.9.
|
73
|
-
rspec-support (~> 3.9.
|
74
|
-
rspec-expectations (3.9.
|
72
|
+
rspec-core (3.9.2)
|
73
|
+
rspec-support (~> 3.9.3)
|
74
|
+
rspec-expectations (3.9.1)
|
75
75
|
diff-lcs (>= 1.2.0, < 2.0)
|
76
76
|
rspec-support (~> 3.9.0)
|
77
77
|
rspec-mocks (3.9.1)
|
78
78
|
diff-lcs (>= 1.2.0, < 2.0)
|
79
79
|
rspec-support (~> 3.9.0)
|
80
|
-
rspec-support (3.9.
|
80
|
+
rspec-support (3.9.3)
|
81
81
|
ruby-readability (0.7.0)
|
82
82
|
guess_html_encoding (>= 0.0.4)
|
83
83
|
nokogiri (>= 1.6.0)
|
@@ -88,8 +88,8 @@ GEM
|
|
88
88
|
nokogumbo (~> 2.0)
|
89
89
|
unf (0.1.4)
|
90
90
|
unf_ext
|
91
|
-
unf_ext (0.0.7.
|
92
|
-
webmock (3.
|
91
|
+
unf_ext (0.0.7.7)
|
92
|
+
webmock (3.8.3)
|
93
93
|
addressable (>= 2.3.6)
|
94
94
|
crack (>= 0.3.2)
|
95
95
|
hashdiff (>= 0.4.0, < 2.0.0)
|
@@ -107,4 +107,4 @@ DEPENDENCIES
|
|
107
107
|
webmock (>= 3.6.0)
|
108
108
|
|
109
109
|
BUNDLED WITH
|
110
|
-
2.1.
|
110
|
+
2.1.4
|
data/README.md
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module WebStat
|
2
2
|
class Fetch
|
3
|
-
attr_accessor :url, :html, :nokogiri, :userdic
|
3
|
+
attr_accessor :url, :html, :nokogiri, :userdic, :status
|
4
4
|
|
5
5
|
# Get title
|
6
6
|
# @return [String] title
|
@@ -67,16 +67,24 @@ module WebStat
|
|
67
67
|
|
68
68
|
# Get url
|
69
69
|
# @param [String] url
|
70
|
+
# @param [String] body
|
70
71
|
def get_url(url)
|
71
72
|
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
72
73
|
# Enable to read Robots.txt
|
73
74
|
agent.robots = true
|
74
|
-
|
75
|
-
|
76
|
-
document.
|
77
|
-
|
78
|
-
|
75
|
+
begin
|
76
|
+
document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
77
|
+
if document.class == Mechanize::File
|
78
|
+
body = document.body
|
79
|
+
else
|
80
|
+
body = document.body.encode('UTF-8', document.encoding)
|
81
|
+
end
|
82
|
+
@status = document.code
|
83
|
+
rescue Mechanize::ResponseCodeError => e
|
84
|
+
body = e.page.body
|
85
|
+
@status = e.page.code
|
79
86
|
end
|
87
|
+
body
|
80
88
|
end
|
81
89
|
|
82
90
|
# Get the informations of @url
|
@@ -96,6 +104,7 @@ module WebStat
|
|
96
104
|
site_name: site_name,
|
97
105
|
content: clean_content,
|
98
106
|
language_code: language_code,
|
107
|
+
status: @status,
|
99
108
|
url: @url,
|
100
109
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
101
110
|
tags: tag.nouns
|
@@ -107,8 +116,9 @@ module WebStat
|
|
107
116
|
# Get original url
|
108
117
|
# @param [String] url
|
109
118
|
def original_url(url)
|
110
|
-
|
111
|
-
|
119
|
+
last_url = FinalRedirectUrl.final_redirect_url(url)
|
120
|
+
unless last_url.nil? || last_url.scrub('').empty?
|
121
|
+
last_url
|
112
122
|
else
|
113
123
|
url
|
114
124
|
end
|
data/lib/web_stat/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module WebStat
|
2
|
-
VERSION = "0.2.
|
3
|
-
end
|
2
|
+
VERSION = "0.2.10"
|
3
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -59,9 +59,10 @@ end
|
|
59
59
|
|
60
60
|
# Set webmock
|
61
61
|
WebStatTestHelper.scheme_and_files.each do |url|
|
62
|
+
status = [200, 404, 503].sample
|
62
63
|
WebMock.stub_request(:get, url)
|
63
64
|
.to_return(
|
64
|
-
status:
|
65
|
+
status: status,
|
65
66
|
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
|
66
67
|
headers: {content_type: 'application/html; charset=utf-8'})
|
67
68
|
end
|
data/spec/web_stat/fetch_spec.rb
CHANGED
@@ -83,6 +83,7 @@ RSpec.describe WebStat::Fetch do
|
|
83
83
|
expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
|
84
84
|
expect(web_stat[:site_name]).to eq "newsdict.blog"
|
85
85
|
expect(web_stat[:content]).not_to eq nil
|
86
|
+
expect(web_stat[:status]).to eq("200").or eq("404").or eq("503")
|
86
87
|
expect(Sanitize.clean(web_stat[:content]).length).to eq web_stat[:content].length
|
87
88
|
expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
88
89
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -224,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
224
224
|
- !ruby/object:Gem::Version
|
225
225
|
version: '0'
|
226
226
|
requirements: []
|
227
|
-
rubygems_version: 3.
|
227
|
+
rubygems_version: 3.0.3
|
228
228
|
signing_key:
|
229
229
|
specification_version: 4
|
230
230
|
summary: Get the status of the web pages.
|