web_stat 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +20 -20
- data/README.md +4 -0
- data/lib/web_stat/fetch.rb +18 -8
- data/lib/web_stat/version.rb +2 -2
- data/spec/spec_helper.rb +2 -1
- data/spec/web_stat/fetch_spec.rb +1 -0
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5821645b40a23008360e4e9717ec9f420fdd5adc5a7b4fdfea6d08de111af27f
|
|
4
|
+
data.tar.gz: c78ddd43e9ad8691dd05481e2854e4280bfcda857cb3fa97624c947c6233af75
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 439ad9a5e969057996d802374739bb587648d6fbc27a9b2fe0da71fdc36345c87b2b483fe474f8d499769d107cab6c335cf41f5575363b2a25edc2c1f88f7cd2
|
|
7
|
+
data.tar.gz: 0e6942690a71294c2402111e21605715d3191bae076167febf2df7277d50da3c8ac2682ef319e570b7f3763ce9515cb7410243f901fb9d3d6a5f86bb67177537
|
data/Gemfile.lock
CHANGED
|
@@ -7,7 +7,7 @@ GIT
|
|
|
7
7
|
PATH
|
|
8
8
|
remote: .
|
|
9
9
|
specs:
|
|
10
|
-
web_stat (0.2.
|
|
10
|
+
web_stat (0.2.10)
|
|
11
11
|
bundler (>= 2.0.2)
|
|
12
12
|
cld (>= 0.8.0)
|
|
13
13
|
mechanize (>= 2.7)
|
|
@@ -27,13 +27,13 @@ GEM
|
|
|
27
27
|
connection_pool (2.2.2)
|
|
28
28
|
crack (0.4.3)
|
|
29
29
|
safe_yaml (~> 1.0.0)
|
|
30
|
-
crass (1.0.
|
|
30
|
+
crass (1.0.6)
|
|
31
31
|
diff-lcs (1.3)
|
|
32
32
|
domain_name (0.5.20190701)
|
|
33
33
|
unf (>= 0.0.5, < 1.0.0)
|
|
34
|
-
ffi (1.
|
|
34
|
+
ffi (1.12.2)
|
|
35
35
|
guess_html_encoding (0.0.11)
|
|
36
|
-
hashdiff (1.0.
|
|
36
|
+
hashdiff (1.0.1)
|
|
37
37
|
http-cookie (1.0.3)
|
|
38
38
|
domain_name (~> 0.5)
|
|
39
39
|
mechanize (2.7.6)
|
|
@@ -45,39 +45,39 @@ GEM
|
|
|
45
45
|
nokogiri (~> 1.6)
|
|
46
46
|
ntlm-http (~> 0.1, >= 0.1.1)
|
|
47
47
|
webrobots (>= 0.0.9, < 0.2)
|
|
48
|
-
method_source (0.
|
|
48
|
+
method_source (1.0.0)
|
|
49
49
|
mime-types (3.3.1)
|
|
50
50
|
mime-types-data (~> 3.2015)
|
|
51
|
-
mime-types-data (3.
|
|
51
|
+
mime-types-data (3.2020.0425)
|
|
52
52
|
mini_portile2 (2.4.0)
|
|
53
|
-
natto (1.
|
|
53
|
+
natto (1.2.0)
|
|
54
54
|
ffi (>= 1.9.0)
|
|
55
55
|
net-http-digest_auth (1.4.1)
|
|
56
|
-
net-http-persistent (
|
|
56
|
+
net-http-persistent (4.0.0)
|
|
57
57
|
connection_pool (~> 2.2)
|
|
58
|
-
nokogiri (1.10.
|
|
58
|
+
nokogiri (1.10.9)
|
|
59
59
|
mini_portile2 (~> 2.4.0)
|
|
60
60
|
nokogumbo (2.0.2)
|
|
61
61
|
nokogiri (~> 1.8, >= 1.8.4)
|
|
62
62
|
ntlm-http (0.1.1)
|
|
63
|
-
pry (0.
|
|
64
|
-
coderay (~> 1.1
|
|
65
|
-
method_source (~>
|
|
66
|
-
public_suffix (4.0.
|
|
63
|
+
pry (0.13.1)
|
|
64
|
+
coderay (~> 1.1)
|
|
65
|
+
method_source (~> 1.0)
|
|
66
|
+
public_suffix (4.0.4)
|
|
67
67
|
rake (13.0.1)
|
|
68
68
|
rspec (3.9.0)
|
|
69
69
|
rspec-core (~> 3.9.0)
|
|
70
70
|
rspec-expectations (~> 3.9.0)
|
|
71
71
|
rspec-mocks (~> 3.9.0)
|
|
72
|
-
rspec-core (3.9.
|
|
73
|
-
rspec-support (~> 3.9.
|
|
74
|
-
rspec-expectations (3.9.
|
|
72
|
+
rspec-core (3.9.2)
|
|
73
|
+
rspec-support (~> 3.9.3)
|
|
74
|
+
rspec-expectations (3.9.1)
|
|
75
75
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
76
76
|
rspec-support (~> 3.9.0)
|
|
77
77
|
rspec-mocks (3.9.1)
|
|
78
78
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
79
79
|
rspec-support (~> 3.9.0)
|
|
80
|
-
rspec-support (3.9.
|
|
80
|
+
rspec-support (3.9.3)
|
|
81
81
|
ruby-readability (0.7.0)
|
|
82
82
|
guess_html_encoding (>= 0.0.4)
|
|
83
83
|
nokogiri (>= 1.6.0)
|
|
@@ -88,8 +88,8 @@ GEM
|
|
|
88
88
|
nokogumbo (~> 2.0)
|
|
89
89
|
unf (0.1.4)
|
|
90
90
|
unf_ext
|
|
91
|
-
unf_ext (0.0.7.
|
|
92
|
-
webmock (3.
|
|
91
|
+
unf_ext (0.0.7.7)
|
|
92
|
+
webmock (3.8.3)
|
|
93
93
|
addressable (>= 2.3.6)
|
|
94
94
|
crack (>= 0.3.2)
|
|
95
95
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
@@ -107,4 +107,4 @@ DEPENDENCIES
|
|
|
107
107
|
webmock (>= 3.6.0)
|
|
108
108
|
|
|
109
109
|
BUNDLED WITH
|
|
110
|
-
2.1.
|
|
110
|
+
2.1.4
|
data/README.md
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module WebStat
|
|
2
2
|
class Fetch
|
|
3
|
-
attr_accessor :url, :html, :nokogiri, :userdic
|
|
3
|
+
attr_accessor :url, :html, :nokogiri, :userdic, :status
|
|
4
4
|
|
|
5
5
|
# Get title
|
|
6
6
|
# @return [String] title
|
|
@@ -67,16 +67,24 @@ module WebStat
|
|
|
67
67
|
|
|
68
68
|
# Get url
|
|
69
69
|
# @param [String] url
|
|
70
|
+
# @param [String] body
|
|
70
71
|
def get_url(url)
|
|
71
72
|
agent = Mechanize.new { |_agent| _agent.user_agent = WebStat::Configure.get["user_agent"] }
|
|
72
73
|
# Enable to read Robots.txt
|
|
73
74
|
agent.robots = true
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
document.
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
begin
|
|
76
|
+
document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
|
77
|
+
if document.class == Mechanize::File
|
|
78
|
+
body = document.body
|
|
79
|
+
else
|
|
80
|
+
body = document.body.encode('UTF-8', document.encoding)
|
|
81
|
+
end
|
|
82
|
+
@status = document.code
|
|
83
|
+
rescue Mechanize::ResponseCodeError => e
|
|
84
|
+
body = e.page.body
|
|
85
|
+
@status = e.page.code
|
|
79
86
|
end
|
|
87
|
+
body
|
|
80
88
|
end
|
|
81
89
|
|
|
82
90
|
# Get the informations of @url
|
|
@@ -96,6 +104,7 @@ module WebStat
|
|
|
96
104
|
site_name: site_name,
|
|
97
105
|
content: clean_content,
|
|
98
106
|
language_code: language_code,
|
|
107
|
+
status: @status,
|
|
99
108
|
url: @url,
|
|
100
109
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
|
101
110
|
tags: tag.nouns
|
|
@@ -107,8 +116,9 @@ module WebStat
|
|
|
107
116
|
# Get original url
|
|
108
117
|
# @param [String] url
|
|
109
118
|
def original_url(url)
|
|
110
|
-
|
|
111
|
-
|
|
119
|
+
last_url = FinalRedirectUrl.final_redirect_url(url)
|
|
120
|
+
unless last_url.nil? || last_url.scrub('').empty?
|
|
121
|
+
last_url
|
|
112
122
|
else
|
|
113
123
|
url
|
|
114
124
|
end
|
data/lib/web_stat/version.rb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
module WebStat
|
|
2
|
-
VERSION = "0.2.
|
|
3
|
-
end
|
|
2
|
+
VERSION = "0.2.10"
|
|
3
|
+
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -59,9 +59,10 @@ end
|
|
|
59
59
|
|
|
60
60
|
# Set webmock
|
|
61
61
|
WebStatTestHelper.scheme_and_files.each do |url|
|
|
62
|
+
status = [200, 404, 503].sample
|
|
62
63
|
WebMock.stub_request(:get, url)
|
|
63
64
|
.to_return(
|
|
64
|
-
status:
|
|
65
|
+
status: status,
|
|
65
66
|
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
|
|
66
67
|
headers: {content_type: 'application/html; charset=utf-8'})
|
|
67
68
|
end
|
data/spec/web_stat/fetch_spec.rb
CHANGED
|
@@ -83,6 +83,7 @@ RSpec.describe WebStat::Fetch do
|
|
|
83
83
|
expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
|
|
84
84
|
expect(web_stat[:site_name]).to eq "newsdict.blog"
|
|
85
85
|
expect(web_stat[:content]).not_to eq nil
|
|
86
|
+
expect(web_stat[:status]).to eq("200").or eq("404").or eq("503")
|
|
86
87
|
expect(Sanitize.clean(web_stat[:content]).length).to eq web_stat[:content].length
|
|
87
88
|
expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
|
88
89
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: web_stat
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yusuke abe
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-05-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -224,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
224
224
|
- !ruby/object:Gem::Version
|
|
225
225
|
version: '0'
|
|
226
226
|
requirements: []
|
|
227
|
-
rubygems_version: 3.
|
|
227
|
+
rubygems_version: 3.0.3
|
|
228
228
|
signing_key:
|
|
229
229
|
specification_version: 4
|
|
230
230
|
summary: Get the status of the web pages.
|