web_stat 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +15 -3
- data/README.md +4 -0
- data/lib/web_stat/fetch/fetch_as_html.rb +1 -1
- data/lib/web_stat/fetch/fetch_as_web.rb +1 -3
- data/lib/web_stat/fetch.rb +7 -11
- data/lib/web_stat/version.rb +1 -1
- data/lib/web_stat.rb +5 -1
- data/spec/fixtures/htmls/h1-title.html +3 -0
- data/spec/fixtures/images/facebook-3.jpg +0 -0
- data/spec/spec_helper.rb +30 -2
- data/spec/web_stat/fetch_spec.rb +15 -2
- data/web_stat.gemspec +1 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84692200a3ace269882025b441ea34bc24ef92e1d61e695db2af2612458d89a6
|
4
|
+
data.tar.gz: 85eb291966b9813c62ab1abdfb0bfc41b823602e86ca3fd3fcd44d8107634aab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59df286b73a5d95cacc3b9cbe56fd5cb9c28df8220ceb6fb302a77f8ebe53892d69e4b4b0b4788522db2e19fc1e899ab5f0638e0ce714811c32db12c4adec977
|
7
|
+
data.tar.gz: 945fdd95331fe8def50a2db97e9e45dd659af782b2c169ad6ae45eb9ccd8ea077d0ba32f263e54c49dfebe8c53247bc5c1077d0d63a61661d60ec0870ea56f4b
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
web_stat (0.1.
|
4
|
+
web_stat (0.1.8)
|
5
5
|
bundler (~> 2.0)
|
6
6
|
final_redirect_url (~> 0.1.0)
|
7
7
|
mechanize (~> 2.7)
|
@@ -13,15 +13,20 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
+
addressable (2.6.0)
|
17
|
+
public_suffix (>= 2.0.2, < 4.0)
|
16
18
|
coderay (1.1.2)
|
17
19
|
connection_pool (2.2.2)
|
20
|
+
crack (0.4.3)
|
21
|
+
safe_yaml (~> 1.0.0)
|
18
22
|
crass (1.0.4)
|
19
23
|
diff-lcs (1.3)
|
20
|
-
domain_name (0.5.
|
24
|
+
domain_name (0.5.20190701)
|
21
25
|
unf (>= 0.0.5, < 1.0.0)
|
22
26
|
ffi (1.11.1)
|
23
27
|
final_redirect_url (0.1.0)
|
24
28
|
guess_html_encoding (0.0.11)
|
29
|
+
hashdiff (1.0.0)
|
25
30
|
http-cookie (1.0.3)
|
26
31
|
domain_name (~> 0.5)
|
27
32
|
mechanize (2.7.6)
|
@@ -41,7 +46,7 @@ GEM
|
|
41
46
|
natto (1.1.2)
|
42
47
|
ffi (>= 1.9.0)
|
43
48
|
net-http-digest_auth (1.4.1)
|
44
|
-
net-http-persistent (3.0
|
49
|
+
net-http-persistent (3.1.0)
|
45
50
|
connection_pool (~> 2.2)
|
46
51
|
nokogiri (1.10.3)
|
47
52
|
mini_portile2 (~> 2.4.0)
|
@@ -51,6 +56,7 @@ GEM
|
|
51
56
|
pry (0.12.2)
|
52
57
|
coderay (~> 1.1.0)
|
53
58
|
method_source (~> 0.9.0)
|
59
|
+
public_suffix (3.1.1)
|
54
60
|
rake (10.5.0)
|
55
61
|
rspec (3.8.0)
|
56
62
|
rspec-core (~> 3.8.0)
|
@@ -68,6 +74,7 @@ GEM
|
|
68
74
|
ruby-readability (0.7.0)
|
69
75
|
guess_html_encoding (>= 0.0.4)
|
70
76
|
nokogiri (>= 1.6.0)
|
77
|
+
safe_yaml (1.0.5)
|
71
78
|
sanitize (5.0.0)
|
72
79
|
crass (~> 1.0.2)
|
73
80
|
nokogiri (>= 1.8.0)
|
@@ -75,6 +82,10 @@ GEM
|
|
75
82
|
unf (0.1.4)
|
76
83
|
unf_ext
|
77
84
|
unf_ext (0.0.7.6)
|
85
|
+
webmock (3.6.0)
|
86
|
+
addressable (>= 2.3.6)
|
87
|
+
crack (>= 0.3.2)
|
88
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
78
89
|
webrobots (0.1.2)
|
79
90
|
|
80
91
|
PLATFORMS
|
@@ -85,6 +96,7 @@ DEPENDENCIES
|
|
85
96
|
rake (~> 10.0)
|
86
97
|
rspec (~> 3.0)
|
87
98
|
web_stat!
|
99
|
+
webmock (~> 3.6.0)
|
88
100
|
|
89
101
|
BUNDLED WITH
|
90
102
|
2.0.1
|
data/README.md
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
@@ -7,8 +7,8 @@ require 'ruby-readability'
|
|
7
7
|
require 'final_redirect_url'
|
8
8
|
module WebStat
|
9
9
|
class Fetch
|
10
|
-
attr_accessor :html, :nokogiri
|
11
|
-
|
10
|
+
attr_accessor :url, :html, :nokogiri
|
11
|
+
|
12
12
|
# Get title
|
13
13
|
# @return [String] title
|
14
14
|
def title
|
@@ -40,6 +40,7 @@ module WebStat
|
|
40
40
|
|
41
41
|
# Get temporary path of image
|
42
42
|
def eyecatch_image_path
|
43
|
+
# Reuse `path` in this method
|
43
44
|
path = nil
|
44
45
|
WebStat::Configure.get["eyecatch_image_xpaths"].each do |xpath|
|
45
46
|
if @nokogiri.xpath(xpath).first.respond_to?(:value)
|
@@ -47,20 +48,15 @@ module WebStat
|
|
47
48
|
break
|
48
49
|
end
|
49
50
|
end
|
50
|
-
if
|
51
|
-
|
52
|
-
path = "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
53
|
-
else
|
54
|
-
path = "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}/#{URI.parse(@url).path}/#{path}"
|
55
|
-
end
|
51
|
+
if path.match(/^\//)
|
52
|
+
"#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
56
53
|
end
|
57
|
-
path
|
58
54
|
end
|
59
55
|
|
60
56
|
# Get local path to save url
|
61
57
|
# @param [String] url
|
62
58
|
def save_local_path(url)
|
63
|
-
return nil if url.nil?
|
59
|
+
return nil if url.nil?
|
64
60
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
65
61
|
open(original_url(url)) do |remote_file|
|
66
62
|
File.open(tmp_file, "w+b") do |_file|
|
@@ -104,4 +100,4 @@ module WebStat
|
|
104
100
|
end
|
105
101
|
end
|
106
102
|
end
|
107
|
-
end
|
103
|
+
end
|
data/lib/web_stat/version.rb
CHANGED
data/lib/web_stat.rb
CHANGED
@@ -17,13 +17,17 @@ module WebStat
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Get web page's stat by url
|
20
|
+
# @param String url
|
20
21
|
def stat_by_url(url)
|
21
22
|
stat_by_web(url)
|
22
23
|
end
|
23
24
|
|
24
25
|
# Get web page's stat by html
|
25
|
-
|
26
|
+
# @param String html
|
27
|
+
# @param [String] url
|
28
|
+
def stat_by_html(html, url=nil)
|
26
29
|
web_stat = WebStat::FetchAsHtml.new(html)
|
30
|
+
web_stat.url = url unless url.nil?
|
27
31
|
web_stat.stat
|
28
32
|
end
|
29
33
|
end
|
@@ -7,5 +7,8 @@
|
|
7
7
|
</head>
|
8
8
|
<body class="post-template tag-rubygems">
|
9
9
|
<h1 class="post-title">gem作成でついまずいたところ</h1>
|
10
|
+
<p class="profile"><a href="https://newsdict.blog">
|
11
|
+
<img src="/content/images/size/w100/2019/03/facebook-3.jpg" alt="Yusuke Abe" class="avatar" /></a></p>
|
12
|
+
|
10
13
|
</body>
|
11
14
|
</html>
|
Binary file
|
data/spec/spec_helper.rb
CHANGED
@@ -3,6 +3,10 @@ require "bundler/setup"
|
|
3
3
|
require 'pry'
|
4
4
|
require "web_stat"
|
5
5
|
|
6
|
+
require 'webmock'
|
7
|
+
include WebMock::API
|
8
|
+
WebMock.enable!
|
9
|
+
|
6
10
|
RSpec.configure do |config|
|
7
11
|
# Enable flags like --only-failures and --next-failure
|
8
12
|
config.example_status_persistence_file_path = ".rspec_status"
|
@@ -47,8 +51,32 @@ module WebStatTestHelper
|
|
47
51
|
# Get htmls of fixture
|
48
52
|
def scheme_and_files
|
49
53
|
Dir.glob(File.join(File.dirname(__FILE__), "fixtures", "htmls", "*.html")).map do |file|
|
50
|
-
|
54
|
+
"https://newsdict.blog/#{File.basename(file)}"
|
51
55
|
end
|
52
56
|
end
|
53
57
|
end
|
54
|
-
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Set webmock
|
61
|
+
WebStatTestHelper.scheme_and_files.each do |url|
|
62
|
+
WebMock.stub_request(:get, url)
|
63
|
+
.to_return(
|
64
|
+
status: 200,
|
65
|
+
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
|
66
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
67
|
+
end
|
68
|
+
|
69
|
+
WebMock.stub_request(:get, "https://newsdict.blog/robots.txt")
|
70
|
+
.to_return(
|
71
|
+
status: 200,
|
72
|
+
body: "User-agent: *
|
73
|
+
Sitemap: https://newsdict.blog/sitemap.xml
|
74
|
+
Disallow: /ghost/
|
75
|
+
Disallow: /p/",
|
76
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
77
|
+
|
78
|
+
WebMock.stub_request(:get, "https://newsdict.blog/content/images/size/w100/2019/03/facebook-3.jpg")
|
79
|
+
.to_return(
|
80
|
+
status: 200,
|
81
|
+
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "facebook-3.jpg")),
|
82
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
data/spec/web_stat/fetch_spec.rb
CHANGED
@@ -30,9 +30,21 @@ RSpec.describe WebStat::Fetch do
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
+
it "Get eyecatch image blob by #{fetch[:class].to_s}" do
|
34
|
+
fetch[:fixture].each do |fixture|
|
35
|
+
web_stat = fetch[:class].new(fixture)
|
36
|
+
web_stat.url = "https://newsdict.blog"
|
37
|
+
unless web_stat.stat[:eyecatch_image_path].nil?
|
38
|
+
image = File.read(web_stat.stat[:eyecatch_image_path])
|
39
|
+
expect(image.encoding.to_s).to eq("UTF-8")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
33
44
|
it "Get eyecatch image path by #{fetch[:class].to_s}" do
|
34
45
|
fetch[:fixture].each do |fixture|
|
35
46
|
web_stat = fetch[:class].new(fixture)
|
47
|
+
web_stat.url = "https://newsdict.blog"
|
36
48
|
expect(web_stat.eyecatch_image_path).to be_string_or_nil
|
37
49
|
end
|
38
50
|
end
|
@@ -40,6 +52,7 @@ RSpec.describe WebStat::Fetch do
|
|
40
52
|
it "Get local path of eyecatch image by #{fetch[:class].to_s}" do
|
41
53
|
fetch[:fixture].each do |fixture|
|
42
54
|
web_stat = fetch[:class].new(fixture)
|
55
|
+
web_stat.url = "https://newsdict.blog"
|
43
56
|
expect(web_stat.stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
44
57
|
end
|
45
58
|
end
|
@@ -47,7 +60,7 @@ RSpec.describe WebStat::Fetch do
|
|
47
60
|
|
48
61
|
it "WebStat.stat_by_html" do
|
49
62
|
WebStatTestHelper.htmls.each do |fixture|
|
50
|
-
web_stat = WebStat.stat_by_html(fixture)
|
63
|
+
web_stat = WebStat.stat_by_html(fixture, "https://newsdict.blog")
|
51
64
|
expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
|
52
65
|
expect(web_stat[:site_name]).to eq "newsdict.blog"
|
53
66
|
expect(web_stat[:content]).not_to eq nil
|
@@ -66,4 +79,4 @@ RSpec.describe WebStat::Fetch do
|
|
66
79
|
expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
67
80
|
end
|
68
81
|
end
|
69
|
-
end
|
82
|
+
end
|
data/web_stat.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.12.2
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: webmock
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 3.6.0
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 3.6.0
|
153
167
|
description: Fetch the web pages and stat.
|
154
168
|
email:
|
155
169
|
- yube@newsdict.jp
|
@@ -180,6 +194,7 @@ files:
|
|
180
194
|
- spec/fixtures/htmls/blog.html
|
181
195
|
- spec/fixtures/htmls/h1-title.html
|
182
196
|
- spec/fixtures/htmls/image.html
|
197
|
+
- spec/fixtures/images/facebook-3.jpg
|
183
198
|
- spec/spec_helper.rb
|
184
199
|
- spec/web_stat/configure_spec.rb
|
185
200
|
- spec/web_stat/fetch_spec.rb
|
@@ -213,6 +228,7 @@ test_files:
|
|
213
228
|
- spec/fixtures/htmls/blog.html
|
214
229
|
- spec/fixtures/htmls/h1-title.html
|
215
230
|
- spec/fixtures/htmls/image.html
|
231
|
+
- spec/fixtures/images/facebook-3.jpg
|
216
232
|
- spec/spec_helper.rb
|
217
233
|
- spec/web_stat/configure_spec.rb
|
218
234
|
- spec/web_stat/fetch_spec.rb
|