web_stat 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +15 -3
- data/README.md +4 -0
- data/lib/web_stat/fetch/fetch_as_html.rb +1 -1
- data/lib/web_stat/fetch/fetch_as_web.rb +1 -3
- data/lib/web_stat/fetch.rb +7 -11
- data/lib/web_stat/version.rb +1 -1
- data/lib/web_stat.rb +5 -1
- data/spec/fixtures/htmls/h1-title.html +3 -0
- data/spec/fixtures/images/facebook-3.jpg +0 -0
- data/spec/spec_helper.rb +30 -2
- data/spec/web_stat/fetch_spec.rb +15 -2
- data/web_stat.gemspec +1 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84692200a3ace269882025b441ea34bc24ef92e1d61e695db2af2612458d89a6
|
4
|
+
data.tar.gz: 85eb291966b9813c62ab1abdfb0bfc41b823602e86ca3fd3fcd44d8107634aab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59df286b73a5d95cacc3b9cbe56fd5cb9c28df8220ceb6fb302a77f8ebe53892d69e4b4b0b4788522db2e19fc1e899ab5f0638e0ce714811c32db12c4adec977
|
7
|
+
data.tar.gz: 945fdd95331fe8def50a2db97e9e45dd659af782b2c169ad6ae45eb9ccd8ea077d0ba32f263e54c49dfebe8c53247bc5c1077d0d63a61661d60ec0870ea56f4b
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
web_stat (0.1.
|
4
|
+
web_stat (0.1.8)
|
5
5
|
bundler (~> 2.0)
|
6
6
|
final_redirect_url (~> 0.1.0)
|
7
7
|
mechanize (~> 2.7)
|
@@ -13,15 +13,20 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
+
addressable (2.6.0)
|
17
|
+
public_suffix (>= 2.0.2, < 4.0)
|
16
18
|
coderay (1.1.2)
|
17
19
|
connection_pool (2.2.2)
|
20
|
+
crack (0.4.3)
|
21
|
+
safe_yaml (~> 1.0.0)
|
18
22
|
crass (1.0.4)
|
19
23
|
diff-lcs (1.3)
|
20
|
-
domain_name (0.5.
|
24
|
+
domain_name (0.5.20190701)
|
21
25
|
unf (>= 0.0.5, < 1.0.0)
|
22
26
|
ffi (1.11.1)
|
23
27
|
final_redirect_url (0.1.0)
|
24
28
|
guess_html_encoding (0.0.11)
|
29
|
+
hashdiff (1.0.0)
|
25
30
|
http-cookie (1.0.3)
|
26
31
|
domain_name (~> 0.5)
|
27
32
|
mechanize (2.7.6)
|
@@ -41,7 +46,7 @@ GEM
|
|
41
46
|
natto (1.1.2)
|
42
47
|
ffi (>= 1.9.0)
|
43
48
|
net-http-digest_auth (1.4.1)
|
44
|
-
net-http-persistent (3.0
|
49
|
+
net-http-persistent (3.1.0)
|
45
50
|
connection_pool (~> 2.2)
|
46
51
|
nokogiri (1.10.3)
|
47
52
|
mini_portile2 (~> 2.4.0)
|
@@ -51,6 +56,7 @@ GEM
|
|
51
56
|
pry (0.12.2)
|
52
57
|
coderay (~> 1.1.0)
|
53
58
|
method_source (~> 0.9.0)
|
59
|
+
public_suffix (3.1.1)
|
54
60
|
rake (10.5.0)
|
55
61
|
rspec (3.8.0)
|
56
62
|
rspec-core (~> 3.8.0)
|
@@ -68,6 +74,7 @@ GEM
|
|
68
74
|
ruby-readability (0.7.0)
|
69
75
|
guess_html_encoding (>= 0.0.4)
|
70
76
|
nokogiri (>= 1.6.0)
|
77
|
+
safe_yaml (1.0.5)
|
71
78
|
sanitize (5.0.0)
|
72
79
|
crass (~> 1.0.2)
|
73
80
|
nokogiri (>= 1.8.0)
|
@@ -75,6 +82,10 @@ GEM
|
|
75
82
|
unf (0.1.4)
|
76
83
|
unf_ext
|
77
84
|
unf_ext (0.0.7.6)
|
85
|
+
webmock (3.6.0)
|
86
|
+
addressable (>= 2.3.6)
|
87
|
+
crack (>= 0.3.2)
|
88
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
78
89
|
webrobots (0.1.2)
|
79
90
|
|
80
91
|
PLATFORMS
|
@@ -85,6 +96,7 @@ DEPENDENCIES
|
|
85
96
|
rake (~> 10.0)
|
86
97
|
rspec (~> 3.0)
|
87
98
|
web_stat!
|
99
|
+
webmock (~> 3.6.0)
|
88
100
|
|
89
101
|
BUNDLED WITH
|
90
102
|
2.0.1
|
data/README.md
CHANGED
data/lib/web_stat/fetch.rb
CHANGED
@@ -7,8 +7,8 @@ require 'ruby-readability'
|
|
7
7
|
require 'final_redirect_url'
|
8
8
|
module WebStat
|
9
9
|
class Fetch
|
10
|
-
attr_accessor :html, :nokogiri
|
11
|
-
|
10
|
+
attr_accessor :url, :html, :nokogiri
|
11
|
+
|
12
12
|
# Get title
|
13
13
|
# @return [String] title
|
14
14
|
def title
|
@@ -40,6 +40,7 @@ module WebStat
|
|
40
40
|
|
41
41
|
# Get temporary path of image
|
42
42
|
def eyecatch_image_path
|
43
|
+
# Reuse `path` in this method
|
43
44
|
path = nil
|
44
45
|
WebStat::Configure.get["eyecatch_image_xpaths"].each do |xpath|
|
45
46
|
if @nokogiri.xpath(xpath).first.respond_to?(:value)
|
@@ -47,20 +48,15 @@ module WebStat
|
|
47
48
|
break
|
48
49
|
end
|
49
50
|
end
|
50
|
-
if
|
51
|
-
|
52
|
-
path = "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
53
|
-
else
|
54
|
-
path = "#{URI.parse(@url).scheme}://#{URI.parse(@url).host}/#{URI.parse(@url).path}/#{path}"
|
55
|
-
end
|
51
|
+
if path.match(/^\//)
|
52
|
+
"#{URI.parse(@url).scheme}://#{URI.parse(@url).host}#{path}"
|
56
53
|
end
|
57
|
-
path
|
58
54
|
end
|
59
55
|
|
60
56
|
# Get local path to save url
|
61
57
|
# @param [String] url
|
62
58
|
def save_local_path(url)
|
63
|
-
return nil if url.nil?
|
59
|
+
return nil if url.nil?
|
64
60
|
tmp_file = "/tmp/#{Digest::SHA1.hexdigest(url)}"
|
65
61
|
open(original_url(url)) do |remote_file|
|
66
62
|
File.open(tmp_file, "w+b") do |_file|
|
@@ -104,4 +100,4 @@ module WebStat
|
|
104
100
|
end
|
105
101
|
end
|
106
102
|
end
|
107
|
-
end
|
103
|
+
end
|
data/lib/web_stat/version.rb
CHANGED
data/lib/web_stat.rb
CHANGED
@@ -17,13 +17,17 @@ module WebStat
|
|
17
17
|
end
|
18
18
|
|
19
19
|
# Get web page's stat by url
|
20
|
+
# @param String url
|
20
21
|
def stat_by_url(url)
|
21
22
|
stat_by_web(url)
|
22
23
|
end
|
23
24
|
|
24
25
|
# Get web page's stat by html
|
25
|
-
|
26
|
+
# @param String html
|
27
|
+
# @param [String] url
|
28
|
+
def stat_by_html(html, url=nil)
|
26
29
|
web_stat = WebStat::FetchAsHtml.new(html)
|
30
|
+
web_stat.url = url unless url.nil?
|
27
31
|
web_stat.stat
|
28
32
|
end
|
29
33
|
end
|
@@ -7,5 +7,8 @@
|
|
7
7
|
</head>
|
8
8
|
<body class="post-template tag-rubygems">
|
9
9
|
<h1 class="post-title">gem作成でついまずいたところ</h1>
|
10
|
+
<p class="profile"><a href="https://newsdict.blog">
|
11
|
+
<img src="/content/images/size/w100/2019/03/facebook-3.jpg" alt="Yusuke Abe" class="avatar" /></a></p>
|
12
|
+
|
10
13
|
</body>
|
11
14
|
</html>
|
Binary file
|
data/spec/spec_helper.rb
CHANGED
@@ -3,6 +3,10 @@ require "bundler/setup"
|
|
3
3
|
require 'pry'
|
4
4
|
require "web_stat"
|
5
5
|
|
6
|
+
require 'webmock'
|
7
|
+
include WebMock::API
|
8
|
+
WebMock.enable!
|
9
|
+
|
6
10
|
RSpec.configure do |config|
|
7
11
|
# Enable flags like --only-failures and --next-failure
|
8
12
|
config.example_status_persistence_file_path = ".rspec_status"
|
@@ -47,8 +51,32 @@ module WebStatTestHelper
|
|
47
51
|
# Get htmls of fixture
|
48
52
|
def scheme_and_files
|
49
53
|
Dir.glob(File.join(File.dirname(__FILE__), "fixtures", "htmls", "*.html")).map do |file|
|
50
|
-
|
54
|
+
"https://newsdict.blog/#{File.basename(file)}"
|
51
55
|
end
|
52
56
|
end
|
53
57
|
end
|
54
|
-
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Set webmock
|
61
|
+
WebStatTestHelper.scheme_and_files.each do |url|
|
62
|
+
WebMock.stub_request(:get, url)
|
63
|
+
.to_return(
|
64
|
+
status: 200,
|
65
|
+
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "htmls", File.basename(url))),
|
66
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
67
|
+
end
|
68
|
+
|
69
|
+
WebMock.stub_request(:get, "https://newsdict.blog/robots.txt")
|
70
|
+
.to_return(
|
71
|
+
status: 200,
|
72
|
+
body: "User-agent: *
|
73
|
+
Sitemap: https://newsdict.blog/sitemap.xml
|
74
|
+
Disallow: /ghost/
|
75
|
+
Disallow: /p/",
|
76
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
77
|
+
|
78
|
+
WebMock.stub_request(:get, "https://newsdict.blog/content/images/size/w100/2019/03/facebook-3.jpg")
|
79
|
+
.to_return(
|
80
|
+
status: 200,
|
81
|
+
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "facebook-3.jpg")),
|
82
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
data/spec/web_stat/fetch_spec.rb
CHANGED
@@ -30,9 +30,21 @@ RSpec.describe WebStat::Fetch do
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
+
it "Get eyecatch image blob by #{fetch[:class].to_s}" do
|
34
|
+
fetch[:fixture].each do |fixture|
|
35
|
+
web_stat = fetch[:class].new(fixture)
|
36
|
+
web_stat.url = "https://newsdict.blog"
|
37
|
+
unless web_stat.stat[:eyecatch_image_path].nil?
|
38
|
+
image = File.read(web_stat.stat[:eyecatch_image_path])
|
39
|
+
expect(image.encoding.to_s).to eq("UTF-8")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
33
44
|
it "Get eyecatch image path by #{fetch[:class].to_s}" do
|
34
45
|
fetch[:fixture].each do |fixture|
|
35
46
|
web_stat = fetch[:class].new(fixture)
|
47
|
+
web_stat.url = "https://newsdict.blog"
|
36
48
|
expect(web_stat.eyecatch_image_path).to be_string_or_nil
|
37
49
|
end
|
38
50
|
end
|
@@ -40,6 +52,7 @@ RSpec.describe WebStat::Fetch do
|
|
40
52
|
it "Get local path of eyecatch image by #{fetch[:class].to_s}" do
|
41
53
|
fetch[:fixture].each do |fixture|
|
42
54
|
web_stat = fetch[:class].new(fixture)
|
55
|
+
web_stat.url = "https://newsdict.blog"
|
43
56
|
expect(web_stat.stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
44
57
|
end
|
45
58
|
end
|
@@ -47,7 +60,7 @@ RSpec.describe WebStat::Fetch do
|
|
47
60
|
|
48
61
|
it "WebStat.stat_by_html" do
|
49
62
|
WebStatTestHelper.htmls.each do |fixture|
|
50
|
-
web_stat = WebStat.stat_by_html(fixture)
|
63
|
+
web_stat = WebStat.stat_by_html(fixture, "https://newsdict.blog")
|
51
64
|
expect(web_stat[:title]).to eq "gem作成でついまずいたところ"
|
52
65
|
expect(web_stat[:site_name]).to eq "newsdict.blog"
|
53
66
|
expect(web_stat[:content]).not_to eq nil
|
@@ -66,4 +79,4 @@ RSpec.describe WebStat::Fetch do
|
|
66
79
|
expect(web_stat[:eyecatch_image_path]).to be_tmp_file_or_nil
|
67
80
|
end
|
68
81
|
end
|
69
|
-
end
|
82
|
+
end
|
data/web_stat.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.12.2
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: webmock
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 3.6.0
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 3.6.0
|
153
167
|
description: Fetch the web pages and stat.
|
154
168
|
email:
|
155
169
|
- yube@newsdict.jp
|
@@ -180,6 +194,7 @@ files:
|
|
180
194
|
- spec/fixtures/htmls/blog.html
|
181
195
|
- spec/fixtures/htmls/h1-title.html
|
182
196
|
- spec/fixtures/htmls/image.html
|
197
|
+
- spec/fixtures/images/facebook-3.jpg
|
183
198
|
- spec/spec_helper.rb
|
184
199
|
- spec/web_stat/configure_spec.rb
|
185
200
|
- spec/web_stat/fetch_spec.rb
|
@@ -213,6 +228,7 @@ test_files:
|
|
213
228
|
- spec/fixtures/htmls/blog.html
|
214
229
|
- spec/fixtures/htmls/h1-title.html
|
215
230
|
- spec/fixtures/htmls/image.html
|
231
|
+
- spec/fixtures/images/facebook-3.jpg
|
216
232
|
- spec/spec_helper.rb
|
217
233
|
- spec/web_stat/configure_spec.rb
|
218
234
|
- spec/web_stat/fetch_spec.rb
|