web_stat 0.4.4 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +29 -25
- data/lib/web_stat/fetch.rb +22 -2
- data/lib/web_stat/version.rb +1 -1
- data/spec/spec_helper.rb +7 -1
- data/spec/web_stat/fetch_spec.rb +5 -0
- data/web_stat.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ce60bcf4a31f90024abc35cf1ecc57e32626dfb5ecf4bc4f2280bd72931ff34
|
4
|
+
data.tar.gz: 575f805a63a995b2d0e3bc909978dcd4e3b4f15462f717cf83d6aac00c96078d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7af1262b25163205eabdfa26e1671f95d53963387eceec6c2c99da0a3a17359b77aaa097ec9556010b8000ecc0772d1dc19a67d128310c8d6bdd8379d008a913
|
7
|
+
data.tar.gz: 855706ad8525609e8a2a50ca64091081794940d69187f951a79c02c66acfa5be9f53cdeacf0f99f6c0431ad41a71ce616691590d40bce0a56576275fba96c453
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
web_stat (0.4.
|
4
|
+
web_stat (0.4.6)
|
5
5
|
bundler (>= 2.0.2)
|
6
6
|
cld (>= 0.8.0)
|
7
|
-
mechanize (>= 2.7)
|
7
|
+
mechanize (>= 2.7.7)
|
8
8
|
natto (>= 1.1.2)
|
9
9
|
nokogiri (>= 1.10.4)
|
10
10
|
pdf-reader (= 2.4.0)
|
@@ -26,10 +26,11 @@ GEM
|
|
26
26
|
cld (0.8.0)
|
27
27
|
ffi
|
28
28
|
coderay (1.1.3)
|
29
|
-
|
30
|
-
|
29
|
+
connection_pool (2.2.3)
|
30
|
+
crack (0.4.5)
|
31
|
+
rexml
|
31
32
|
crass (1.0.6)
|
32
|
-
diff-lcs (1.
|
33
|
+
diff-lcs (1.4.4)
|
33
34
|
domain_name (0.5.20190701)
|
34
35
|
unf (>= 0.0.5, < 1.0.0)
|
35
36
|
ffi (1.14.2)
|
@@ -38,7 +39,7 @@ GEM
|
|
38
39
|
hashery (2.1.2)
|
39
40
|
http-cookie (1.0.3)
|
40
41
|
domain_name (~> 0.5)
|
41
|
-
mechanize (2.7.
|
42
|
+
mechanize (2.7.7)
|
42
43
|
domain_name (~> 0.5, >= 0.5.1)
|
43
44
|
http-cookie (~> 1.0)
|
44
45
|
mime-types (>= 1.17.2)
|
@@ -46,16 +47,20 @@ GEM
|
|
46
47
|
net-http-persistent (>= 2.5.2)
|
47
48
|
nokogiri (~> 1.6)
|
48
49
|
ntlm-http (~> 0.1, >= 0.1.1)
|
50
|
+
webrick (~> 1.7)
|
49
51
|
webrobots (>= 0.0.9, < 0.2)
|
50
52
|
method_source (1.0.0)
|
51
53
|
mime-types (3.3.1)
|
52
54
|
mime-types-data (~> 3.2015)
|
53
|
-
mime-types-data (3.
|
55
|
+
mime-types-data (3.2021.0212)
|
56
|
+
mini_portile2 (2.5.0)
|
54
57
|
natto (1.2.0)
|
55
58
|
ffi (>= 1.9.0)
|
56
59
|
net-http-digest_auth (1.4.1)
|
57
|
-
net-http-persistent (
|
58
|
-
|
60
|
+
net-http-persistent (4.0.1)
|
61
|
+
connection_pool (~> 2.2)
|
62
|
+
nokogiri (1.11.1)
|
63
|
+
mini_portile2 (~> 2.5.0)
|
59
64
|
racc (~> 1.4)
|
60
65
|
nokogumbo (2.0.4)
|
61
66
|
nokogiri (~> 1.8, >= 1.8.4)
|
@@ -72,30 +77,29 @@ GEM
|
|
72
77
|
pry-byebug (3.9.0)
|
73
78
|
byebug (~> 11.0)
|
74
79
|
pry (~> 0.13.0)
|
75
|
-
public_suffix (4.0.
|
80
|
+
public_suffix (4.0.6)
|
76
81
|
racc (1.5.2)
|
77
|
-
rake (13.0.
|
82
|
+
rake (13.0.3)
|
78
83
|
rexml (3.2.4)
|
79
|
-
rspec (3.
|
80
|
-
rspec-core (~> 3.
|
81
|
-
rspec-expectations (~> 3.
|
82
|
-
rspec-mocks (~> 3.
|
83
|
-
rspec-core (3.
|
84
|
-
rspec-support (~> 3.
|
85
|
-
rspec-expectations (3.
|
84
|
+
rspec (3.10.0)
|
85
|
+
rspec-core (~> 3.10.0)
|
86
|
+
rspec-expectations (~> 3.10.0)
|
87
|
+
rspec-mocks (~> 3.10.0)
|
88
|
+
rspec-core (3.10.1)
|
89
|
+
rspec-support (~> 3.10.0)
|
90
|
+
rspec-expectations (3.10.1)
|
86
91
|
diff-lcs (>= 1.2.0, < 2.0)
|
87
|
-
rspec-support (~> 3.
|
88
|
-
rspec-mocks (3.
|
92
|
+
rspec-support (~> 3.10.0)
|
93
|
+
rspec-mocks (3.10.2)
|
89
94
|
diff-lcs (>= 1.2.0, < 2.0)
|
90
|
-
rspec-support (~> 3.
|
91
|
-
rspec-support (3.
|
95
|
+
rspec-support (~> 3.10.0)
|
96
|
+
rspec-support (3.10.2)
|
92
97
|
ruby-rc4 (0.1.5)
|
93
98
|
ruby-readability (0.7.0)
|
94
99
|
guess_html_encoding (>= 0.0.4)
|
95
100
|
nokogiri (>= 1.6.0)
|
96
101
|
rubyzip (2.3.0)
|
97
|
-
|
98
|
-
sanitize (5.2.2)
|
102
|
+
sanitize (5.2.3)
|
99
103
|
crass (~> 1.0.2)
|
100
104
|
nokogiri (>= 1.8.0)
|
101
105
|
nokogumbo (~> 2.0)
|
@@ -106,7 +110,7 @@ GEM
|
|
106
110
|
unf (0.1.4)
|
107
111
|
unf_ext
|
108
112
|
unf_ext (0.0.7.7)
|
109
|
-
webmock (3.
|
113
|
+
webmock (3.11.2)
|
110
114
|
addressable (>= 2.3.6)
|
111
115
|
crack (>= 0.3.2)
|
112
116
|
hashdiff (>= 0.4.0, < 2.0.0)
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module WebStat
|
2
2
|
class Fetch
|
3
|
-
attr_accessor :url, :html, :nokogiri, :userdic, :status
|
3
|
+
attr_accessor :url, :html, :nokogiri, :userdic, :status, :header
|
4
4
|
# Get title
|
5
5
|
# @return [String] title
|
6
6
|
def title
|
@@ -95,12 +95,13 @@ module WebStat
|
|
95
95
|
if mech.agent.robots_disallowed?(url)
|
96
96
|
raise Mechanize::RobotsDisallowedError.new(url)
|
97
97
|
end
|
98
|
+
document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
99
|
+
@header = document.header
|
98
100
|
begin
|
99
101
|
raise 'not_use_chromedirver' unless WebStat::Configure.get["use_chromedirver"]
|
100
102
|
body = WebStat::WebDriverHelper.get_source(url)
|
101
103
|
@status = 200
|
102
104
|
rescue
|
103
|
-
document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
104
105
|
if document.class == Mechanize::File
|
105
106
|
body = document.body
|
106
107
|
else
|
@@ -114,6 +115,24 @@ module WebStat
|
|
114
115
|
end
|
115
116
|
body
|
116
117
|
end
|
118
|
+
|
119
|
+
# Return Date or last modified header.
|
120
|
+
# @param [String] url
|
121
|
+
# @return DataTime
|
122
|
+
def get_last_modified
|
123
|
+
@header = @header || {}
|
124
|
+
if @header.has_key?("date") && @header.has_key?("last-modified")
|
125
|
+
if DateTime.parse(@header["date"]) >= DateTime.parse(@header["last-modified"])
|
126
|
+
DateTime.parse(@header["date"])
|
127
|
+
else
|
128
|
+
DateTime.parse(@header["last-modified"])
|
129
|
+
end
|
130
|
+
elsif @header.has_key?("date")
|
131
|
+
DateTime.parse(@header["date"])
|
132
|
+
elsif @header.has_key?("last-modified")
|
133
|
+
DateTime.parse(@header["last-modified"])
|
134
|
+
end
|
135
|
+
end
|
117
136
|
|
118
137
|
# Get the informations of @url
|
119
138
|
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
@@ -134,6 +153,7 @@ module WebStat
|
|
134
153
|
language_code: language_code,
|
135
154
|
status: @status,
|
136
155
|
url: @url,
|
156
|
+
last_modified_at: get_last_modified,
|
137
157
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
138
158
|
tags: tag.nouns
|
139
159
|
}
|
data/lib/web_stat/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -102,4 +102,10 @@ WebMock.stub_request(:get, "https://cdn.newsdict.jp/assets/newsdict-5d8601394c3f
|
|
102
102
|
.to_return(
|
103
103
|
status: 200,
|
104
104
|
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "newsdict-5d8601394c3f4eea2d7161ab92ab327ac7099e22214c853327011b3a71859b8e.png")),
|
105
|
-
headers: {content_type: 'application/html; charset=utf-8'})
|
105
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
106
|
+
|
107
|
+
WebMock.stub_request(:get, "https://newsdict.blog/last_modified_at")
|
108
|
+
.to_return(
|
109
|
+
status: 200,
|
110
|
+
body: "ok",
|
111
|
+
headers: {content_type: 'application/html; charset=utf-8', date: "Tue, 05 Apr 2016 07:43:08 GMT", "Last-Modified": "Tue, 05 Apr 2020 07:43:08 JST"})
|
data/spec/web_stat/fetch_spec.rb
CHANGED
@@ -204,4 +204,9 @@ RSpec.describe WebStat::Fetch do
|
|
204
204
|
expect(WebStat::FetchAsWeb.url_valid?("https://status.cloud.google.com/incident/cloud-functions/19010")).to be true
|
205
205
|
expect(WebStat::FetchAsWeb.url_valid?("http://g.co/arts/SK1jZHJpT8N1BGaM7")).to be true
|
206
206
|
end
|
207
|
+
|
208
|
+
it "get_last_modified" do
|
209
|
+
web_stat = WebStat::FetchAsWeb.new("https://newsdict.blog/last_modified_at")
|
210
|
+
web_stat.stat[:last_modified_at] === DateTime.parse("Tue, 05 Apr 2020 07:43:08 JST")
|
211
|
+
end
|
207
212
|
end
|
data/web_stat.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
|
23
23
|
spec.add_runtime_dependency "bundler", ">= 2.0.2"
|
24
24
|
spec.add_runtime_dependency "nokogiri", ">= 1.10.4"
|
25
|
-
spec.add_runtime_dependency "mechanize", ">= 2.7"
|
25
|
+
spec.add_runtime_dependency "mechanize", ">= 2.7.7"
|
26
26
|
spec.add_runtime_dependency "ruby-readability", ">= 0.7"
|
27
27
|
spec.add_runtime_dependency "natto", ">= 1.1.2"
|
28
28
|
spec.add_runtime_dependency "sanitize", ">= 5.0.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 2.7.7
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 2.7.7
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: ruby-readability
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|