web_stat 0.4.4 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +29 -25
- data/lib/web_stat/fetch.rb +22 -2
- data/lib/web_stat/version.rb +1 -1
- data/spec/spec_helper.rb +7 -1
- data/spec/web_stat/fetch_spec.rb +5 -0
- data/web_stat.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ce60bcf4a31f90024abc35cf1ecc57e32626dfb5ecf4bc4f2280bd72931ff34
|
4
|
+
data.tar.gz: 575f805a63a995b2d0e3bc909978dcd4e3b4f15462f717cf83d6aac00c96078d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7af1262b25163205eabdfa26e1671f95d53963387eceec6c2c99da0a3a17359b77aaa097ec9556010b8000ecc0772d1dc19a67d128310c8d6bdd8379d008a913
|
7
|
+
data.tar.gz: 855706ad8525609e8a2a50ca64091081794940d69187f951a79c02c66acfa5be9f53cdeacf0f99f6c0431ad41a71ce616691590d40bce0a56576275fba96c453
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
web_stat (0.4.
|
4
|
+
web_stat (0.4.6)
|
5
5
|
bundler (>= 2.0.2)
|
6
6
|
cld (>= 0.8.0)
|
7
|
-
mechanize (>= 2.7)
|
7
|
+
mechanize (>= 2.7.7)
|
8
8
|
natto (>= 1.1.2)
|
9
9
|
nokogiri (>= 1.10.4)
|
10
10
|
pdf-reader (= 2.4.0)
|
@@ -26,10 +26,11 @@ GEM
|
|
26
26
|
cld (0.8.0)
|
27
27
|
ffi
|
28
28
|
coderay (1.1.3)
|
29
|
-
|
30
|
-
|
29
|
+
connection_pool (2.2.3)
|
30
|
+
crack (0.4.5)
|
31
|
+
rexml
|
31
32
|
crass (1.0.6)
|
32
|
-
diff-lcs (1.
|
33
|
+
diff-lcs (1.4.4)
|
33
34
|
domain_name (0.5.20190701)
|
34
35
|
unf (>= 0.0.5, < 1.0.0)
|
35
36
|
ffi (1.14.2)
|
@@ -38,7 +39,7 @@ GEM
|
|
38
39
|
hashery (2.1.2)
|
39
40
|
http-cookie (1.0.3)
|
40
41
|
domain_name (~> 0.5)
|
41
|
-
mechanize (2.7.
|
42
|
+
mechanize (2.7.7)
|
42
43
|
domain_name (~> 0.5, >= 0.5.1)
|
43
44
|
http-cookie (~> 1.0)
|
44
45
|
mime-types (>= 1.17.2)
|
@@ -46,16 +47,20 @@ GEM
|
|
46
47
|
net-http-persistent (>= 2.5.2)
|
47
48
|
nokogiri (~> 1.6)
|
48
49
|
ntlm-http (~> 0.1, >= 0.1.1)
|
50
|
+
webrick (~> 1.7)
|
49
51
|
webrobots (>= 0.0.9, < 0.2)
|
50
52
|
method_source (1.0.0)
|
51
53
|
mime-types (3.3.1)
|
52
54
|
mime-types-data (~> 3.2015)
|
53
|
-
mime-types-data (3.
|
55
|
+
mime-types-data (3.2021.0212)
|
56
|
+
mini_portile2 (2.5.0)
|
54
57
|
natto (1.2.0)
|
55
58
|
ffi (>= 1.9.0)
|
56
59
|
net-http-digest_auth (1.4.1)
|
57
|
-
net-http-persistent (
|
58
|
-
|
60
|
+
net-http-persistent (4.0.1)
|
61
|
+
connection_pool (~> 2.2)
|
62
|
+
nokogiri (1.11.1)
|
63
|
+
mini_portile2 (~> 2.5.0)
|
59
64
|
racc (~> 1.4)
|
60
65
|
nokogumbo (2.0.4)
|
61
66
|
nokogiri (~> 1.8, >= 1.8.4)
|
@@ -72,30 +77,29 @@ GEM
|
|
72
77
|
pry-byebug (3.9.0)
|
73
78
|
byebug (~> 11.0)
|
74
79
|
pry (~> 0.13.0)
|
75
|
-
public_suffix (4.0.
|
80
|
+
public_suffix (4.0.6)
|
76
81
|
racc (1.5.2)
|
77
|
-
rake (13.0.
|
82
|
+
rake (13.0.3)
|
78
83
|
rexml (3.2.4)
|
79
|
-
rspec (3.
|
80
|
-
rspec-core (~> 3.
|
81
|
-
rspec-expectations (~> 3.
|
82
|
-
rspec-mocks (~> 3.
|
83
|
-
rspec-core (3.
|
84
|
-
rspec-support (~> 3.
|
85
|
-
rspec-expectations (3.
|
84
|
+
rspec (3.10.0)
|
85
|
+
rspec-core (~> 3.10.0)
|
86
|
+
rspec-expectations (~> 3.10.0)
|
87
|
+
rspec-mocks (~> 3.10.0)
|
88
|
+
rspec-core (3.10.1)
|
89
|
+
rspec-support (~> 3.10.0)
|
90
|
+
rspec-expectations (3.10.1)
|
86
91
|
diff-lcs (>= 1.2.0, < 2.0)
|
87
|
-
rspec-support (~> 3.
|
88
|
-
rspec-mocks (3.
|
92
|
+
rspec-support (~> 3.10.0)
|
93
|
+
rspec-mocks (3.10.2)
|
89
94
|
diff-lcs (>= 1.2.0, < 2.0)
|
90
|
-
rspec-support (~> 3.
|
91
|
-
rspec-support (3.
|
95
|
+
rspec-support (~> 3.10.0)
|
96
|
+
rspec-support (3.10.2)
|
92
97
|
ruby-rc4 (0.1.5)
|
93
98
|
ruby-readability (0.7.0)
|
94
99
|
guess_html_encoding (>= 0.0.4)
|
95
100
|
nokogiri (>= 1.6.0)
|
96
101
|
rubyzip (2.3.0)
|
97
|
-
|
98
|
-
sanitize (5.2.2)
|
102
|
+
sanitize (5.2.3)
|
99
103
|
crass (~> 1.0.2)
|
100
104
|
nokogiri (>= 1.8.0)
|
101
105
|
nokogumbo (~> 2.0)
|
@@ -106,7 +110,7 @@ GEM
|
|
106
110
|
unf (0.1.4)
|
107
111
|
unf_ext
|
108
112
|
unf_ext (0.0.7.7)
|
109
|
-
webmock (3.
|
113
|
+
webmock (3.11.2)
|
110
114
|
addressable (>= 2.3.6)
|
111
115
|
crack (>= 0.3.2)
|
112
116
|
hashdiff (>= 0.4.0, < 2.0.0)
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module WebStat
|
2
2
|
class Fetch
|
3
|
-
attr_accessor :url, :html, :nokogiri, :userdic, :status
|
3
|
+
attr_accessor :url, :html, :nokogiri, :userdic, :status, :header
|
4
4
|
# Get title
|
5
5
|
# @return [String] title
|
6
6
|
def title
|
@@ -95,12 +95,13 @@ module WebStat
|
|
95
95
|
if mech.agent.robots_disallowed?(url)
|
96
96
|
raise Mechanize::RobotsDisallowedError.new(url)
|
97
97
|
end
|
98
|
+
document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
99
|
+
@header = document.header
|
98
100
|
begin
|
99
101
|
raise 'not_use_chromedirver' unless WebStat::Configure.get["use_chromedirver"]
|
100
102
|
body = WebStat::WebDriverHelper.get_source(url)
|
101
103
|
@status = 200
|
102
104
|
rescue
|
103
|
-
document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
|
104
105
|
if document.class == Mechanize::File
|
105
106
|
body = document.body
|
106
107
|
else
|
@@ -114,6 +115,24 @@ module WebStat
|
|
114
115
|
end
|
115
116
|
body
|
116
117
|
end
|
118
|
+
|
119
|
+
# Return Date or last modified header.
|
120
|
+
# @param [String] url
|
121
|
+
# @return DataTime
|
122
|
+
def get_last_modified
|
123
|
+
@header = @header || {}
|
124
|
+
if @header.has_key?("date") && @header.has_key?("last-modified")
|
125
|
+
if DateTime.parse(@header["date"]) >= DateTime.parse(@header["last-modified"])
|
126
|
+
DateTime.parse(@header["date"])
|
127
|
+
else
|
128
|
+
DateTime.parse(@header["last-modified"])
|
129
|
+
end
|
130
|
+
elsif @header.has_key?("date")
|
131
|
+
DateTime.parse(@header["date"])
|
132
|
+
elsif @header.has_key?("last-modified")
|
133
|
+
DateTime.parse(@header["last-modified"])
|
134
|
+
end
|
135
|
+
end
|
117
136
|
|
118
137
|
# Get the informations of @url
|
119
138
|
# @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
|
@@ -134,6 +153,7 @@ module WebStat
|
|
134
153
|
language_code: language_code,
|
135
154
|
status: @status,
|
136
155
|
url: @url,
|
156
|
+
last_modified_at: get_last_modified,
|
137
157
|
eyecatch_image_path: save_local_path(eyecatch_image_path),
|
138
158
|
tags: tag.nouns
|
139
159
|
}
|
data/lib/web_stat/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -102,4 +102,10 @@ WebMock.stub_request(:get, "https://cdn.newsdict.jp/assets/newsdict-5d8601394c3f
|
|
102
102
|
.to_return(
|
103
103
|
status: 200,
|
104
104
|
body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "newsdict-5d8601394c3f4eea2d7161ab92ab327ac7099e22214c853327011b3a71859b8e.png")),
|
105
|
-
headers: {content_type: 'application/html; charset=utf-8'})
|
105
|
+
headers: {content_type: 'application/html; charset=utf-8'})
|
106
|
+
|
107
|
+
WebMock.stub_request(:get, "https://newsdict.blog/last_modified_at")
|
108
|
+
.to_return(
|
109
|
+
status: 200,
|
110
|
+
body: "ok",
|
111
|
+
headers: {content_type: 'application/html; charset=utf-8', date: "Tue, 05 Apr 2016 07:43:08 GMT", "Last-Modified": "Tue, 05 Apr 2020 07:43:08 JST"})
|
data/spec/web_stat/fetch_spec.rb
CHANGED
@@ -204,4 +204,9 @@ RSpec.describe WebStat::Fetch do
|
|
204
204
|
expect(WebStat::FetchAsWeb.url_valid?("https://status.cloud.google.com/incident/cloud-functions/19010")).to be true
|
205
205
|
expect(WebStat::FetchAsWeb.url_valid?("http://g.co/arts/SK1jZHJpT8N1BGaM7")).to be true
|
206
206
|
end
|
207
|
+
|
208
|
+
it "get_last_modified" do
|
209
|
+
web_stat = WebStat::FetchAsWeb.new("https://newsdict.blog/last_modified_at")
|
210
|
+
web_stat.stat[:last_modified_at] === DateTime.parse("Tue, 05 Apr 2020 07:43:08 JST")
|
211
|
+
end
|
207
212
|
end
|
data/web_stat.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
|
23
23
|
spec.add_runtime_dependency "bundler", ">= 2.0.2"
|
24
24
|
spec.add_runtime_dependency "nokogiri", ">= 1.10.4"
|
25
|
-
spec.add_runtime_dependency "mechanize", ">= 2.7"
|
25
|
+
spec.add_runtime_dependency "mechanize", ">= 2.7.7"
|
26
26
|
spec.add_runtime_dependency "ruby-readability", ">= 0.7"
|
27
27
|
spec.add_runtime_dependency "natto", ">= 1.1.2"
|
28
28
|
spec.add_runtime_dependency "sanitize", ">= 5.0.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 2.7.7
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 2.7.7
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: ruby-readability
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|