web_stat 0.4.4 → 0.4.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc14d85bf9edb790963b56d1759fe01c60836f7c0aafb7c8acc67cbce480b21c
4
- data.tar.gz: b1c321d0509f68cee0c443eed22ca6815ff9aede85aa66896a084bff43f5f44c
3
+ metadata.gz: 3ce60bcf4a31f90024abc35cf1ecc57e32626dfb5ecf4bc4f2280bd72931ff34
4
+ data.tar.gz: 575f805a63a995b2d0e3bc909978dcd4e3b4f15462f717cf83d6aac00c96078d
5
5
  SHA512:
6
- metadata.gz: d8892eeb6c3c917a87f1cf4ca0627b6f0ac4c0c8025fbd65bf7df5ff60bcb6d0c21994f25540b6fb82b9b6a4ac4230035de2f34f694534bb91b84bd3d03f7a44
7
- data.tar.gz: 2594de9b8a15705be360e9b0cd7e01d5cfc47b8cb4c91a1a25cd38f2b592e6607924c78f9d16093a2b03b47e097e31e3a153c6cd77852a91d7fc4f09625a6f59
6
+ metadata.gz: 7af1262b25163205eabdfa26e1671f95d53963387eceec6c2c99da0a3a17359b77aaa097ec9556010b8000ecc0772d1dc19a67d128310c8d6bdd8379d008a913
7
+ data.tar.gz: 855706ad8525609e8a2a50ca64091081794940d69187f951a79c02c66acfa5be9f53cdeacf0f99f6c0431ad41a71ce616691590d40bce0a56576275fba96c453
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.4.4)
4
+ web_stat (0.4.6)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
- mechanize (>= 2.7)
7
+ mechanize (>= 2.7.7)
8
8
  natto (>= 1.1.2)
9
9
  nokogiri (>= 1.10.4)
10
10
  pdf-reader (= 2.4.0)
@@ -26,10 +26,11 @@ GEM
26
26
  cld (0.8.0)
27
27
  ffi
28
28
  coderay (1.1.3)
29
- crack (0.4.3)
30
- safe_yaml (~> 1.0.0)
29
+ connection_pool (2.2.3)
30
+ crack (0.4.5)
31
+ rexml
31
32
  crass (1.0.6)
32
- diff-lcs (1.3)
33
+ diff-lcs (1.4.4)
33
34
  domain_name (0.5.20190701)
34
35
  unf (>= 0.0.5, < 1.0.0)
35
36
  ffi (1.14.2)
@@ -38,7 +39,7 @@ GEM
38
39
  hashery (2.1.2)
39
40
  http-cookie (1.0.3)
40
41
  domain_name (~> 0.5)
41
- mechanize (2.7.6)
42
+ mechanize (2.7.7)
42
43
  domain_name (~> 0.5, >= 0.5.1)
43
44
  http-cookie (~> 1.0)
44
45
  mime-types (>= 1.17.2)
@@ -46,16 +47,20 @@ GEM
46
47
  net-http-persistent (>= 2.5.2)
47
48
  nokogiri (~> 1.6)
48
49
  ntlm-http (~> 0.1, >= 0.1.1)
50
+ webrick (~> 1.7)
49
51
  webrobots (>= 0.0.9, < 0.2)
50
52
  method_source (1.0.0)
51
53
  mime-types (3.3.1)
52
54
  mime-types-data (~> 3.2015)
53
- mime-types-data (3.2020.1104)
55
+ mime-types-data (3.2021.0212)
56
+ mini_portile2 (2.5.0)
54
57
  natto (1.2.0)
55
58
  ffi (>= 1.9.0)
56
59
  net-http-digest_auth (1.4.1)
57
- net-http-persistent (2.9.4)
58
- nokogiri (1.11.1-x86_64-linux)
60
+ net-http-persistent (4.0.1)
61
+ connection_pool (~> 2.2)
62
+ nokogiri (1.11.1)
63
+ mini_portile2 (~> 2.5.0)
59
64
  racc (~> 1.4)
60
65
  nokogumbo (2.0.4)
61
66
  nokogiri (~> 1.8, >= 1.8.4)
@@ -72,30 +77,29 @@ GEM
72
77
  pry-byebug (3.9.0)
73
78
  byebug (~> 11.0)
74
79
  pry (~> 0.13.0)
75
- public_suffix (4.0.5)
80
+ public_suffix (4.0.6)
76
81
  racc (1.5.2)
77
- rake (13.0.1)
82
+ rake (13.0.3)
78
83
  rexml (3.2.4)
79
- rspec (3.9.0)
80
- rspec-core (~> 3.9.0)
81
- rspec-expectations (~> 3.9.0)
82
- rspec-mocks (~> 3.9.0)
83
- rspec-core (3.9.2)
84
- rspec-support (~> 3.9.3)
85
- rspec-expectations (3.9.2)
84
+ rspec (3.10.0)
85
+ rspec-core (~> 3.10.0)
86
+ rspec-expectations (~> 3.10.0)
87
+ rspec-mocks (~> 3.10.0)
88
+ rspec-core (3.10.1)
89
+ rspec-support (~> 3.10.0)
90
+ rspec-expectations (3.10.1)
86
91
  diff-lcs (>= 1.2.0, < 2.0)
87
- rspec-support (~> 3.9.0)
88
- rspec-mocks (3.9.1)
92
+ rspec-support (~> 3.10.0)
93
+ rspec-mocks (3.10.2)
89
94
  diff-lcs (>= 1.2.0, < 2.0)
90
- rspec-support (~> 3.9.0)
91
- rspec-support (3.9.3)
95
+ rspec-support (~> 3.10.0)
96
+ rspec-support (3.10.2)
92
97
  ruby-rc4 (0.1.5)
93
98
  ruby-readability (0.7.0)
94
99
  guess_html_encoding (>= 0.0.4)
95
100
  nokogiri (>= 1.6.0)
96
101
  rubyzip (2.3.0)
97
- safe_yaml (1.0.5)
98
- sanitize (5.2.2)
102
+ sanitize (5.2.3)
99
103
  crass (~> 1.0.2)
100
104
  nokogiri (>= 1.8.0)
101
105
  nokogumbo (~> 2.0)
@@ -106,7 +110,7 @@ GEM
106
110
  unf (0.1.4)
107
111
  unf_ext
108
112
  unf_ext (0.0.7.7)
109
- webmock (3.8.3)
113
+ webmock (3.11.2)
110
114
  addressable (>= 2.3.6)
111
115
  crack (>= 0.3.2)
112
116
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -1,6 +1,6 @@
1
1
  module WebStat
2
2
  class Fetch
3
- attr_accessor :url, :html, :nokogiri, :userdic, :status
3
+ attr_accessor :url, :html, :nokogiri, :userdic, :status, :header
4
4
  # Get title
5
5
  # @return [String] title
6
6
  def title
@@ -95,12 +95,13 @@ module WebStat
95
95
  if mech.agent.robots_disallowed?(url)
96
96
  raise Mechanize::RobotsDisallowedError.new(url)
97
97
  end
98
+ document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
99
+ @header = document.header
98
100
  begin
99
101
  raise 'not_use_chromedirver' unless WebStat::Configure.get["use_chromedirver"]
100
102
  body = WebStat::WebDriverHelper.get_source(url)
101
103
  @status = 200
102
104
  rescue
103
- document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
104
105
  if document.class == Mechanize::File
105
106
  body = document.body
106
107
  else
@@ -114,6 +115,24 @@ module WebStat
114
115
  end
115
116
  body
116
117
  end
118
+
119
+ # Return Date or last modified header.
120
+ # @param [String] url
121
+ # @return DataTime
122
+ def get_last_modified
123
+ @header = @header || {}
124
+ if @header.has_key?("date") && @header.has_key?("last-modified")
125
+ if DateTime.parse(@header["date"]) >= DateTime.parse(@header["last-modified"])
126
+ DateTime.parse(@header["date"])
127
+ else
128
+ DateTime.parse(@header["last-modified"])
129
+ end
130
+ elsif @header.has_key?("date")
131
+ DateTime.parse(@header["date"])
132
+ elsif @header.has_key?("last-modified")
133
+ DateTime.parse(@header["last-modified"])
134
+ end
135
+ end
117
136
 
118
137
  # Get the informations of @url
119
138
  # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
@@ -134,6 +153,7 @@ module WebStat
134
153
  language_code: language_code,
135
154
  status: @status,
136
155
  url: @url,
156
+ last_modified_at: get_last_modified,
137
157
  eyecatch_image_path: save_local_path(eyecatch_image_path),
138
158
  tags: tag.nouns
139
159
  }
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.4"
2
+ VERSION = "0.4.6"
3
3
  end
data/spec/spec_helper.rb CHANGED
@@ -102,4 +102,10 @@ WebMock.stub_request(:get, "https://cdn.newsdict.jp/assets/newsdict-5d8601394c3f
102
102
  .to_return(
103
103
  status: 200,
104
104
  body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "newsdict-5d8601394c3f4eea2d7161ab92ab327ac7099e22214c853327011b3a71859b8e.png")),
105
- headers: {content_type: 'application/html; charset=utf-8'})
105
+ headers: {content_type: 'application/html; charset=utf-8'})
106
+
107
+ WebMock.stub_request(:get, "https://newsdict.blog/last_modified_at")
108
+ .to_return(
109
+ status: 200,
110
+ body: "ok",
111
+ headers: {content_type: 'application/html; charset=utf-8', date: "Tue, 05 Apr 2016 07:43:08 GMT", "Last-Modified": "Tue, 05 Apr 2020 07:43:08 JST"})
@@ -204,4 +204,9 @@ RSpec.describe WebStat::Fetch do
204
204
  expect(WebStat::FetchAsWeb.url_valid?("https://status.cloud.google.com/incident/cloud-functions/19010")).to be true
205
205
  expect(WebStat::FetchAsWeb.url_valid?("http://g.co/arts/SK1jZHJpT8N1BGaM7")).to be true
206
206
  end
207
+
208
+ it "get_last_modified" do
209
+ web_stat = WebStat::FetchAsWeb.new("https://newsdict.blog/last_modified_at")
210
+ web_stat.stat[:last_modified_at] === DateTime.parse("Tue, 05 Apr 2020 07:43:08 JST")
211
+ end
207
212
  end
data/web_stat.gemspec CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_runtime_dependency "bundler", ">= 2.0.2"
24
24
  spec.add_runtime_dependency "nokogiri", ">= 1.10.4"
25
- spec.add_runtime_dependency "mechanize", ">= 2.7"
25
+ spec.add_runtime_dependency "mechanize", ">= 2.7.7"
26
26
  spec.add_runtime_dependency "ruby-readability", ">= 0.7"
27
27
  spec.add_runtime_dependency "natto", ">= 1.1.2"
28
28
  spec.add_runtime_dependency "sanitize", ">= 5.0.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-31 00:00:00.000000000 Z
11
+ date: 2021-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '2.7'
47
+ version: 2.7.7
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '2.7'
54
+ version: 2.7.7
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: ruby-readability
57
57
  requirement: !ruby/object:Gem::Requirement