web_stat 0.4.4 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc14d85bf9edb790963b56d1759fe01c60836f7c0aafb7c8acc67cbce480b21c
4
- data.tar.gz: b1c321d0509f68cee0c443eed22ca6815ff9aede85aa66896a084bff43f5f44c
3
+ metadata.gz: 3ce60bcf4a31f90024abc35cf1ecc57e32626dfb5ecf4bc4f2280bd72931ff34
4
+ data.tar.gz: 575f805a63a995b2d0e3bc909978dcd4e3b4f15462f717cf83d6aac00c96078d
5
5
  SHA512:
6
- metadata.gz: d8892eeb6c3c917a87f1cf4ca0627b6f0ac4c0c8025fbd65bf7df5ff60bcb6d0c21994f25540b6fb82b9b6a4ac4230035de2f34f694534bb91b84bd3d03f7a44
7
- data.tar.gz: 2594de9b8a15705be360e9b0cd7e01d5cfc47b8cb4c91a1a25cd38f2b592e6607924c78f9d16093a2b03b47e097e31e3a153c6cd77852a91d7fc4f09625a6f59
6
+ metadata.gz: 7af1262b25163205eabdfa26e1671f95d53963387eceec6c2c99da0a3a17359b77aaa097ec9556010b8000ecc0772d1dc19a67d128310c8d6bdd8379d008a913
7
+ data.tar.gz: 855706ad8525609e8a2a50ca64091081794940d69187f951a79c02c66acfa5be9f53cdeacf0f99f6c0431ad41a71ce616691590d40bce0a56576275fba96c453
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.4.4)
4
+ web_stat (0.4.6)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
- mechanize (>= 2.7)
7
+ mechanize (>= 2.7.7)
8
8
  natto (>= 1.1.2)
9
9
  nokogiri (>= 1.10.4)
10
10
  pdf-reader (= 2.4.0)
@@ -26,10 +26,11 @@ GEM
26
26
  cld (0.8.0)
27
27
  ffi
28
28
  coderay (1.1.3)
29
- crack (0.4.3)
30
- safe_yaml (~> 1.0.0)
29
+ connection_pool (2.2.3)
30
+ crack (0.4.5)
31
+ rexml
31
32
  crass (1.0.6)
32
- diff-lcs (1.3)
33
+ diff-lcs (1.4.4)
33
34
  domain_name (0.5.20190701)
34
35
  unf (>= 0.0.5, < 1.0.0)
35
36
  ffi (1.14.2)
@@ -38,7 +39,7 @@ GEM
38
39
  hashery (2.1.2)
39
40
  http-cookie (1.0.3)
40
41
  domain_name (~> 0.5)
41
- mechanize (2.7.6)
42
+ mechanize (2.7.7)
42
43
  domain_name (~> 0.5, >= 0.5.1)
43
44
  http-cookie (~> 1.0)
44
45
  mime-types (>= 1.17.2)
@@ -46,16 +47,20 @@ GEM
46
47
  net-http-persistent (>= 2.5.2)
47
48
  nokogiri (~> 1.6)
48
49
  ntlm-http (~> 0.1, >= 0.1.1)
50
+ webrick (~> 1.7)
49
51
  webrobots (>= 0.0.9, < 0.2)
50
52
  method_source (1.0.0)
51
53
  mime-types (3.3.1)
52
54
  mime-types-data (~> 3.2015)
53
- mime-types-data (3.2020.1104)
55
+ mime-types-data (3.2021.0212)
56
+ mini_portile2 (2.5.0)
54
57
  natto (1.2.0)
55
58
  ffi (>= 1.9.0)
56
59
  net-http-digest_auth (1.4.1)
57
- net-http-persistent (2.9.4)
58
- nokogiri (1.11.1-x86_64-linux)
60
+ net-http-persistent (4.0.1)
61
+ connection_pool (~> 2.2)
62
+ nokogiri (1.11.1)
63
+ mini_portile2 (~> 2.5.0)
59
64
  racc (~> 1.4)
60
65
  nokogumbo (2.0.4)
61
66
  nokogiri (~> 1.8, >= 1.8.4)
@@ -72,30 +77,29 @@ GEM
72
77
  pry-byebug (3.9.0)
73
78
  byebug (~> 11.0)
74
79
  pry (~> 0.13.0)
75
- public_suffix (4.0.5)
80
+ public_suffix (4.0.6)
76
81
  racc (1.5.2)
77
- rake (13.0.1)
82
+ rake (13.0.3)
78
83
  rexml (3.2.4)
79
- rspec (3.9.0)
80
- rspec-core (~> 3.9.0)
81
- rspec-expectations (~> 3.9.0)
82
- rspec-mocks (~> 3.9.0)
83
- rspec-core (3.9.2)
84
- rspec-support (~> 3.9.3)
85
- rspec-expectations (3.9.2)
84
+ rspec (3.10.0)
85
+ rspec-core (~> 3.10.0)
86
+ rspec-expectations (~> 3.10.0)
87
+ rspec-mocks (~> 3.10.0)
88
+ rspec-core (3.10.1)
89
+ rspec-support (~> 3.10.0)
90
+ rspec-expectations (3.10.1)
86
91
  diff-lcs (>= 1.2.0, < 2.0)
87
- rspec-support (~> 3.9.0)
88
- rspec-mocks (3.9.1)
92
+ rspec-support (~> 3.10.0)
93
+ rspec-mocks (3.10.2)
89
94
  diff-lcs (>= 1.2.0, < 2.0)
90
- rspec-support (~> 3.9.0)
91
- rspec-support (3.9.3)
95
+ rspec-support (~> 3.10.0)
96
+ rspec-support (3.10.2)
92
97
  ruby-rc4 (0.1.5)
93
98
  ruby-readability (0.7.0)
94
99
  guess_html_encoding (>= 0.0.4)
95
100
  nokogiri (>= 1.6.0)
96
101
  rubyzip (2.3.0)
97
- safe_yaml (1.0.5)
98
- sanitize (5.2.2)
102
+ sanitize (5.2.3)
99
103
  crass (~> 1.0.2)
100
104
  nokogiri (>= 1.8.0)
101
105
  nokogumbo (~> 2.0)
@@ -106,7 +110,7 @@ GEM
106
110
  unf (0.1.4)
107
111
  unf_ext
108
112
  unf_ext (0.0.7.7)
109
- webmock (3.8.3)
113
+ webmock (3.11.2)
110
114
  addressable (>= 2.3.6)
111
115
  crack (>= 0.3.2)
112
116
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -1,6 +1,6 @@
1
1
  module WebStat
2
2
  class Fetch
3
- attr_accessor :url, :html, :nokogiri, :userdic, :status
3
+ attr_accessor :url, :html, :nokogiri, :userdic, :status, :header
4
4
  # Get title
5
5
  # @return [String] title
6
6
  def title
@@ -95,12 +95,13 @@ module WebStat
95
95
  if mech.agent.robots_disallowed?(url)
96
96
  raise Mechanize::RobotsDisallowedError.new(url)
97
97
  end
98
+ document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
99
+ @header = document.header
98
100
  begin
99
101
  raise 'not_use_chromedirver' unless WebStat::Configure.get["use_chromedirver"]
100
102
  body = WebStat::WebDriverHelper.get_source(url)
101
103
  @status = 200
102
104
  rescue
103
- document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
104
105
  if document.class == Mechanize::File
105
106
  body = document.body
106
107
  else
@@ -114,6 +115,24 @@ module WebStat
114
115
  end
115
116
  body
116
117
  end
118
+
119
+ # Return Date or last modified header.
120
+ # @param [String] url
121
+ # @return DataTime
122
+ def get_last_modified
123
+ @header = @header || {}
124
+ if @header.has_key?("date") && @header.has_key?("last-modified")
125
+ if DateTime.parse(@header["date"]) >= DateTime.parse(@header["last-modified"])
126
+ DateTime.parse(@header["date"])
127
+ else
128
+ DateTime.parse(@header["last-modified"])
129
+ end
130
+ elsif @header.has_key?("date")
131
+ DateTime.parse(@header["date"])
132
+ elsif @header.has_key?("last-modified")
133
+ DateTime.parse(@header["last-modified"])
134
+ end
135
+ end
117
136
 
118
137
  # Get the informations of @url
119
138
  # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
@@ -134,6 +153,7 @@ module WebStat
134
153
  language_code: language_code,
135
154
  status: @status,
136
155
  url: @url,
156
+ last_modified_at: get_last_modified,
137
157
  eyecatch_image_path: save_local_path(eyecatch_image_path),
138
158
  tags: tag.nouns
139
159
  }
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.4"
2
+ VERSION = "0.4.6"
3
3
  end
data/spec/spec_helper.rb CHANGED
@@ -102,4 +102,10 @@ WebMock.stub_request(:get, "https://cdn.newsdict.jp/assets/newsdict-5d8601394c3f
102
102
  .to_return(
103
103
  status: 200,
104
104
  body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "newsdict-5d8601394c3f4eea2d7161ab92ab327ac7099e22214c853327011b3a71859b8e.png")),
105
- headers: {content_type: 'application/html; charset=utf-8'})
105
+ headers: {content_type: 'application/html; charset=utf-8'})
106
+
107
+ WebMock.stub_request(:get, "https://newsdict.blog/last_modified_at")
108
+ .to_return(
109
+ status: 200,
110
+ body: "ok",
111
+ headers: {content_type: 'application/html; charset=utf-8', date: "Tue, 05 Apr 2016 07:43:08 GMT", "Last-Modified": "Tue, 05 Apr 2020 07:43:08 JST"})
@@ -204,4 +204,9 @@ RSpec.describe WebStat::Fetch do
204
204
  expect(WebStat::FetchAsWeb.url_valid?("https://status.cloud.google.com/incident/cloud-functions/19010")).to be true
205
205
  expect(WebStat::FetchAsWeb.url_valid?("http://g.co/arts/SK1jZHJpT8N1BGaM7")).to be true
206
206
  end
207
+
208
+ it "get_last_modified" do
209
+ web_stat = WebStat::FetchAsWeb.new("https://newsdict.blog/last_modified_at")
210
+ web_stat.stat[:last_modified_at] === DateTime.parse("Tue, 05 Apr 2020 07:43:08 JST")
211
+ end
207
212
  end
data/web_stat.gemspec CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_runtime_dependency "bundler", ">= 2.0.2"
24
24
  spec.add_runtime_dependency "nokogiri", ">= 1.10.4"
25
- spec.add_runtime_dependency "mechanize", ">= 2.7"
25
+ spec.add_runtime_dependency "mechanize", ">= 2.7.7"
26
26
  spec.add_runtime_dependency "ruby-readability", ">= 0.7"
27
27
  spec.add_runtime_dependency "natto", ">= 1.1.2"
28
28
  spec.add_runtime_dependency "sanitize", ">= 5.0.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-31 00:00:00.000000000 Z
11
+ date: 2021-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '2.7'
47
+ version: 2.7.7
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '2.7'
54
+ version: 2.7.7
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: ruby-readability
57
57
  requirement: !ruby/object:Gem::Requirement