web_stat 0.4.1 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fc357cc98a214af7a391fe834ec927b43649ca8d8c5b1105e7fb1947dd7127a
4
- data.tar.gz: f7c5f6d28101ff677aaa4db5b9b486d067798b0a8461f91352db252f68374513
3
+ metadata.gz: 234586f7faedd6315e64b118b3f63532d2ea64f5a56fa046c78d56ae3be52935
4
+ data.tar.gz: 71b73587d78fa05da29ffe15c2c381e3fccd31b76025060bbeedcff1f5ea5cf5
5
5
  SHA512:
6
- metadata.gz: 1616c4f1a64ebc693ee4e822b06150028783736448850d8aca80c90909ca879580a82f871f95375904b38813b10155105fd5d1121ed0321ae9cc898a4b95549e
7
- data.tar.gz: 1637fb0b6b6f8ee83b133d95e3ab31a6c4ad2a64399f4ac19173f215ec7a79eac851c1348d9efa1efbfdc88a7ac8e5d6ecd9f9c067c4295b0a82f2662ef4c448
6
+ metadata.gz: 34b7f3a68413b53865c2c5e57dc78ab2627689e656ed6ebc9d69938804cbbbf490e4148f2e0e44afbd8da4c9052423427bd053f90bf24fe65195c4ded0ede57d
7
+ data.tar.gz: ae11480e79de83e5c082e3663fa6570a64e85da5739b3c009a500ad4bd88bbd95f9fa7045362f211aff014f3e3aed9d363158f91e33ab87d60fc09e9dc26b3d3
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.4.1)
4
+ web_stat (0.4.7)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
- mechanize (>= 2.7)
7
+ mechanize (>= 2.7.7)
8
8
  natto (>= 1.1.2)
9
9
  nokogiri (>= 1.10.4)
10
10
  pdf-reader (= 2.4.0)
@@ -26,10 +26,11 @@ GEM
26
26
  cld (0.8.0)
27
27
  ffi
28
28
  coderay (1.1.3)
29
- crack (0.4.3)
30
- safe_yaml (~> 1.0.0)
29
+ connection_pool (2.2.3)
30
+ crack (0.4.5)
31
+ rexml
31
32
  crass (1.0.6)
32
- diff-lcs (1.3)
33
+ diff-lcs (1.4.4)
33
34
  domain_name (0.5.20190701)
34
35
  unf (>= 0.0.5, < 1.0.0)
35
36
  ffi (1.14.2)
@@ -38,7 +39,7 @@ GEM
38
39
  hashery (2.1.2)
39
40
  http-cookie (1.0.3)
40
41
  domain_name (~> 0.5)
41
- mechanize (2.7.6)
42
+ mechanize (2.7.7)
42
43
  domain_name (~> 0.5, >= 0.5.1)
43
44
  http-cookie (~> 1.0)
44
45
  mime-types (>= 1.17.2)
@@ -46,15 +47,17 @@ GEM
46
47
  net-http-persistent (>= 2.5.2)
47
48
  nokogiri (~> 1.6)
48
49
  ntlm-http (~> 0.1, >= 0.1.1)
50
+ webrick (~> 1.7)
49
51
  webrobots (>= 0.0.9, < 0.2)
50
52
  method_source (1.0.0)
51
53
  mime-types (3.3.1)
52
54
  mime-types-data (~> 3.2015)
53
- mime-types-data (3.2020.1104)
55
+ mime-types-data (3.2021.0225)
54
56
  natto (1.2.0)
55
57
  ffi (>= 1.9.0)
56
58
  net-http-digest_auth (1.4.1)
57
- net-http-persistent (2.9.4)
59
+ net-http-persistent (4.0.1)
60
+ connection_pool (~> 2.2)
58
61
  nokogiri (1.11.1-x86_64-linux)
59
62
  racc (~> 1.4)
60
63
  nokogumbo (2.0.4)
@@ -72,30 +75,29 @@ GEM
72
75
  pry-byebug (3.9.0)
73
76
  byebug (~> 11.0)
74
77
  pry (~> 0.13.0)
75
- public_suffix (4.0.5)
78
+ public_suffix (4.0.6)
76
79
  racc (1.5.2)
77
- rake (13.0.1)
80
+ rake (13.0.3)
78
81
  rexml (3.2.4)
79
- rspec (3.9.0)
80
- rspec-core (~> 3.9.0)
81
- rspec-expectations (~> 3.9.0)
82
- rspec-mocks (~> 3.9.0)
83
- rspec-core (3.9.2)
84
- rspec-support (~> 3.9.3)
85
- rspec-expectations (3.9.2)
82
+ rspec (3.10.0)
83
+ rspec-core (~> 3.10.0)
84
+ rspec-expectations (~> 3.10.0)
85
+ rspec-mocks (~> 3.10.0)
86
+ rspec-core (3.10.1)
87
+ rspec-support (~> 3.10.0)
88
+ rspec-expectations (3.10.1)
86
89
  diff-lcs (>= 1.2.0, < 2.0)
87
- rspec-support (~> 3.9.0)
88
- rspec-mocks (3.9.1)
90
+ rspec-support (~> 3.10.0)
91
+ rspec-mocks (3.10.2)
89
92
  diff-lcs (>= 1.2.0, < 2.0)
90
- rspec-support (~> 3.9.0)
91
- rspec-support (3.9.3)
93
+ rspec-support (~> 3.10.0)
94
+ rspec-support (3.10.2)
92
95
  ruby-rc4 (0.1.5)
93
96
  ruby-readability (0.7.0)
94
97
  guess_html_encoding (>= 0.0.4)
95
98
  nokogiri (>= 1.6.0)
96
99
  rubyzip (2.3.0)
97
- safe_yaml (1.0.5)
98
- sanitize (5.2.2)
100
+ sanitize (5.2.3)
99
101
  crass (~> 1.0.2)
100
102
  nokogiri (>= 1.8.0)
101
103
  nokogumbo (~> 2.0)
@@ -106,7 +108,7 @@ GEM
106
108
  unf (0.1.4)
107
109
  unf_ext
108
110
  unf_ext (0.0.7.7)
109
- webmock (3.8.3)
111
+ webmock (3.11.2)
110
112
  addressable (>= 2.3.6)
111
113
  crack (>= 0.3.2)
112
114
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -1,6 +1,6 @@
1
1
  module WebStat
2
2
  class Fetch
3
- attr_accessor :url, :html, :nokogiri, :userdic, :status
3
+ attr_accessor :url, :html, :nokogiri, :userdic, :status, :header
4
4
  # Get title
5
5
  # @return [String] title
6
6
  def title
@@ -82,6 +82,8 @@ module WebStat
82
82
  end
83
83
  end
84
84
  tmp_file
85
+ rescue
86
+ false
85
87
  end
86
88
 
87
89
  # Get url
@@ -95,20 +97,13 @@ module WebStat
95
97
  if mech.agent.robots_disallowed?(url)
96
98
  raise Mechanize::RobotsDisallowedError.new(url)
97
99
  end
98
- if WebStat::Configure.get["use_chromedirver"]
99
- begin
100
- body = WebStat::WebDriverHelper.get_source(url)
101
- rescue Selenium::WebDriver::Error::UnknownError => e
102
- document = agent.get(url, [], nil, { 'Accept-Language' => 'ja'})
103
- if document.class == Mechanize::File
104
- body = document.body
105
- else
106
- body = document.body.encode('UTF-8', document.encoding)
107
- end
108
- end
100
+ document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
101
+ @header = document.header
102
+ begin
103
+ raise 'not_use_chromedirver' unless WebStat::Configure.get["use_chromedirver"]
104
+ body = WebStat::WebDriverHelper.get_source(url)
109
105
  @status = 200
110
- else
111
- document = mech.get(url, [], nil, { 'Accept-Language' => 'ja'})
106
+ rescue
112
107
  if document.class == Mechanize::File
113
108
  body = document.body
114
109
  else
@@ -122,6 +117,24 @@ module WebStat
122
117
  end
123
118
  body
124
119
  end
120
+
121
+ # Return Date or last modified header.
122
+ # @param [String] url
123
+ # @return DataTime
124
+ def get_last_modified
125
+ @header = @header || {}
126
+ if @header.has_key?("date") && @header.has_key?("last-modified")
127
+ if DateTime.parse(@header["date"]) >= DateTime.parse(@header["last-modified"])
128
+ DateTime.parse(@header["date"])
129
+ else
130
+ DateTime.parse(@header["last-modified"])
131
+ end
132
+ elsif @header.has_key?("date")
133
+ DateTime.parse(@header["date"])
134
+ elsif @header.has_key?("last-modified")
135
+ DateTime.parse(@header["last-modified"])
136
+ end
137
+ end
125
138
 
126
139
  # Get the informations of @url
127
140
  # @param [Hash] Specify a dictionary for each language code. example ) {"ja": /***/**.dic, "other": /***/***.dic}
@@ -142,6 +155,7 @@ module WebStat
142
155
  language_code: language_code,
143
156
  status: @status,
144
157
  url: @url,
158
+ last_modified_at: get_last_modified,
145
159
  eyecatch_image_path: save_local_path(eyecatch_image_path),
146
160
  tags: tag.nouns
147
161
  }
@@ -5,7 +5,7 @@ module WebStat
5
5
  # initialize class
6
6
  # @param [String] url
7
7
  def initialize(url)
8
- unless url_valid?(url)
8
+ unless FetchAsWeb.url_valid?(url)
9
9
  raise WebStat::INVALID_URL, url
10
10
  end
11
11
  @url = original_url(url)
@@ -36,11 +36,12 @@ module WebStat
36
36
  end
37
37
  @nokogiri = ::Nokogiri::HTML(@html)
38
38
  end
39
-
40
- # Validation url
41
- def url_valid?(url)
42
- regexp = Regexp.new("^https?://([a-zA-Z0-9][a-zA-Z0-9\\\-\.]{1,61}[a-zA-Z0-9])\\\.([a-zA-Z]{2,})(.*)?$", Regexp::IGNORECASE)
43
- regexp.match?(url)
44
- end
39
+ class << self
40
+ # Validation url
41
+ def url_valid?(url)
42
+ regexp = Regexp.new("^https?://([a-z0-9][a-z0-9\\\-\.]{0,61})\\\.([a-z]{2,})(.*)?$", Regexp::IGNORECASE)
43
+ regexp.match?(url)
44
+ end
45
+ end
45
46
  end
46
47
  end
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.1"
2
+ VERSION = "0.4.7"
3
3
  end
data/spec/spec_helper.rb CHANGED
@@ -102,4 +102,10 @@ WebMock.stub_request(:get, "https://cdn.newsdict.jp/assets/newsdict-5d8601394c3f
102
102
  .to_return(
103
103
  status: 200,
104
104
  body: File.new(File.join(File.dirname(__FILE__), "fixtures", "images", "newsdict-5d8601394c3f4eea2d7161ab92ab327ac7099e22214c853327011b3a71859b8e.png")),
105
- headers: {content_type: 'application/html; charset=utf-8'})
105
+ headers: {content_type: 'application/html; charset=utf-8'})
106
+
107
+ WebMock.stub_request(:get, "https://newsdict.blog/last_modified_at")
108
+ .to_return(
109
+ status: 200,
110
+ body: "ok",
111
+ headers: {content_type: 'application/html; charset=utf-8', date: "Tue, 05 Apr 2016 07:43:08 GMT", "Last-Modified": "Tue, 05 Apr 2020 07:43:08 JST"})
@@ -197,11 +197,16 @@ RSpec.describe WebStat::Fetch do
197
197
  end
198
198
 
199
199
  it "valid url" do
200
- web_stat_fetch_web_class = WebStat::FetchAsWeb.new("https://newsdict.blog/content/images/size/w100/2019/03/facebook-3.jpg")
201
- expect(web_stat_fetch_web_class.url_valid?("http://status.aws.amazon.com/#cloudfront_12345")).to be true
202
- expect(web_stat_fetch_web_class.url_valid?("https://findy-code.io?h=NWsZey5UgJ51u&t=omikuji-22")).to be true
203
- expect(web_stat_fetch_web_class.url_valid?("https://www.meetup.com/pro/docker")).to be true
204
- expect(web_stat_fetch_web_class.url_valid?("https://gxyt4.app.goo.gl/Mn64U")).to be true
205
- expect(web_stat_fetch_web_class.url_valid?("https://status.cloud.google.com/incident/cloud-functions/19010")).to be true
200
+ expect(WebStat::FetchAsWeb.url_valid?("http://status.aws.amazon.com/#cloudfront_12345")).to be true
201
+ expect(WebStat::FetchAsWeb.url_valid?("https://findy-code.io?h=NWsZey5UgJ51u&t=omikuji-22")).to be true
202
+ expect(WebStat::FetchAsWeb.url_valid?("https://www.meetup.com/pro/docker")).to be true
203
+ expect(WebStat::FetchAsWeb.url_valid?("https://gxyt4.app.goo.gl/Mn64U")).to be true
204
+ expect(WebStat::FetchAsWeb.url_valid?("https://status.cloud.google.com/incident/cloud-functions/19010")).to be true
205
+ expect(WebStat::FetchAsWeb.url_valid?("http://g.co/arts/SK1jZHJpT8N1BGaM7")).to be true
206
+ end
207
+
208
+ it "get_last_modified" do
209
+ web_stat = WebStat::FetchAsWeb.new("https://newsdict.blog/last_modified_at")
210
+ web_stat.stat[:last_modified_at] === DateTime.parse("Tue, 05 Apr 2020 07:43:08 JST")
206
211
  end
207
212
  end
data/web_stat.gemspec CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
22
22
 
23
23
  spec.add_runtime_dependency "bundler", ">= 2.0.2"
24
24
  spec.add_runtime_dependency "nokogiri", ">= 1.10.4"
25
- spec.add_runtime_dependency "mechanize", ">= 2.7"
25
+ spec.add_runtime_dependency "mechanize", ">= 2.7.7"
26
26
  spec.add_runtime_dependency "ruby-readability", ">= 0.7"
27
27
  spec.add_runtime_dependency "natto", ">= 1.1.2"
28
28
  spec.add_runtime_dependency "sanitize", ">= 5.0.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-31 00:00:00.000000000 Z
11
+ date: 2021-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '2.7'
47
+ version: 2.7.7
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '2.7'
54
+ version: 2.7.7
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: ruby-readability
57
57
  requirement: !ruby/object:Gem::Requirement