web_stat 0.4.7 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 234586f7faedd6315e64b118b3f63532d2ea64f5a56fa046c78d56ae3be52935
4
- data.tar.gz: 71b73587d78fa05da29ffe15c2c381e3fccd31b76025060bbeedcff1f5ea5cf5
3
+ metadata.gz: d3f6f4f9692846a2c50a365125efb42dfd4a0b44fddef1c108621d94fd6dcde4
4
+ data.tar.gz: 2f6d400bce014144c940edcafbad9dc4e8c164fce356c70bfd7dbd1ad0c2f862
5
5
  SHA512:
6
- metadata.gz: 34b7f3a68413b53865c2c5e57dc78ab2627689e656ed6ebc9d69938804cbbbf490e4148f2e0e44afbd8da4c9052423427bd053f90bf24fe65195c4ded0ede57d
7
- data.tar.gz: ae11480e79de83e5c082e3663fa6570a64e85da5739b3c009a500ad4bd88bbd95f9fa7045362f211aff014f3e3aed9d363158f91e33ab87d60fc09e9dc26b3d3
6
+ metadata.gz: 6518aaed72267de7612257c43067a762b933d2e1a64d04defb07d7471eacd8bd1a4ca1cadc96dad5b279cc09a4bbc5a3453abfe10715bde5dc614d08d1953098
7
+ data.tar.gz: 3cd03188f34030da0c9ead6633bd1c67550524c22368b5c52dd69132e54be69edff6a28d7a91b6ff76ad02a6f3bb2becebb2df1e03db72fd7f3f16262d8137e2
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.4.7)
4
+ web_stat (0.5.0)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
+ google-api-client (>= 0.53.0)
7
8
  mechanize (>= 2.7.7)
8
9
  natto (>= 1.1.2)
9
10
  nokogiri (>= 1.10.4)
@@ -18,6 +19,12 @@ GEM
18
19
  remote: https://rubygems.org/
19
20
  specs:
20
21
  Ascii85 (1.0.3)
22
+ activesupport (6.1.3.2)
23
+ concurrent-ruby (~> 1.0, >= 1.0.2)
24
+ i18n (>= 1.6, < 2)
25
+ minitest (>= 5.1)
26
+ tzinfo (~> 2.0)
27
+ zeitwerk (~> 2.3)
21
28
  addressable (2.7.0)
22
29
  public_suffix (>= 2.0.2, < 5.0)
23
30
  afm (0.2.2)
@@ -26,43 +33,99 @@ GEM
26
33
  cld (0.8.0)
27
34
  ffi
28
35
  coderay (1.1.3)
29
- connection_pool (2.2.3)
36
+ concurrent-ruby (1.1.9)
37
+ connection_pool (2.2.5)
30
38
  crack (0.4.5)
31
39
  rexml
32
40
  crass (1.0.6)
41
+ declarative (0.0.20)
33
42
  diff-lcs (1.4.4)
34
43
  domain_name (0.5.20190701)
35
44
  unf (>= 0.0.5, < 1.0.0)
36
- ffi (1.14.2)
45
+ faraday (1.4.2)
46
+ faraday-em_http (~> 1.0)
47
+ faraday-em_synchrony (~> 1.0)
48
+ faraday-excon (~> 1.1)
49
+ faraday-net_http (~> 1.0)
50
+ faraday-net_http_persistent (~> 1.1)
51
+ multipart-post (>= 1.2, < 3)
52
+ ruby2_keywords (>= 0.0.4)
53
+ faraday-em_http (1.0.0)
54
+ faraday-em_synchrony (1.0.0)
55
+ faraday-excon (1.1.0)
56
+ faraday-net_http (1.0.1)
57
+ faraday-net_http_persistent (1.1.0)
58
+ ffi (1.15.3)
59
+ gems (1.2.0)
60
+ google-api-client (0.53.0)
61
+ google-apis-core (~> 0.1)
62
+ google-apis-generator (~> 0.1)
63
+ google-apis-core (0.3.0)
64
+ addressable (~> 2.5, >= 2.5.1)
65
+ googleauth (~> 0.14)
66
+ httpclient (>= 2.8.1, < 3.0)
67
+ mini_mime (~> 1.0)
68
+ representable (~> 3.0)
69
+ retriable (>= 2.0, < 4.0)
70
+ rexml
71
+ signet (~> 0.14)
72
+ webrick
73
+ google-apis-discovery_v1 (0.4.0)
74
+ google-apis-core (~> 0.1)
75
+ google-apis-generator (0.3.0)
76
+ activesupport (>= 5.0)
77
+ gems (~> 1.2)
78
+ google-apis-core (~> 0.1)
79
+ google-apis-discovery_v1 (~> 0.0)
80
+ thor (>= 0.20, < 2.a)
81
+ googleauth (0.16.2)
82
+ faraday (>= 0.17.3, < 2.0)
83
+ jwt (>= 1.4, < 3.0)
84
+ memoist (~> 0.16)
85
+ multi_json (~> 1.11)
86
+ os (>= 0.9, < 2.0)
87
+ signet (~> 0.14)
37
88
  guess_html_encoding (0.0.11)
38
89
  hashdiff (1.0.1)
39
90
  hashery (2.1.2)
40
- http-cookie (1.0.3)
91
+ http-cookie (1.0.4)
41
92
  domain_name (~> 0.5)
42
- mechanize (2.7.7)
43
- domain_name (~> 0.5, >= 0.5.1)
44
- http-cookie (~> 1.0)
45
- mime-types (>= 1.17.2)
46
- net-http-digest_auth (~> 1.1, >= 1.1.1)
47
- net-http-persistent (>= 2.5.2)
48
- nokogiri (~> 1.6)
49
- ntlm-http (~> 0.1, >= 0.1.1)
93
+ httpclient (2.8.3)
94
+ i18n (1.8.10)
95
+ concurrent-ruby (~> 1.0)
96
+ jwt (2.2.3)
97
+ mechanize (2.8.1)
98
+ addressable (~> 2.7)
99
+ domain_name (~> 0.5, >= 0.5.20190701)
100
+ http-cookie (~> 1.0, >= 1.0.3)
101
+ mime-types (~> 3.0)
102
+ net-http-digest_auth (~> 1.4, >= 1.4.1)
103
+ net-http-persistent (>= 2.5.2, < 5.0.dev)
104
+ nokogiri (~> 1.11, >= 1.11.2)
105
+ rubyntlm (~> 0.6, >= 0.6.3)
50
106
  webrick (~> 1.7)
51
- webrobots (>= 0.0.9, < 0.2)
107
+ webrobots (~> 0.1.2)
108
+ memoist (0.16.2)
52
109
  method_source (1.0.0)
53
110
  mime-types (3.3.1)
54
111
  mime-types-data (~> 3.2015)
55
112
  mime-types-data (3.2021.0225)
113
+ mini_mime (1.1.0)
114
+ mini_portile2 (2.5.3)
115
+ minitest (5.14.4)
116
+ multi_json (1.15.0)
117
+ multipart-post (2.1.1)
56
118
  natto (1.2.0)
57
119
  ffi (>= 1.9.0)
58
120
  net-http-digest_auth (1.4.1)
59
121
  net-http-persistent (4.0.1)
60
122
  connection_pool (~> 2.2)
61
- nokogiri (1.11.1-x86_64-linux)
123
+ nokogiri (1.11.7)
124
+ mini_portile2 (~> 2.5.0)
62
125
  racc (~> 1.4)
63
- nokogumbo (2.0.4)
126
+ nokogumbo (2.0.5)
64
127
  nokogiri (~> 1.8, >= 1.8.4)
65
- ntlm-http (0.1.1)
128
+ os (1.1.1)
66
129
  pdf-reader (2.4.0)
67
130
  Ascii85 (~> 1.0.0)
68
131
  afm (~> 0.2.1)
@@ -78,6 +141,11 @@ GEM
78
141
  public_suffix (4.0.6)
79
142
  racc (1.5.2)
80
143
  rake (13.0.3)
144
+ representable (3.1.1)
145
+ declarative (< 0.1.0)
146
+ trailblazer-option (>= 0.1.1, < 0.2.0)
147
+ uber (< 0.2.0)
148
+ retriable (3.1.2)
81
149
  rexml (3.2.4)
82
150
  rspec (3.10.0)
83
151
  rspec-core (~> 3.10.0)
@@ -96,6 +164,8 @@ GEM
96
164
  ruby-readability (0.7.0)
97
165
  guess_html_encoding (>= 0.0.4)
98
166
  nokogiri (>= 1.6.0)
167
+ ruby2_keywords (0.0.4)
168
+ rubyntlm (0.6.3)
99
169
  rubyzip (2.3.0)
100
170
  sanitize (5.2.3)
101
171
  crass (~> 1.0.2)
@@ -104,7 +174,17 @@ GEM
104
174
  selenium-webdriver (3.142.7)
105
175
  childprocess (>= 0.5, < 4.0)
106
176
  rubyzip (>= 1.2.2)
177
+ signet (0.15.0)
178
+ addressable (~> 2.3)
179
+ faraday (>= 0.17.3, < 2.0)
180
+ jwt (>= 1.5, < 3.0)
181
+ multi_json (~> 1.10)
182
+ thor (1.1.0)
183
+ trailblazer-option (0.1.1)
107
184
  ttfunk (1.7.0)
185
+ tzinfo (2.0.4)
186
+ concurrent-ruby (~> 1.0)
187
+ uber (0.1.0)
108
188
  unf (0.1.4)
109
189
  unf_ext
110
190
  unf_ext (0.0.7.7)
@@ -114,6 +194,7 @@ GEM
114
194
  hashdiff (>= 0.4.0, < 2.0.0)
115
195
  webrick (1.7.0)
116
196
  webrobots (0.1.2)
197
+ zeitwerk (2.4.2)
117
198
 
118
199
  PLATFORMS
119
200
  ruby
File without changes
data/lib/web_stat.rb CHANGED
@@ -11,6 +11,7 @@ require 'net/http'
11
11
  require 'pdf/reader'
12
12
  require 'ruby-readability'
13
13
  require 'selenium-webdriver'
14
+ require 'google/apis/youtube_v3'
14
15
 
15
16
  require "helpers/web_drive_helper"
16
17
  require "web_stat/final_redirect_url"
@@ -18,6 +19,7 @@ require "web_stat/categorize"
18
19
  require "web_stat/configure"
19
20
  require "web_stat/errors"
20
21
  require "web_stat/fetch"
22
+
21
23
  require "web_stat/tag"
22
24
  require "web_stat/version"
23
25
  require "web_stat/fetch/fetch_as_html"
@@ -14,10 +14,12 @@ development: &development
14
14
  - '//img/@src'
15
15
  userdic: ""
16
16
  use_chromedirver: false
17
+ id_extraction_regexs:
18
+ youtube: '^https://www.youtube.com/watch\?v=([^&]+)'
17
19
  thumbnail_regex:
18
- youtube:
19
- - '%r{^https://www.youtube.com/watch\?v=([^&]+)}'
20
- - 'http://img.youtube.com/vi/\1/default.jpg'
20
+ youtube: 'http://img.youtube.com/vi/\1/default.jpg'
21
+ api_keys:
22
+ youtube: "dummy-key"
21
23
  test:
22
24
  <<: *development
23
25
  production:
@@ -34,7 +34,23 @@ module WebStat
34
34
  end
35
35
  # Get main section
36
36
  def content
37
- Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
37
+ if @url.match(WebStat::Configure.get["id_extraction_regexs"]["youtube"])
38
+ youtube_decscription
39
+ else
40
+ Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
41
+ end
42
+ end
43
+
44
+ # Get describe of youtube movie.
45
+ def youtube_decscription
46
+ regex_string = WebStat::Configure.get["id_extraction_regexs"]["youtube"]
47
+ if @url.match(regex_string)
48
+ id = @url.gsub(%r{#{regex_string}}, '\1')
49
+ youtube = Google::Apis::YoutubeV3::YouTubeService.new
50
+ youtube.key = WebStat::Configure.get["api_keys"]["youtube"]
51
+ response = youtube.list_videos(:snippet, id: id)
52
+ response.items.first.snippet.description
53
+ end
38
54
  end
39
55
 
40
56
  # Get temporary path of image
@@ -48,9 +64,9 @@ module WebStat
48
64
  end
49
65
  end
50
66
  # If there is a thumbnail rule, apply it.
51
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
52
- if @url.match(v[0])
53
- return @url.gsub(v[0], v[1])
67
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
68
+ if @url.match(regex_string)
69
+ return @url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])
54
70
  end
55
71
  end
56
72
  readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body').to_s).content)
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.7"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -14,14 +14,15 @@ RSpec.describe WebStat::Configure do
14
14
  it "Get thumbnail_regex.youtube." do
15
15
  config = WebStat::Configure.get
16
16
  expect(config["thumbnail_regex"]["yotube"].nil?).to eq true
17
- expect(config["thumbnail_regex"]["youtube"].count).to eq 2
17
+ expect(config["id_extraction_regexs"]["youtube"]).to be_a String
18
+ expect(config["thumbnail_regex"]["youtube"]).to be_a String
18
19
  end
19
20
 
20
21
  it "Match youtube url." do
21
22
  sample_url = "https://www.youtube.com/watch?v=aChpsuUffUM"
22
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
23
- if sample_url.match(v[0])
24
- expect(sample_url.gsub(v[0], v[1])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
23
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
24
+ if sample_url.match(regex_string)
25
+ expect(sample_url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
25
26
  end
26
27
  end
27
28
  end
data/web_stat.gemspec CHANGED
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_runtime_dependency "pdf-reader", "2.4.0"
32
32
  spec.add_runtime_dependency "webrick", ">= 1.7.0"
33
33
  spec.add_runtime_dependency "rexml", ">= 3.2.4"
34
+ spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
34
35
 
35
36
  spec.add_development_dependency "rake", ">= 10.0"
36
37
  spec.add_development_dependency "rspec", ">= 3.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.7
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-28 00:00:00.000000000 Z
11
+ date: 2021-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: 3.2.4
167
+ - !ruby/object:Gem::Dependency
168
+ name: google-api-client
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: 0.53.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: 0.53.0
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: rake
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -238,7 +252,7 @@ description: Fetch the web pages and stat.
238
252
  email:
239
253
  - yube@newsdict.jp
240
254
  executables:
241
- - fetch_as_html
255
+ - fetch_as_url
242
256
  extensions: []
243
257
  extra_rdoc_files: []
244
258
  files:
@@ -253,7 +267,7 @@ files:
253
267
  - LICENSE.txt
254
268
  - README.md
255
269
  - Rakefile
256
- - bin/fetch_as_html
270
+ - bin/fetch_as_url
257
271
  - docker-compose.yml
258
272
  - docker/exec
259
273
  - docker/start