web_stat 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 234586f7faedd6315e64b118b3f63532d2ea64f5a56fa046c78d56ae3be52935
4
- data.tar.gz: 71b73587d78fa05da29ffe15c2c381e3fccd31b76025060bbeedcff1f5ea5cf5
3
+ metadata.gz: d3f6f4f9692846a2c50a365125efb42dfd4a0b44fddef1c108621d94fd6dcde4
4
+ data.tar.gz: 2f6d400bce014144c940edcafbad9dc4e8c164fce356c70bfd7dbd1ad0c2f862
5
5
  SHA512:
6
- metadata.gz: 34b7f3a68413b53865c2c5e57dc78ab2627689e656ed6ebc9d69938804cbbbf490e4148f2e0e44afbd8da4c9052423427bd053f90bf24fe65195c4ded0ede57d
7
- data.tar.gz: ae11480e79de83e5c082e3663fa6570a64e85da5739b3c009a500ad4bd88bbd95f9fa7045362f211aff014f3e3aed9d363158f91e33ab87d60fc09e9dc26b3d3
6
+ metadata.gz: 6518aaed72267de7612257c43067a762b933d2e1a64d04defb07d7471eacd8bd1a4ca1cadc96dad5b279cc09a4bbc5a3453abfe10715bde5dc614d08d1953098
7
+ data.tar.gz: 3cd03188f34030da0c9ead6633bd1c67550524c22368b5c52dd69132e54be69edff6a28d7a91b6ff76ad02a6f3bb2becebb2df1e03db72fd7f3f16262d8137e2
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- web_stat (0.4.7)
4
+ web_stat (0.5.0)
5
5
  bundler (>= 2.0.2)
6
6
  cld (>= 0.8.0)
7
+ google-api-client (>= 0.53.0)
7
8
  mechanize (>= 2.7.7)
8
9
  natto (>= 1.1.2)
9
10
  nokogiri (>= 1.10.4)
@@ -18,6 +19,12 @@ GEM
18
19
  remote: https://rubygems.org/
19
20
  specs:
20
21
  Ascii85 (1.0.3)
22
+ activesupport (6.1.3.2)
23
+ concurrent-ruby (~> 1.0, >= 1.0.2)
24
+ i18n (>= 1.6, < 2)
25
+ minitest (>= 5.1)
26
+ tzinfo (~> 2.0)
27
+ zeitwerk (~> 2.3)
21
28
  addressable (2.7.0)
22
29
  public_suffix (>= 2.0.2, < 5.0)
23
30
  afm (0.2.2)
@@ -26,43 +33,99 @@ GEM
26
33
  cld (0.8.0)
27
34
  ffi
28
35
  coderay (1.1.3)
29
- connection_pool (2.2.3)
36
+ concurrent-ruby (1.1.9)
37
+ connection_pool (2.2.5)
30
38
  crack (0.4.5)
31
39
  rexml
32
40
  crass (1.0.6)
41
+ declarative (0.0.20)
33
42
  diff-lcs (1.4.4)
34
43
  domain_name (0.5.20190701)
35
44
  unf (>= 0.0.5, < 1.0.0)
36
- ffi (1.14.2)
45
+ faraday (1.4.2)
46
+ faraday-em_http (~> 1.0)
47
+ faraday-em_synchrony (~> 1.0)
48
+ faraday-excon (~> 1.1)
49
+ faraday-net_http (~> 1.0)
50
+ faraday-net_http_persistent (~> 1.1)
51
+ multipart-post (>= 1.2, < 3)
52
+ ruby2_keywords (>= 0.0.4)
53
+ faraday-em_http (1.0.0)
54
+ faraday-em_synchrony (1.0.0)
55
+ faraday-excon (1.1.0)
56
+ faraday-net_http (1.0.1)
57
+ faraday-net_http_persistent (1.1.0)
58
+ ffi (1.15.3)
59
+ gems (1.2.0)
60
+ google-api-client (0.53.0)
61
+ google-apis-core (~> 0.1)
62
+ google-apis-generator (~> 0.1)
63
+ google-apis-core (0.3.0)
64
+ addressable (~> 2.5, >= 2.5.1)
65
+ googleauth (~> 0.14)
66
+ httpclient (>= 2.8.1, < 3.0)
67
+ mini_mime (~> 1.0)
68
+ representable (~> 3.0)
69
+ retriable (>= 2.0, < 4.0)
70
+ rexml
71
+ signet (~> 0.14)
72
+ webrick
73
+ google-apis-discovery_v1 (0.4.0)
74
+ google-apis-core (~> 0.1)
75
+ google-apis-generator (0.3.0)
76
+ activesupport (>= 5.0)
77
+ gems (~> 1.2)
78
+ google-apis-core (~> 0.1)
79
+ google-apis-discovery_v1 (~> 0.0)
80
+ thor (>= 0.20, < 2.a)
81
+ googleauth (0.16.2)
82
+ faraday (>= 0.17.3, < 2.0)
83
+ jwt (>= 1.4, < 3.0)
84
+ memoist (~> 0.16)
85
+ multi_json (~> 1.11)
86
+ os (>= 0.9, < 2.0)
87
+ signet (~> 0.14)
37
88
  guess_html_encoding (0.0.11)
38
89
  hashdiff (1.0.1)
39
90
  hashery (2.1.2)
40
- http-cookie (1.0.3)
91
+ http-cookie (1.0.4)
41
92
  domain_name (~> 0.5)
42
- mechanize (2.7.7)
43
- domain_name (~> 0.5, >= 0.5.1)
44
- http-cookie (~> 1.0)
45
- mime-types (>= 1.17.2)
46
- net-http-digest_auth (~> 1.1, >= 1.1.1)
47
- net-http-persistent (>= 2.5.2)
48
- nokogiri (~> 1.6)
49
- ntlm-http (~> 0.1, >= 0.1.1)
93
+ httpclient (2.8.3)
94
+ i18n (1.8.10)
95
+ concurrent-ruby (~> 1.0)
96
+ jwt (2.2.3)
97
+ mechanize (2.8.1)
98
+ addressable (~> 2.7)
99
+ domain_name (~> 0.5, >= 0.5.20190701)
100
+ http-cookie (~> 1.0, >= 1.0.3)
101
+ mime-types (~> 3.0)
102
+ net-http-digest_auth (~> 1.4, >= 1.4.1)
103
+ net-http-persistent (>= 2.5.2, < 5.0.dev)
104
+ nokogiri (~> 1.11, >= 1.11.2)
105
+ rubyntlm (~> 0.6, >= 0.6.3)
50
106
  webrick (~> 1.7)
51
- webrobots (>= 0.0.9, < 0.2)
107
+ webrobots (~> 0.1.2)
108
+ memoist (0.16.2)
52
109
  method_source (1.0.0)
53
110
  mime-types (3.3.1)
54
111
  mime-types-data (~> 3.2015)
55
112
  mime-types-data (3.2021.0225)
113
+ mini_mime (1.1.0)
114
+ mini_portile2 (2.5.3)
115
+ minitest (5.14.4)
116
+ multi_json (1.15.0)
117
+ multipart-post (2.1.1)
56
118
  natto (1.2.0)
57
119
  ffi (>= 1.9.0)
58
120
  net-http-digest_auth (1.4.1)
59
121
  net-http-persistent (4.0.1)
60
122
  connection_pool (~> 2.2)
61
- nokogiri (1.11.1-x86_64-linux)
123
+ nokogiri (1.11.7)
124
+ mini_portile2 (~> 2.5.0)
62
125
  racc (~> 1.4)
63
- nokogumbo (2.0.4)
126
+ nokogumbo (2.0.5)
64
127
  nokogiri (~> 1.8, >= 1.8.4)
65
- ntlm-http (0.1.1)
128
+ os (1.1.1)
66
129
  pdf-reader (2.4.0)
67
130
  Ascii85 (~> 1.0.0)
68
131
  afm (~> 0.2.1)
@@ -78,6 +141,11 @@ GEM
78
141
  public_suffix (4.0.6)
79
142
  racc (1.5.2)
80
143
  rake (13.0.3)
144
+ representable (3.1.1)
145
+ declarative (< 0.1.0)
146
+ trailblazer-option (>= 0.1.1, < 0.2.0)
147
+ uber (< 0.2.0)
148
+ retriable (3.1.2)
81
149
  rexml (3.2.4)
82
150
  rspec (3.10.0)
83
151
  rspec-core (~> 3.10.0)
@@ -96,6 +164,8 @@ GEM
96
164
  ruby-readability (0.7.0)
97
165
  guess_html_encoding (>= 0.0.4)
98
166
  nokogiri (>= 1.6.0)
167
+ ruby2_keywords (0.0.4)
168
+ rubyntlm (0.6.3)
99
169
  rubyzip (2.3.0)
100
170
  sanitize (5.2.3)
101
171
  crass (~> 1.0.2)
@@ -104,7 +174,17 @@ GEM
104
174
  selenium-webdriver (3.142.7)
105
175
  childprocess (>= 0.5, < 4.0)
106
176
  rubyzip (>= 1.2.2)
177
+ signet (0.15.0)
178
+ addressable (~> 2.3)
179
+ faraday (>= 0.17.3, < 2.0)
180
+ jwt (>= 1.5, < 3.0)
181
+ multi_json (~> 1.10)
182
+ thor (1.1.0)
183
+ trailblazer-option (0.1.1)
107
184
  ttfunk (1.7.0)
185
+ tzinfo (2.0.4)
186
+ concurrent-ruby (~> 1.0)
187
+ uber (0.1.0)
108
188
  unf (0.1.4)
109
189
  unf_ext
110
190
  unf_ext (0.0.7.7)
@@ -114,6 +194,7 @@ GEM
114
194
  hashdiff (>= 0.4.0, < 2.0.0)
115
195
  webrick (1.7.0)
116
196
  webrobots (0.1.2)
197
+ zeitwerk (2.4.2)
117
198
 
118
199
  PLATFORMS
119
200
  ruby
File without changes
data/lib/web_stat.rb CHANGED
@@ -11,6 +11,7 @@ require 'net/http'
11
11
  require 'pdf/reader'
12
12
  require 'ruby-readability'
13
13
  require 'selenium-webdriver'
14
+ require 'google/apis/youtube_v3'
14
15
 
15
16
  require "helpers/web_drive_helper"
16
17
  require "web_stat/final_redirect_url"
@@ -18,6 +19,7 @@ require "web_stat/categorize"
18
19
  require "web_stat/configure"
19
20
  require "web_stat/errors"
20
21
  require "web_stat/fetch"
22
+
21
23
  require "web_stat/tag"
22
24
  require "web_stat/version"
23
25
  require "web_stat/fetch/fetch_as_html"
@@ -14,10 +14,12 @@ development: &development
14
14
  - '//img/@src'
15
15
  userdic: ""
16
16
  use_chromedirver: false
17
+ id_extraction_regexs:
18
+ youtube: '^https://www.youtube.com/watch\?v=([^&]+)'
17
19
  thumbnail_regex:
18
- youtube:
19
- - '%r{^https://www.youtube.com/watch\?v=([^&]+)}'
20
- - 'http://img.youtube.com/vi/\1/default.jpg'
20
+ youtube: 'http://img.youtube.com/vi/\1/default.jpg'
21
+ api_keys:
22
+ youtube: "dummy-key"
21
23
  test:
22
24
  <<: *development
23
25
  production:
@@ -34,7 +34,23 @@ module WebStat
34
34
  end
35
35
  # Get main section
36
36
  def content
37
- Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
37
+ if @url.match(WebStat::Configure.get["id_extraction_regexs"]["youtube"])
38
+ youtube_decscription
39
+ else
40
+ Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
41
+ end
42
+ end
43
+
44
+ # Get describe of youtube movie.
45
+ def youtube_decscription
46
+ regex_string = WebStat::Configure.get["id_extraction_regexs"]["youtube"]
47
+ if @url.match(regex_string)
48
+ id = @url.gsub(%r{#{regex_string}}, '\1')
49
+ youtube = Google::Apis::YoutubeV3::YouTubeService.new
50
+ youtube.key = WebStat::Configure.get["api_keys"]["youtube"]
51
+ response = youtube.list_videos(:snippet, id: id)
52
+ response.items.first.snippet.description
53
+ end
38
54
  end
39
55
 
40
56
  # Get temporary path of image
@@ -48,9 +64,9 @@ module WebStat
48
64
  end
49
65
  end
50
66
  # If there is a thumbnail rule, apply it.
51
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
52
- if @url.match(v[0])
53
- return @url.gsub(v[0], v[1])
67
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
68
+ if @url.match(regex_string)
69
+ return @url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])
54
70
  end
55
71
  end
56
72
  readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body').to_s).content)
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.7"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -14,14 +14,15 @@ RSpec.describe WebStat::Configure do
14
14
  it "Get thumbnail_regex.youtube." do
15
15
  config = WebStat::Configure.get
16
16
  expect(config["thumbnail_regex"]["yotube"].nil?).to eq true
17
- expect(config["thumbnail_regex"]["youtube"].count).to eq 2
17
+ expect(config["id_extraction_regexs"]["youtube"]).to be_a String
18
+ expect(config["thumbnail_regex"]["youtube"]).to be_a String
18
19
  end
19
20
 
20
21
  it "Match youtube url." do
21
22
  sample_url = "https://www.youtube.com/watch?v=aChpsuUffUM"
22
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
23
- if sample_url.match(v[0])
24
- expect(sample_url.gsub(v[0], v[1])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
23
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
24
+ if sample_url.match(regex_string)
25
+ expect(sample_url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
25
26
  end
26
27
  end
27
28
  end
data/web_stat.gemspec CHANGED
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_runtime_dependency "pdf-reader", "2.4.0"
32
32
  spec.add_runtime_dependency "webrick", ">= 1.7.0"
33
33
  spec.add_runtime_dependency "rexml", ">= 3.2.4"
34
+ spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
34
35
 
35
36
  spec.add_development_dependency "rake", ">= 10.0"
36
37
  spec.add_development_dependency "rspec", ">= 3.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.7
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-28 00:00:00.000000000 Z
11
+ date: 2021-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: 3.2.4
167
+ - !ruby/object:Gem::Dependency
168
+ name: google-api-client
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: 0.53.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: 0.53.0
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: rake
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -238,7 +252,7 @@ description: Fetch the web pages and stat.
238
252
  email:
239
253
  - yube@newsdict.jp
240
254
  executables:
241
- - fetch_as_html
255
+ - fetch_as_url
242
256
  extensions: []
243
257
  extra_rdoc_files: []
244
258
  files:
@@ -253,7 +267,7 @@ files:
253
267
  - LICENSE.txt
254
268
  - README.md
255
269
  - Rakefile
256
- - bin/fetch_as_html
270
+ - bin/fetch_as_url
257
271
  - docker-compose.yml
258
272
  - docker/exec
259
273
  - docker/start