web_stat 0.4.6 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ce60bcf4a31f90024abc35cf1ecc57e32626dfb5ecf4bc4f2280bd72931ff34
4
- data.tar.gz: 575f805a63a995b2d0e3bc909978dcd4e3b4f15462f717cf83d6aac00c96078d
3
+ metadata.gz: e09b39af40b8307541b0c9be774a603c893fa15a1e3dbf0ef4cc3b75912a7c56
4
+ data.tar.gz: d0f291a214b07adeba7679c82c1a710a60797eb4a3924badcd2d2fee96d750a2
5
5
  SHA512:
6
- metadata.gz: 7af1262b25163205eabdfa26e1671f95d53963387eceec6c2c99da0a3a17359b77aaa097ec9556010b8000ecc0772d1dc19a67d128310c8d6bdd8379d008a913
7
- data.tar.gz: 855706ad8525609e8a2a50ca64091081794940d69187f951a79c02c66acfa5be9f53cdeacf0f99f6c0431ad41a71ce616691590d40bce0a56576275fba96c453
6
+ metadata.gz: 3a150d48af70feb1840c1aeaebe5720c8ea8b0600c2571427130de835414d96d0fa2a14b7cff85e36484ed801102e394d96971b2c681725db6eb3d3333a3f641
7
+ data.tar.gz: 708a5bcce8bd1313d05ace67ad5db3351e54f7f7e6c1798061256699c6c99129e2a7c3d9e05ad7052dbdd497c15b8fb0d8afe091484697fe1b294731fb3fcc24
File without changes
data/lib/web_stat.rb CHANGED
@@ -2,6 +2,7 @@ require "bundler"
2
2
 
3
3
  require 'cld'
4
4
  require 'uri'
5
+ require 'erb'
5
6
  require 'digest'
6
7
  require 'logger'
7
8
  require 'sanitize'
@@ -11,6 +12,7 @@ require 'net/http'
11
12
  require 'pdf/reader'
12
13
  require 'ruby-readability'
13
14
  require 'selenium-webdriver'
15
+ require 'google/apis/youtube_v3'
14
16
 
15
17
  require "helpers/web_drive_helper"
16
18
  require "web_stat/final_redirect_url"
@@ -18,6 +20,7 @@ require "web_stat/categorize"
18
20
  require "web_stat/configure"
19
21
  require "web_stat/errors"
20
22
  require "web_stat/fetch"
23
+
21
24
  require "web_stat/tag"
22
25
  require "web_stat/version"
23
26
  require "web_stat/fetch/fetch_as_html"
@@ -14,10 +14,13 @@ development: &development
14
14
  - '//img/@src'
15
15
  userdic: ""
16
16
  use_chromedirver: false
17
+ id_extraction_regexs:
18
+ youtube: '^https://www.youtube.com/watch\?v=([^&]+)'
17
19
  thumbnail_regex:
18
- youtube:
19
- - '%r{^https://www.youtube.com/watch\?v=([^&]+)}'
20
- - 'http://img.youtube.com/vi/\1/default.jpg'
20
+ youtube: 'http://img.youtube.com/vi/\1/default.jpg'
21
+ api_keys:
22
+ youtube: "dummy-key"
23
+ lang: <%= ENV['LANG'] %>
21
24
  test:
22
25
  <<: *development
23
26
  production:
@@ -7,9 +7,9 @@ module WebStat
7
7
  # Get yaml
8
8
  def get
9
9
  if defined? Rails
10
- YAML.load_file(get_configure_path)[Rails.env]
10
+ YAML.load(ERB.new(File.read(get_configure_path)).result)[Rails.env]
11
11
  else
12
- YAML.load_file(get_configure_path)[ENV["ENV"] || "production"]
12
+ YAML.load(ERB.new(File.read(get_configure_path)).result)[ENV["ENV"] || "production"]
13
13
  end
14
14
  end
15
15
 
@@ -34,7 +34,23 @@ module WebStat
34
34
  end
35
35
  # Get main section
36
36
  def content
37
- Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
37
+ if @url&.match(WebStat::Configure.get["id_extraction_regexs"]["youtube"])
38
+ youtube_decscription
39
+ else
40
+ Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
41
+ end
42
+ end
43
+
44
+ # Get describe of youtube movie.
45
+ def youtube_decscription
46
+ regex_string = WebStat::Configure.get["id_extraction_regexs"]["youtube"]
47
+ if @url.match(regex_string)
48
+ id = @url.gsub(%r{#{regex_string}.*$}, '\1')
49
+ youtube = Google::Apis::YoutubeV3::YouTubeService.new
50
+ youtube.key = WebStat::Configure.get["api_keys"]["youtube"]
51
+ response = youtube.list_videos(:snippet, id: id)
52
+ response.items.first.snippet.description
53
+ end
38
54
  end
39
55
 
40
56
  # Get temporary path of image
@@ -48,9 +64,9 @@ module WebStat
48
64
  end
49
65
  end
50
66
  # If there is a thumbnail rule, apply it.
51
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
52
- if @url.match(v[0])
53
- return @url.gsub(v[0], v[1])
67
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
68
+ if @url.match(regex_string)
69
+ return @url.gsub(%r{#{regex_string}.*$}, WebStat::Configure.get["thumbnail_regex"][provider])
54
70
  end
55
71
  end
56
72
  readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body').to_s).content)
@@ -82,6 +98,8 @@ module WebStat
82
98
  end
83
99
  end
84
100
  tmp_file
101
+ rescue
102
+ false
85
103
  end
86
104
 
87
105
  # Get url
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.6"
3
- end
2
+ VERSION = "0.5.3"
3
+ end
@@ -14,15 +14,20 @@ RSpec.describe WebStat::Configure do
14
14
  it "Get thumbnail_regex.youtube." do
15
15
  config = WebStat::Configure.get
16
16
  expect(config["thumbnail_regex"]["yotube"].nil?).to eq true
17
- expect(config["thumbnail_regex"]["youtube"].count).to eq 2
17
+ expect(config["id_extraction_regexs"]["youtube"]).to be_a String
18
+ expect(config["thumbnail_regex"]["youtube"]).to be_a String
18
19
  end
19
20
 
20
21
  it "Match youtube url." do
21
22
  sample_url = "https://www.youtube.com/watch?v=aChpsuUffUM"
22
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
23
- if sample_url.match(v[0])
24
- expect(sample_url.gsub(v[0], v[1])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
23
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
24
+ if sample_url.match(regex_string)
25
+ expect(sample_url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
25
26
  end
26
27
  end
27
28
  end
29
+
30
+ it "Environment variables can be read on YAML" do
31
+ expect(WebStat::Configure.get["lang"]).to eq "C.UTF-8"
32
+ end
28
33
  end
data/web_stat.gemspec CHANGED
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_runtime_dependency "pdf-reader", "2.4.0"
32
32
  spec.add_runtime_dependency "webrick", ">= 1.7.0"
33
33
  spec.add_runtime_dependency "rexml", ">= 3.2.4"
34
+ spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
34
35
 
35
36
  spec.add_development_dependency "rake", ">= 10.0"
36
37
  spec.add_development_dependency "rspec", ">= 3.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-24 00:00:00.000000000 Z
11
+ date: 2021-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: 3.2.4
167
+ - !ruby/object:Gem::Dependency
168
+ name: google-api-client
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: 0.53.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: 0.53.0
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: rake
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -238,7 +252,7 @@ description: Fetch the web pages and stat.
238
252
  email:
239
253
  - yube@newsdict.jp
240
254
  executables:
241
- - fetch_as_html
255
+ - fetch_as_url
242
256
  extensions: []
243
257
  extra_rdoc_files: []
244
258
  files:
@@ -249,11 +263,10 @@ files:
249
263
  - CODE_OF_CONDUCT.md
250
264
  - Dockerfile
251
265
  - Gemfile
252
- - Gemfile.lock
253
266
  - LICENSE.txt
254
267
  - README.md
255
268
  - Rakefile
256
- - bin/fetch_as_html
269
+ - bin/fetch_as_url
257
270
  - docker-compose.yml
258
271
  - docker/exec
259
272
  - docker/start
data/Gemfile.lock DELETED
@@ -1,132 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- web_stat (0.4.6)
5
- bundler (>= 2.0.2)
6
- cld (>= 0.8.0)
7
- mechanize (>= 2.7.7)
8
- natto (>= 1.1.2)
9
- nokogiri (>= 1.10.4)
10
- pdf-reader (= 2.4.0)
11
- rexml (>= 3.2.4)
12
- ruby-readability (>= 0.7)
13
- sanitize (>= 5.0.0)
14
- selenium-webdriver (= 3.142.7)
15
- webrick (>= 1.7.0)
16
-
17
- GEM
18
- remote: https://rubygems.org/
19
- specs:
20
- Ascii85 (1.0.3)
21
- addressable (2.7.0)
22
- public_suffix (>= 2.0.2, < 5.0)
23
- afm (0.2.2)
24
- byebug (11.1.3)
25
- childprocess (3.0.0)
26
- cld (0.8.0)
27
- ffi
28
- coderay (1.1.3)
29
- connection_pool (2.2.3)
30
- crack (0.4.5)
31
- rexml
32
- crass (1.0.6)
33
- diff-lcs (1.4.4)
34
- domain_name (0.5.20190701)
35
- unf (>= 0.0.5, < 1.0.0)
36
- ffi (1.14.2)
37
- guess_html_encoding (0.0.11)
38
- hashdiff (1.0.1)
39
- hashery (2.1.2)
40
- http-cookie (1.0.3)
41
- domain_name (~> 0.5)
42
- mechanize (2.7.7)
43
- domain_name (~> 0.5, >= 0.5.1)
44
- http-cookie (~> 1.0)
45
- mime-types (>= 1.17.2)
46
- net-http-digest_auth (~> 1.1, >= 1.1.1)
47
- net-http-persistent (>= 2.5.2)
48
- nokogiri (~> 1.6)
49
- ntlm-http (~> 0.1, >= 0.1.1)
50
- webrick (~> 1.7)
51
- webrobots (>= 0.0.9, < 0.2)
52
- method_source (1.0.0)
53
- mime-types (3.3.1)
54
- mime-types-data (~> 3.2015)
55
- mime-types-data (3.2021.0212)
56
- mini_portile2 (2.5.0)
57
- natto (1.2.0)
58
- ffi (>= 1.9.0)
59
- net-http-digest_auth (1.4.1)
60
- net-http-persistent (4.0.1)
61
- connection_pool (~> 2.2)
62
- nokogiri (1.11.1)
63
- mini_portile2 (~> 2.5.0)
64
- racc (~> 1.4)
65
- nokogumbo (2.0.4)
66
- nokogiri (~> 1.8, >= 1.8.4)
67
- ntlm-http (0.1.1)
68
- pdf-reader (2.4.0)
69
- Ascii85 (~> 1.0.0)
70
- afm (~> 0.2.1)
71
- hashery (~> 2.0)
72
- ruby-rc4
73
- ttfunk
74
- pry (0.13.1)
75
- coderay (~> 1.1)
76
- method_source (~> 1.0)
77
- pry-byebug (3.9.0)
78
- byebug (~> 11.0)
79
- pry (~> 0.13.0)
80
- public_suffix (4.0.6)
81
- racc (1.5.2)
82
- rake (13.0.3)
83
- rexml (3.2.4)
84
- rspec (3.10.0)
85
- rspec-core (~> 3.10.0)
86
- rspec-expectations (~> 3.10.0)
87
- rspec-mocks (~> 3.10.0)
88
- rspec-core (3.10.1)
89
- rspec-support (~> 3.10.0)
90
- rspec-expectations (3.10.1)
91
- diff-lcs (>= 1.2.0, < 2.0)
92
- rspec-support (~> 3.10.0)
93
- rspec-mocks (3.10.2)
94
- diff-lcs (>= 1.2.0, < 2.0)
95
- rspec-support (~> 3.10.0)
96
- rspec-support (3.10.2)
97
- ruby-rc4 (0.1.5)
98
- ruby-readability (0.7.0)
99
- guess_html_encoding (>= 0.0.4)
100
- nokogiri (>= 1.6.0)
101
- rubyzip (2.3.0)
102
- sanitize (5.2.3)
103
- crass (~> 1.0.2)
104
- nokogiri (>= 1.8.0)
105
- nokogumbo (~> 2.0)
106
- selenium-webdriver (3.142.7)
107
- childprocess (>= 0.5, < 4.0)
108
- rubyzip (>= 1.2.2)
109
- ttfunk (1.7.0)
110
- unf (0.1.4)
111
- unf_ext
112
- unf_ext (0.0.7.7)
113
- webmock (3.11.2)
114
- addressable (>= 2.3.6)
115
- crack (>= 0.3.2)
116
- hashdiff (>= 0.4.0, < 2.0.0)
117
- webrick (1.7.0)
118
- webrobots (0.1.2)
119
-
120
- PLATFORMS
121
- ruby
122
-
123
- DEPENDENCIES
124
- pry (>= 0.13.1)
125
- pry-byebug (= 3.9.0)
126
- rake (>= 10.0)
127
- rspec (>= 3.0)
128
- web_stat!
129
- webmock (>= 3.8.3)
130
-
131
- BUNDLED WITH
132
- 2.2.4