web_stat 0.4.6 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ce60bcf4a31f90024abc35cf1ecc57e32626dfb5ecf4bc4f2280bd72931ff34
4
- data.tar.gz: 575f805a63a995b2d0e3bc909978dcd4e3b4f15462f717cf83d6aac00c96078d
3
+ metadata.gz: e09b39af40b8307541b0c9be774a603c893fa15a1e3dbf0ef4cc3b75912a7c56
4
+ data.tar.gz: d0f291a214b07adeba7679c82c1a710a60797eb4a3924badcd2d2fee96d750a2
5
5
  SHA512:
6
- metadata.gz: 7af1262b25163205eabdfa26e1671f95d53963387eceec6c2c99da0a3a17359b77aaa097ec9556010b8000ecc0772d1dc19a67d128310c8d6bdd8379d008a913
7
- data.tar.gz: 855706ad8525609e8a2a50ca64091081794940d69187f951a79c02c66acfa5be9f53cdeacf0f99f6c0431ad41a71ce616691590d40bce0a56576275fba96c453
6
+ metadata.gz: 3a150d48af70feb1840c1aeaebe5720c8ea8b0600c2571427130de835414d96d0fa2a14b7cff85e36484ed801102e394d96971b2c681725db6eb3d3333a3f641
7
+ data.tar.gz: 708a5bcce8bd1313d05ace67ad5db3351e54f7f7e6c1798061256699c6c99129e2a7c3d9e05ad7052dbdd497c15b8fb0d8afe091484697fe1b294731fb3fcc24
File without changes
data/lib/web_stat.rb CHANGED
@@ -2,6 +2,7 @@ require "bundler"
2
2
 
3
3
  require 'cld'
4
4
  require 'uri'
5
+ require 'erb'
5
6
  require 'digest'
6
7
  require 'logger'
7
8
  require 'sanitize'
@@ -11,6 +12,7 @@ require 'net/http'
11
12
  require 'pdf/reader'
12
13
  require 'ruby-readability'
13
14
  require 'selenium-webdriver'
15
+ require 'google/apis/youtube_v3'
14
16
 
15
17
  require "helpers/web_drive_helper"
16
18
  require "web_stat/final_redirect_url"
@@ -18,6 +20,7 @@ require "web_stat/categorize"
18
20
  require "web_stat/configure"
19
21
  require "web_stat/errors"
20
22
  require "web_stat/fetch"
23
+
21
24
  require "web_stat/tag"
22
25
  require "web_stat/version"
23
26
  require "web_stat/fetch/fetch_as_html"
@@ -14,10 +14,13 @@ development: &development
14
14
  - '//img/@src'
15
15
  userdic: ""
16
16
  use_chromedirver: false
17
+ id_extraction_regexs:
18
+ youtube: '^https://www.youtube.com/watch\?v=([^&]+)'
17
19
  thumbnail_regex:
18
- youtube:
19
- - '%r{^https://www.youtube.com/watch\?v=([^&]+)}'
20
- - 'http://img.youtube.com/vi/\1/default.jpg'
20
+ youtube: 'http://img.youtube.com/vi/\1/default.jpg'
21
+ api_keys:
22
+ youtube: "dummy-key"
23
+ lang: <%= ENV['LANG'] %>
21
24
  test:
22
25
  <<: *development
23
26
  production:
@@ -7,9 +7,9 @@ module WebStat
7
7
  # Get yaml
8
8
  def get
9
9
  if defined? Rails
10
- YAML.load_file(get_configure_path)[Rails.env]
10
+ YAML.load(ERB.new(File.read(get_configure_path)).result)[Rails.env]
11
11
  else
12
- YAML.load_file(get_configure_path)[ENV["ENV"] || "production"]
12
+ YAML.load(ERB.new(File.read(get_configure_path)).result)[ENV["ENV"] || "production"]
13
13
  end
14
14
  end
15
15
 
@@ -34,7 +34,23 @@ module WebStat
34
34
  end
35
35
  # Get main section
36
36
  def content
37
- Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
37
+ if @url&.match(WebStat::Configure.get["id_extraction_regexs"]["youtube"])
38
+ youtube_decscription
39
+ else
40
+ Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
41
+ end
42
+ end
43
+
44
+ # Get describe of youtube movie.
45
+ def youtube_decscription
46
+ regex_string = WebStat::Configure.get["id_extraction_regexs"]["youtube"]
47
+ if @url.match(regex_string)
48
+ id = @url.gsub(%r{#{regex_string}.*$}, '\1')
49
+ youtube = Google::Apis::YoutubeV3::YouTubeService.new
50
+ youtube.key = WebStat::Configure.get["api_keys"]["youtube"]
51
+ response = youtube.list_videos(:snippet, id: id)
52
+ response.items.first.snippet.description
53
+ end
38
54
  end
39
55
 
40
56
  # Get temporary path of image
@@ -48,9 +64,9 @@ module WebStat
48
64
  end
49
65
  end
50
66
  # If there is a thumbnail rule, apply it.
51
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
52
- if @url.match(v[0])
53
- return @url.gsub(v[0], v[1])
67
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
68
+ if @url.match(regex_string)
69
+ return @url.gsub(%r{#{regex_string}.*$}, WebStat::Configure.get["thumbnail_regex"][provider])
54
70
  end
55
71
  end
56
72
  readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body').to_s).content)
@@ -82,6 +98,8 @@ module WebStat
82
98
  end
83
99
  end
84
100
  tmp_file
101
+ rescue
102
+ false
85
103
  end
86
104
 
87
105
  # Get url
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.4.6"
3
- end
2
+ VERSION = "0.5.3"
3
+ end
@@ -14,15 +14,20 @@ RSpec.describe WebStat::Configure do
14
14
  it "Get thumbnail_regex.youtube." do
15
15
  config = WebStat::Configure.get
16
16
  expect(config["thumbnail_regex"]["yotube"].nil?).to eq true
17
- expect(config["thumbnail_regex"]["youtube"].count).to eq 2
17
+ expect(config["id_extraction_regexs"]["youtube"]).to be_a String
18
+ expect(config["thumbnail_regex"]["youtube"]).to be_a String
18
19
  end
19
20
 
20
21
  it "Match youtube url." do
21
22
  sample_url = "https://www.youtube.com/watch?v=aChpsuUffUM"
22
- WebStat::Configure.get["thumbnail_regex"].each do |provider, v|
23
- if sample_url.match(v[0])
24
- expect(sample_url.gsub(v[0], v[1])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
23
+ WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
24
+ if sample_url.match(regex_string)
25
+ expect(sample_url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
25
26
  end
26
27
  end
27
28
  end
29
+
30
+ it "Environment variables can be read on YAML" do
31
+ expect(WebStat::Configure.get["lang"]).to eq "C.UTF-8"
32
+ end
28
33
  end
data/web_stat.gemspec CHANGED
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_runtime_dependency "pdf-reader", "2.4.0"
32
32
  spec.add_runtime_dependency "webrick", ">= 1.7.0"
33
33
  spec.add_runtime_dependency "rexml", ">= 3.2.4"
34
+ spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
34
35
 
35
36
  spec.add_development_dependency "rake", ">= 10.0"
36
37
  spec.add_development_dependency "rspec", ">= 3.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-24 00:00:00.000000000 Z
11
+ date: 2021-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: 3.2.4
167
+ - !ruby/object:Gem::Dependency
168
+ name: google-api-client
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: 0.53.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: 0.53.0
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: rake
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -238,7 +252,7 @@ description: Fetch the web pages and stat.
238
252
  email:
239
253
  - yube@newsdict.jp
240
254
  executables:
241
- - fetch_as_html
255
+ - fetch_as_url
242
256
  extensions: []
243
257
  extra_rdoc_files: []
244
258
  files:
@@ -249,11 +263,10 @@ files:
249
263
  - CODE_OF_CONDUCT.md
250
264
  - Dockerfile
251
265
  - Gemfile
252
- - Gemfile.lock
253
266
  - LICENSE.txt
254
267
  - README.md
255
268
  - Rakefile
256
- - bin/fetch_as_html
269
+ - bin/fetch_as_url
257
270
  - docker-compose.yml
258
271
  - docker/exec
259
272
  - docker/start
data/Gemfile.lock DELETED
@@ -1,132 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- web_stat (0.4.6)
5
- bundler (>= 2.0.2)
6
- cld (>= 0.8.0)
7
- mechanize (>= 2.7.7)
8
- natto (>= 1.1.2)
9
- nokogiri (>= 1.10.4)
10
- pdf-reader (= 2.4.0)
11
- rexml (>= 3.2.4)
12
- ruby-readability (>= 0.7)
13
- sanitize (>= 5.0.0)
14
- selenium-webdriver (= 3.142.7)
15
- webrick (>= 1.7.0)
16
-
17
- GEM
18
- remote: https://rubygems.org/
19
- specs:
20
- Ascii85 (1.0.3)
21
- addressable (2.7.0)
22
- public_suffix (>= 2.0.2, < 5.0)
23
- afm (0.2.2)
24
- byebug (11.1.3)
25
- childprocess (3.0.0)
26
- cld (0.8.0)
27
- ffi
28
- coderay (1.1.3)
29
- connection_pool (2.2.3)
30
- crack (0.4.5)
31
- rexml
32
- crass (1.0.6)
33
- diff-lcs (1.4.4)
34
- domain_name (0.5.20190701)
35
- unf (>= 0.0.5, < 1.0.0)
36
- ffi (1.14.2)
37
- guess_html_encoding (0.0.11)
38
- hashdiff (1.0.1)
39
- hashery (2.1.2)
40
- http-cookie (1.0.3)
41
- domain_name (~> 0.5)
42
- mechanize (2.7.7)
43
- domain_name (~> 0.5, >= 0.5.1)
44
- http-cookie (~> 1.0)
45
- mime-types (>= 1.17.2)
46
- net-http-digest_auth (~> 1.1, >= 1.1.1)
47
- net-http-persistent (>= 2.5.2)
48
- nokogiri (~> 1.6)
49
- ntlm-http (~> 0.1, >= 0.1.1)
50
- webrick (~> 1.7)
51
- webrobots (>= 0.0.9, < 0.2)
52
- method_source (1.0.0)
53
- mime-types (3.3.1)
54
- mime-types-data (~> 3.2015)
55
- mime-types-data (3.2021.0212)
56
- mini_portile2 (2.5.0)
57
- natto (1.2.0)
58
- ffi (>= 1.9.0)
59
- net-http-digest_auth (1.4.1)
60
- net-http-persistent (4.0.1)
61
- connection_pool (~> 2.2)
62
- nokogiri (1.11.1)
63
- mini_portile2 (~> 2.5.0)
64
- racc (~> 1.4)
65
- nokogumbo (2.0.4)
66
- nokogiri (~> 1.8, >= 1.8.4)
67
- ntlm-http (0.1.1)
68
- pdf-reader (2.4.0)
69
- Ascii85 (~> 1.0.0)
70
- afm (~> 0.2.1)
71
- hashery (~> 2.0)
72
- ruby-rc4
73
- ttfunk
74
- pry (0.13.1)
75
- coderay (~> 1.1)
76
- method_source (~> 1.0)
77
- pry-byebug (3.9.0)
78
- byebug (~> 11.0)
79
- pry (~> 0.13.0)
80
- public_suffix (4.0.6)
81
- racc (1.5.2)
82
- rake (13.0.3)
83
- rexml (3.2.4)
84
- rspec (3.10.0)
85
- rspec-core (~> 3.10.0)
86
- rspec-expectations (~> 3.10.0)
87
- rspec-mocks (~> 3.10.0)
88
- rspec-core (3.10.1)
89
- rspec-support (~> 3.10.0)
90
- rspec-expectations (3.10.1)
91
- diff-lcs (>= 1.2.0, < 2.0)
92
- rspec-support (~> 3.10.0)
93
- rspec-mocks (3.10.2)
94
- diff-lcs (>= 1.2.0, < 2.0)
95
- rspec-support (~> 3.10.0)
96
- rspec-support (3.10.2)
97
- ruby-rc4 (0.1.5)
98
- ruby-readability (0.7.0)
99
- guess_html_encoding (>= 0.0.4)
100
- nokogiri (>= 1.6.0)
101
- rubyzip (2.3.0)
102
- sanitize (5.2.3)
103
- crass (~> 1.0.2)
104
- nokogiri (>= 1.8.0)
105
- nokogumbo (~> 2.0)
106
- selenium-webdriver (3.142.7)
107
- childprocess (>= 0.5, < 4.0)
108
- rubyzip (>= 1.2.2)
109
- ttfunk (1.7.0)
110
- unf (0.1.4)
111
- unf_ext
112
- unf_ext (0.0.7.7)
113
- webmock (3.11.2)
114
- addressable (>= 2.3.6)
115
- crack (>= 0.3.2)
116
- hashdiff (>= 0.4.0, < 2.0.0)
117
- webrick (1.7.0)
118
- webrobots (0.1.2)
119
-
120
- PLATFORMS
121
- ruby
122
-
123
- DEPENDENCIES
124
- pry (>= 0.13.1)
125
- pry-byebug (= 3.9.0)
126
- rake (>= 10.0)
127
- rspec (>= 3.0)
128
- web_stat!
129
- webmock (>= 3.8.3)
130
-
131
- BUNDLED WITH
132
- 2.2.4