web_stat 0.4.6 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/{fetch_as_html → fetch_as_url} +0 -0
- data/lib/web_stat.rb +3 -0
- data/lib/web_stat/config/web_stat.yml +6 -3
- data/lib/web_stat/configure.rb +2 -2
- data/lib/web_stat/fetch.rb +22 -4
- data/lib/web_stat/version.rb +2 -2
- data/spec/web_stat/configure_spec.rb +9 -4
- data/web_stat.gemspec +1 -0
- metadata +18 -5
- data/Gemfile.lock +0 -132
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e09b39af40b8307541b0c9be774a603c893fa15a1e3dbf0ef4cc3b75912a7c56
|
4
|
+
data.tar.gz: d0f291a214b07adeba7679c82c1a710a60797eb4a3924badcd2d2fee96d750a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a150d48af70feb1840c1aeaebe5720c8ea8b0600c2571427130de835414d96d0fa2a14b7cff85e36484ed801102e394d96971b2c681725db6eb3d3333a3f641
|
7
|
+
data.tar.gz: 708a5bcce8bd1313d05ace67ad5db3351e54f7f7e6c1798061256699c6c99129e2a7c3d9e05ad7052dbdd497c15b8fb0d8afe091484697fe1b294731fb3fcc24
|
File without changes
|
data/lib/web_stat.rb
CHANGED
@@ -2,6 +2,7 @@ require "bundler"
|
|
2
2
|
|
3
3
|
require 'cld'
|
4
4
|
require 'uri'
|
5
|
+
require 'erb'
|
5
6
|
require 'digest'
|
6
7
|
require 'logger'
|
7
8
|
require 'sanitize'
|
@@ -11,6 +12,7 @@ require 'net/http'
|
|
11
12
|
require 'pdf/reader'
|
12
13
|
require 'ruby-readability'
|
13
14
|
require 'selenium-webdriver'
|
15
|
+
require 'google/apis/youtube_v3'
|
14
16
|
|
15
17
|
require "helpers/web_drive_helper"
|
16
18
|
require "web_stat/final_redirect_url"
|
@@ -18,6 +20,7 @@ require "web_stat/categorize"
|
|
18
20
|
require "web_stat/configure"
|
19
21
|
require "web_stat/errors"
|
20
22
|
require "web_stat/fetch"
|
23
|
+
|
21
24
|
require "web_stat/tag"
|
22
25
|
require "web_stat/version"
|
23
26
|
require "web_stat/fetch/fetch_as_html"
|
@@ -14,10 +14,13 @@ development: &development
|
|
14
14
|
- '//img/@src'
|
15
15
|
userdic: ""
|
16
16
|
use_chromedirver: false
|
17
|
+
id_extraction_regexs:
|
18
|
+
youtube: '^https://www.youtube.com/watch\?v=([^&]+)'
|
17
19
|
thumbnail_regex:
|
18
|
-
youtube:
|
19
|
-
|
20
|
-
|
20
|
+
youtube: 'http://img.youtube.com/vi/\1/default.jpg'
|
21
|
+
api_keys:
|
22
|
+
youtube: "dummy-key"
|
23
|
+
lang: <%= ENV['LANG'] %>
|
21
24
|
test:
|
22
25
|
<<: *development
|
23
26
|
production:
|
data/lib/web_stat/configure.rb
CHANGED
@@ -7,9 +7,9 @@ module WebStat
|
|
7
7
|
# Get yaml
|
8
8
|
def get
|
9
9
|
if defined? Rails
|
10
|
-
YAML.
|
10
|
+
YAML.load(ERB.new(File.read(get_configure_path)).result)[Rails.env]
|
11
11
|
else
|
12
|
-
YAML.
|
12
|
+
YAML.load(ERB.new(File.read(get_configure_path)).result)[ENV["ENV"] || "production"]
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
data/lib/web_stat/fetch.rb
CHANGED
@@ -34,7 +34,23 @@ module WebStat
|
|
34
34
|
end
|
35
35
|
# Get main section
|
36
36
|
def content
|
37
|
-
|
37
|
+
if @url&.match(WebStat::Configure.get["id_extraction_regexs"]["youtube"])
|
38
|
+
youtube_decscription
|
39
|
+
else
|
40
|
+
Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get describe of youtube movie.
|
45
|
+
def youtube_decscription
|
46
|
+
regex_string = WebStat::Configure.get["id_extraction_regexs"]["youtube"]
|
47
|
+
if @url.match(regex_string)
|
48
|
+
id = @url.gsub(%r{#{regex_string}.*$}, '\1')
|
49
|
+
youtube = Google::Apis::YoutubeV3::YouTubeService.new
|
50
|
+
youtube.key = WebStat::Configure.get["api_keys"]["youtube"]
|
51
|
+
response = youtube.list_videos(:snippet, id: id)
|
52
|
+
response.items.first.snippet.description
|
53
|
+
end
|
38
54
|
end
|
39
55
|
|
40
56
|
# Get temporary path of image
|
@@ -48,9 +64,9 @@ module WebStat
|
|
48
64
|
end
|
49
65
|
end
|
50
66
|
# If there is a thumbnail rule, apply it.
|
51
|
-
WebStat::Configure.get["
|
52
|
-
if @url.match(
|
53
|
-
return @url.gsub(
|
67
|
+
WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
|
68
|
+
if @url.match(regex_string)
|
69
|
+
return @url.gsub(%r{#{regex_string}.*$}, WebStat::Configure.get["thumbnail_regex"][provider])
|
54
70
|
end
|
55
71
|
end
|
56
72
|
readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body').to_s).content)
|
@@ -82,6 +98,8 @@ module WebStat
|
|
82
98
|
end
|
83
99
|
end
|
84
100
|
tmp_file
|
101
|
+
rescue
|
102
|
+
false
|
85
103
|
end
|
86
104
|
|
87
105
|
# Get url
|
data/lib/web_stat/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module WebStat
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.5.3"
|
3
|
+
end
|
@@ -14,15 +14,20 @@ RSpec.describe WebStat::Configure do
|
|
14
14
|
it "Get thumbnail_regex.youtube." do
|
15
15
|
config = WebStat::Configure.get
|
16
16
|
expect(config["thumbnail_regex"]["yotube"].nil?).to eq true
|
17
|
-
expect(config["
|
17
|
+
expect(config["id_extraction_regexs"]["youtube"]).to be_a String
|
18
|
+
expect(config["thumbnail_regex"]["youtube"]).to be_a String
|
18
19
|
end
|
19
20
|
|
20
21
|
it "Match youtube url." do
|
21
22
|
sample_url = "https://www.youtube.com/watch?v=aChpsuUffUM"
|
22
|
-
WebStat::Configure.get["
|
23
|
-
if sample_url.match(
|
24
|
-
expect(sample_url.gsub(
|
23
|
+
WebStat::Configure.get["id_extraction_regexs"].each do |provider, regex_string|
|
24
|
+
if sample_url.match(regex_string)
|
25
|
+
expect(sample_url.gsub(%r{#{regex_string}}, WebStat::Configure.get["thumbnail_regex"][provider])).to eq 'http://img.youtube.com/vi/aChpsuUffUM/default.jpg'
|
25
26
|
end
|
26
27
|
end
|
27
28
|
end
|
29
|
+
|
30
|
+
it "Environment variables can be read on YAML" do
|
31
|
+
expect(WebStat::Configure.get["lang"]).to eq "C.UTF-8"
|
32
|
+
end
|
28
33
|
end
|
data/web_stat.gemspec
CHANGED
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_runtime_dependency "pdf-reader", "2.4.0"
|
32
32
|
spec.add_runtime_dependency "webrick", ">= 1.7.0"
|
33
33
|
spec.add_runtime_dependency "rexml", ">= 3.2.4"
|
34
|
+
spec.add_runtime_dependency "google-api-client", ">= 0.53.0"
|
34
35
|
|
35
36
|
spec.add_development_dependency "rake", ">= 10.0"
|
36
37
|
spec.add_development_dependency "rspec", ">= 3.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web_stat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yusuke abe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: 3.2.4
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: google-api-client
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: 0.53.0
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: 0.53.0
|
167
181
|
- !ruby/object:Gem::Dependency
|
168
182
|
name: rake
|
169
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -238,7 +252,7 @@ description: Fetch the web pages and stat.
|
|
238
252
|
email:
|
239
253
|
- yube@newsdict.jp
|
240
254
|
executables:
|
241
|
-
-
|
255
|
+
- fetch_as_url
|
242
256
|
extensions: []
|
243
257
|
extra_rdoc_files: []
|
244
258
|
files:
|
@@ -249,11 +263,10 @@ files:
|
|
249
263
|
- CODE_OF_CONDUCT.md
|
250
264
|
- Dockerfile
|
251
265
|
- Gemfile
|
252
|
-
- Gemfile.lock
|
253
266
|
- LICENSE.txt
|
254
267
|
- README.md
|
255
268
|
- Rakefile
|
256
|
-
- bin/
|
269
|
+
- bin/fetch_as_url
|
257
270
|
- docker-compose.yml
|
258
271
|
- docker/exec
|
259
272
|
- docker/start
|
data/Gemfile.lock
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
web_stat (0.4.6)
|
5
|
-
bundler (>= 2.0.2)
|
6
|
-
cld (>= 0.8.0)
|
7
|
-
mechanize (>= 2.7.7)
|
8
|
-
natto (>= 1.1.2)
|
9
|
-
nokogiri (>= 1.10.4)
|
10
|
-
pdf-reader (= 2.4.0)
|
11
|
-
rexml (>= 3.2.4)
|
12
|
-
ruby-readability (>= 0.7)
|
13
|
-
sanitize (>= 5.0.0)
|
14
|
-
selenium-webdriver (= 3.142.7)
|
15
|
-
webrick (>= 1.7.0)
|
16
|
-
|
17
|
-
GEM
|
18
|
-
remote: https://rubygems.org/
|
19
|
-
specs:
|
20
|
-
Ascii85 (1.0.3)
|
21
|
-
addressable (2.7.0)
|
22
|
-
public_suffix (>= 2.0.2, < 5.0)
|
23
|
-
afm (0.2.2)
|
24
|
-
byebug (11.1.3)
|
25
|
-
childprocess (3.0.0)
|
26
|
-
cld (0.8.0)
|
27
|
-
ffi
|
28
|
-
coderay (1.1.3)
|
29
|
-
connection_pool (2.2.3)
|
30
|
-
crack (0.4.5)
|
31
|
-
rexml
|
32
|
-
crass (1.0.6)
|
33
|
-
diff-lcs (1.4.4)
|
34
|
-
domain_name (0.5.20190701)
|
35
|
-
unf (>= 0.0.5, < 1.0.0)
|
36
|
-
ffi (1.14.2)
|
37
|
-
guess_html_encoding (0.0.11)
|
38
|
-
hashdiff (1.0.1)
|
39
|
-
hashery (2.1.2)
|
40
|
-
http-cookie (1.0.3)
|
41
|
-
domain_name (~> 0.5)
|
42
|
-
mechanize (2.7.7)
|
43
|
-
domain_name (~> 0.5, >= 0.5.1)
|
44
|
-
http-cookie (~> 1.0)
|
45
|
-
mime-types (>= 1.17.2)
|
46
|
-
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
47
|
-
net-http-persistent (>= 2.5.2)
|
48
|
-
nokogiri (~> 1.6)
|
49
|
-
ntlm-http (~> 0.1, >= 0.1.1)
|
50
|
-
webrick (~> 1.7)
|
51
|
-
webrobots (>= 0.0.9, < 0.2)
|
52
|
-
method_source (1.0.0)
|
53
|
-
mime-types (3.3.1)
|
54
|
-
mime-types-data (~> 3.2015)
|
55
|
-
mime-types-data (3.2021.0212)
|
56
|
-
mini_portile2 (2.5.0)
|
57
|
-
natto (1.2.0)
|
58
|
-
ffi (>= 1.9.0)
|
59
|
-
net-http-digest_auth (1.4.1)
|
60
|
-
net-http-persistent (4.0.1)
|
61
|
-
connection_pool (~> 2.2)
|
62
|
-
nokogiri (1.11.1)
|
63
|
-
mini_portile2 (~> 2.5.0)
|
64
|
-
racc (~> 1.4)
|
65
|
-
nokogumbo (2.0.4)
|
66
|
-
nokogiri (~> 1.8, >= 1.8.4)
|
67
|
-
ntlm-http (0.1.1)
|
68
|
-
pdf-reader (2.4.0)
|
69
|
-
Ascii85 (~> 1.0.0)
|
70
|
-
afm (~> 0.2.1)
|
71
|
-
hashery (~> 2.0)
|
72
|
-
ruby-rc4
|
73
|
-
ttfunk
|
74
|
-
pry (0.13.1)
|
75
|
-
coderay (~> 1.1)
|
76
|
-
method_source (~> 1.0)
|
77
|
-
pry-byebug (3.9.0)
|
78
|
-
byebug (~> 11.0)
|
79
|
-
pry (~> 0.13.0)
|
80
|
-
public_suffix (4.0.6)
|
81
|
-
racc (1.5.2)
|
82
|
-
rake (13.0.3)
|
83
|
-
rexml (3.2.4)
|
84
|
-
rspec (3.10.0)
|
85
|
-
rspec-core (~> 3.10.0)
|
86
|
-
rspec-expectations (~> 3.10.0)
|
87
|
-
rspec-mocks (~> 3.10.0)
|
88
|
-
rspec-core (3.10.1)
|
89
|
-
rspec-support (~> 3.10.0)
|
90
|
-
rspec-expectations (3.10.1)
|
91
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
92
|
-
rspec-support (~> 3.10.0)
|
93
|
-
rspec-mocks (3.10.2)
|
94
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
95
|
-
rspec-support (~> 3.10.0)
|
96
|
-
rspec-support (3.10.2)
|
97
|
-
ruby-rc4 (0.1.5)
|
98
|
-
ruby-readability (0.7.0)
|
99
|
-
guess_html_encoding (>= 0.0.4)
|
100
|
-
nokogiri (>= 1.6.0)
|
101
|
-
rubyzip (2.3.0)
|
102
|
-
sanitize (5.2.3)
|
103
|
-
crass (~> 1.0.2)
|
104
|
-
nokogiri (>= 1.8.0)
|
105
|
-
nokogumbo (~> 2.0)
|
106
|
-
selenium-webdriver (3.142.7)
|
107
|
-
childprocess (>= 0.5, < 4.0)
|
108
|
-
rubyzip (>= 1.2.2)
|
109
|
-
ttfunk (1.7.0)
|
110
|
-
unf (0.1.4)
|
111
|
-
unf_ext
|
112
|
-
unf_ext (0.0.7.7)
|
113
|
-
webmock (3.11.2)
|
114
|
-
addressable (>= 2.3.6)
|
115
|
-
crack (>= 0.3.2)
|
116
|
-
hashdiff (>= 0.4.0, < 2.0.0)
|
117
|
-
webrick (1.7.0)
|
118
|
-
webrobots (0.1.2)
|
119
|
-
|
120
|
-
PLATFORMS
|
121
|
-
ruby
|
122
|
-
|
123
|
-
DEPENDENCIES
|
124
|
-
pry (>= 0.13.1)
|
125
|
-
pry-byebug (= 3.9.0)
|
126
|
-
rake (>= 10.0)
|
127
|
-
rspec (>= 3.0)
|
128
|
-
web_stat!
|
129
|
-
webmock (>= 3.8.3)
|
130
|
-
|
131
|
-
BUNDLED WITH
|
132
|
-
2.2.4
|