web_stat 0.3.19 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Dockerfile +1 -1
- data/Gemfile.lock +14 -12
- data/README.md +0 -23
- data/lib/web_stat/fetch.rb +2 -2
- data/lib/web_stat/version.rb +1 -1
- data/web_stat.gemspec +2 -0
- metadata +31 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 29d04d8379978d1e295b829193a330fd9bab8cb92326e83aab39d066059dcb02
|
|
4
|
+
data.tar.gz: c2df9896f21ec9d777dea101a1c4c4be513ad28c11428fc1104ccefe08f98b40
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 728bc129ddced4cc58081ca4d8c02b761ba77e4adf277b736115ce51b1e7293f7b208adc5742fe50ba4e406f2a7c3f65af19d5c55f0cf0188a4a4b25704719bd
|
|
7
|
+
data.tar.gz: ef8ee476834a6fb75fed33476d070e1b00044f9115b85eaf3148f1901af10e4cd32f79ef3b369e59366a56ab7e8560180da3def8562e2b396f2b8b6bfc26196b
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
3.0.0
|
data/Dockerfile
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# Define base image, you can use --build-arg
|
|
2
|
-
ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.
|
|
2
|
+
ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.37.0_nodev15.2.1_rubyv3.0.0_sasscv2.4.0_ffiv1.13.1_chromedriver"
|
|
3
3
|
FROM $base_image
|
|
4
4
|
|
|
5
5
|
# Set locale
|
data/Gemfile.lock
CHANGED
|
@@ -8,9 +8,11 @@ PATH
|
|
|
8
8
|
natto (>= 1.1.2)
|
|
9
9
|
nokogiri (>= 1.10.4)
|
|
10
10
|
pdf-reader (= 2.4.0)
|
|
11
|
+
rexml (>= 3.2.4)
|
|
11
12
|
ruby-readability (>= 0.7)
|
|
12
13
|
sanitize (>= 5.0.0)
|
|
13
14
|
selenium-webdriver (= 3.142.7)
|
|
15
|
+
webrick (>= 1.7.0)
|
|
14
16
|
|
|
15
17
|
GEM
|
|
16
18
|
remote: https://rubygems.org/
|
|
@@ -24,14 +26,13 @@ GEM
|
|
|
24
26
|
cld (0.8.0)
|
|
25
27
|
ffi
|
|
26
28
|
coderay (1.1.3)
|
|
27
|
-
connection_pool (2.2.3)
|
|
28
29
|
crack (0.4.3)
|
|
29
30
|
safe_yaml (~> 1.0.0)
|
|
30
31
|
crass (1.0.6)
|
|
31
32
|
diff-lcs (1.3)
|
|
32
33
|
domain_name (0.5.20190701)
|
|
33
34
|
unf (>= 0.0.5, < 1.0.0)
|
|
34
|
-
ffi (1.
|
|
35
|
+
ffi (1.14.2)
|
|
35
36
|
guess_html_encoding (0.0.11)
|
|
36
37
|
hashdiff (1.0.1)
|
|
37
38
|
hashery (2.1.2)
|
|
@@ -49,16 +50,14 @@ GEM
|
|
|
49
50
|
method_source (1.0.0)
|
|
50
51
|
mime-types (3.3.1)
|
|
51
52
|
mime-types-data (~> 3.2015)
|
|
52
|
-
mime-types-data (3.2020.
|
|
53
|
-
mini_portile2 (2.4.0)
|
|
53
|
+
mime-types-data (3.2020.1104)
|
|
54
54
|
natto (1.2.0)
|
|
55
55
|
ffi (>= 1.9.0)
|
|
56
56
|
net-http-digest_auth (1.4.1)
|
|
57
|
-
net-http-persistent (
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
nokogumbo (2.0.2)
|
|
57
|
+
net-http-persistent (2.9.4)
|
|
58
|
+
nokogiri (1.11.1-x86_64-linux)
|
|
59
|
+
racc (~> 1.4)
|
|
60
|
+
nokogumbo (2.0.4)
|
|
62
61
|
nokogiri (~> 1.8, >= 1.8.4)
|
|
63
62
|
ntlm-http (0.1.1)
|
|
64
63
|
pdf-reader (2.4.0)
|
|
@@ -74,7 +73,9 @@ GEM
|
|
|
74
73
|
byebug (~> 11.0)
|
|
75
74
|
pry (~> 0.13.0)
|
|
76
75
|
public_suffix (4.0.5)
|
|
76
|
+
racc (1.5.2)
|
|
77
77
|
rake (13.0.1)
|
|
78
|
+
rexml (3.2.4)
|
|
78
79
|
rspec (3.9.0)
|
|
79
80
|
rspec-core (~> 3.9.0)
|
|
80
81
|
rspec-expectations (~> 3.9.0)
|
|
@@ -94,14 +95,14 @@ GEM
|
|
|
94
95
|
nokogiri (>= 1.6.0)
|
|
95
96
|
rubyzip (2.3.0)
|
|
96
97
|
safe_yaml (1.0.5)
|
|
97
|
-
sanitize (5.2.
|
|
98
|
+
sanitize (5.2.2)
|
|
98
99
|
crass (~> 1.0.2)
|
|
99
100
|
nokogiri (>= 1.8.0)
|
|
100
101
|
nokogumbo (~> 2.0)
|
|
101
102
|
selenium-webdriver (3.142.7)
|
|
102
103
|
childprocess (>= 0.5, < 4.0)
|
|
103
104
|
rubyzip (>= 1.2.2)
|
|
104
|
-
ttfunk (1.
|
|
105
|
+
ttfunk (1.7.0)
|
|
105
106
|
unf (0.1.4)
|
|
106
107
|
unf_ext
|
|
107
108
|
unf_ext (0.0.7.7)
|
|
@@ -109,6 +110,7 @@ GEM
|
|
|
109
110
|
addressable (>= 2.3.6)
|
|
110
111
|
crack (>= 0.3.2)
|
|
111
112
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
113
|
+
webrick (1.7.0)
|
|
112
114
|
webrobots (0.1.2)
|
|
113
115
|
|
|
114
116
|
PLATFORMS
|
|
@@ -123,4 +125,4 @@ DEPENDENCIES
|
|
|
123
125
|
webmock (>= 3.8.3)
|
|
124
126
|
|
|
125
127
|
BUNDLED WITH
|
|
126
|
-
2.
|
|
128
|
+
2.2.4
|
data/README.md
CHANGED
|
@@ -1,30 +1,7 @@
|
|
|
1
|
-
# !!!!! Precautions when using with Rails !!!!!
|
|
2
|
-
|
|
3
|
-
Write this line your Gemfile.
|
|
4
|
-
```
|
|
5
|
-
gem "final_redirect_url", :git => "git@github.com:yubele/final_redirect_url"
|
|
6
|
-
```
|
|
7
|
-
|
|
8
1
|
# WebStat
|
|
9
2
|
|
|
10
3
|
Fetch the web pages and stat.
|
|
11
4
|
|
|
12
|
-
## Requirements
|
|
13
|
-
|
|
14
|
-
- [MeCab _0.996_](http://taku910.github.io/mecab/#download)
|
|
15
|
-
- add runtime dependency
|
|
16
|
-
- "bundler", "~> 2.0"
|
|
17
|
-
- "nokogiri", "~> 1.10"
|
|
18
|
-
- "mechanize", "~> 2.7"
|
|
19
|
-
- "ruby-readability", "~> 0.7"
|
|
20
|
-
- "final_redirect_url", "~> 0.1.0"
|
|
21
|
-
- "natto", "~> 1.1.2"
|
|
22
|
-
- add development dependency
|
|
23
|
-
- "rake", "~> 10.0"
|
|
24
|
-
- "rspec", "~> 3.0"
|
|
25
|
-
- "rake", "~> 10.0"
|
|
26
|
-
- "rspec", "~> 3.0"
|
|
27
|
-
|
|
28
5
|
### Install mecab
|
|
29
6
|
|
|
30
7
|
$ sudo apt install mecab-ipadic-utf8 libmecab
|
data/lib/web_stat/fetch.rb
CHANGED
|
@@ -34,7 +34,7 @@ module WebStat
|
|
|
34
34
|
end
|
|
35
35
|
# Get main section
|
|
36
36
|
def content
|
|
37
|
-
Sanitize.clean(Readability::Document.new(@nokogiri.at('body')).content)
|
|
37
|
+
Sanitize.clean(Readability::Document.new(@nokogiri.at('body').to_s).content)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
# Get temporary path of image
|
|
@@ -53,7 +53,7 @@ module WebStat
|
|
|
53
53
|
return @url.gsub(v[0], v[1])
|
|
54
54
|
end
|
|
55
55
|
end
|
|
56
|
-
readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body')).content)
|
|
56
|
+
readability_content = ::Nokogiri::HTML(Readability::Document.new(@nokogiri.at('body').to_s).content)
|
|
57
57
|
if (path.nil? || path.empty?) && readability_content.xpath('//img').first
|
|
58
58
|
path = readability_content.xpath('//img').first.attr('src')
|
|
59
59
|
end
|
data/lib/web_stat/version.rb
CHANGED
data/web_stat.gemspec
CHANGED
|
@@ -29,6 +29,8 @@ Gem::Specification.new do |spec|
|
|
|
29
29
|
spec.add_runtime_dependency "cld", ">= 0.8.0"
|
|
30
30
|
spec.add_runtime_dependency "selenium-webdriver", "= 3.142.7"
|
|
31
31
|
spec.add_runtime_dependency "pdf-reader", "2.4.0"
|
|
32
|
+
spec.add_runtime_dependency "webrick", ">= 1.7.0"
|
|
33
|
+
spec.add_runtime_dependency "rexml", ">= 3.2.4"
|
|
32
34
|
|
|
33
35
|
spec.add_development_dependency "rake", ">= 10.0"
|
|
34
36
|
spec.add_development_dependency "rspec", ">= 3.0"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: web_stat
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yusuke abe
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2021-01-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -136,6 +136,34 @@ dependencies:
|
|
|
136
136
|
- - '='
|
|
137
137
|
- !ruby/object:Gem::Version
|
|
138
138
|
version: 2.4.0
|
|
139
|
+
- !ruby/object:Gem::Dependency
|
|
140
|
+
name: webrick
|
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
|
142
|
+
requirements:
|
|
143
|
+
- - ">="
|
|
144
|
+
- !ruby/object:Gem::Version
|
|
145
|
+
version: 1.7.0
|
|
146
|
+
type: :runtime
|
|
147
|
+
prerelease: false
|
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
149
|
+
requirements:
|
|
150
|
+
- - ">="
|
|
151
|
+
- !ruby/object:Gem::Version
|
|
152
|
+
version: 1.7.0
|
|
153
|
+
- !ruby/object:Gem::Dependency
|
|
154
|
+
name: rexml
|
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
|
156
|
+
requirements:
|
|
157
|
+
- - ">="
|
|
158
|
+
- !ruby/object:Gem::Version
|
|
159
|
+
version: 3.2.4
|
|
160
|
+
type: :runtime
|
|
161
|
+
prerelease: false
|
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
163
|
+
requirements:
|
|
164
|
+
- - ">="
|
|
165
|
+
- !ruby/object:Gem::Version
|
|
166
|
+
version: 3.2.4
|
|
139
167
|
- !ruby/object:Gem::Dependency
|
|
140
168
|
name: rake
|
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -274,7 +302,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
274
302
|
- !ruby/object:Gem::Version
|
|
275
303
|
version: '0'
|
|
276
304
|
requirements: []
|
|
277
|
-
rubygems_version: 3.
|
|
305
|
+
rubygems_version: 3.2.3
|
|
278
306
|
signing_key:
|
|
279
307
|
specification_version: 4
|
|
280
308
|
summary: Get the status of the web pages.
|