web_stat 0.2.11 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bcfaeb202076ea30cae6205877d0b0ad9060eb84116b84ef7bd3580cc4349aac
4
- data.tar.gz: 98e586440c8f3aed29e38a003f6d1afc96b26b2ae4c21e43d47e1dfaf6aaa16a
3
+ metadata.gz: 0f62b7ce2720dcd1851a3c0620af8da6e00313e1b5de918e8b1e23ceeeacee89
4
+ data.tar.gz: 1a52579415bf2a4f96efd39931ef41849751f693658f5cfbcd88202c680a446c
5
5
  SHA512:
6
- metadata.gz: dfbe6264256f08550ebb42244d92bd81c976dd5dd72a6a4dabffdf6a1366a8f010e8a20527d0bbe7a8334cdb028e86ecae0c63d6cc4368741abe815f1fcb3092
7
- data.tar.gz: 9098d904f26dfdfe14c87352cb47ca3f0333f5424ef26d2cd8232c088c2cf8d7dcbeb07012a9294a5c4e4949b51fe71e94d0995465400d87cf7c037fd09ba978
6
+ metadata.gz: 51b5d4fbc2e0969913cec3c1ae4b745cca8a790b4f9638d9a920d844840500424c69f18e7d3f58d2d870b8391e0817f06781699b302f36b2a68d81467608a867
7
+ data.tar.gz: 2d4ba4bfac9c5d5c76fcc48ba0202b7dbf223b4f42b7d6a649cce22c0742d6b7605c3386c93be2cb29487308714fc94fd2bf1063faf78bdcd5b7b6c5ab060d8b
data/.gitignore CHANGED
@@ -6,6 +6,7 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ /.bundle
9
10
 
10
11
  # rspec failure tracking
11
12
  .rspec_status
@@ -1 +1 @@
1
- 2.7.0
1
+ 2.7.1
@@ -0,0 +1,25 @@
1
+ # Define base image, you can use --build-arg
2
+ ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.35.2_nodev14.3.0_rubyv2.7.1_sasscv2.3.0_ffiv1.13.1_chromedriver"
3
+ FROM $base_image
4
+
5
+ # Set locale
6
+ ENV LANG "C.UTF-8"
7
+ ENV NOKOGIRI_USE_SYSTEM_LIBRARIES "YES"
8
+
9
+ # Set correct environment variables.
10
+ RUN mkdir -p /var/www/docker
11
+ WORKDIR /var/www/docker
12
+
13
+ # Set up application
14
+ COPY . .
15
+
16
+ # Init gems
17
+ RUN echo "gem: --no-rdoc --no-ri" > ~/.gemrc
18
+ RUN . /etc/profile.d/rvm.sh && \
19
+ bundle config --global with 'development test' && \
20
+ bundle config --global system true && \
21
+ bundle config --global jobs 10 && \
22
+ bundle config --global build.nokogiri --use-system-libraries && \
23
+ bundle install
24
+
25
+ CMD ["bash"]
data/Gemfile CHANGED
@@ -1,6 +1,4 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- gem "final_redirect_url", :git => "git@github.com:yubele/final_redirect_url"
4
-
5
3
  # Specify your gem's dependencies in web_stat.gemspec
6
4
  gemspec
@@ -1,13 +1,7 @@
1
- GIT
2
- remote: git@github.com:yubele/final_redirect_url
3
- revision: 45df878ec9495ebbfa06dc0a60cc5043c2519e16
4
- specs:
5
- final_redirect_url (0.1.1)
6
-
7
1
  PATH
8
2
  remote: .
9
3
  specs:
10
- web_stat (0.2.11)
4
+ web_stat (0.3.0)
11
5
  bundler (>= 2.0.2)
12
6
  cld (>= 0.8.0)
13
7
  mechanize (>= 2.7)
@@ -15,23 +9,26 @@ PATH
15
9
  nokogiri (>= 1.10.4)
16
10
  ruby-readability (>= 0.7)
17
11
  sanitize (>= 5.0.0)
12
+ selenium-webdriver (= 3.142.7)
18
13
 
19
14
  GEM
20
15
  remote: https://rubygems.org/
21
16
  specs:
22
17
  addressable (2.7.0)
23
18
  public_suffix (>= 2.0.2, < 5.0)
19
+ byebug (11.1.3)
20
+ childprocess (3.0.0)
24
21
  cld (0.8.0)
25
22
  ffi
26
- coderay (1.1.2)
27
- connection_pool (2.2.2)
23
+ coderay (1.1.3)
24
+ connection_pool (2.2.3)
28
25
  crack (0.4.3)
29
26
  safe_yaml (~> 1.0.0)
30
27
  crass (1.0.6)
31
28
  diff-lcs (1.3)
32
29
  domain_name (0.5.20190701)
33
30
  unf (>= 0.0.5, < 1.0.0)
34
- ffi (1.12.2)
31
+ ffi (1.13.1)
35
32
  guess_html_encoding (0.0.11)
36
33
  hashdiff (1.0.1)
37
34
  http-cookie (1.0.3)
@@ -48,7 +45,7 @@ GEM
48
45
  method_source (1.0.0)
49
46
  mime-types (3.3.1)
50
47
  mime-types-data (~> 3.2015)
51
- mime-types-data (3.2020.0425)
48
+ mime-types-data (3.2020.0512)
52
49
  mini_portile2 (2.4.0)
53
50
  natto (1.2.0)
54
51
  ffi (>= 1.9.0)
@@ -63,7 +60,10 @@ GEM
63
60
  pry (0.13.1)
64
61
  coderay (~> 1.1)
65
62
  method_source (~> 1.0)
66
- public_suffix (4.0.4)
63
+ pry-byebug (3.9.0)
64
+ byebug (~> 11.0)
65
+ pry (~> 0.13.0)
66
+ public_suffix (4.0.5)
67
67
  rake (13.0.1)
68
68
  rspec (3.9.0)
69
69
  rspec-core (~> 3.9.0)
@@ -71,7 +71,7 @@ GEM
71
71
  rspec-mocks (~> 3.9.0)
72
72
  rspec-core (3.9.2)
73
73
  rspec-support (~> 3.9.3)
74
- rspec-expectations (3.9.1)
74
+ rspec-expectations (3.9.2)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.9.0)
77
77
  rspec-mocks (3.9.1)
@@ -81,11 +81,15 @@ GEM
81
81
  ruby-readability (0.7.0)
82
82
  guess_html_encoding (>= 0.0.4)
83
83
  nokogiri (>= 1.6.0)
84
+ rubyzip (2.3.0)
84
85
  safe_yaml (1.0.5)
85
- sanitize (5.1.0)
86
+ sanitize (5.2.0)
86
87
  crass (~> 1.0.2)
87
88
  nokogiri (>= 1.8.0)
88
89
  nokogumbo (~> 2.0)
90
+ selenium-webdriver (3.142.7)
91
+ childprocess (>= 0.5, < 4.0)
92
+ rubyzip (>= 1.2.2)
89
93
  unf (0.1.4)
90
94
  unf_ext
91
95
  unf_ext (0.0.7.7)
@@ -99,8 +103,8 @@ PLATFORMS
99
103
  ruby
100
104
 
101
105
  DEPENDENCIES
102
- final_redirect_url!
103
- pry (>= 0.12.2)
106
+ pry (>= 0.13.1)
107
+ pry-byebug (= 3.9.0)
104
108
  rake (>= 10.0)
105
109
  rspec (>= 3.0)
106
110
  web_stat!
@@ -0,0 +1,18 @@
1
+ version: "3.8"
2
+ networks:
3
+ app-tier:
4
+ driver: bridge
5
+ services:
6
+ web_stat:
7
+ tty: true
8
+ stdin_open: true
9
+ container_name: web_stat
10
+ build:
11
+ context: .
12
+ dockerfile: Dockerfile
13
+ volumes:
14
+ - ./:/var/www/docker:cached
15
+ working_dir: /var/www/docker
16
+ command: bash
17
+ networks:
18
+ - app-tier
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ # Check to support tty.
3
+ if [ "$(tty>/dev/null;echo $?)" != "0" ];then
4
+ DOCKERCOMPOSE_EXEC="docker-compose exec -T"
5
+ else
6
+ DOCKERCOMPOSE_EXEC="docker-compose exec"
7
+ fi
8
+ $DOCKERCOMPOSE_EXEC web_stat /bin/bash -c ". /etc/profile.d/rvm.sh && . /root/.nvm/nvm.sh && $*"
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+ if [ "$1" = "rm" ];then
4
+ shift
5
+ docker-compose rm -f
6
+ fi
7
+ docker-compose stop
8
+ if [ "$1" = "attach" ]; then
9
+ docker-compose up -d
10
+ docker attach $2
11
+ else
12
+ docker-compose up $@
13
+ fi
@@ -0,0 +1,28 @@
1
+ class WebDriverHelper
2
+ class << self
3
+ # Get last url
4
+ # @param [String] url
5
+ # @param [Integer] delay
6
+ def get_last_url(url, delay=nil)
7
+ Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
8
+ Selenium::WebDriver.logger.level = :info
9
+ options = Selenium::WebDriver::Chrome::Options.new(args: [
10
+ 'headless',
11
+ 'no-sandbox',
12
+ 'disable-gpu',
13
+ 'start-maximized',
14
+ 'window-size=1920,1080'
15
+ ])
16
+ driver = Selenium::WebDriver.for(:chrome, options: options)
17
+ driver.manage.timeouts.implicit_wait = 10
18
+ Selenium::WebDriver::Wait.new(timeout: 10)
19
+ driver.get(url)
20
+ if delay.is_a?(Integer)
21
+ sleep delay
22
+ end
23
+ last_url = driver.current_url
24
+ driver.quit
25
+ last_url
26
+ end
27
+ end
28
+ end
@@ -1,14 +1,18 @@
1
1
  require "bundler"
2
2
 
3
+ require 'cld'
3
4
  require 'uri'
4
5
  require 'digest'
6
+ require 'logger'
5
7
  require 'sanitize'
6
8
  require 'nokogiri'
7
9
  require 'open-uri'
10
+ require 'net/http'
8
11
  require 'ruby-readability'
9
- require 'final_redirect_url'
10
- require 'cld'
12
+ require 'selenium-webdriver'
11
13
 
14
+ require "helpers/web_drive_helper"
15
+ require "web_stat/final_redirect_url"
12
16
  require "web_stat/categorize"
13
17
  require "web_stat/configure"
14
18
  require "web_stat/errors"
@@ -19,7 +19,6 @@ module WebStat
19
19
  title.strip
20
20
  end
21
21
  end
22
-
23
22
  # Get name of domain
24
23
  def site_name
25
24
  begin
@@ -33,7 +32,6 @@ module WebStat
33
32
  site_name.strip
34
33
  end
35
34
  end
36
- []
37
35
  # Get main section
38
36
  def content
39
37
  Sanitize.clean(Readability::Document.new(@nokogiri.at('body')).content)
@@ -0,0 +1,50 @@
1
+ # ref) https://github.com/indyarocks/final_redirect_url
2
+ # customize
3
+ # Changed
4
+
5
+ module FinalRedirectUrl
6
+
7
+ def self.final_redirect_url(url, options={})
8
+ final_url = ''
9
+ if is_valid_url?(url)
10
+ begin
11
+ redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
+ response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
+ final_url = url_string_from_uri(response_uri)
14
+ rescue Exception => ex
15
+ # nothing
16
+ end
17
+ end
18
+ final_url
19
+ end
20
+
21
+ private
22
+ def self.is_valid_url?(url)
23
+ url.to_s.match? URI::regexp(['http', 'https'])
24
+ end
25
+
26
+ def self.get_final_redirect_url(url, limit = 10)
27
+ return url if limit <= 0
28
+ uri = URI.parse(url)
29
+ response = ::Net::HTTP.get_response(uri)
30
+ if response.class == Net::HTTPOK
31
+ return URI.parse(WebDriverHelper.get_last_url(uri))
32
+ else
33
+ redirect_location = response['location']
34
+ location_uri = URI.parse(redirect_location)
35
+ if location_uri.host.nil?
36
+ redirect_location = uri.scheme + '://' + uri.host + redirect_location
37
+ end
38
+ warn "redirected to #{redirect_location}"
39
+ get_final_redirect_url(redirect_location, limit - 1)
40
+ end
41
+ end
42
+
43
+ def self.url_string_from_uri(uri)
44
+ url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
45
+ if uri.fragment
46
+ url_str = url_str + "##{uri.fragment}"
47
+ end
48
+ url_str
49
+ end
50
+ end
@@ -4,7 +4,7 @@ module WebStat
4
4
  attr_accessor :natto_mecab, :article
5
5
 
6
6
  def initialize(article, userdic: nil)
7
- @natto_mecab = Natto::MeCab.new(userdic: userdic)
7
+ @natto_mecab = Natto::MeCab.new(userdic: userdic)
8
8
  @article = article
9
9
  end
10
10
 
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.11"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -1,12 +1,18 @@
1
1
  require 'rspec/expectations'
2
2
  require "bundler/setup"
3
3
  require 'pry'
4
+ require 'pry-byebug'
4
5
  require "web_stat"
5
6
 
6
7
  require 'webmock'
7
8
  include WebMock::API
8
9
  WebMock.enable!
9
10
 
11
+ WebMock.disable_net_connect!({
12
+ allow_localhost: true,
13
+ allow: 'chromedriver.storage.googleapis.com'
14
+ })
15
+
10
16
  RSpec.configure do |config|
11
17
  # Enable flags like --only-failures and --next-failure
12
18
  config.example_status_persistence_file_path = ".rspec_status"
@@ -27,9 +27,11 @@ Gem::Specification.new do |spec|
27
27
  spec.add_runtime_dependency "natto", ">= 1.1.2"
28
28
  spec.add_runtime_dependency "sanitize", ">= 5.0.0"
29
29
  spec.add_runtime_dependency "cld", ">= 0.8.0"
30
+ spec.add_runtime_dependency "selenium-webdriver", "= 3.142.7"
30
31
 
31
32
  spec.add_development_dependency "rake", ">= 10.0"
32
33
  spec.add_development_dependency "rspec", ">= 3.0"
33
- spec.add_development_dependency "pry", ">= 0.12.2"
34
+ spec.add_development_dependency "pry", ">= 0.13.1"
34
35
  spec.add_development_dependency "webmock", ">= 3.6.0"
36
+ spec.add_development_dependency "pry-byebug", "3.9.0"
35
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-09 00:00:00.000000000 Z
11
+ date: 2020-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: 0.8.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: selenium-webdriver
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '='
116
+ - !ruby/object:Gem::Version
117
+ version: 3.142.7
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '='
123
+ - !ruby/object:Gem::Version
124
+ version: 3.142.7
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rake
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +156,14 @@ dependencies:
142
156
  requirements:
143
157
  - - ">="
144
158
  - !ruby/object:Gem::Version
145
- version: 0.12.2
159
+ version: 0.13.1
146
160
  type: :development
147
161
  prerelease: false
148
162
  version_requirements: !ruby/object:Gem::Requirement
149
163
  requirements:
150
164
  - - ">="
151
165
  - !ruby/object:Gem::Version
152
- version: 0.12.2
166
+ version: 0.13.1
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: webmock
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - ">="
165
179
  - !ruby/object:Gem::Version
166
180
  version: 3.6.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: pry-byebug
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - '='
186
+ - !ruby/object:Gem::Version
187
+ version: 3.9.0
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - '='
193
+ - !ruby/object:Gem::Version
194
+ version: 3.9.0
167
195
  description: Fetch the web pages and stat.
168
196
  email:
169
197
  - yube@newsdict.jp
@@ -177,12 +205,17 @@ files:
177
205
  - ".ruby-version"
178
206
  - ".travis.yml"
179
207
  - CODE_OF_CONDUCT.md
208
+ - Dockerfile
180
209
  - Gemfile
181
210
  - Gemfile.lock
182
211
  - LICENSE.txt
183
212
  - README.md
184
213
  - Rakefile
185
214
  - bin/fetch_as_html
215
+ - docker-compose.yml
216
+ - docker/exec
217
+ - docker/start
218
+ - lib/helpers/web_drive_helper.rb
186
219
  - lib/web_stat.rb
187
220
  - lib/web_stat/categorize.rb
188
221
  - lib/web_stat/config/web_stat.yml
@@ -191,6 +224,7 @@ files:
191
224
  - lib/web_stat/fetch.rb
192
225
  - lib/web_stat/fetch/fetch_as_html.rb
193
226
  - lib/web_stat/fetch/fetch_as_web.rb
227
+ - lib/web_stat/final_redirect_url.rb
194
228
  - lib/web_stat/tag.rb
195
229
  - lib/web_stat/tasks/install.rake
196
230
  - lib/web_stat/version.rb
@@ -224,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
258
  - !ruby/object:Gem::Version
225
259
  version: '0'
226
260
  requirements: []
227
- rubygems_version: 3.0.3
261
+ rubygems_version: 3.1.2
228
262
  signing_key:
229
263
  specification_version: 4
230
264
  summary: Get the status of the web pages.