web_stat 0.2.11 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bcfaeb202076ea30cae6205877d0b0ad9060eb84116b84ef7bd3580cc4349aac
4
- data.tar.gz: 98e586440c8f3aed29e38a003f6d1afc96b26b2ae4c21e43d47e1dfaf6aaa16a
3
+ metadata.gz: 0f62b7ce2720dcd1851a3c0620af8da6e00313e1b5de918e8b1e23ceeeacee89
4
+ data.tar.gz: 1a52579415bf2a4f96efd39931ef41849751f693658f5cfbcd88202c680a446c
5
5
  SHA512:
6
- metadata.gz: dfbe6264256f08550ebb42244d92bd81c976dd5dd72a6a4dabffdf6a1366a8f010e8a20527d0bbe7a8334cdb028e86ecae0c63d6cc4368741abe815f1fcb3092
7
- data.tar.gz: 9098d904f26dfdfe14c87352cb47ca3f0333f5424ef26d2cd8232c088c2cf8d7dcbeb07012a9294a5c4e4949b51fe71e94d0995465400d87cf7c037fd09ba978
6
+ metadata.gz: 51b5d4fbc2e0969913cec3c1ae4b745cca8a790b4f9638d9a920d844840500424c69f18e7d3f58d2d870b8391e0817f06781699b302f36b2a68d81467608a867
7
+ data.tar.gz: 2d4ba4bfac9c5d5c76fcc48ba0202b7dbf223b4f42b7d6a649cce22c0742d6b7605c3386c93be2cb29487308714fc94fd2bf1063faf78bdcd5b7b6c5ab060d8b
data/.gitignore CHANGED
@@ -6,6 +6,7 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ /.bundle
9
10
 
10
11
  # rspec failure tracking
11
12
  .rspec_status
@@ -1 +1 @@
1
- 2.7.0
1
+ 2.7.1
@@ -0,0 +1,25 @@
1
+ # Define base image, you can use --build-arg
2
+ ARG base_image="newsdict/rails:ubuntu20.10_nvmv0.35.2_nodev14.3.0_rubyv2.7.1_sasscv2.3.0_ffiv1.13.1_chromedriver"
3
+ FROM $base_image
4
+
5
+ # Set locale
6
+ ENV LANG "C.UTF-8"
7
+ ENV NOKOGIRI_USE_SYSTEM_LIBRARIES "YES"
8
+
9
+ # Set correct environment variables.
10
+ RUN mkdir -p /var/www/docker
11
+ WORKDIR /var/www/docker
12
+
13
+ # Set up application
14
+ COPY . .
15
+
16
+ # Init gems
17
+ RUN echo "gem: --no-rdoc --no-ri" > ~/.gemrc
18
+ RUN . /etc/profile.d/rvm.sh && \
19
+ bundle config --global with 'development test' && \
20
+ bundle config --global system true && \
21
+ bundle config --global jobs 10 && \
22
+ bundle config --global build.nokogiri --use-system-libraries && \
23
+ bundle install
24
+
25
+ CMD ["bash"]
data/Gemfile CHANGED
@@ -1,6 +1,4 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- gem "final_redirect_url", :git => "git@github.com:yubele/final_redirect_url"
4
-
5
3
  # Specify your gem's dependencies in web_stat.gemspec
6
4
  gemspec
@@ -1,13 +1,7 @@
1
- GIT
2
- remote: git@github.com:yubele/final_redirect_url
3
- revision: 45df878ec9495ebbfa06dc0a60cc5043c2519e16
4
- specs:
5
- final_redirect_url (0.1.1)
6
-
7
1
  PATH
8
2
  remote: .
9
3
  specs:
10
- web_stat (0.2.11)
4
+ web_stat (0.3.0)
11
5
  bundler (>= 2.0.2)
12
6
  cld (>= 0.8.0)
13
7
  mechanize (>= 2.7)
@@ -15,23 +9,26 @@ PATH
15
9
  nokogiri (>= 1.10.4)
16
10
  ruby-readability (>= 0.7)
17
11
  sanitize (>= 5.0.0)
12
+ selenium-webdriver (= 3.142.7)
18
13
 
19
14
  GEM
20
15
  remote: https://rubygems.org/
21
16
  specs:
22
17
  addressable (2.7.0)
23
18
  public_suffix (>= 2.0.2, < 5.0)
19
+ byebug (11.1.3)
20
+ childprocess (3.0.0)
24
21
  cld (0.8.0)
25
22
  ffi
26
- coderay (1.1.2)
27
- connection_pool (2.2.2)
23
+ coderay (1.1.3)
24
+ connection_pool (2.2.3)
28
25
  crack (0.4.3)
29
26
  safe_yaml (~> 1.0.0)
30
27
  crass (1.0.6)
31
28
  diff-lcs (1.3)
32
29
  domain_name (0.5.20190701)
33
30
  unf (>= 0.0.5, < 1.0.0)
34
- ffi (1.12.2)
31
+ ffi (1.13.1)
35
32
  guess_html_encoding (0.0.11)
36
33
  hashdiff (1.0.1)
37
34
  http-cookie (1.0.3)
@@ -48,7 +45,7 @@ GEM
48
45
  method_source (1.0.0)
49
46
  mime-types (3.3.1)
50
47
  mime-types-data (~> 3.2015)
51
- mime-types-data (3.2020.0425)
48
+ mime-types-data (3.2020.0512)
52
49
  mini_portile2 (2.4.0)
53
50
  natto (1.2.0)
54
51
  ffi (>= 1.9.0)
@@ -63,7 +60,10 @@ GEM
63
60
  pry (0.13.1)
64
61
  coderay (~> 1.1)
65
62
  method_source (~> 1.0)
66
- public_suffix (4.0.4)
63
+ pry-byebug (3.9.0)
64
+ byebug (~> 11.0)
65
+ pry (~> 0.13.0)
66
+ public_suffix (4.0.5)
67
67
  rake (13.0.1)
68
68
  rspec (3.9.0)
69
69
  rspec-core (~> 3.9.0)
@@ -71,7 +71,7 @@ GEM
71
71
  rspec-mocks (~> 3.9.0)
72
72
  rspec-core (3.9.2)
73
73
  rspec-support (~> 3.9.3)
74
- rspec-expectations (3.9.1)
74
+ rspec-expectations (3.9.2)
75
75
  diff-lcs (>= 1.2.0, < 2.0)
76
76
  rspec-support (~> 3.9.0)
77
77
  rspec-mocks (3.9.1)
@@ -81,11 +81,15 @@ GEM
81
81
  ruby-readability (0.7.0)
82
82
  guess_html_encoding (>= 0.0.4)
83
83
  nokogiri (>= 1.6.0)
84
+ rubyzip (2.3.0)
84
85
  safe_yaml (1.0.5)
85
- sanitize (5.1.0)
86
+ sanitize (5.2.0)
86
87
  crass (~> 1.0.2)
87
88
  nokogiri (>= 1.8.0)
88
89
  nokogumbo (~> 2.0)
90
+ selenium-webdriver (3.142.7)
91
+ childprocess (>= 0.5, < 4.0)
92
+ rubyzip (>= 1.2.2)
89
93
  unf (0.1.4)
90
94
  unf_ext
91
95
  unf_ext (0.0.7.7)
@@ -99,8 +103,8 @@ PLATFORMS
99
103
  ruby
100
104
 
101
105
  DEPENDENCIES
102
- final_redirect_url!
103
- pry (>= 0.12.2)
106
+ pry (>= 0.13.1)
107
+ pry-byebug (= 3.9.0)
104
108
  rake (>= 10.0)
105
109
  rspec (>= 3.0)
106
110
  web_stat!
@@ -0,0 +1,18 @@
1
+ version: "3.8"
2
+ networks:
3
+ app-tier:
4
+ driver: bridge
5
+ services:
6
+ web_stat:
7
+ tty: true
8
+ stdin_open: true
9
+ container_name: web_stat
10
+ build:
11
+ context: .
12
+ dockerfile: Dockerfile
13
+ volumes:
14
+ - ./:/var/www/docker:cached
15
+ working_dir: /var/www/docker
16
+ command: bash
17
+ networks:
18
+ - app-tier
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ # Check to support tty.
3
+ if [ "$(tty>/dev/null;echo $?)" != "0" ];then
4
+ DOCKERCOMPOSE_EXEC="docker-compose exec -T"
5
+ else
6
+ DOCKERCOMPOSE_EXEC="docker-compose exec"
7
+ fi
8
+ $DOCKERCOMPOSE_EXEC web_stat /bin/bash -c ". /etc/profile.d/rvm.sh && . /root/.nvm/nvm.sh && $*"
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+ if [ "$1" = "rm" ];then
4
+ shift
5
+ docker-compose rm -f
6
+ fi
7
+ docker-compose stop
8
+ if [ "$1" = "attach" ]; then
9
+ docker-compose up -d
10
+ docker attach $2
11
+ else
12
+ docker-compose up $@
13
+ fi
@@ -0,0 +1,28 @@
1
+ class WebDriverHelper
2
+ class << self
3
+ # Get last url
4
+ # @param [String] url
5
+ # @param [Integer] delay
6
+ def get_last_url(url, delay=nil)
7
+ Selenium::WebDriver.logger.output = File.join("/tmp", "selenium.log")
8
+ Selenium::WebDriver.logger.level = :info
9
+ options = Selenium::WebDriver::Chrome::Options.new(args: [
10
+ 'headless',
11
+ 'no-sandbox',
12
+ 'disable-gpu',
13
+ 'start-maximized',
14
+ 'window-size=1920,1080'
15
+ ])
16
+ driver = Selenium::WebDriver.for(:chrome, options: options)
17
+ driver.manage.timeouts.implicit_wait = 10
18
+ Selenium::WebDriver::Wait.new(timeout: 10)
19
+ driver.get(url)
20
+ if delay.is_a?(Integer)
21
+ sleep delay
22
+ end
23
+ last_url = driver.current_url
24
+ driver.quit
25
+ last_url
26
+ end
27
+ end
28
+ end
@@ -1,14 +1,18 @@
1
1
  require "bundler"
2
2
 
3
+ require 'cld'
3
4
  require 'uri'
4
5
  require 'digest'
6
+ require 'logger'
5
7
  require 'sanitize'
6
8
  require 'nokogiri'
7
9
  require 'open-uri'
10
+ require 'net/http'
8
11
  require 'ruby-readability'
9
- require 'final_redirect_url'
10
- require 'cld'
12
+ require 'selenium-webdriver'
11
13
 
14
+ require "helpers/web_drive_helper"
15
+ require "web_stat/final_redirect_url"
12
16
  require "web_stat/categorize"
13
17
  require "web_stat/configure"
14
18
  require "web_stat/errors"
@@ -19,7 +19,6 @@ module WebStat
19
19
  title.strip
20
20
  end
21
21
  end
22
-
23
22
  # Get name of domain
24
23
  def site_name
25
24
  begin
@@ -33,7 +32,6 @@ module WebStat
33
32
  site_name.strip
34
33
  end
35
34
  end
36
- []
37
35
  # Get main section
38
36
  def content
39
37
  Sanitize.clean(Readability::Document.new(@nokogiri.at('body')).content)
@@ -0,0 +1,50 @@
1
+ # ref) https://github.com/indyarocks/final_redirect_url
2
+ # customize
3
+ # Changed
4
+
5
+ module FinalRedirectUrl
6
+
7
+ def self.final_redirect_url(url, options={})
8
+ final_url = ''
9
+ if is_valid_url?(url)
10
+ begin
11
+ redirect_lookup_depth = options[:depth].to_i > 0 ? options[:depth].to_i : 10
12
+ response_uri = get_final_redirect_url(url, redirect_lookup_depth)
13
+ final_url = url_string_from_uri(response_uri)
14
+ rescue Exception => ex
15
+ # nothing
16
+ end
17
+ end
18
+ final_url
19
+ end
20
+
21
+ private
22
+ def self.is_valid_url?(url)
23
+ url.to_s.match? URI::regexp(['http', 'https'])
24
+ end
25
+
26
+ def self.get_final_redirect_url(url, limit = 10)
27
+ return url if limit <= 0
28
+ uri = URI.parse(url)
29
+ response = ::Net::HTTP.get_response(uri)
30
+ if response.class == Net::HTTPOK
31
+ return URI.parse(WebDriverHelper.get_last_url(uri))
32
+ else
33
+ redirect_location = response['location']
34
+ location_uri = URI.parse(redirect_location)
35
+ if location_uri.host.nil?
36
+ redirect_location = uri.scheme + '://' + uri.host + redirect_location
37
+ end
38
+ warn "redirected to #{redirect_location}"
39
+ get_final_redirect_url(redirect_location, limit - 1)
40
+ end
41
+ end
42
+
43
+ def self.url_string_from_uri(uri)
44
+ url_str = "#{uri.scheme}://#{uri.host}#{uri.request_uri}"
45
+ if uri.fragment
46
+ url_str = url_str + "##{uri.fragment}"
47
+ end
48
+ url_str
49
+ end
50
+ end
@@ -4,7 +4,7 @@ module WebStat
4
4
  attr_accessor :natto_mecab, :article
5
5
 
6
6
  def initialize(article, userdic: nil)
7
- @natto_mecab = Natto::MeCab.new(userdic: userdic)
7
+ @natto_mecab = Natto::MeCab.new(userdic: userdic)
8
8
  @article = article
9
9
  end
10
10
 
@@ -1,3 +1,3 @@
1
1
  module WebStat
2
- VERSION = "0.2.11"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -1,12 +1,18 @@
1
1
  require 'rspec/expectations'
2
2
  require "bundler/setup"
3
3
  require 'pry'
4
+ require 'pry-byebug'
4
5
  require "web_stat"
5
6
 
6
7
  require 'webmock'
7
8
  include WebMock::API
8
9
  WebMock.enable!
9
10
 
11
+ WebMock.disable_net_connect!({
12
+ allow_localhost: true,
13
+ allow: 'chromedriver.storage.googleapis.com'
14
+ })
15
+
10
16
  RSpec.configure do |config|
11
17
  # Enable flags like --only-failures and --next-failure
12
18
  config.example_status_persistence_file_path = ".rspec_status"
@@ -27,9 +27,11 @@ Gem::Specification.new do |spec|
27
27
  spec.add_runtime_dependency "natto", ">= 1.1.2"
28
28
  spec.add_runtime_dependency "sanitize", ">= 5.0.0"
29
29
  spec.add_runtime_dependency "cld", ">= 0.8.0"
30
+ spec.add_runtime_dependency "selenium-webdriver", "= 3.142.7"
30
31
 
31
32
  spec.add_development_dependency "rake", ">= 10.0"
32
33
  spec.add_development_dependency "rspec", ">= 3.0"
33
- spec.add_development_dependency "pry", ">= 0.12.2"
34
+ spec.add_development_dependency "pry", ">= 0.13.1"
34
35
  spec.add_development_dependency "webmock", ">= 3.6.0"
36
+ spec.add_development_dependency "pry-byebug", "3.9.0"
35
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_stat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yusuke abe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-09 00:00:00.000000000 Z
11
+ date: 2020-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: 0.8.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: selenium-webdriver
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '='
116
+ - !ruby/object:Gem::Version
117
+ version: 3.142.7
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '='
123
+ - !ruby/object:Gem::Version
124
+ version: 3.142.7
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rake
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +156,14 @@ dependencies:
142
156
  requirements:
143
157
  - - ">="
144
158
  - !ruby/object:Gem::Version
145
- version: 0.12.2
159
+ version: 0.13.1
146
160
  type: :development
147
161
  prerelease: false
148
162
  version_requirements: !ruby/object:Gem::Requirement
149
163
  requirements:
150
164
  - - ">="
151
165
  - !ruby/object:Gem::Version
152
- version: 0.12.2
166
+ version: 0.13.1
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: webmock
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - ">="
165
179
  - !ruby/object:Gem::Version
166
180
  version: 3.6.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: pry-byebug
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - '='
186
+ - !ruby/object:Gem::Version
187
+ version: 3.9.0
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - '='
193
+ - !ruby/object:Gem::Version
194
+ version: 3.9.0
167
195
  description: Fetch the web pages and stat.
168
196
  email:
169
197
  - yube@newsdict.jp
@@ -177,12 +205,17 @@ files:
177
205
  - ".ruby-version"
178
206
  - ".travis.yml"
179
207
  - CODE_OF_CONDUCT.md
208
+ - Dockerfile
180
209
  - Gemfile
181
210
  - Gemfile.lock
182
211
  - LICENSE.txt
183
212
  - README.md
184
213
  - Rakefile
185
214
  - bin/fetch_as_html
215
+ - docker-compose.yml
216
+ - docker/exec
217
+ - docker/start
218
+ - lib/helpers/web_drive_helper.rb
186
219
  - lib/web_stat.rb
187
220
  - lib/web_stat/categorize.rb
188
221
  - lib/web_stat/config/web_stat.yml
@@ -191,6 +224,7 @@ files:
191
224
  - lib/web_stat/fetch.rb
192
225
  - lib/web_stat/fetch/fetch_as_html.rb
193
226
  - lib/web_stat/fetch/fetch_as_web.rb
227
+ - lib/web_stat/final_redirect_url.rb
194
228
  - lib/web_stat/tag.rb
195
229
  - lib/web_stat/tasks/install.rake
196
230
  - lib/web_stat/version.rb
@@ -224,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
258
  - !ruby/object:Gem::Version
225
259
  version: '0'
226
260
  requirements: []
227
- rubygems_version: 3.0.3
261
+ rubygems_version: 3.1.2
228
262
  signing_key:
229
263
  specification_version: 4
230
264
  summary: Get the status of the web pages.