ruby-link-checker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/CONTRIBUTING.md +125 -0
- data/Dangerfile +4 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +162 -0
- data/LICENSE.md +22 -0
- data/README.md +279 -0
- data/RELEASING.md +60 -0
- data/Rakefile +18 -0
- data/benchmarks/check.rb +31 -0
- data/benchmarks/data/opensearch.org/links.txt +4270 -0
- data/benchmarks/data/opensearch.org/small.txt +100 -0
- data/coverage/assets/0.12.3/DataTables-1.10.20/images/sort_asc.png +0 -0
- data/coverage/assets/0.12.3/DataTables-1.10.20/images/sort_asc_disabled.png +0 -0
- data/coverage/assets/0.12.3/DataTables-1.10.20/images/sort_both.png +0 -0
- data/coverage/assets/0.12.3/DataTables-1.10.20/images/sort_desc.png +0 -0
- data/coverage/assets/0.12.3/DataTables-1.10.20/images/sort_desc_disabled.png +0 -0
- data/coverage/assets/0.12.3/application.css +1 -0
- data/coverage/assets/0.12.3/application.js +7 -0
- data/coverage/assets/0.12.3/colorbox/border.png +0 -0
- data/coverage/assets/0.12.3/colorbox/controls.png +0 -0
- data/coverage/assets/0.12.3/colorbox/loading.gif +0 -0
- data/coverage/assets/0.12.3/colorbox/loading_background.png +0 -0
- data/coverage/assets/0.12.3/favicon_green.png +0 -0
- data/coverage/assets/0.12.3/favicon_red.png +0 -0
- data/coverage/assets/0.12.3/favicon_yellow.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/coverage/assets/0.12.3/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/coverage/assets/0.12.3/images/ui-icons_222222_256x240.png +0 -0
- data/coverage/assets/0.12.3/images/ui-icons_2e83ff_256x240.png +0 -0
- data/coverage/assets/0.12.3/images/ui-icons_454545_256x240.png +0 -0
- data/coverage/assets/0.12.3/images/ui-icons_888888_256x240.png +0 -0
- data/coverage/assets/0.12.3/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/coverage/assets/0.12.3/loading.gif +0 -0
- data/coverage/assets/0.12.3/magnify.png +0 -0
- data/coverage/index.html +14555 -0
- data/lib/ruby-link-checker/callbacks.rb +50 -0
- data/lib/ruby-link-checker/checker.rb +42 -0
- data/lib/ruby-link-checker/config.rb +41 -0
- data/lib/ruby-link-checker/errors/base_error.rb +8 -0
- data/lib/ruby-link-checker/errors/redirect_loop_error.rb +18 -0
- data/lib/ruby-link-checker/errors.rb +2 -0
- data/lib/ruby-link-checker/logger.rb +14 -0
- data/lib/ruby-link-checker/net/http/checker.rb +31 -0
- data/lib/ruby-link-checker/net/http/config.rb +35 -0
- data/lib/ruby-link-checker/net/http/result.rb +53 -0
- data/lib/ruby-link-checker/net/http.rb +3 -0
- data/lib/ruby-link-checker/result.rb +74 -0
- data/lib/ruby-link-checker/task.rb +20 -0
- data/lib/ruby-link-checker/tasks.rb +99 -0
- data/lib/ruby-link-checker/typhoeus/hydra/checker.rb +52 -0
- data/lib/ruby-link-checker/typhoeus/hydra/config.rb +35 -0
- data/lib/ruby-link-checker/typhoeus/hydra/result.rb +53 -0
- data/lib/ruby-link-checker/typhoeus/hydra.rb +3 -0
- data/lib/ruby-link-checker/version.rb +5 -0
- data/lib/ruby-link-checker.rb +13 -0
- data/lib/ruby_link_checker.rb +3 -0
- data/pkg/ruby-link-checker-0.1.0.gem +0 -0
- data/ruby-link-checker.gemspec +21 -0
- data/spec/fixtures/200.yml +65 -0
- data/spec/fixtures/301_200.yml +102 -0
- data/spec/fixtures/301_301.yml +114 -0
- data/spec/fixtures/301_400_301_200.yml +185 -0
- data/spec/fixtures/301_403.yml +78 -0
- data/spec/fixtures/404.yml +57 -0
- data/spec/fixtures/404_200.yml +89 -0
- data/spec/fixtures/404_404.yml +81 -0
- data/spec/fixtures/429_200.yml +135 -0
- data/spec/fixtures/429_429_200.yml +201 -0
- data/spec/ruby-link-checker/callbacks_spec.rb +43 -0
- data/spec/ruby-link-checker/checker_spec.rb +10 -0
- data/spec/ruby-link-checker/config_spec.rb +39 -0
- data/spec/ruby-link-checker/net/http/checker_spec.rb +61 -0
- data/spec/ruby-link-checker/typhoeus/hydra/checker_spec.rb +67 -0
- data/spec/ruby-link-checker/version_spec.rb +9 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/config.rb +8 -0
- data/spec/support/link_checker.rb +322 -0
- data/spec/support/vcr.rb +13 -0
- data/spec/support/with_result.rb +15 -0
- data/spec/support/with_url.rb +14 -0
- metadata +144 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkChecker
|
|
4
|
+
module Callbacks
|
|
5
|
+
def callbacks
|
|
6
|
+
@callbacks ||= Hash.new { |h, k| h[k] = [] }
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def delegates
|
|
10
|
+
@delegates ||= []
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on(*events, &block)
|
|
14
|
+
if events && Array(events).any?
|
|
15
|
+
Array(events).each do |event|
|
|
16
|
+
callbacks[event.to_s] << block
|
|
17
|
+
end
|
|
18
|
+
else
|
|
19
|
+
delegates << block
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def method_missing(m, *args, &block)
|
|
24
|
+
if m.to_s[-1] == '!'
|
|
25
|
+
callback(m.to_s[...-1].to_sym, *args)
|
|
26
|
+
else
|
|
27
|
+
super
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def callback(event, *data)
|
|
34
|
+
delegates.each do |c|
|
|
35
|
+
c.call(event, *data)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
callbacks = self.callbacks[event.to_s]
|
|
39
|
+
return false unless callbacks
|
|
40
|
+
|
|
41
|
+
callbacks.each do |c|
|
|
42
|
+
c.call(*data)
|
|
43
|
+
end
|
|
44
|
+
true
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
logger.error("#{self}##{__method__}") { e }
|
|
47
|
+
false
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkChecker
|
|
4
|
+
class Checker
|
|
5
|
+
include LinkChecker::Config
|
|
6
|
+
include LinkChecker::Callbacks
|
|
7
|
+
|
|
8
|
+
attr_reader :results
|
|
9
|
+
attr_accessor(*Config::ATTRIBUTES)
|
|
10
|
+
|
|
11
|
+
def initialize(options = {})
|
|
12
|
+
LinkChecker::Config::ATTRIBUTES.each do |key|
|
|
13
|
+
send("#{key}=", options[key] || LinkChecker.config.send(key))
|
|
14
|
+
end
|
|
15
|
+
raise ArgumentError, "Missing methods." if methods&.none?
|
|
16
|
+
@logger ||= options[:logger] || LinkChecker::Config.logger || LinkChecker::Logger.default
|
|
17
|
+
@results = { error: [], failure: [], success: [] } unless options.key?(:results) && !options[:results]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def task_klass
|
|
21
|
+
@task_klass ||= begin
|
|
22
|
+
module_name = self.class.name.split("::")[...-1].join('::')
|
|
23
|
+
Object.const_get("#{module_name}::Task")
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def check(uri, options = {})
|
|
28
|
+
tasks = Tasks.new(
|
|
29
|
+
self,
|
|
30
|
+
task_klass,
|
|
31
|
+
uri,
|
|
32
|
+
methods,
|
|
33
|
+
options
|
|
34
|
+
)
|
|
35
|
+
tasks.on do |event, *args|
|
|
36
|
+
results[event] << args.first if @results && %i[error failure success].include?(event)
|
|
37
|
+
callback event, *args
|
|
38
|
+
end
|
|
39
|
+
tasks.execute!
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkChecker
|
|
4
|
+
module Config
|
|
5
|
+
extend self
|
|
6
|
+
|
|
7
|
+
ATTRIBUTES = %i[
|
|
8
|
+
methods
|
|
9
|
+
user_agent
|
|
10
|
+
logger
|
|
11
|
+
retries
|
|
12
|
+
].freeze
|
|
13
|
+
|
|
14
|
+
attr_accessor(*Config::ATTRIBUTES)
|
|
15
|
+
|
|
16
|
+
def reset
|
|
17
|
+
self.methods = %w[HEAD GET]
|
|
18
|
+
self.user_agent = "Ruby Link Checker/#{LinkChecker::VERSION}"
|
|
19
|
+
self.logger = nil
|
|
20
|
+
self.retries = 0
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def retries=(value)
|
|
24
|
+
raise ArgumentError, "Invalid number of retries: #{value}" unless value.is_a?(Integer) && value >= 0
|
|
25
|
+
|
|
26
|
+
@retries = value
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
class << self
|
|
31
|
+
def configure
|
|
32
|
+
block_given? ? yield(Config) : Config
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def config
|
|
36
|
+
Config
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
LinkChecker::Config.reset
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkChecker
|
|
4
|
+
module Errors
|
|
5
|
+
class RedirectLoopError < BaseError
|
|
6
|
+
attr_accessor :urls
|
|
7
|
+
|
|
8
|
+
def initialize(urls)
|
|
9
|
+
@urls = urls
|
|
10
|
+
super "Redirect loop: #{urls.join(' -> ')}."
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def url
|
|
14
|
+
@urls.last
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
module Net
|
|
3
|
+
module HTTP
|
|
4
|
+
class Task < ::LinkChecker::Task
|
|
5
|
+
def run!
|
|
6
|
+
::Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
|
|
7
|
+
http.read_timeout = checker.read_timeout if checker.read_timeout
|
|
8
|
+
http.open_timeout = checker.open_timeout if checker.open_timeout
|
|
9
|
+
request = ::Net::HTTPGenericRequest.new(method, false, true, uri)
|
|
10
|
+
request['User-Agent'] = checker.user_agent
|
|
11
|
+
response = http.request(request)
|
|
12
|
+
logger.debug "#{method} #{uri}: #{response.code}"
|
|
13
|
+
result! Result.new(uri, method, original_uri, request, response, options)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class Checker < LinkChecker::Checker
|
|
19
|
+
extend ::LinkChecker::Net::HTTP::Config
|
|
20
|
+
attr_accessor(*LinkChecker::Net::HTTP::Config::ATTRIBUTES)
|
|
21
|
+
|
|
22
|
+
def initialize(options = {})
|
|
23
|
+
LinkChecker::Net::HTTP::Config::ATTRIBUTES.each do |key|
|
|
24
|
+
send("#{key}=", options[key] || LinkChecker::Net::HTTP::Config.send(key))
|
|
25
|
+
end
|
|
26
|
+
super options
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkChecker
|
|
4
|
+
module Net
|
|
5
|
+
module HTTP
|
|
6
|
+
module Config
|
|
7
|
+
extend self
|
|
8
|
+
|
|
9
|
+
ATTRIBUTES = %i[
|
|
10
|
+
read_timeout
|
|
11
|
+
open_timeout
|
|
12
|
+
].freeze
|
|
13
|
+
|
|
14
|
+
attr_accessor(*Config::ATTRIBUTES)
|
|
15
|
+
|
|
16
|
+
def reset
|
|
17
|
+
self.read_timeout = nil
|
|
18
|
+
self.open_timeout = nil
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class << self
|
|
23
|
+
def configure
|
|
24
|
+
block_given? ? yield(Config) : Config
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def config
|
|
28
|
+
Config
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
LinkChecker::Net::HTTP::Config.reset
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
module Net
|
|
3
|
+
module HTTP
|
|
4
|
+
class Result < ::LinkChecker::Result
|
|
5
|
+
attr_accessor :request, :response
|
|
6
|
+
|
|
7
|
+
def initialize(uri, method, original_uri, request, response, options)
|
|
8
|
+
@request = request
|
|
9
|
+
@response = response
|
|
10
|
+
super uri, method, original_uri, options
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def error?
|
|
14
|
+
false
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def failure?
|
|
18
|
+
!success? && !redirect?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def code
|
|
22
|
+
@code ||= begin
|
|
23
|
+
response.code.to_i
|
|
24
|
+
rescue StandardError
|
|
25
|
+
-1
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def request_headers
|
|
30
|
+
request
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def redirect_to
|
|
34
|
+
return nil unless response
|
|
35
|
+
|
|
36
|
+
response['Location']
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def redirect?
|
|
40
|
+
return false unless response
|
|
41
|
+
|
|
42
|
+
[301, 302, 303, 307, 308].include?(code)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def success?
|
|
46
|
+
return false unless response
|
|
47
|
+
|
|
48
|
+
code >= 200 && code <= 299
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
class Result
|
|
3
|
+
attr_accessor :uri, :result_uri, :method, :options, :checker
|
|
4
|
+
|
|
5
|
+
def initialize(current_uri, method, original_uri, options = {})
|
|
6
|
+
@uri = original_uri
|
|
7
|
+
@result_uri = current_uri
|
|
8
|
+
@method = method
|
|
9
|
+
@options = options
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def success?
|
|
13
|
+
false
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def failure?
|
|
17
|
+
false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def error?
|
|
21
|
+
false
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def redirect?
|
|
25
|
+
false
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def redirect_to
|
|
29
|
+
nil
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def request_headers
|
|
33
|
+
{}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def code
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def error
|
|
41
|
+
nil
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def to_s
|
|
45
|
+
status_s = if success?
|
|
46
|
+
'OK'
|
|
47
|
+
elsif failure?
|
|
48
|
+
'FAIL'
|
|
49
|
+
elsif redirect?
|
|
50
|
+
'REDIRECT'
|
|
51
|
+
else
|
|
52
|
+
'ERROR'
|
|
53
|
+
end
|
|
54
|
+
"#{method} #{uri}#{result_uri == uri ? nil : ' (' + result_uri.to_s + ')'}: #{status_s} (#{code})"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
class ResultError < Result
|
|
59
|
+
attr_accessor :error
|
|
60
|
+
|
|
61
|
+
def initialize(uri, method, original_uri, error, options = {})
|
|
62
|
+
@error = error
|
|
63
|
+
super uri, method, original_uri, options
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def error?
|
|
67
|
+
true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def code
|
|
71
|
+
error.class.name
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
class Task
|
|
3
|
+
include LinkChecker::Callbacks
|
|
4
|
+
|
|
5
|
+
attr_reader :uri, :original_uri, :method, :logger, :options, :checker
|
|
6
|
+
|
|
7
|
+
def initialize(checker, uri, method, original_uri, options = {})
|
|
8
|
+
@checker = checker
|
|
9
|
+
@logger = checker.logger
|
|
10
|
+
@uri = uri
|
|
11
|
+
@original_uri = original_uri || @uri
|
|
12
|
+
@method = method
|
|
13
|
+
@options = options
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def run!
|
|
17
|
+
raise NotImplementedError
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
class Tasks
|
|
3
|
+
include LinkChecker::Callbacks
|
|
4
|
+
|
|
5
|
+
attr_reader :result, :uri, :original_uri
|
|
6
|
+
|
|
7
|
+
def initialize(checker, task_klass, uri, methods, options = {})
|
|
8
|
+
@uri = uri
|
|
9
|
+
@retries_left = checker.retries
|
|
10
|
+
@methods_left = methods.dup
|
|
11
|
+
@methods = methods.dup
|
|
12
|
+
@task_klass = task_klass
|
|
13
|
+
@checker = checker
|
|
14
|
+
@logger = checker.logger
|
|
15
|
+
@redirects = [uri]
|
|
16
|
+
@options = options
|
|
17
|
+
raise ArgumentError, :tasks_klass unless @task_klass && @task_klass < ::LinkChecker::Task
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def new_task(uri, method, original_uri, options)
|
|
21
|
+
task_klass.new(checker, uri, method, original_uri, options)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def execute!
|
|
25
|
+
if retry?
|
|
26
|
+
@retries_left -= 1
|
|
27
|
+
retry! @result
|
|
28
|
+
_queue_task(uri, method, original_uri || uri, options)
|
|
29
|
+
elsif methods_left.any?
|
|
30
|
+
@method = methods_left.shift
|
|
31
|
+
@redirects = [uri]
|
|
32
|
+
@uri = URI(@uri) unless @uri.is_a?(URI)
|
|
33
|
+
_queue_task(uri, method, original_uri || uri, options)
|
|
34
|
+
elsif @result && result.error?
|
|
35
|
+
error! @result
|
|
36
|
+
else
|
|
37
|
+
failure! @result
|
|
38
|
+
end
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
logger.error("#{self}##{__method__}") { e }
|
|
41
|
+
_handle_result ResultError.new(uri, method, original_uri || uri, e, options)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
attr_reader :logger, :methods_left, :options, :task_klass, :redirects, :checker, :method
|
|
47
|
+
|
|
48
|
+
def retries
|
|
49
|
+
checker.retries
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def first_time?
|
|
53
|
+
!!method.nil?
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def retries_left
|
|
57
|
+
@retries_left ||= retries
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def retry?
|
|
61
|
+
!first_time? && retries_left > 0
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def _queue_task(uri, method, original_uri, options = {})
|
|
65
|
+
task = new_task(uri, method, original_uri, options)
|
|
66
|
+
task.on :result do |result|
|
|
67
|
+
_handle_result result
|
|
68
|
+
end
|
|
69
|
+
task.run!
|
|
70
|
+
rescue StandardError => e
|
|
71
|
+
logger.error("#{self}##{__method__}") { e }
|
|
72
|
+
_handle_result ResultError.new(uri, method, original_uri, e, options)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def _handle_result(result)
|
|
76
|
+
@result = result
|
|
77
|
+
logger.info "#{' ' * (redirects.count - 1)}#{result}"
|
|
78
|
+
result! result
|
|
79
|
+
if result.redirect?
|
|
80
|
+
redirect! result
|
|
81
|
+
redirected_to_uri = URI.join(uri, result.redirect_to)
|
|
82
|
+
if redirects.include?(redirected_to_uri)
|
|
83
|
+
raise LinkChecker::Errors::RedirectLoopError,
|
|
84
|
+
redirects.push(redirected_to_uri)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
redirects << redirected_to_uri
|
|
88
|
+
_queue_task(redirected_to_uri, result.method, uri, options)
|
|
89
|
+
elsif result.success?
|
|
90
|
+
success! result
|
|
91
|
+
else
|
|
92
|
+
execute!
|
|
93
|
+
end
|
|
94
|
+
rescue StandardError => e
|
|
95
|
+
logger.error("#{self}##{__method__}") { e }
|
|
96
|
+
_handle_result ResultError.new(result.uri, result.method, result.result_uri, e, options)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
module Typhoeus
|
|
3
|
+
module Hydra
|
|
4
|
+
class Task < ::LinkChecker::Task
|
|
5
|
+
def run!
|
|
6
|
+
request = ::Typhoeus::Request.new(
|
|
7
|
+
uri, {
|
|
8
|
+
method: method,
|
|
9
|
+
followlocation: false,
|
|
10
|
+
timeout: checker.timeout,
|
|
11
|
+
connecttimeout: checker.connecttimeout,
|
|
12
|
+
headers: {
|
|
13
|
+
'User-Agent' => checker.user_agent
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
)
|
|
17
|
+
request.on_complete do |response|
|
|
18
|
+
if response.timed_out?
|
|
19
|
+
logger.debug "#{method} #{uri}: #{response.return_code}"
|
|
20
|
+
result! ResultError.new(uri, method, original_uri, Timeout::Error.new, options)
|
|
21
|
+
else
|
|
22
|
+
logger.debug "#{method} #{uri}: #{response.code}"
|
|
23
|
+
result! Result.new(uri, method, original_uri, request, response, options)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
checker._queue(request)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
class Checker < LinkChecker::Checker
|
|
31
|
+
extend ::LinkChecker::Typhoeus::Hydra::Config
|
|
32
|
+
attr_accessor(*LinkChecker::Typhoeus::Hydra::Config::ATTRIBUTES)
|
|
33
|
+
|
|
34
|
+
def initialize(options = {})
|
|
35
|
+
LinkChecker::Typhoeus::Hydra::Config::ATTRIBUTES.each do |key|
|
|
36
|
+
send("#{key}=", options[key] || LinkChecker::Typhoeus::Hydra::Config.send(key))
|
|
37
|
+
end
|
|
38
|
+
@hydra = ::Typhoeus::Hydra.new(options[:hydra] || { max_concurrency: 10 })
|
|
39
|
+
super options
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def run
|
|
43
|
+
@hydra.run
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def _queue(request)
|
|
47
|
+
@hydra.queue(request)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LinkChecker
|
|
4
|
+
module Typhoeus
|
|
5
|
+
module Hydra
|
|
6
|
+
module Config
|
|
7
|
+
extend self
|
|
8
|
+
|
|
9
|
+
ATTRIBUTES = %i[
|
|
10
|
+
timeout
|
|
11
|
+
connecttimeout
|
|
12
|
+
].freeze
|
|
13
|
+
|
|
14
|
+
attr_accessor(*Config::ATTRIBUTES)
|
|
15
|
+
|
|
16
|
+
def reset
|
|
17
|
+
self.timeout = 60
|
|
18
|
+
self.connecttimeout = 10
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class << self
|
|
23
|
+
def configure
|
|
24
|
+
block_given? ? yield(Config) : Config
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def config
|
|
28
|
+
Config
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
LinkChecker::Typhoeus::Hydra::Config.reset
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module LinkChecker
|
|
2
|
+
module Typhoeus
|
|
3
|
+
module Hydra
|
|
4
|
+
class Result < ::LinkChecker::Result
|
|
5
|
+
attr_accessor :request, :response
|
|
6
|
+
|
|
7
|
+
def initialize(uri, method, original_uri, request, response, options)
|
|
8
|
+
@request = request
|
|
9
|
+
@response = response
|
|
10
|
+
super uri, method, original_uri, options
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def error?
|
|
14
|
+
false
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def failure?
|
|
18
|
+
!success? && !redirect? && !error?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def code
|
|
22
|
+
@code ||= begin
|
|
23
|
+
response.code.to_i
|
|
24
|
+
rescue StandardError
|
|
25
|
+
-1
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def request_headers
|
|
30
|
+
request.options[:headers]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def redirect_to
|
|
34
|
+
return nil unless response
|
|
35
|
+
|
|
36
|
+
response.headers['Location']
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def redirect?
|
|
40
|
+
return false unless response
|
|
41
|
+
|
|
42
|
+
[301, 302, 303, 307, 308].include?(code)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def success?
|
|
46
|
+
return false unless response
|
|
47
|
+
|
|
48
|
+
code >= 200 && code <= 299
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'ruby-link-checker/version'
|
|
4
|
+
require_relative 'ruby-link-checker/errors'
|
|
5
|
+
require_relative 'ruby-link-checker/config'
|
|
6
|
+
require_relative 'ruby-link-checker/callbacks'
|
|
7
|
+
require_relative 'ruby-link-checker/logger'
|
|
8
|
+
require_relative 'ruby-link-checker/task'
|
|
9
|
+
require_relative 'ruby-link-checker/tasks'
|
|
10
|
+
require_relative 'ruby-link-checker/checker'
|
|
11
|
+
require_relative 'ruby-link-checker/result'
|
|
12
|
+
require_relative 'ruby-link-checker/net/http'
|
|
13
|
+
require_relative 'ruby-link-checker/typhoeus/hydra'
|
|
Binary file
|