sla 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/sla +10 -2
- data/lib/sla.rb +9 -3
- data/lib/sla/checker.rb +43 -25
- data/lib/sla/command.rb +70 -0
- data/lib/sla/formatters/base.rb +34 -0
- data/lib/sla/formatters/simple.rb +26 -0
- data/lib/sla/formatters/tty.rb +43 -0
- data/lib/sla/formatters/verbose.rb +30 -0
- data/lib/sla/page.rb +83 -0
- data/lib/sla/version.rb +1 -1
- metadata +21 -12
- data/lib/sla/command_line.rb +0 -83
- data/lib/sla/docopt.txt +0 -43
- data/lib/sla/link.rb +0 -84
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 408ec43b2728bd5d2a1461b747b487506976e020894684d8270154a5e9b807a1
|
4
|
+
data.tar.gz: 8620e841cde6cc02ba87e2c35a32d278c197f65363e85fad6fd4adec3adcbf1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7b55730a4502b09df9c59ca8a3f0b49aa2986a2b6b548f8f26e8a8e5650075497acaef75f020e93354eb2f33d0538c6176ad3d3d070e11b0600e8ca6eddab34
|
7
|
+
data.tar.gz: 94eeaa6c32a22a8f894bf6c65b0b74084120a071897028d4888952ac7d7dd28d10dcfe289a5230bd053c06b2a307ee32ddcf96e50f8bfc900c94eaa03b60a4ca
|
data/bin/sla
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
2
|
require 'sla'
|
3
|
+
include Colsole
|
4
|
+
|
5
|
+
router = MisterBin::Runner.new handler: SLA::Command
|
4
6
|
|
5
7
|
begin
|
6
|
-
|
8
|
+
exit router.run ARGV
|
9
|
+
|
7
10
|
rescue SLA::BrokenLinks => e
|
8
11
|
puts "(exit code 1)"
|
9
12
|
exit 1
|
13
|
+
|
14
|
+
rescue Interrupt
|
15
|
+
puts "\nGoodbye"
|
16
|
+
exit 1
|
17
|
+
|
10
18
|
end
|
data/lib/sla.rb
CHANGED
@@ -2,14 +2,20 @@ require 'webcache'
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'colsole'
|
4
4
|
require 'singleton'
|
5
|
-
require '
|
5
|
+
require 'mister_bin'
|
6
6
|
require 'uri'
|
7
7
|
|
8
8
|
require 'sla/version'
|
9
9
|
require 'sla/exceptions'
|
10
|
-
require 'sla/
|
10
|
+
require 'sla/page'
|
11
11
|
require 'sla/checker'
|
12
|
-
|
12
|
+
|
13
|
+
require 'sla/formatters/base'
|
14
|
+
require 'sla/formatters/verbose'
|
15
|
+
require 'sla/formatters/simple'
|
16
|
+
require 'sla/formatters/tty'
|
17
|
+
|
18
|
+
require 'sla/command'
|
13
19
|
|
14
20
|
if ENV['BYEBUG']
|
15
21
|
# :nocov:
|
data/lib/sla/checker.rb
CHANGED
@@ -1,42 +1,60 @@
|
|
1
1
|
module SLA
|
2
2
|
class Checker
|
3
|
-
|
3
|
+
attr_reader :max_depth, :ignore, :check_external
|
4
4
|
|
5
|
-
|
5
|
+
def initialize(max_depth: 5, ignore: nil, check_external: false)
|
6
|
+
@max_depth = max_depth
|
7
|
+
@ignore = ignore
|
8
|
+
@check_external = check_external
|
9
|
+
end
|
6
10
|
|
7
|
-
def
|
8
|
-
@
|
9
|
-
@checked_links = []
|
10
|
-
@check_external = false
|
11
|
-
@ignore = []
|
11
|
+
def deeply_checked
|
12
|
+
@deeply_checked ||= {}
|
12
13
|
end
|
13
14
|
|
14
|
-
def
|
15
|
-
|
15
|
+
def checked
|
16
|
+
@checked ||= {}
|
16
17
|
end
|
17
18
|
|
18
|
-
def check(
|
19
|
-
|
19
|
+
def check(page, &block)
|
20
|
+
return if ignore? page
|
21
|
+
return if page.depth >= max_depth
|
22
|
+
return unless page.valid?
|
23
|
+
|
24
|
+
yield [:source, page] if block_given?
|
20
25
|
|
21
|
-
|
22
|
-
|
23
|
-
|
26
|
+
pages = page.pages
|
27
|
+
pages.reject! { |page| page.external? } if !check_external
|
28
|
+
|
29
|
+
pages.each do |page|
|
30
|
+
if checked.has_key? page.url or ignore? page
|
31
|
+
yield [:skip, page] if block_given?
|
32
|
+
else
|
33
|
+
checked[page.url] = true
|
34
|
+
yield [:check, page] if block_given?
|
35
|
+
end
|
24
36
|
end
|
25
37
|
|
26
|
-
|
27
|
-
|
38
|
+
pages.each do |page|
|
39
|
+
next if deeply_checked.has_key? page.url
|
40
|
+
deeply_checked[page.url] = true
|
41
|
+
next if page.external?
|
42
|
+
check page, &block
|
43
|
+
end
|
44
|
+
end
|
28
45
|
|
29
|
-
|
46
|
+
private
|
30
47
|
|
31
|
-
|
48
|
+
def ignore?(page)
|
49
|
+
return false unless ignore
|
32
50
|
|
33
|
-
|
34
|
-
|
35
|
-
return if depth >= max_depth
|
36
|
-
|
37
|
-
link.sublinks.each do |sublink|
|
38
|
-
check sublink, depth+1, &block
|
51
|
+
ignore.each do |text|
|
52
|
+
return true if page.url.include? text
|
39
53
|
end
|
54
|
+
|
55
|
+
false
|
40
56
|
end
|
57
|
+
|
41
58
|
end
|
42
|
-
end
|
59
|
+
end
|
60
|
+
|
data/lib/sla/command.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module SLA
|
2
|
+
class Command < MisterBin::Command
|
3
|
+
include Colsole
|
4
|
+
|
5
|
+
help "Site Link Analyzer"
|
6
|
+
|
7
|
+
version VERSION
|
8
|
+
|
9
|
+
usage "sla URL [options]"
|
10
|
+
usage "sla --help | -h | --version"
|
11
|
+
|
12
|
+
param "URL", "URL to scan"
|
13
|
+
|
14
|
+
option "--verbose, -v", "Show detailed output"
|
15
|
+
option "--simple, -s", "Show simple output of errors only"
|
16
|
+
option "--depth, -d DEPTH", "Set crawling depth [default: 5]"
|
17
|
+
option "--external, -x", "Also check external links"
|
18
|
+
option "--ignore, -i URLS", "Specify a list of space delimited patterns to skip\nURLs that contain any of the strings in this list will be skipped"
|
19
|
+
option "--cache, -c LIFE", "Set cache life [default: 1d]. LIFE can be in any of the following formats:\n 10 = 10 seconds\n 20s = 20 seconds\n 10m = 10 minutes\n 10h = 10 hours\n 10d = 10 days"
|
20
|
+
option "--cache-dir DIR", "Set the cache directory"
|
21
|
+
|
22
|
+
example "sla example.com"
|
23
|
+
example "sla example.com -c10m -d10"
|
24
|
+
example "sla example.com --cache-dir my_cache"
|
25
|
+
example "sla example.com --depth 10"
|
26
|
+
example "sla example.com --cache 30d --external"
|
27
|
+
example "sla example.com --simple > out.log"
|
28
|
+
example "sla example.com --ignore \"/admin /customer/login\""
|
29
|
+
|
30
|
+
environment "SLA_SLEEP", "Set number of seconds to sleep between calls (for debugging purposes)"
|
31
|
+
|
32
|
+
def run
|
33
|
+
WebCache.life = args['--cache']
|
34
|
+
WebCache.dir = args['--cache-dir'] if args['--cache-dir']
|
35
|
+
|
36
|
+
max_depth = args['--depth'].to_i
|
37
|
+
url = args['URL']
|
38
|
+
ignore = args['--ignore']
|
39
|
+
ignore = ignore.split " " if ignore
|
40
|
+
check_external = args['--external']
|
41
|
+
|
42
|
+
checker = Checker.new max_depth: max_depth,
|
43
|
+
ignore: ignore, check_external: check_external
|
44
|
+
|
45
|
+
formatter = if args['--verbose']
|
46
|
+
Formatters::Verbose.new
|
47
|
+
elsif args['--simple']
|
48
|
+
Formatters::Simple.new
|
49
|
+
else
|
50
|
+
Formatters::TTY.new
|
51
|
+
end
|
52
|
+
|
53
|
+
execute url, checker, formatter
|
54
|
+
end
|
55
|
+
|
56
|
+
def execute(url, checker, formatter)
|
57
|
+
page = Page.new url
|
58
|
+
checker.check page do |action, page|
|
59
|
+
success = formatter.handle action, page
|
60
|
+
sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
|
61
|
+
end
|
62
|
+
|
63
|
+
formatter.footer
|
64
|
+
|
65
|
+
unless formatter.success? or ENV['SLA_ALLOW_FAILS']
|
66
|
+
raise BrokenLinks
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class Base
|
4
|
+
include Colsole
|
5
|
+
|
6
|
+
attr_accessor :count, :failed
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@count = 0
|
10
|
+
@failed = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def success?
|
14
|
+
failed == 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle(action, page)
|
18
|
+
# :nocov:
|
19
|
+
raise NotImplementedError
|
20
|
+
# :nocov:
|
21
|
+
end
|
22
|
+
|
23
|
+
def footer_prefix
|
24
|
+
"\n"
|
25
|
+
end
|
26
|
+
|
27
|
+
def footer
|
28
|
+
color = success? ? '!txtgrn!' : '!txtred!'
|
29
|
+
say "#{footer_prefix}#{color}Checked #{count} pages with #{failed} failures"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class Simple < Base
|
4
|
+
attr_reader :last_source
|
5
|
+
|
6
|
+
def handle(action, page)
|
7
|
+
@last_source = page.url if action == :source
|
8
|
+
|
9
|
+
return unless action == :check
|
10
|
+
@count += 1
|
11
|
+
|
12
|
+
return if page.valid?
|
13
|
+
|
14
|
+
@failed += 1
|
15
|
+
|
16
|
+
if last_source
|
17
|
+
say "!txtpur!SOURCE #{last_source}"
|
18
|
+
@last_source = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
say " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class TTY < Base
|
4
|
+
attr_reader :last_source
|
5
|
+
|
6
|
+
def handle(action, page)
|
7
|
+
screen_width = terminal_width
|
8
|
+
|
9
|
+
@last_source = page.url if action == :source
|
10
|
+
|
11
|
+
return unless action == :check
|
12
|
+
@count += 1
|
13
|
+
|
14
|
+
if page.valid?
|
15
|
+
status = "PASS"
|
16
|
+
color = "!txtgrn!"
|
17
|
+
else
|
18
|
+
@failed += 1
|
19
|
+
status = "FAIL"
|
20
|
+
color = "!txtred!"
|
21
|
+
|
22
|
+
if last_source
|
23
|
+
resay "!txtpur!SOURCE #{last_source}"
|
24
|
+
@last_source = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
resay " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
|
28
|
+
end
|
29
|
+
|
30
|
+
message = "[#{failed}/#{count} @ #{page.depth}] #{status}"
|
31
|
+
remaining_width = screen_width - message.size - 4
|
32
|
+
url = page.url[0..remaining_width]
|
33
|
+
resay "[#{failed}/#{count} @ #{page.depth}] #{color}#{status}!txtrst! #{url} "
|
34
|
+
end
|
35
|
+
|
36
|
+
def footer_prefix
|
37
|
+
terminal? ? "\033[2K\n" : "\n"
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class Verbose < Base
|
4
|
+
def handle(action, page)
|
5
|
+
case action
|
6
|
+
when :source
|
7
|
+
say "\n!txtpur!SOURCE #{page.url}"
|
8
|
+
|
9
|
+
when :check
|
10
|
+
@count += 1
|
11
|
+
|
12
|
+
if page.valid?
|
13
|
+
status = "PASS"
|
14
|
+
color = "!txtgrn!"
|
15
|
+
else
|
16
|
+
@failed += 1
|
17
|
+
status = "FAIL"
|
18
|
+
color = "!txtred!"
|
19
|
+
end
|
20
|
+
|
21
|
+
say " #{color}#{status}!txtrst! #{page.depth} #{page.url}"
|
22
|
+
|
23
|
+
when :skip
|
24
|
+
say " !txtblu!SKIP!txtrst! #{page.depth} #{page.url}"
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/sla/page.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
class Page
|
2
|
+
attr_reader :uri, :parent, :depth
|
3
|
+
|
4
|
+
def initialize(uri, parent: nil, depth: 0)
|
5
|
+
if uri.is_a? String
|
6
|
+
uri = "http://#{uri}" unless uri.start_with? 'http'
|
7
|
+
uri = URI.parse uri
|
8
|
+
uri.fragment = false
|
9
|
+
end
|
10
|
+
|
11
|
+
@uri, @parent, @depth = uri, parent, depth
|
12
|
+
end
|
13
|
+
|
14
|
+
def error
|
15
|
+
response.error
|
16
|
+
end
|
17
|
+
|
18
|
+
def external?
|
19
|
+
byebug unless uri.respond_to? :host
|
20
|
+
uri.host != parent.uri.host
|
21
|
+
end
|
22
|
+
|
23
|
+
def inspect
|
24
|
+
"#<Page url: #{url}, depth: #{depth}>"
|
25
|
+
end
|
26
|
+
|
27
|
+
def pages
|
28
|
+
@pages ||= pages!
|
29
|
+
end
|
30
|
+
|
31
|
+
def url
|
32
|
+
uri.to_s
|
33
|
+
end
|
34
|
+
|
35
|
+
def valid?
|
36
|
+
!response.error
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def anchors
|
42
|
+
@anchors ||= dom.css('a[href]')
|
43
|
+
end
|
44
|
+
|
45
|
+
def content
|
46
|
+
@content ||= response.content
|
47
|
+
end
|
48
|
+
|
49
|
+
def dom
|
50
|
+
@dom ||= Nokogiri::HTML content
|
51
|
+
end
|
52
|
+
|
53
|
+
def normalize_url(new_url)
|
54
|
+
new_url = URI.parse new_url
|
55
|
+
new_url.fragment = false
|
56
|
+
|
57
|
+
result = new_url.absolute? ? new_url : URI.join(url, new_url)
|
58
|
+
|
59
|
+
result.scheme =~ /^http/ ? result.to_s : nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def pages!
|
63
|
+
result = {}
|
64
|
+
anchors.each do |a|
|
65
|
+
url = normalize_url a['href']
|
66
|
+
next unless url
|
67
|
+
page = Page.new url, parent: self, depth: depth+1
|
68
|
+
result[url] = page
|
69
|
+
end
|
70
|
+
result.values
|
71
|
+
end
|
72
|
+
|
73
|
+
def response
|
74
|
+
@response ||= response!
|
75
|
+
end
|
76
|
+
|
77
|
+
def response!
|
78
|
+
response = WebCache.get url
|
79
|
+
@uri = response.base_uri
|
80
|
+
response
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
data/lib/sla/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sla
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colsole
|
@@ -17,6 +17,9 @@ dependencies:
|
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0.7'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.7.1
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -24,48 +27,51 @@ dependencies:
|
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0.7'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.7.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
34
|
+
name: mister_bin
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
39
|
+
version: '0.7'
|
34
40
|
type: :runtime
|
35
41
|
prerelease: false
|
36
42
|
version_requirements: !ruby/object:Gem::Requirement
|
37
43
|
requirements:
|
38
44
|
- - "~>"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
46
|
+
version: '0.7'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: webcache
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
51
|
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
53
|
+
version: '0.6'
|
48
54
|
type: :runtime
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
60
|
+
version: '0.6'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: nokogiri
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
58
64
|
requirements:
|
59
65
|
- - "~>"
|
60
66
|
- !ruby/object:Gem::Version
|
61
|
-
version: '1.
|
67
|
+
version: '1.10'
|
62
68
|
type: :runtime
|
63
69
|
prerelease: false
|
64
70
|
version_requirements: !ruby/object:Gem::Requirement
|
65
71
|
requirements:
|
66
72
|
- - "~>"
|
67
73
|
- !ruby/object:Gem::Version
|
68
|
-
version: '1.
|
74
|
+
version: '1.10'
|
69
75
|
description: Check for broken links on a website
|
70
76
|
email: db@dannyben.com
|
71
77
|
executables:
|
@@ -77,10 +83,13 @@ files:
|
|
77
83
|
- bin/sla
|
78
84
|
- lib/sla.rb
|
79
85
|
- lib/sla/checker.rb
|
80
|
-
- lib/sla/
|
81
|
-
- lib/sla/docopt.txt
|
86
|
+
- lib/sla/command.rb
|
82
87
|
- lib/sla/exceptions.rb
|
83
|
-
- lib/sla/
|
88
|
+
- lib/sla/formatters/base.rb
|
89
|
+
- lib/sla/formatters/simple.rb
|
90
|
+
- lib/sla/formatters/tty.rb
|
91
|
+
- lib/sla/formatters/verbose.rb
|
92
|
+
- lib/sla/page.rb
|
84
93
|
- lib/sla/version.rb
|
85
94
|
homepage: https://github.com/DannyBen/sla
|
86
95
|
licenses:
|
data/lib/sla/command_line.rb
DELETED
@@ -1,83 +0,0 @@
|
|
1
|
-
module SLA
|
2
|
-
class CommandLine < SuperDocopt::Base
|
3
|
-
include Colsole
|
4
|
-
|
5
|
-
version VERSION
|
6
|
-
docopt File.expand_path 'docopt.txt', __dir__
|
7
|
-
subcommands ['check']
|
8
|
-
|
9
|
-
def before_execute
|
10
|
-
WebCache.life = args['--cache']
|
11
|
-
WebCache.dir = args['--cache-dir'] if args['--cache-dir']
|
12
|
-
end
|
13
|
-
|
14
|
-
def check
|
15
|
-
checker = Checker.new
|
16
|
-
checker.max_depth = args['--depth'].to_i
|
17
|
-
logfile = args['--log']
|
18
|
-
start_url = args['DOMAIN']
|
19
|
-
ignore = args['--ignore']
|
20
|
-
ignore = ignore.split " " if ignore
|
21
|
-
screen_width = terminal_width
|
22
|
-
|
23
|
-
checker.check_external = args['--external']
|
24
|
-
checker.ignore = ignore if ignore
|
25
|
-
|
26
|
-
start_url = "http://#{start_url}" unless start_url[0..3] == 'http'
|
27
|
-
|
28
|
-
File.unlink logfile if logfile and File.exist? logfile
|
29
|
-
|
30
|
-
count = 1
|
31
|
-
failed = 0
|
32
|
-
|
33
|
-
log = []
|
34
|
-
|
35
|
-
checker.check start_url do |link|
|
36
|
-
status = link.status
|
37
|
-
colored_status = color_status status
|
38
|
-
if status != '200'
|
39
|
-
failed +=1
|
40
|
-
resay "#{colored_status} #{link.ident}"
|
41
|
-
log.push "#{status} #{link.ident}" if logfile
|
42
|
-
end
|
43
|
-
|
44
|
-
message = "[#{failed}/#{count} @ #{link.depth}] #{status}"
|
45
|
-
remaining_width = screen_width - message.size - 4
|
46
|
-
trimmed_link = link.ident[0..remaining_width]
|
47
|
-
|
48
|
-
resay "[#{failed}/#{count} @ #{link.depth}] #{colored_status} #{trimmed_link} "
|
49
|
-
count += 1
|
50
|
-
|
51
|
-
sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
|
52
|
-
end
|
53
|
-
|
54
|
-
color = failed > 0 ? '!txtred!' : '!txtgrn!'
|
55
|
-
resay "#{color}Done checking #{count} links with #{failed} failures"
|
56
|
-
|
57
|
-
if logfile
|
58
|
-
logstring = log.join("\n") + "\n"
|
59
|
-
File.write logfile, logstring
|
60
|
-
end
|
61
|
-
|
62
|
-
if failed > 0 and !ENV['SLA_ALLOW_FAILS']
|
63
|
-
raise BrokenLinks
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
private
|
68
|
-
|
69
|
-
def color_status(status)
|
70
|
-
case status
|
71
|
-
when '200'
|
72
|
-
'!txtgrn!200!txtrst!'
|
73
|
-
when '404'
|
74
|
-
'!txtred!404!txtrst!'
|
75
|
-
else
|
76
|
-
# :nocov:
|
77
|
-
status
|
78
|
-
# :nocov:
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
end
|
83
|
-
end
|
data/lib/sla/docopt.txt
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
SLA
|
2
|
-
|
3
|
-
Usage:
|
4
|
-
sla check DOMAIN [options]
|
5
|
-
sla (-h|--help|--version)
|
6
|
-
|
7
|
-
Commands:
|
8
|
-
check
|
9
|
-
Start checking for broken links on a given domain.
|
10
|
-
|
11
|
-
Options:
|
12
|
-
--depth, -d DEPTH
|
13
|
-
Set crawling depth [default: 5].
|
14
|
-
|
15
|
-
--cache, -c LIFE
|
16
|
-
Set cache life [default: 1d]. LIFE can be in any of the
|
17
|
-
following formats:
|
18
|
-
10 = 10 seconds
|
19
|
-
20s = 20 seconds
|
20
|
-
10m = 10 minutes
|
21
|
-
10h = 10 hours
|
22
|
-
10d = 10 days
|
23
|
-
|
24
|
-
--cache-dir DIR
|
25
|
-
Set the cache directory.
|
26
|
-
|
27
|
-
--external, -x
|
28
|
-
Also check external links.
|
29
|
-
|
30
|
-
--log, -l LOGFILE
|
31
|
-
Save errors to log file.
|
32
|
-
|
33
|
-
--ignore, -i URLS
|
34
|
-
Specify a list of space delimited URLs to skip.
|
35
|
-
URLs that start with the strings in this list will be skipped.
|
36
|
-
|
37
|
-
Examples:
|
38
|
-
sla check example.com
|
39
|
-
sla check example.com -c10m -d10
|
40
|
-
sla check example.com --cache-dir my_cache
|
41
|
-
sla check example.com --depth 10 --log my_log.log
|
42
|
-
sla check example.com --cache 30d
|
43
|
-
sla check example.com --ignore "/admin /customer/login"
|
data/lib/sla/link.rb
DELETED
@@ -1,84 +0,0 @@
|
|
1
|
-
module SLA
|
2
|
-
class Link
|
3
|
-
attr_accessor :text, :href, :status, :depth, :real_uri
|
4
|
-
attr_reader :parent
|
5
|
-
|
6
|
-
def initialize(href, opts={})
|
7
|
-
@href = href
|
8
|
-
@text = opts[:text]
|
9
|
-
@depth = opts[:depth] || 1
|
10
|
-
self.parent = opts[:parent] || @href
|
11
|
-
end
|
12
|
-
|
13
|
-
def valid?
|
14
|
-
validate
|
15
|
-
status == '200'
|
16
|
-
end
|
17
|
-
|
18
|
-
def validate
|
19
|
-
content
|
20
|
-
end
|
21
|
-
|
22
|
-
def content
|
23
|
-
@content ||= content!
|
24
|
-
end
|
25
|
-
|
26
|
-
def content!
|
27
|
-
response = WebCache.get url
|
28
|
-
@status = response.error ? '404' : '200'
|
29
|
-
@real_uri = response.base_uri
|
30
|
-
response.content
|
31
|
-
end
|
32
|
-
|
33
|
-
def ident
|
34
|
-
external? ? full_uri.to_s : full_uri.request_uri
|
35
|
-
end
|
36
|
-
|
37
|
-
def url
|
38
|
-
full_uri.to_s
|
39
|
-
end
|
40
|
-
|
41
|
-
def doc
|
42
|
-
@doc ||= Nokogiri::HTML content
|
43
|
-
end
|
44
|
-
|
45
|
-
def sublinks
|
46
|
-
@sublinks ||= sublinks!
|
47
|
-
end
|
48
|
-
|
49
|
-
def sublinks!
|
50
|
-
anchors = doc.css('a[href]')
|
51
|
-
result = []
|
52
|
-
anchors.each do |a|
|
53
|
-
link = Link.new a['href'], text: a.text, parent: real_uri, depth: depth+1
|
54
|
-
result.push link if link.relevant?
|
55
|
-
end
|
56
|
-
result
|
57
|
-
end
|
58
|
-
|
59
|
-
def uri
|
60
|
-
@uri ||= URI.parse href
|
61
|
-
end
|
62
|
-
|
63
|
-
def parent=(url)
|
64
|
-
@parent = url.is_a?(String) ? URI.parse(url) : url
|
65
|
-
end
|
66
|
-
|
67
|
-
def path
|
68
|
-
uri.request_uri
|
69
|
-
end
|
70
|
-
|
71
|
-
def full_uri
|
72
|
-
return uri if uri.absolute? || !parent.absolute?
|
73
|
-
URI.join parent, href
|
74
|
-
end
|
75
|
-
|
76
|
-
def external?
|
77
|
-
parent.host != full_uri.host
|
78
|
-
end
|
79
|
-
|
80
|
-
def relevant?
|
81
|
-
full_uri.scheme =~ /^http/
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|