sla 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/sla +10 -2
- data/lib/sla.rb +9 -3
- data/lib/sla/checker.rb +43 -25
- data/lib/sla/command.rb +70 -0
- data/lib/sla/formatters/base.rb +34 -0
- data/lib/sla/formatters/simple.rb +26 -0
- data/lib/sla/formatters/tty.rb +43 -0
- data/lib/sla/formatters/verbose.rb +30 -0
- data/lib/sla/page.rb +83 -0
- data/lib/sla/version.rb +1 -1
- metadata +21 -12
- data/lib/sla/command_line.rb +0 -83
- data/lib/sla/docopt.txt +0 -43
- data/lib/sla/link.rb +0 -84
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 408ec43b2728bd5d2a1461b747b487506976e020894684d8270154a5e9b807a1
|
4
|
+
data.tar.gz: 8620e841cde6cc02ba87e2c35a32d278c197f65363e85fad6fd4adec3adcbf1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7b55730a4502b09df9c59ca8a3f0b49aa2986a2b6b548f8f26e8a8e5650075497acaef75f020e93354eb2f33d0538c6176ad3d3d070e11b0600e8ca6eddab34
|
7
|
+
data.tar.gz: 94eeaa6c32a22a8f894bf6c65b0b74084120a071897028d4888952ac7d7dd28d10dcfe289a5230bd053c06b2a307ee32ddcf96e50f8bfc900c94eaa03b60a4ca
|
data/bin/sla
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
2
|
require 'sla'
|
3
|
+
include Colsole
|
4
|
+
|
5
|
+
router = MisterBin::Runner.new handler: SLA::Command
|
4
6
|
|
5
7
|
begin
|
6
|
-
|
8
|
+
exit router.run ARGV
|
9
|
+
|
7
10
|
rescue SLA::BrokenLinks => e
|
8
11
|
puts "(exit code 1)"
|
9
12
|
exit 1
|
13
|
+
|
14
|
+
rescue Interrupt
|
15
|
+
puts "\nGoodbye"
|
16
|
+
exit 1
|
17
|
+
|
10
18
|
end
|
data/lib/sla.rb
CHANGED
@@ -2,14 +2,20 @@ require 'webcache'
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'colsole'
|
4
4
|
require 'singleton'
|
5
|
-
require '
|
5
|
+
require 'mister_bin'
|
6
6
|
require 'uri'
|
7
7
|
|
8
8
|
require 'sla/version'
|
9
9
|
require 'sla/exceptions'
|
10
|
-
require 'sla/
|
10
|
+
require 'sla/page'
|
11
11
|
require 'sla/checker'
|
12
|
-
|
12
|
+
|
13
|
+
require 'sla/formatters/base'
|
14
|
+
require 'sla/formatters/verbose'
|
15
|
+
require 'sla/formatters/simple'
|
16
|
+
require 'sla/formatters/tty'
|
17
|
+
|
18
|
+
require 'sla/command'
|
13
19
|
|
14
20
|
if ENV['BYEBUG']
|
15
21
|
# :nocov:
|
data/lib/sla/checker.rb
CHANGED
@@ -1,42 +1,60 @@
|
|
1
1
|
module SLA
|
2
2
|
class Checker
|
3
|
-
|
3
|
+
attr_reader :max_depth, :ignore, :check_external
|
4
4
|
|
5
|
-
|
5
|
+
def initialize(max_depth: 5, ignore: nil, check_external: false)
|
6
|
+
@max_depth = max_depth
|
7
|
+
@ignore = ignore
|
8
|
+
@check_external = check_external
|
9
|
+
end
|
6
10
|
|
7
|
-
def
|
8
|
-
@
|
9
|
-
@checked_links = []
|
10
|
-
@check_external = false
|
11
|
-
@ignore = []
|
11
|
+
def deeply_checked
|
12
|
+
@deeply_checked ||= {}
|
12
13
|
end
|
13
14
|
|
14
|
-
def
|
15
|
-
|
15
|
+
def checked
|
16
|
+
@checked ||= {}
|
16
17
|
end
|
17
18
|
|
18
|
-
def check(
|
19
|
-
|
19
|
+
def check(page, &block)
|
20
|
+
return if ignore? page
|
21
|
+
return if page.depth >= max_depth
|
22
|
+
return unless page.valid?
|
23
|
+
|
24
|
+
yield [:source, page] if block_given?
|
20
25
|
|
21
|
-
|
22
|
-
|
23
|
-
|
26
|
+
pages = page.pages
|
27
|
+
pages.reject! { |page| page.external? } if !check_external
|
28
|
+
|
29
|
+
pages.each do |page|
|
30
|
+
if checked.has_key? page.url or ignore? page
|
31
|
+
yield [:skip, page] if block_given?
|
32
|
+
else
|
33
|
+
checked[page.url] = true
|
34
|
+
yield [:check, page] if block_given?
|
35
|
+
end
|
24
36
|
end
|
25
37
|
|
26
|
-
|
27
|
-
|
38
|
+
pages.each do |page|
|
39
|
+
next if deeply_checked.has_key? page.url
|
40
|
+
deeply_checked[page.url] = true
|
41
|
+
next if page.external?
|
42
|
+
check page, &block
|
43
|
+
end
|
44
|
+
end
|
28
45
|
|
29
|
-
|
46
|
+
private
|
30
47
|
|
31
|
-
|
48
|
+
def ignore?(page)
|
49
|
+
return false unless ignore
|
32
50
|
|
33
|
-
|
34
|
-
|
35
|
-
return if depth >= max_depth
|
36
|
-
|
37
|
-
link.sublinks.each do |sublink|
|
38
|
-
check sublink, depth+1, &block
|
51
|
+
ignore.each do |text|
|
52
|
+
return true if page.url.include? text
|
39
53
|
end
|
54
|
+
|
55
|
+
false
|
40
56
|
end
|
57
|
+
|
41
58
|
end
|
42
|
-
end
|
59
|
+
end
|
60
|
+
|
data/lib/sla/command.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module SLA
|
2
|
+
class Command < MisterBin::Command
|
3
|
+
include Colsole
|
4
|
+
|
5
|
+
help "Site Link Analyzer"
|
6
|
+
|
7
|
+
version VERSION
|
8
|
+
|
9
|
+
usage "sla URL [options]"
|
10
|
+
usage "sla --help | -h | --version"
|
11
|
+
|
12
|
+
param "URL", "URL to scan"
|
13
|
+
|
14
|
+
option "--verbose, -v", "Show detailed output"
|
15
|
+
option "--simple, -s", "Show simple output of errors only"
|
16
|
+
option "--depth, -d DEPTH", "Set crawling depth [default: 5]"
|
17
|
+
option "--external, -x", "Also check external links"
|
18
|
+
option "--ignore, -i URLS", "Specify a list of space delimited patterns to skip\nURLs that contain any of the strings in this list will be skipped"
|
19
|
+
option "--cache, -c LIFE", "Set cache life [default: 1d]. LIFE can be in any of the following formats:\n 10 = 10 seconds\n 20s = 20 seconds\n 10m = 10 minutes\n 10h = 10 hours\n 10d = 10 days"
|
20
|
+
option "--cache-dir DIR", "Set the cache directory"
|
21
|
+
|
22
|
+
example "sla example.com"
|
23
|
+
example "sla example.com -c10m -d10"
|
24
|
+
example "sla example.com --cache-dir my_cache"
|
25
|
+
example "sla example.com --depth 10"
|
26
|
+
example "sla example.com --cache 30d --external"
|
27
|
+
example "sla example.com --simple > out.log"
|
28
|
+
example "sla example.com --ignore \"/admin /customer/login\""
|
29
|
+
|
30
|
+
environment "SLA_SLEEP", "Set number of seconds to sleep between calls (for debugging purposes)"
|
31
|
+
|
32
|
+
def run
|
33
|
+
WebCache.life = args['--cache']
|
34
|
+
WebCache.dir = args['--cache-dir'] if args['--cache-dir']
|
35
|
+
|
36
|
+
max_depth = args['--depth'].to_i
|
37
|
+
url = args['URL']
|
38
|
+
ignore = args['--ignore']
|
39
|
+
ignore = ignore.split " " if ignore
|
40
|
+
check_external = args['--external']
|
41
|
+
|
42
|
+
checker = Checker.new max_depth: max_depth,
|
43
|
+
ignore: ignore, check_external: check_external
|
44
|
+
|
45
|
+
formatter = if args['--verbose']
|
46
|
+
Formatters::Verbose.new
|
47
|
+
elsif args['--simple']
|
48
|
+
Formatters::Simple.new
|
49
|
+
else
|
50
|
+
Formatters::TTY.new
|
51
|
+
end
|
52
|
+
|
53
|
+
execute url, checker, formatter
|
54
|
+
end
|
55
|
+
|
56
|
+
def execute(url, checker, formatter)
|
57
|
+
page = Page.new url
|
58
|
+
checker.check page do |action, page|
|
59
|
+
success = formatter.handle action, page
|
60
|
+
sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
|
61
|
+
end
|
62
|
+
|
63
|
+
formatter.footer
|
64
|
+
|
65
|
+
unless formatter.success? or ENV['SLA_ALLOW_FAILS']
|
66
|
+
raise BrokenLinks
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class Base
|
4
|
+
include Colsole
|
5
|
+
|
6
|
+
attr_accessor :count, :failed
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@count = 0
|
10
|
+
@failed = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def success?
|
14
|
+
failed == 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle(action, page)
|
18
|
+
# :nocov:
|
19
|
+
raise NotImplementedError
|
20
|
+
# :nocov:
|
21
|
+
end
|
22
|
+
|
23
|
+
def footer_prefix
|
24
|
+
"\n"
|
25
|
+
end
|
26
|
+
|
27
|
+
def footer
|
28
|
+
color = success? ? '!txtgrn!' : '!txtred!'
|
29
|
+
say "#{footer_prefix}#{color}Checked #{count} pages with #{failed} failures"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class Simple < Base
|
4
|
+
attr_reader :last_source
|
5
|
+
|
6
|
+
def handle(action, page)
|
7
|
+
@last_source = page.url if action == :source
|
8
|
+
|
9
|
+
return unless action == :check
|
10
|
+
@count += 1
|
11
|
+
|
12
|
+
return if page.valid?
|
13
|
+
|
14
|
+
@failed += 1
|
15
|
+
|
16
|
+
if last_source
|
17
|
+
say "!txtpur!SOURCE #{last_source}"
|
18
|
+
@last_source = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
say " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class TTY < Base
|
4
|
+
attr_reader :last_source
|
5
|
+
|
6
|
+
def handle(action, page)
|
7
|
+
screen_width = terminal_width
|
8
|
+
|
9
|
+
@last_source = page.url if action == :source
|
10
|
+
|
11
|
+
return unless action == :check
|
12
|
+
@count += 1
|
13
|
+
|
14
|
+
if page.valid?
|
15
|
+
status = "PASS"
|
16
|
+
color = "!txtgrn!"
|
17
|
+
else
|
18
|
+
@failed += 1
|
19
|
+
status = "FAIL"
|
20
|
+
color = "!txtred!"
|
21
|
+
|
22
|
+
if last_source
|
23
|
+
resay "!txtpur!SOURCE #{last_source}"
|
24
|
+
@last_source = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
resay " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
|
28
|
+
end
|
29
|
+
|
30
|
+
message = "[#{failed}/#{count} @ #{page.depth}] #{status}"
|
31
|
+
remaining_width = screen_width - message.size - 4
|
32
|
+
url = page.url[0..remaining_width]
|
33
|
+
resay "[#{failed}/#{count} @ #{page.depth}] #{color}#{status}!txtrst! #{url} "
|
34
|
+
end
|
35
|
+
|
36
|
+
def footer_prefix
|
37
|
+
terminal? ? "\033[2K\n" : "\n"
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module SLA
|
2
|
+
module Formatters
|
3
|
+
class Verbose < Base
|
4
|
+
def handle(action, page)
|
5
|
+
case action
|
6
|
+
when :source
|
7
|
+
say "\n!txtpur!SOURCE #{page.url}"
|
8
|
+
|
9
|
+
when :check
|
10
|
+
@count += 1
|
11
|
+
|
12
|
+
if page.valid?
|
13
|
+
status = "PASS"
|
14
|
+
color = "!txtgrn!"
|
15
|
+
else
|
16
|
+
@failed += 1
|
17
|
+
status = "FAIL"
|
18
|
+
color = "!txtred!"
|
19
|
+
end
|
20
|
+
|
21
|
+
say " #{color}#{status}!txtrst! #{page.depth} #{page.url}"
|
22
|
+
|
23
|
+
when :skip
|
24
|
+
say " !txtblu!SKIP!txtrst! #{page.depth} #{page.url}"
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/sla/page.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
class Page
|
2
|
+
attr_reader :uri, :parent, :depth
|
3
|
+
|
4
|
+
def initialize(uri, parent: nil, depth: 0)
|
5
|
+
if uri.is_a? String
|
6
|
+
uri = "http://#{uri}" unless uri.start_with? 'http'
|
7
|
+
uri = URI.parse uri
|
8
|
+
uri.fragment = false
|
9
|
+
end
|
10
|
+
|
11
|
+
@uri, @parent, @depth = uri, parent, depth
|
12
|
+
end
|
13
|
+
|
14
|
+
def error
|
15
|
+
response.error
|
16
|
+
end
|
17
|
+
|
18
|
+
def external?
|
19
|
+
byebug unless uri.respond_to? :host
|
20
|
+
uri.host != parent.uri.host
|
21
|
+
end
|
22
|
+
|
23
|
+
def inspect
|
24
|
+
"#<Page url: #{url}, depth: #{depth}>"
|
25
|
+
end
|
26
|
+
|
27
|
+
def pages
|
28
|
+
@pages ||= pages!
|
29
|
+
end
|
30
|
+
|
31
|
+
def url
|
32
|
+
uri.to_s
|
33
|
+
end
|
34
|
+
|
35
|
+
def valid?
|
36
|
+
!response.error
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def anchors
|
42
|
+
@anchors ||= dom.css('a[href]')
|
43
|
+
end
|
44
|
+
|
45
|
+
def content
|
46
|
+
@content ||= response.content
|
47
|
+
end
|
48
|
+
|
49
|
+
def dom
|
50
|
+
@dom ||= Nokogiri::HTML content
|
51
|
+
end
|
52
|
+
|
53
|
+
def normalize_url(new_url)
|
54
|
+
new_url = URI.parse new_url
|
55
|
+
new_url.fragment = false
|
56
|
+
|
57
|
+
result = new_url.absolute? ? new_url : URI.join(url, new_url)
|
58
|
+
|
59
|
+
result.scheme =~ /^http/ ? result.to_s : nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def pages!
|
63
|
+
result = {}
|
64
|
+
anchors.each do |a|
|
65
|
+
url = normalize_url a['href']
|
66
|
+
next unless url
|
67
|
+
page = Page.new url, parent: self, depth: depth+1
|
68
|
+
result[url] = page
|
69
|
+
end
|
70
|
+
result.values
|
71
|
+
end
|
72
|
+
|
73
|
+
def response
|
74
|
+
@response ||= response!
|
75
|
+
end
|
76
|
+
|
77
|
+
def response!
|
78
|
+
response = WebCache.get url
|
79
|
+
@uri = response.base_uri
|
80
|
+
response
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
data/lib/sla/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sla
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colsole
|
@@ -17,6 +17,9 @@ dependencies:
|
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0.7'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.7.1
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -24,48 +27,51 @@ dependencies:
|
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0.7'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.7.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
34
|
+
name: mister_bin
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
39
|
+
version: '0.7'
|
34
40
|
type: :runtime
|
35
41
|
prerelease: false
|
36
42
|
version_requirements: !ruby/object:Gem::Requirement
|
37
43
|
requirements:
|
38
44
|
- - "~>"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
46
|
+
version: '0.7'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
48
|
name: webcache
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
51
|
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
53
|
+
version: '0.6'
|
48
54
|
type: :runtime
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
60
|
+
version: '0.6'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: nokogiri
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
58
64
|
requirements:
|
59
65
|
- - "~>"
|
60
66
|
- !ruby/object:Gem::Version
|
61
|
-
version: '1.
|
67
|
+
version: '1.10'
|
62
68
|
type: :runtime
|
63
69
|
prerelease: false
|
64
70
|
version_requirements: !ruby/object:Gem::Requirement
|
65
71
|
requirements:
|
66
72
|
- - "~>"
|
67
73
|
- !ruby/object:Gem::Version
|
68
|
-
version: '1.
|
74
|
+
version: '1.10'
|
69
75
|
description: Check for broken links on a website
|
70
76
|
email: db@dannyben.com
|
71
77
|
executables:
|
@@ -77,10 +83,13 @@ files:
|
|
77
83
|
- bin/sla
|
78
84
|
- lib/sla.rb
|
79
85
|
- lib/sla/checker.rb
|
80
|
-
- lib/sla/
|
81
|
-
- lib/sla/docopt.txt
|
86
|
+
- lib/sla/command.rb
|
82
87
|
- lib/sla/exceptions.rb
|
83
|
-
- lib/sla/
|
88
|
+
- lib/sla/formatters/base.rb
|
89
|
+
- lib/sla/formatters/simple.rb
|
90
|
+
- lib/sla/formatters/tty.rb
|
91
|
+
- lib/sla/formatters/verbose.rb
|
92
|
+
- lib/sla/page.rb
|
84
93
|
- lib/sla/version.rb
|
85
94
|
homepage: https://github.com/DannyBen/sla
|
86
95
|
licenses:
|
data/lib/sla/command_line.rb
DELETED
@@ -1,83 +0,0 @@
|
|
1
|
-
module SLA
|
2
|
-
class CommandLine < SuperDocopt::Base
|
3
|
-
include Colsole
|
4
|
-
|
5
|
-
version VERSION
|
6
|
-
docopt File.expand_path 'docopt.txt', __dir__
|
7
|
-
subcommands ['check']
|
8
|
-
|
9
|
-
def before_execute
|
10
|
-
WebCache.life = args['--cache']
|
11
|
-
WebCache.dir = args['--cache-dir'] if args['--cache-dir']
|
12
|
-
end
|
13
|
-
|
14
|
-
def check
|
15
|
-
checker = Checker.new
|
16
|
-
checker.max_depth = args['--depth'].to_i
|
17
|
-
logfile = args['--log']
|
18
|
-
start_url = args['DOMAIN']
|
19
|
-
ignore = args['--ignore']
|
20
|
-
ignore = ignore.split " " if ignore
|
21
|
-
screen_width = terminal_width
|
22
|
-
|
23
|
-
checker.check_external = args['--external']
|
24
|
-
checker.ignore = ignore if ignore
|
25
|
-
|
26
|
-
start_url = "http://#{start_url}" unless start_url[0..3] == 'http'
|
27
|
-
|
28
|
-
File.unlink logfile if logfile and File.exist? logfile
|
29
|
-
|
30
|
-
count = 1
|
31
|
-
failed = 0
|
32
|
-
|
33
|
-
log = []
|
34
|
-
|
35
|
-
checker.check start_url do |link|
|
36
|
-
status = link.status
|
37
|
-
colored_status = color_status status
|
38
|
-
if status != '200'
|
39
|
-
failed +=1
|
40
|
-
resay "#{colored_status} #{link.ident}"
|
41
|
-
log.push "#{status} #{link.ident}" if logfile
|
42
|
-
end
|
43
|
-
|
44
|
-
message = "[#{failed}/#{count} @ #{link.depth}] #{status}"
|
45
|
-
remaining_width = screen_width - message.size - 4
|
46
|
-
trimmed_link = link.ident[0..remaining_width]
|
47
|
-
|
48
|
-
resay "[#{failed}/#{count} @ #{link.depth}] #{colored_status} #{trimmed_link} "
|
49
|
-
count += 1
|
50
|
-
|
51
|
-
sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
|
52
|
-
end
|
53
|
-
|
54
|
-
color = failed > 0 ? '!txtred!' : '!txtgrn!'
|
55
|
-
resay "#{color}Done checking #{count} links with #{failed} failures"
|
56
|
-
|
57
|
-
if logfile
|
58
|
-
logstring = log.join("\n") + "\n"
|
59
|
-
File.write logfile, logstring
|
60
|
-
end
|
61
|
-
|
62
|
-
if failed > 0 and !ENV['SLA_ALLOW_FAILS']
|
63
|
-
raise BrokenLinks
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
private
|
68
|
-
|
69
|
-
def color_status(status)
|
70
|
-
case status
|
71
|
-
when '200'
|
72
|
-
'!txtgrn!200!txtrst!'
|
73
|
-
when '404'
|
74
|
-
'!txtred!404!txtrst!'
|
75
|
-
else
|
76
|
-
# :nocov:
|
77
|
-
status
|
78
|
-
# :nocov:
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
end
|
83
|
-
end
|
data/lib/sla/docopt.txt
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
SLA
|
2
|
-
|
3
|
-
Usage:
|
4
|
-
sla check DOMAIN [options]
|
5
|
-
sla (-h|--help|--version)
|
6
|
-
|
7
|
-
Commands:
|
8
|
-
check
|
9
|
-
Start checking for broken links on a given domain.
|
10
|
-
|
11
|
-
Options:
|
12
|
-
--depth, -d DEPTH
|
13
|
-
Set crawling depth [default: 5].
|
14
|
-
|
15
|
-
--cache, -c LIFE
|
16
|
-
Set cache life [default: 1d]. LIFE can be in any of the
|
17
|
-
following formats:
|
18
|
-
10 = 10 seconds
|
19
|
-
20s = 20 seconds
|
20
|
-
10m = 10 minutes
|
21
|
-
10h = 10 hours
|
22
|
-
10d = 10 days
|
23
|
-
|
24
|
-
--cache-dir DIR
|
25
|
-
Set the cache directory.
|
26
|
-
|
27
|
-
--external, -x
|
28
|
-
Also check external links.
|
29
|
-
|
30
|
-
--log, -l LOGFILE
|
31
|
-
Save errors to log file.
|
32
|
-
|
33
|
-
--ignore, -i URLS
|
34
|
-
Specify a list of space delimited URLs to skip.
|
35
|
-
URLs that start with the strings in this list will be skipped.
|
36
|
-
|
37
|
-
Examples:
|
38
|
-
sla check example.com
|
39
|
-
sla check example.com -c10m -d10
|
40
|
-
sla check example.com --cache-dir my_cache
|
41
|
-
sla check example.com --depth 10 --log my_log.log
|
42
|
-
sla check example.com --cache 30d
|
43
|
-
sla check example.com --ignore "/admin /customer/login"
|
data/lib/sla/link.rb
DELETED
@@ -1,84 +0,0 @@
|
|
1
|
-
module SLA
|
2
|
-
class Link
|
3
|
-
attr_accessor :text, :href, :status, :depth, :real_uri
|
4
|
-
attr_reader :parent
|
5
|
-
|
6
|
-
def initialize(href, opts={})
|
7
|
-
@href = href
|
8
|
-
@text = opts[:text]
|
9
|
-
@depth = opts[:depth] || 1
|
10
|
-
self.parent = opts[:parent] || @href
|
11
|
-
end
|
12
|
-
|
13
|
-
def valid?
|
14
|
-
validate
|
15
|
-
status == '200'
|
16
|
-
end
|
17
|
-
|
18
|
-
def validate
|
19
|
-
content
|
20
|
-
end
|
21
|
-
|
22
|
-
def content
|
23
|
-
@content ||= content!
|
24
|
-
end
|
25
|
-
|
26
|
-
def content!
|
27
|
-
response = WebCache.get url
|
28
|
-
@status = response.error ? '404' : '200'
|
29
|
-
@real_uri = response.base_uri
|
30
|
-
response.content
|
31
|
-
end
|
32
|
-
|
33
|
-
def ident
|
34
|
-
external? ? full_uri.to_s : full_uri.request_uri
|
35
|
-
end
|
36
|
-
|
37
|
-
def url
|
38
|
-
full_uri.to_s
|
39
|
-
end
|
40
|
-
|
41
|
-
def doc
|
42
|
-
@doc ||= Nokogiri::HTML content
|
43
|
-
end
|
44
|
-
|
45
|
-
def sublinks
|
46
|
-
@sublinks ||= sublinks!
|
47
|
-
end
|
48
|
-
|
49
|
-
def sublinks!
|
50
|
-
anchors = doc.css('a[href]')
|
51
|
-
result = []
|
52
|
-
anchors.each do |a|
|
53
|
-
link = Link.new a['href'], text: a.text, parent: real_uri, depth: depth+1
|
54
|
-
result.push link if link.relevant?
|
55
|
-
end
|
56
|
-
result
|
57
|
-
end
|
58
|
-
|
59
|
-
def uri
|
60
|
-
@uri ||= URI.parse href
|
61
|
-
end
|
62
|
-
|
63
|
-
def parent=(url)
|
64
|
-
@parent = url.is_a?(String) ? URI.parse(url) : url
|
65
|
-
end
|
66
|
-
|
67
|
-
def path
|
68
|
-
uri.request_uri
|
69
|
-
end
|
70
|
-
|
71
|
-
def full_uri
|
72
|
-
return uri if uri.absolute? || !parent.absolute?
|
73
|
-
URI.join parent, href
|
74
|
-
end
|
75
|
-
|
76
|
-
def external?
|
77
|
-
parent.host != full_uri.host
|
78
|
-
end
|
79
|
-
|
80
|
-
def relevant?
|
81
|
-
full_uri.scheme =~ /^http/
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|