sla 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 20e5f6a05020d9aecc2ad6754a7a22cee70e18f45e3bc5f679daabb2042c749a
4
- data.tar.gz: 2d5b0290dd9dd25169bbb89ff20df4aea66db616ea65585a56a982866579f568
3
+ metadata.gz: 408ec43b2728bd5d2a1461b747b487506976e020894684d8270154a5e9b807a1
4
+ data.tar.gz: 8620e841cde6cc02ba87e2c35a32d278c197f65363e85fad6fd4adec3adcbf1e
5
5
  SHA512:
6
- metadata.gz: cc586125c98e82786f0c747fcdf9d21f45d933564e4aba416036d21a39a70c72bdb974665c8e6a6f5c5b17a4ad323a52bb896205f4e5bb7e49002dd4370b1d84
7
- data.tar.gz: 10fadfccf9da8a7b427f6512bfb6365f890dc70cc01646280a5de51aeebee38026942dc78b30e1c5dac9196015e3e9565a804bf13e8c9c3c7c2bed00c9a5ca49
6
+ metadata.gz: d7b55730a4502b09df9c59ca8a3f0b49aa2986a2b6b548f8f26e8a8e5650075497acaef75f020e93354eb2f33d0538c6176ad3d3d070e11b0600e8ca6eddab34
7
+ data.tar.gz: 94eeaa6c32a22a8f894bf6c65b0b74084120a071897028d4888952ac7d7dd28d10dcfe289a5230bd053c06b2a307ee32ddcf96e50f8bfc900c94eaa03b60a4ca
data/bin/sla CHANGED
@@ -1,10 +1,18 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  require 'sla'
3
+ include Colsole
4
+
5
+ router = MisterBin::Runner.new handler: SLA::Command
4
6
 
5
7
  begin
6
- SLA::CommandLine.execute ARGV
8
+ exit router.run ARGV
9
+
7
10
  rescue SLA::BrokenLinks => e
8
11
  puts "(exit code 1)"
9
12
  exit 1
13
+
14
+ rescue Interrupt
15
+ puts "\nGoodbye"
16
+ exit 1
17
+
10
18
  end
data/lib/sla.rb CHANGED
@@ -2,14 +2,20 @@ require 'webcache'
2
2
  require 'nokogiri'
3
3
  require 'colsole'
4
4
  require 'singleton'
5
- require 'super_docopt'
5
+ require 'mister_bin'
6
6
  require 'uri'
7
7
 
8
8
  require 'sla/version'
9
9
  require 'sla/exceptions'
10
- require 'sla/link'
10
+ require 'sla/page'
11
11
  require 'sla/checker'
12
- require 'sla/command_line'
12
+
13
+ require 'sla/formatters/base'
14
+ require 'sla/formatters/verbose'
15
+ require 'sla/formatters/simple'
16
+ require 'sla/formatters/tty'
17
+
18
+ require 'sla/command'
13
19
 
14
20
  if ENV['BYEBUG']
15
21
  # :nocov:
data/lib/sla/checker.rb CHANGED
@@ -1,42 +1,60 @@
1
1
  module SLA
2
2
  class Checker
3
- include Colsole
3
+ attr_reader :max_depth, :ignore, :check_external
4
4
 
5
- attr_accessor :max_depth, :checked_links, :check_external, :ignore
5
+ def initialize(max_depth: 5, ignore: nil, check_external: false)
6
+ @max_depth = max_depth
7
+ @ignore = ignore
8
+ @check_external = check_external
9
+ end
6
10
 
7
- def initialize
8
- @max_depth = 10
9
- @checked_links = []
10
- @check_external = false
11
- @ignore = []
11
+ def deeply_checked
12
+ @deeply_checked ||= {}
12
13
  end
13
14
 
14
- def count
15
- checked_links.count
15
+ def checked
16
+ @checked ||= {}
16
17
  end
17
18
 
18
- def check(link, depth=1, &block)
19
- link = Link.new link, depth: depth if link.is_a? String
19
+ def check(page, &block)
20
+ return if ignore? page
21
+ return if page.depth >= max_depth
22
+ return unless page.valid?
23
+
24
+ yield [:source, page] if block_given?
20
25
 
21
- return if link.external? && !check_external
22
- ignore.each do |ignored|
23
- return if link.ident.start_with? ignored
26
+ pages = page.pages
27
+ pages.reject! { |page| page.external? } if !check_external
28
+
29
+ pages.each do |page|
30
+ if checked.has_key? page.url or ignore? page
31
+ yield [:skip, page] if block_given?
32
+ else
33
+ checked[page.url] = true
34
+ yield [:check, page] if block_given?
35
+ end
24
36
  end
25
37
 
26
- link.validate
27
- yield link if block_given?
38
+ pages.each do |page|
39
+ next if deeply_checked.has_key? page.url
40
+ deeply_checked[page.url] = true
41
+ next if page.external?
42
+ check page, &block
43
+ end
44
+ end
28
45
 
29
- return if checked_links.include? link.url
46
+ private
30
47
 
31
- checked_links.push link.url
48
+ def ignore?(page)
49
+ return false unless ignore
32
50
 
33
- return if link.external?
34
- return unless link.valid?
35
- return if depth >= max_depth
36
-
37
- link.sublinks.each do |sublink|
38
- check sublink, depth+1, &block
51
+ ignore.each do |text|
52
+ return true if page.url.include? text
39
53
  end
54
+
55
+ false
40
56
  end
57
+
41
58
  end
42
- end
59
+ end
60
+
@@ -0,0 +1,70 @@
1
+ module SLA
2
+ class Command < MisterBin::Command
3
+ include Colsole
4
+
5
+ help "Site Link Analyzer"
6
+
7
+ version VERSION
8
+
9
+ usage "sla URL [options]"
10
+ usage "sla --help | -h | --version"
11
+
12
+ param "URL", "URL to scan"
13
+
14
+ option "--verbose, -v", "Show detailed output"
15
+ option "--simple, -s", "Show simple output of errors only"
16
+ option "--depth, -d DEPTH", "Set crawling depth [default: 5]"
17
+ option "--external, -x", "Also check external links"
18
+ option "--ignore, -i URLS", "Specify a list of space delimited patterns to skip\nURLs that contain any of the strings in this list will be skipped"
19
+ option "--cache, -c LIFE", "Set cache life [default: 1d]. LIFE can be in any of the following formats:\n 10 = 10 seconds\n 20s = 20 seconds\n 10m = 10 minutes\n 10h = 10 hours\n 10d = 10 days"
20
+ option "--cache-dir DIR", "Set the cache directory"
21
+
22
+ example "sla example.com"
23
+ example "sla example.com -c10m -d10"
24
+ example "sla example.com --cache-dir my_cache"
25
+ example "sla example.com --depth 10"
26
+ example "sla example.com --cache 30d --external"
27
+ example "sla example.com --simple > out.log"
28
+ example "sla example.com --ignore \"/admin /customer/login\""
29
+
30
+ environment "SLA_SLEEP", "Set number of seconds to sleep between calls (for debugging purposes)"
31
+
32
+ def run
33
+ WebCache.life = args['--cache']
34
+ WebCache.dir = args['--cache-dir'] if args['--cache-dir']
35
+
36
+ max_depth = args['--depth'].to_i
37
+ url = args['URL']
38
+ ignore = args['--ignore']
39
+ ignore = ignore.split " " if ignore
40
+ check_external = args['--external']
41
+
42
+ checker = Checker.new max_depth: max_depth,
43
+ ignore: ignore, check_external: check_external
44
+
45
+ formatter = if args['--verbose']
46
+ Formatters::Verbose.new
47
+ elsif args['--simple']
48
+ Formatters::Simple.new
49
+ else
50
+ Formatters::TTY.new
51
+ end
52
+
53
+ execute url, checker, formatter
54
+ end
55
+
56
+ def execute(url, checker, formatter)
57
+ page = Page.new url
58
+ checker.check page do |action, page|
59
+ success = formatter.handle action, page
60
+ sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
61
+ end
62
+
63
+ formatter.footer
64
+
65
+ unless formatter.success? or ENV['SLA_ALLOW_FAILS']
66
+ raise BrokenLinks
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,34 @@
1
+ module SLA
2
+ module Formatters
3
+ class Base
4
+ include Colsole
5
+
6
+ attr_accessor :count, :failed
7
+
8
+ def initialize
9
+ @count = 0
10
+ @failed = 0
11
+ end
12
+
13
+ def success?
14
+ failed == 0
15
+ end
16
+
17
+ def handle(action, page)
18
+ # :nocov:
19
+ raise NotImplementedError
20
+ # :nocov:
21
+ end
22
+
23
+ def footer_prefix
24
+ "\n"
25
+ end
26
+
27
+ def footer
28
+ color = success? ? '!txtgrn!' : '!txtred!'
29
+ say "#{footer_prefix}#{color}Checked #{count} pages with #{failed} failures"
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,26 @@
1
+ module SLA
2
+ module Formatters
3
+ class Simple < Base
4
+ attr_reader :last_source
5
+
6
+ def handle(action, page)
7
+ @last_source = page.url if action == :source
8
+
9
+ return unless action == :check
10
+ @count += 1
11
+
12
+ return if page.valid?
13
+
14
+ @failed += 1
15
+
16
+ if last_source
17
+ say "!txtpur!SOURCE #{last_source}"
18
+ @last_source = nil
19
+ end
20
+
21
+ say " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ module SLA
2
+ module Formatters
3
+ class TTY < Base
4
+ attr_reader :last_source
5
+
6
+ def handle(action, page)
7
+ screen_width = terminal_width
8
+
9
+ @last_source = page.url if action == :source
10
+
11
+ return unless action == :check
12
+ @count += 1
13
+
14
+ if page.valid?
15
+ status = "PASS"
16
+ color = "!txtgrn!"
17
+ else
18
+ @failed += 1
19
+ status = "FAIL"
20
+ color = "!txtred!"
21
+
22
+ if last_source
23
+ resay "!txtpur!SOURCE #{last_source}"
24
+ @last_source = nil
25
+ end
26
+
27
+ resay " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
28
+ end
29
+
30
+ message = "[#{failed}/#{count} @ #{page.depth}] #{status}"
31
+ remaining_width = screen_width - message.size - 4
32
+ url = page.url[0..remaining_width]
33
+ resay "[#{failed}/#{count} @ #{page.depth}] #{color}#{status}!txtrst! #{url} "
34
+ end
35
+
36
+ def footer_prefix
37
+ terminal? ? "\033[2K\n" : "\n"
38
+ end
39
+
40
+
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,30 @@
1
+ module SLA
2
+ module Formatters
3
+ class Verbose < Base
4
+ def handle(action, page)
5
+ case action
6
+ when :source
7
+ say "\n!txtpur!SOURCE #{page.url}"
8
+
9
+ when :check
10
+ @count += 1
11
+
12
+ if page.valid?
13
+ status = "PASS"
14
+ color = "!txtgrn!"
15
+ else
16
+ @failed += 1
17
+ status = "FAIL"
18
+ color = "!txtred!"
19
+ end
20
+
21
+ say " #{color}#{status}!txtrst! #{page.depth} #{page.url}"
22
+
23
+ when :skip
24
+ say " !txtblu!SKIP!txtrst! #{page.depth} #{page.url}"
25
+
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
data/lib/sla/page.rb ADDED
@@ -0,0 +1,83 @@
1
+ class Page
2
+ attr_reader :uri, :parent, :depth
3
+
4
+ def initialize(uri, parent: nil, depth: 0)
5
+ if uri.is_a? String
6
+ uri = "http://#{uri}" unless uri.start_with? 'http'
7
+ uri = URI.parse uri
8
+ uri.fragment = false
9
+ end
10
+
11
+ @uri, @parent, @depth = uri, parent, depth
12
+ end
13
+
14
+ def error
15
+ response.error
16
+ end
17
+
18
+ def external?
19
+ byebug unless uri.respond_to? :host
20
+ uri.host != parent.uri.host
21
+ end
22
+
23
+ def inspect
24
+ "#<Page url: #{url}, depth: #{depth}>"
25
+ end
26
+
27
+ def pages
28
+ @pages ||= pages!
29
+ end
30
+
31
+ def url
32
+ uri.to_s
33
+ end
34
+
35
+ def valid?
36
+ !response.error
37
+ end
38
+
39
+ private
40
+
41
+ def anchors
42
+ @anchors ||= dom.css('a[href]')
43
+ end
44
+
45
+ def content
46
+ @content ||= response.content
47
+ end
48
+
49
+ def dom
50
+ @dom ||= Nokogiri::HTML content
51
+ end
52
+
53
+ def normalize_url(new_url)
54
+ new_url = URI.parse new_url
55
+ new_url.fragment = false
56
+
57
+ result = new_url.absolute? ? new_url : URI.join(url, new_url)
58
+
59
+ result.scheme =~ /^http/ ? result.to_s : nil
60
+ end
61
+
62
+ def pages!
63
+ result = {}
64
+ anchors.each do |a|
65
+ url = normalize_url a['href']
66
+ next unless url
67
+ page = Page.new url, parent: self, depth: depth+1
68
+ result[url] = page
69
+ end
70
+ result.values
71
+ end
72
+
73
+ def response
74
+ @response ||= response!
75
+ end
76
+
77
+ def response!
78
+ response = WebCache.get url
79
+ @uri = response.base_uri
80
+ response
81
+ end
82
+
83
+ end
data/lib/sla/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module SLA
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sla
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-26 00:00:00.000000000 Z
11
+ date: 2019-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colsole
@@ -17,6 +17,9 @@ dependencies:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0.7'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.7.1
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -24,48 +27,51 @@ dependencies:
24
27
  - - "~>"
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0.7'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.7.1
27
33
  - !ruby/object:Gem::Dependency
28
- name: super_docopt
34
+ name: mister_bin
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - "~>"
32
38
  - !ruby/object:Gem::Version
33
- version: '0.1'
39
+ version: '0.7'
34
40
  type: :runtime
35
41
  prerelease: false
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
44
  - - "~>"
39
45
  - !ruby/object:Gem::Version
40
- version: '0.1'
46
+ version: '0.7'
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: webcache
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
51
  - - "~>"
46
52
  - !ruby/object:Gem::Version
47
- version: '0.4'
53
+ version: '0.6'
48
54
  type: :runtime
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
58
  - - "~>"
53
59
  - !ruby/object:Gem::Version
54
- version: '0.4'
60
+ version: '0.6'
55
61
  - !ruby/object:Gem::Dependency
56
62
  name: nokogiri
57
63
  requirement: !ruby/object:Gem::Requirement
58
64
  requirements:
59
65
  - - "~>"
60
66
  - !ruby/object:Gem::Version
61
- version: '1.8'
67
+ version: '1.10'
62
68
  type: :runtime
63
69
  prerelease: false
64
70
  version_requirements: !ruby/object:Gem::Requirement
65
71
  requirements:
66
72
  - - "~>"
67
73
  - !ruby/object:Gem::Version
68
- version: '1.8'
74
+ version: '1.10'
69
75
  description: Check for broken links on a website
70
76
  email: db@dannyben.com
71
77
  executables:
@@ -77,10 +83,13 @@ files:
77
83
  - bin/sla
78
84
  - lib/sla.rb
79
85
  - lib/sla/checker.rb
80
- - lib/sla/command_line.rb
81
- - lib/sla/docopt.txt
86
+ - lib/sla/command.rb
82
87
  - lib/sla/exceptions.rb
83
- - lib/sla/link.rb
88
+ - lib/sla/formatters/base.rb
89
+ - lib/sla/formatters/simple.rb
90
+ - lib/sla/formatters/tty.rb
91
+ - lib/sla/formatters/verbose.rb
92
+ - lib/sla/page.rb
84
93
  - lib/sla/version.rb
85
94
  homepage: https://github.com/DannyBen/sla
86
95
  licenses:
@@ -1,83 +0,0 @@
1
- module SLA
2
- class CommandLine < SuperDocopt::Base
3
- include Colsole
4
-
5
- version VERSION
6
- docopt File.expand_path 'docopt.txt', __dir__
7
- subcommands ['check']
8
-
9
- def before_execute
10
- WebCache.life = args['--cache']
11
- WebCache.dir = args['--cache-dir'] if args['--cache-dir']
12
- end
13
-
14
- def check
15
- checker = Checker.new
16
- checker.max_depth = args['--depth'].to_i
17
- logfile = args['--log']
18
- start_url = args['DOMAIN']
19
- ignore = args['--ignore']
20
- ignore = ignore.split " " if ignore
21
- screen_width = terminal_width
22
-
23
- checker.check_external = args['--external']
24
- checker.ignore = ignore if ignore
25
-
26
- start_url = "http://#{start_url}" unless start_url[0..3] == 'http'
27
-
28
- File.unlink logfile if logfile and File.exist? logfile
29
-
30
- count = 1
31
- failed = 0
32
-
33
- log = []
34
-
35
- checker.check start_url do |link|
36
- status = link.status
37
- colored_status = color_status status
38
- if status != '200'
39
- failed +=1
40
- resay "#{colored_status} #{link.ident}"
41
- log.push "#{status} #{link.ident}" if logfile
42
- end
43
-
44
- message = "[#{failed}/#{count} @ #{link.depth}] #{status}"
45
- remaining_width = screen_width - message.size - 4
46
- trimmed_link = link.ident[0..remaining_width]
47
-
48
- resay "[#{failed}/#{count} @ #{link.depth}] #{colored_status} #{trimmed_link} "
49
- count += 1
50
-
51
- sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
52
- end
53
-
54
- color = failed > 0 ? '!txtred!' : '!txtgrn!'
55
- resay "#{color}Done checking #{count} links with #{failed} failures"
56
-
57
- if logfile
58
- logstring = log.join("\n") + "\n"
59
- File.write logfile, logstring
60
- end
61
-
62
- if failed > 0 and !ENV['SLA_ALLOW_FAILS']
63
- raise BrokenLinks
64
- end
65
- end
66
-
67
- private
68
-
69
- def color_status(status)
70
- case status
71
- when '200'
72
- '!txtgrn!200!txtrst!'
73
- when '404'
74
- '!txtred!404!txtrst!'
75
- else
76
- # :nocov:
77
- status
78
- # :nocov:
79
- end
80
- end
81
-
82
- end
83
- end
data/lib/sla/docopt.txt DELETED
@@ -1,43 +0,0 @@
1
- SLA
2
-
3
- Usage:
4
- sla check DOMAIN [options]
5
- sla (-h|--help|--version)
6
-
7
- Commands:
8
- check
9
- Start checking for broken links on a given domain.
10
-
11
- Options:
12
- --depth, -d DEPTH
13
- Set crawling depth [default: 5].
14
-
15
- --cache, -c LIFE
16
- Set cache life [default: 1d]. LIFE can be in any of the
17
- following formats:
18
- 10 = 10 seconds
19
- 20s = 20 seconds
20
- 10m = 10 minutes
21
- 10h = 10 hours
22
- 10d = 10 days
23
-
24
- --cache-dir DIR
25
- Set the cache directory.
26
-
27
- --external, -x
28
- Also check external links.
29
-
30
- --log, -l LOGFILE
31
- Save errors to log file.
32
-
33
- --ignore, -i URLS
34
- Specify a list of space delimited URLs to skip.
35
- URLs that start with the strings in this list will be skipped.
36
-
37
- Examples:
38
- sla check example.com
39
- sla check example.com -c10m -d10
40
- sla check example.com --cache-dir my_cache
41
- sla check example.com --depth 10 --log my_log.log
42
- sla check example.com --cache 30d
43
- sla check example.com --ignore "/admin /customer/login"
data/lib/sla/link.rb DELETED
@@ -1,84 +0,0 @@
1
- module SLA
2
- class Link
3
- attr_accessor :text, :href, :status, :depth, :real_uri
4
- attr_reader :parent
5
-
6
- def initialize(href, opts={})
7
- @href = href
8
- @text = opts[:text]
9
- @depth = opts[:depth] || 1
10
- self.parent = opts[:parent] || @href
11
- end
12
-
13
- def valid?
14
- validate
15
- status == '200'
16
- end
17
-
18
- def validate
19
- content
20
- end
21
-
22
- def content
23
- @content ||= content!
24
- end
25
-
26
- def content!
27
- response = WebCache.get url
28
- @status = response.error ? '404' : '200'
29
- @real_uri = response.base_uri
30
- response.content
31
- end
32
-
33
- def ident
34
- external? ? full_uri.to_s : full_uri.request_uri
35
- end
36
-
37
- def url
38
- full_uri.to_s
39
- end
40
-
41
- def doc
42
- @doc ||= Nokogiri::HTML content
43
- end
44
-
45
- def sublinks
46
- @sublinks ||= sublinks!
47
- end
48
-
49
- def sublinks!
50
- anchors = doc.css('a[href]')
51
- result = []
52
- anchors.each do |a|
53
- link = Link.new a['href'], text: a.text, parent: real_uri, depth: depth+1
54
- result.push link if link.relevant?
55
- end
56
- result
57
- end
58
-
59
- def uri
60
- @uri ||= URI.parse href
61
- end
62
-
63
- def parent=(url)
64
- @parent = url.is_a?(String) ? URI.parse(url) : url
65
- end
66
-
67
- def path
68
- uri.request_uri
69
- end
70
-
71
- def full_uri
72
- return uri if uri.absolute? || !parent.absolute?
73
- URI.join parent, href
74
- end
75
-
76
- def external?
77
- parent.host != full_uri.host
78
- end
79
-
80
- def relevant?
81
- full_uri.scheme =~ /^http/
82
- end
83
- end
84
- end