sla 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 20e5f6a05020d9aecc2ad6754a7a22cee70e18f45e3bc5f679daabb2042c749a
4
- data.tar.gz: 2d5b0290dd9dd25169bbb89ff20df4aea66db616ea65585a56a982866579f568
3
+ metadata.gz: 408ec43b2728bd5d2a1461b747b487506976e020894684d8270154a5e9b807a1
4
+ data.tar.gz: 8620e841cde6cc02ba87e2c35a32d278c197f65363e85fad6fd4adec3adcbf1e
5
5
  SHA512:
6
- metadata.gz: cc586125c98e82786f0c747fcdf9d21f45d933564e4aba416036d21a39a70c72bdb974665c8e6a6f5c5b17a4ad323a52bb896205f4e5bb7e49002dd4370b1d84
7
- data.tar.gz: 10fadfccf9da8a7b427f6512bfb6365f890dc70cc01646280a5de51aeebee38026942dc78b30e1c5dac9196015e3e9565a804bf13e8c9c3c7c2bed00c9a5ca49
6
+ metadata.gz: d7b55730a4502b09df9c59ca8a3f0b49aa2986a2b6b548f8f26e8a8e5650075497acaef75f020e93354eb2f33d0538c6176ad3d3d070e11b0600e8ca6eddab34
7
+ data.tar.gz: 94eeaa6c32a22a8f894bf6c65b0b74084120a071897028d4888952ac7d7dd28d10dcfe289a5230bd053c06b2a307ee32ddcf96e50f8bfc900c94eaa03b60a4ca
data/bin/sla CHANGED
@@ -1,10 +1,18 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  require 'sla'
3
+ include Colsole
4
+
5
+ router = MisterBin::Runner.new handler: SLA::Command
4
6
 
5
7
  begin
6
- SLA::CommandLine.execute ARGV
8
+ exit router.run ARGV
9
+
7
10
  rescue SLA::BrokenLinks => e
8
11
  puts "(exit code 1)"
9
12
  exit 1
13
+
14
+ rescue Interrupt
15
+ puts "\nGoodbye"
16
+ exit 1
17
+
10
18
  end
data/lib/sla.rb CHANGED
@@ -2,14 +2,20 @@ require 'webcache'
2
2
  require 'nokogiri'
3
3
  require 'colsole'
4
4
  require 'singleton'
5
- require 'super_docopt'
5
+ require 'mister_bin'
6
6
  require 'uri'
7
7
 
8
8
  require 'sla/version'
9
9
  require 'sla/exceptions'
10
- require 'sla/link'
10
+ require 'sla/page'
11
11
  require 'sla/checker'
12
- require 'sla/command_line'
12
+
13
+ require 'sla/formatters/base'
14
+ require 'sla/formatters/verbose'
15
+ require 'sla/formatters/simple'
16
+ require 'sla/formatters/tty'
17
+
18
+ require 'sla/command'
13
19
 
14
20
  if ENV['BYEBUG']
15
21
  # :nocov:
data/lib/sla/checker.rb CHANGED
@@ -1,42 +1,60 @@
1
1
  module SLA
2
2
  class Checker
3
- include Colsole
3
+ attr_reader :max_depth, :ignore, :check_external
4
4
 
5
- attr_accessor :max_depth, :checked_links, :check_external, :ignore
5
+ def initialize(max_depth: 5, ignore: nil, check_external: false)
6
+ @max_depth = max_depth
7
+ @ignore = ignore
8
+ @check_external = check_external
9
+ end
6
10
 
7
- def initialize
8
- @max_depth = 10
9
- @checked_links = []
10
- @check_external = false
11
- @ignore = []
11
+ def deeply_checked
12
+ @deeply_checked ||= {}
12
13
  end
13
14
 
14
- def count
15
- checked_links.count
15
+ def checked
16
+ @checked ||= {}
16
17
  end
17
18
 
18
- def check(link, depth=1, &block)
19
- link = Link.new link, depth: depth if link.is_a? String
19
+ def check(page, &block)
20
+ return if ignore? page
21
+ return if page.depth >= max_depth
22
+ return unless page.valid?
23
+
24
+ yield [:source, page] if block_given?
20
25
 
21
- return if link.external? && !check_external
22
- ignore.each do |ignored|
23
- return if link.ident.start_with? ignored
26
+ pages = page.pages
27
+ pages.reject! { |page| page.external? } if !check_external
28
+
29
+ pages.each do |page|
30
+ if checked.has_key? page.url or ignore? page
31
+ yield [:skip, page] if block_given?
32
+ else
33
+ checked[page.url] = true
34
+ yield [:check, page] if block_given?
35
+ end
24
36
  end
25
37
 
26
- link.validate
27
- yield link if block_given?
38
+ pages.each do |page|
39
+ next if deeply_checked.has_key? page.url
40
+ deeply_checked[page.url] = true
41
+ next if page.external?
42
+ check page, &block
43
+ end
44
+ end
28
45
 
29
- return if checked_links.include? link.url
46
+ private
30
47
 
31
- checked_links.push link.url
48
+ def ignore?(page)
49
+ return false unless ignore
32
50
 
33
- return if link.external?
34
- return unless link.valid?
35
- return if depth >= max_depth
36
-
37
- link.sublinks.each do |sublink|
38
- check sublink, depth+1, &block
51
+ ignore.each do |text|
52
+ return true if page.url.include? text
39
53
  end
54
+
55
+ false
40
56
  end
57
+
41
58
  end
42
- end
59
+ end
60
+
@@ -0,0 +1,70 @@
1
+ module SLA
2
+ class Command < MisterBin::Command
3
+ include Colsole
4
+
5
+ help "Site Link Analyzer"
6
+
7
+ version VERSION
8
+
9
+ usage "sla URL [options]"
10
+ usage "sla --help | -h | --version"
11
+
12
+ param "URL", "URL to scan"
13
+
14
+ option "--verbose, -v", "Show detailed output"
15
+ option "--simple, -s", "Show simple output of errors only"
16
+ option "--depth, -d DEPTH", "Set crawling depth [default: 5]"
17
+ option "--external, -x", "Also check external links"
18
+ option "--ignore, -i URLS", "Specify a list of space delimited patterns to skip\nURLs that contain any of the strings in this list will be skipped"
19
+ option "--cache, -c LIFE", "Set cache life [default: 1d]. LIFE can be in any of the following formats:\n 10 = 10 seconds\n 20s = 20 seconds\n 10m = 10 minutes\n 10h = 10 hours\n 10d = 10 days"
20
+ option "--cache-dir DIR", "Set the cache directory"
21
+
22
+ example "sla example.com"
23
+ example "sla example.com -c10m -d10"
24
+ example "sla example.com --cache-dir my_cache"
25
+ example "sla example.com --depth 10"
26
+ example "sla example.com --cache 30d --external"
27
+ example "sla example.com --simple > out.log"
28
+ example "sla example.com --ignore \"/admin /customer/login\""
29
+
30
+ environment "SLA_SLEEP", "Set number of seconds to sleep between calls (for debugging purposes)"
31
+
32
+ def run
33
+ WebCache.life = args['--cache']
34
+ WebCache.dir = args['--cache-dir'] if args['--cache-dir']
35
+
36
+ max_depth = args['--depth'].to_i
37
+ url = args['URL']
38
+ ignore = args['--ignore']
39
+ ignore = ignore.split " " if ignore
40
+ check_external = args['--external']
41
+
42
+ checker = Checker.new max_depth: max_depth,
43
+ ignore: ignore, check_external: check_external
44
+
45
+ formatter = if args['--verbose']
46
+ Formatters::Verbose.new
47
+ elsif args['--simple']
48
+ Formatters::Simple.new
49
+ else
50
+ Formatters::TTY.new
51
+ end
52
+
53
+ execute url, checker, formatter
54
+ end
55
+
56
+ def execute(url, checker, formatter)
57
+ page = Page.new url
58
+ checker.check page do |action, page|
59
+ success = formatter.handle action, page
60
+ sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
61
+ end
62
+
63
+ formatter.footer
64
+
65
+ unless formatter.success? or ENV['SLA_ALLOW_FAILS']
66
+ raise BrokenLinks
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,34 @@
1
+ module SLA
2
+ module Formatters
3
+ class Base
4
+ include Colsole
5
+
6
+ attr_accessor :count, :failed
7
+
8
+ def initialize
9
+ @count = 0
10
+ @failed = 0
11
+ end
12
+
13
+ def success?
14
+ failed == 0
15
+ end
16
+
17
+ def handle(action, page)
18
+ # :nocov:
19
+ raise NotImplementedError
20
+ # :nocov:
21
+ end
22
+
23
+ def footer_prefix
24
+ "\n"
25
+ end
26
+
27
+ def footer
28
+ color = success? ? '!txtgrn!' : '!txtred!'
29
+ say "#{footer_prefix}#{color}Checked #{count} pages with #{failed} failures"
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,26 @@
1
+ module SLA
2
+ module Formatters
3
+ class Simple < Base
4
+ attr_reader :last_source
5
+
6
+ def handle(action, page)
7
+ @last_source = page.url if action == :source
8
+
9
+ return unless action == :check
10
+ @count += 1
11
+
12
+ return if page.valid?
13
+
14
+ @failed += 1
15
+
16
+ if last_source
17
+ say "!txtpur!SOURCE #{last_source}"
18
+ @last_source = nil
19
+ end
20
+
21
+ say " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ module SLA
2
+ module Formatters
3
+ class TTY < Base
4
+ attr_reader :last_source
5
+
6
+ def handle(action, page)
7
+ screen_width = terminal_width
8
+
9
+ @last_source = page.url if action == :source
10
+
11
+ return unless action == :check
12
+ @count += 1
13
+
14
+ if page.valid?
15
+ status = "PASS"
16
+ color = "!txtgrn!"
17
+ else
18
+ @failed += 1
19
+ status = "FAIL"
20
+ color = "!txtred!"
21
+
22
+ if last_source
23
+ resay "!txtpur!SOURCE #{last_source}"
24
+ @last_source = nil
25
+ end
26
+
27
+ resay " !txtred!FAIL!txtrst! #{page.depth} #{page.url}"
28
+ end
29
+
30
+ message = "[#{failed}/#{count} @ #{page.depth}] #{status}"
31
+ remaining_width = screen_width - message.size - 4
32
+ url = page.url[0..remaining_width]
33
+ resay "[#{failed}/#{count} @ #{page.depth}] #{color}#{status}!txtrst! #{url} "
34
+ end
35
+
36
+ def footer_prefix
37
+ terminal? ? "\033[2K\n" : "\n"
38
+ end
39
+
40
+
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,30 @@
1
+ module SLA
2
+ module Formatters
3
+ class Verbose < Base
4
+ def handle(action, page)
5
+ case action
6
+ when :source
7
+ say "\n!txtpur!SOURCE #{page.url}"
8
+
9
+ when :check
10
+ @count += 1
11
+
12
+ if page.valid?
13
+ status = "PASS"
14
+ color = "!txtgrn!"
15
+ else
16
+ @failed += 1
17
+ status = "FAIL"
18
+ color = "!txtred!"
19
+ end
20
+
21
+ say " #{color}#{status}!txtrst! #{page.depth} #{page.url}"
22
+
23
+ when :skip
24
+ say " !txtblu!SKIP!txtrst! #{page.depth} #{page.url}"
25
+
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
data/lib/sla/page.rb ADDED
@@ -0,0 +1,83 @@
1
+ class Page
2
+ attr_reader :uri, :parent, :depth
3
+
4
+ def initialize(uri, parent: nil, depth: 0)
5
+ if uri.is_a? String
6
+ uri = "http://#{uri}" unless uri.start_with? 'http'
7
+ uri = URI.parse uri
8
+ uri.fragment = false
9
+ end
10
+
11
+ @uri, @parent, @depth = uri, parent, depth
12
+ end
13
+
14
+ def error
15
+ response.error
16
+ end
17
+
18
+ def external?
19
+ byebug unless uri.respond_to? :host
20
+ uri.host != parent.uri.host
21
+ end
22
+
23
+ def inspect
24
+ "#<Page url: #{url}, depth: #{depth}>"
25
+ end
26
+
27
+ def pages
28
+ @pages ||= pages!
29
+ end
30
+
31
+ def url
32
+ uri.to_s
33
+ end
34
+
35
+ def valid?
36
+ !response.error
37
+ end
38
+
39
+ private
40
+
41
+ def anchors
42
+ @anchors ||= dom.css('a[href]')
43
+ end
44
+
45
+ def content
46
+ @content ||= response.content
47
+ end
48
+
49
+ def dom
50
+ @dom ||= Nokogiri::HTML content
51
+ end
52
+
53
+ def normalize_url(new_url)
54
+ new_url = URI.parse new_url
55
+ new_url.fragment = false
56
+
57
+ result = new_url.absolute? ? new_url : URI.join(url, new_url)
58
+
59
+ result.scheme =~ /^http/ ? result.to_s : nil
60
+ end
61
+
62
+ def pages!
63
+ result = {}
64
+ anchors.each do |a|
65
+ url = normalize_url a['href']
66
+ next unless url
67
+ page = Page.new url, parent: self, depth: depth+1
68
+ result[url] = page
69
+ end
70
+ result.values
71
+ end
72
+
73
+ def response
74
+ @response ||= response!
75
+ end
76
+
77
+ def response!
78
+ response = WebCache.get url
79
+ @uri = response.base_uri
80
+ response
81
+ end
82
+
83
+ end
data/lib/sla/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module SLA
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sla
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-26 00:00:00.000000000 Z
11
+ date: 2019-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colsole
@@ -17,6 +17,9 @@ dependencies:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0.7'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.7.1
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -24,48 +27,51 @@ dependencies:
24
27
  - - "~>"
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0.7'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.7.1
27
33
  - !ruby/object:Gem::Dependency
28
- name: super_docopt
34
+ name: mister_bin
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - "~>"
32
38
  - !ruby/object:Gem::Version
33
- version: '0.1'
39
+ version: '0.7'
34
40
  type: :runtime
35
41
  prerelease: false
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
44
  - - "~>"
39
45
  - !ruby/object:Gem::Version
40
- version: '0.1'
46
+ version: '0.7'
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: webcache
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
51
  - - "~>"
46
52
  - !ruby/object:Gem::Version
47
- version: '0.4'
53
+ version: '0.6'
48
54
  type: :runtime
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
58
  - - "~>"
53
59
  - !ruby/object:Gem::Version
54
- version: '0.4'
60
+ version: '0.6'
55
61
  - !ruby/object:Gem::Dependency
56
62
  name: nokogiri
57
63
  requirement: !ruby/object:Gem::Requirement
58
64
  requirements:
59
65
  - - "~>"
60
66
  - !ruby/object:Gem::Version
61
- version: '1.8'
67
+ version: '1.10'
62
68
  type: :runtime
63
69
  prerelease: false
64
70
  version_requirements: !ruby/object:Gem::Requirement
65
71
  requirements:
66
72
  - - "~>"
67
73
  - !ruby/object:Gem::Version
68
- version: '1.8'
74
+ version: '1.10'
69
75
  description: Check for broken links on a website
70
76
  email: db@dannyben.com
71
77
  executables:
@@ -77,10 +83,13 @@ files:
77
83
  - bin/sla
78
84
  - lib/sla.rb
79
85
  - lib/sla/checker.rb
80
- - lib/sla/command_line.rb
81
- - lib/sla/docopt.txt
86
+ - lib/sla/command.rb
82
87
  - lib/sla/exceptions.rb
83
- - lib/sla/link.rb
88
+ - lib/sla/formatters/base.rb
89
+ - lib/sla/formatters/simple.rb
90
+ - lib/sla/formatters/tty.rb
91
+ - lib/sla/formatters/verbose.rb
92
+ - lib/sla/page.rb
84
93
  - lib/sla/version.rb
85
94
  homepage: https://github.com/DannyBen/sla
86
95
  licenses:
@@ -1,83 +0,0 @@
1
- module SLA
2
- class CommandLine < SuperDocopt::Base
3
- include Colsole
4
-
5
- version VERSION
6
- docopt File.expand_path 'docopt.txt', __dir__
7
- subcommands ['check']
8
-
9
- def before_execute
10
- WebCache.life = args['--cache']
11
- WebCache.dir = args['--cache-dir'] if args['--cache-dir']
12
- end
13
-
14
- def check
15
- checker = Checker.new
16
- checker.max_depth = args['--depth'].to_i
17
- logfile = args['--log']
18
- start_url = args['DOMAIN']
19
- ignore = args['--ignore']
20
- ignore = ignore.split " " if ignore
21
- screen_width = terminal_width
22
-
23
- checker.check_external = args['--external']
24
- checker.ignore = ignore if ignore
25
-
26
- start_url = "http://#{start_url}" unless start_url[0..3] == 'http'
27
-
28
- File.unlink logfile if logfile and File.exist? logfile
29
-
30
- count = 1
31
- failed = 0
32
-
33
- log = []
34
-
35
- checker.check start_url do |link|
36
- status = link.status
37
- colored_status = color_status status
38
- if status != '200'
39
- failed +=1
40
- resay "#{colored_status} #{link.ident}"
41
- log.push "#{status} #{link.ident}" if logfile
42
- end
43
-
44
- message = "[#{failed}/#{count} @ #{link.depth}] #{status}"
45
- remaining_width = screen_width - message.size - 4
46
- trimmed_link = link.ident[0..remaining_width]
47
-
48
- resay "[#{failed}/#{count} @ #{link.depth}] #{colored_status} #{trimmed_link} "
49
- count += 1
50
-
51
- sleep ENV['SLA_SLEEP'].to_f if ENV['SLA_SLEEP']
52
- end
53
-
54
- color = failed > 0 ? '!txtred!' : '!txtgrn!'
55
- resay "#{color}Done checking #{count} links with #{failed} failures"
56
-
57
- if logfile
58
- logstring = log.join("\n") + "\n"
59
- File.write logfile, logstring
60
- end
61
-
62
- if failed > 0 and !ENV['SLA_ALLOW_FAILS']
63
- raise BrokenLinks
64
- end
65
- end
66
-
67
- private
68
-
69
- def color_status(status)
70
- case status
71
- when '200'
72
- '!txtgrn!200!txtrst!'
73
- when '404'
74
- '!txtred!404!txtrst!'
75
- else
76
- # :nocov:
77
- status
78
- # :nocov:
79
- end
80
- end
81
-
82
- end
83
- end
data/lib/sla/docopt.txt DELETED
@@ -1,43 +0,0 @@
1
- SLA
2
-
3
- Usage:
4
- sla check DOMAIN [options]
5
- sla (-h|--help|--version)
6
-
7
- Commands:
8
- check
9
- Start checking for broken links on a given domain.
10
-
11
- Options:
12
- --depth, -d DEPTH
13
- Set crawling depth [default: 5].
14
-
15
- --cache, -c LIFE
16
- Set cache life [default: 1d]. LIFE can be in any of the
17
- following formats:
18
- 10 = 10 seconds
19
- 20s = 20 seconds
20
- 10m = 10 minutes
21
- 10h = 10 hours
22
- 10d = 10 days
23
-
24
- --cache-dir DIR
25
- Set the cache directory.
26
-
27
- --external, -x
28
- Also check external links.
29
-
30
- --log, -l LOGFILE
31
- Save errors to log file.
32
-
33
- --ignore, -i URLS
34
- Specify a list of space delimited URLs to skip.
35
- URLs that start with the strings in this list will be skipped.
36
-
37
- Examples:
38
- sla check example.com
39
- sla check example.com -c10m -d10
40
- sla check example.com --cache-dir my_cache
41
- sla check example.com --depth 10 --log my_log.log
42
- sla check example.com --cache 30d
43
- sla check example.com --ignore "/admin /customer/login"
data/lib/sla/link.rb DELETED
@@ -1,84 +0,0 @@
1
- module SLA
2
- class Link
3
- attr_accessor :text, :href, :status, :depth, :real_uri
4
- attr_reader :parent
5
-
6
- def initialize(href, opts={})
7
- @href = href
8
- @text = opts[:text]
9
- @depth = opts[:depth] || 1
10
- self.parent = opts[:parent] || @href
11
- end
12
-
13
- def valid?
14
- validate
15
- status == '200'
16
- end
17
-
18
- def validate
19
- content
20
- end
21
-
22
- def content
23
- @content ||= content!
24
- end
25
-
26
- def content!
27
- response = WebCache.get url
28
- @status = response.error ? '404' : '200'
29
- @real_uri = response.base_uri
30
- response.content
31
- end
32
-
33
- def ident
34
- external? ? full_uri.to_s : full_uri.request_uri
35
- end
36
-
37
- def url
38
- full_uri.to_s
39
- end
40
-
41
- def doc
42
- @doc ||= Nokogiri::HTML content
43
- end
44
-
45
- def sublinks
46
- @sublinks ||= sublinks!
47
- end
48
-
49
- def sublinks!
50
- anchors = doc.css('a[href]')
51
- result = []
52
- anchors.each do |a|
53
- link = Link.new a['href'], text: a.text, parent: real_uri, depth: depth+1
54
- result.push link if link.relevant?
55
- end
56
- result
57
- end
58
-
59
- def uri
60
- @uri ||= URI.parse href
61
- end
62
-
63
- def parent=(url)
64
- @parent = url.is_a?(String) ? URI.parse(url) : url
65
- end
66
-
67
- def path
68
- uri.request_uri
69
- end
70
-
71
- def full_uri
72
- return uri if uri.absolute? || !parent.absolute?
73
- URI.join parent, href
74
- end
75
-
76
- def external?
77
- parent.host != full_uri.host
78
- end
79
-
80
- def relevant?
81
- full_uri.scheme =~ /^http/
82
- end
83
- end
84
- end