sla 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5fdee1ea51728193de0bfeaa1b97b21c024f023f
4
+ data.tar.gz: 5cf1689f5408018346cd9cc3248e40cb7825ee5e
5
+ SHA512:
6
+ metadata.gz: e812c02cd97d3b351f0f8ff988b9e6163d8d0c569ecd88048d5a36412bd8acc211bd9c24db3ad750b9ee345f3fe3d5fbbc1f859aab1858a45109941acea255cd
7
+ data.tar.gz: f37e16e97dd8997e1a51f460d85b8fcd3775902550156263d5955bf78956e6069548d7650cba2916d15791ae9c298b4a47e305a351229b3e9580d5d3d8d85846
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ Site Link Analyzer
2
+ ==================================================
3
+
data/lib/sla/base.rb ADDED
@@ -0,0 +1,17 @@
1
+ module SLA
2
+ class Base
3
+ attr_accessor :domain
4
+
5
+ def cache
6
+ @cache ||= Cache.instance.cache
7
+ end
8
+
9
+ def url_manager
10
+ @url_manager ||= UrlManager.instance
11
+ end
12
+
13
+ def base_url
14
+ url_manager.base_url.to_s
15
+ end
16
+ end
17
+ end
data/lib/sla/cache.rb ADDED
@@ -0,0 +1,15 @@
1
+ module SLA
2
+ class Cache
3
+ include Singleton
4
+
5
+ def cache
6
+ @cache ||= cache!
7
+ end
8
+
9
+ def cache!
10
+ result = WebCache.new
11
+ result.life = 60 * 60 * 24 # 24 hours
12
+ result
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,45 @@
1
+ module SLA
2
+ class Checker < Base
3
+ include Colsole
4
+
5
+ attr_accessor :checked_links, :results, :max_depth, :next_check
6
+
7
+ def initialize
8
+ @max_depth = 10
9
+ @checked_links = []
10
+ @next_check ||= []
11
+ end
12
+
13
+ def count
14
+ checked_links.count
15
+ end
16
+
17
+ def on_check(urls=nil, depth=1, &block)
18
+ urls ||= [base_url]
19
+
20
+ self.next_check = []
21
+
22
+ urls.each do |url|
23
+ check_url url, depth, &block
24
+ if depth < max_depth
25
+ on_check next_check, depth+1, &block
26
+ end
27
+ end
28
+ end
29
+
30
+ def check_url(url, depth, &block)
31
+ page = Page.new url, depth: depth, base_url: base_url
32
+ page.validate
33
+
34
+ yield page
35
+ return if depth >= max_depth
36
+
37
+ page.links.each do |link|
38
+ next if checked_links.include? link.url
39
+
40
+ checked_links.push link.url
41
+ next_check.push link.url
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,68 @@
1
+ module SLA
2
+ class CommandLine < Base
3
+ include Singleton
4
+ include Colsole
5
+
6
+ def execute(argv=[])
7
+ doc = File.read File.dirname(__FILE__) + '/docopt.txt'
8
+ begin
9
+ args = Docopt::docopt(doc, argv: argv, version: VERSION)
10
+ handle args
11
+ rescue Docopt::Exit => e
12
+ puts e.message
13
+ end
14
+ end
15
+
16
+ def handle(args)
17
+ if args['check']
18
+ @domain = args['DOMAIN']
19
+ @max_depth = args['--depth'].to_i
20
+ @cache_life = args['--cache'].to_i
21
+ check_domain
22
+ end
23
+ end
24
+
25
+ def check_domain
26
+ checker = Checker.new
27
+ checker.max_depth = @max_depth
28
+ checker.cache.life = @cache_life
29
+ url_manager.base_url = @domain
30
+
31
+ File.unlink 'log.log' if File.exist? 'log.log'
32
+
33
+ count = 1
34
+ failed = 0
35
+
36
+ open('log.log', 'a') do |f|
37
+ checker.on_check do |page|
38
+ indent = '-' * page.depth
39
+
40
+ status = page.status
41
+ colored_status = color_status status
42
+ failed +=1 if status != '200'
43
+
44
+ say "#{count} #{colored_status} #{indent} #{page.name}"
45
+ f.puts "#{count} #{status} #{indent} #{page.name}"
46
+ count += 1
47
+ end
48
+
49
+ color = failed > 0 ? '!txtred!' : '!txtgrn!'
50
+ say "#{color}Done with #{failed} failures"
51
+ f.puts "Done with #{failed} failures"
52
+ end
53
+
54
+ end
55
+
56
+ def color_status(status)
57
+ case status
58
+ when '200'
59
+ '!txtgrn!200!txtrst!'
60
+ when '404'
61
+ '!txtred!404!txtrst!'
62
+ else
63
+ status
64
+ end
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,20 @@
1
+ SLA
2
+
3
+ Usage:
4
+ sla check DOMAIN [options]
5
+ sla (-h|--help|--version)
6
+
7
+ Commands:
8
+ check
9
+ Start checking for broken links on a given domain
10
+
11
+ Options:
12
+ --depth, -d DEPTH
13
+ Set crawling depth [default: 5]
14
+
15
+ --cache, -c LIFE
16
+ Set cache life in seconds [default: 86400]
17
+
18
+ Examples:
19
+ sla check example.com
20
+ sla check example.com -c360 -d10
data/lib/sla/link.rb ADDED
@@ -0,0 +1,15 @@
1
+ module SLA
2
+ class Link
3
+ attr_accessor :text, :url
4
+
5
+ def initialize(text, url)
6
+ @text = text
7
+ @url = url
8
+ end
9
+
10
+ def path
11
+ uri = URI.parse url
12
+ uri.request_uri
13
+ end
14
+ end
15
+ end
data/lib/sla/page.rb ADDED
@@ -0,0 +1,75 @@
1
+ module SLA
2
+ class Page < Base
3
+ attr_accessor :depth, :status, :base_uri
4
+ attr_reader :url
5
+
6
+ def initialize(url, opts={})
7
+ @url = url
8
+ @base_uri = url
9
+ @status = '000'
10
+ self.depth = opts[:depth] if opts[:depth]
11
+ end
12
+
13
+ def valid?
14
+ content
15
+ status == '200'
16
+ end
17
+
18
+ def validate
19
+ content
20
+ end
21
+
22
+ def name
23
+ @name ||= name!
24
+ end
25
+
26
+ def name!
27
+ uri = URI.parse url
28
+ if uri.request_uri.empty? || uri.request_uri == '/'
29
+ url
30
+ else
31
+ uri.request_uri
32
+ end
33
+ end
34
+
35
+ def protocol
36
+ @protocol ||= protocol!
37
+ end
38
+
39
+ def protocol!
40
+ uri = URI.parse url
41
+ uri.scheme
42
+ end
43
+
44
+ def content
45
+ @content ||= content!
46
+ end
47
+
48
+ def content!
49
+ response = cache.get url
50
+ self.status = response.error ? '404' : '200'
51
+ self.base_uri = response.base_uri
52
+ response.content
53
+ end
54
+
55
+ def doc
56
+ @doc ||= Nokogiri::HTML content
57
+ end
58
+
59
+ def links
60
+ @links ||= links!
61
+ end
62
+
63
+ def links!
64
+ links = doc.css('a')
65
+ result = []
66
+ links.each do |link|
67
+ href = url_manager.absolute link['href'], base_uri
68
+ next unless href
69
+ result.push Link.new link.text, href
70
+ end
71
+ result
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,23 @@
1
+ module SLA
2
+ class UrlManager < Base
3
+ include Singleton
4
+
5
+ attr_reader :uri, :base_url
6
+
7
+ def base_url=(url)
8
+ url = "http://#{url}" unless url[0..3] == 'http'
9
+ response = cache.get url
10
+ @base_url = response.base_uri
11
+ end
12
+
13
+ def absolute(relative, base=nil)
14
+ return false if relative =~ /^(tel|mailto|#)/
15
+ base ||= base_url
16
+ relative = URI.encode relative
17
+ result = URI.join(base, relative)
18
+
19
+ return false unless result.host == base_url.host
20
+ result.to_s
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,3 @@
1
+ module SLA
2
+ VERSION = "0.0.1"
3
+ end
data/lib/sla.rb ADDED
@@ -0,0 +1,20 @@
1
+ require 'webcache'
2
+ require 'nokogiri'
3
+ require 'colsole'
4
+ require 'singleton'
5
+ require 'docopt'
6
+ require 'uri'
7
+
8
+ require 'sla/version'
9
+ require 'sla/base'
10
+ require 'sla/page'
11
+ require 'sla/link'
12
+ require 'sla/checker'
13
+ require 'sla/command_line'
14
+ require 'sla/cache'
15
+ require 'sla/url_manager'
16
+
17
+ # Remove before deploy
18
+ require 'byebug'
19
+ require 'awesome_print'
20
+ require 'pp'
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sla
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Danny Ben Shitrit
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: runfile
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.8'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.8'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colsole
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: docopt
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.5'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: webcache
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.6'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.6'
83
+ - !ruby/object:Gem::Dependency
84
+ name: runfile
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.8'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.8'
97
+ - !ruby/object:Gem::Dependency
98
+ name: adsf
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.2'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.2'
111
+ - !ruby/object:Gem::Dependency
112
+ name: runfile-tasks
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.4'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.4'
125
+ - !ruby/object:Gem::Dependency
126
+ name: byebug
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '9.0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '9.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: awesome_print
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '1.7'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '1.7'
153
+ description: Check for broken links on a website
154
+ email: db@dannyben.com
155
+ executables: []
156
+ extensions: []
157
+ extra_rdoc_files: []
158
+ files:
159
+ - README.md
160
+ - lib/sla.rb
161
+ - lib/sla/base.rb
162
+ - lib/sla/cache.rb
163
+ - lib/sla/checker.rb
164
+ - lib/sla/command_line.rb
165
+ - lib/sla/docopt.txt
166
+ - lib/sla/link.rb
167
+ - lib/sla/page.rb
168
+ - lib/sla/url_manager.rb
169
+ - lib/sla/version.rb
170
+ homepage: https://github.com/DannyBen/sla
171
+ licenses:
172
+ - MIT
173
+ metadata: {}
174
+ post_install_message:
175
+ rdoc_options: []
176
+ require_paths:
177
+ - lib
178
+ required_ruby_version: !ruby/object:Gem::Requirement
179
+ requirements:
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ version: 2.0.0
183
+ required_rubygems_version: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ requirements: []
189
+ rubyforge_project:
190
+ rubygems_version: 2.5.1
191
+ signing_key:
192
+ specification_version: 4
193
+ summary: Site Link Analyzer
194
+ test_files: []