sla 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5fdee1ea51728193de0bfeaa1b97b21c024f023f
4
+ data.tar.gz: 5cf1689f5408018346cd9cc3248e40cb7825ee5e
5
+ SHA512:
6
+ metadata.gz: e812c02cd97d3b351f0f8ff988b9e6163d8d0c569ecd88048d5a36412bd8acc211bd9c24db3ad750b9ee345f3fe3d5fbbc1f859aab1858a45109941acea255cd
7
+ data.tar.gz: f37e16e97dd8997e1a51f460d85b8fcd3775902550156263d5955bf78956e6069548d7650cba2916d15791ae9c298b4a47e305a351229b3e9580d5d3d8d85846
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ Site Link Analyzer
2
+ ==================================================
3
+
data/lib/sla/base.rb ADDED
@@ -0,0 +1,17 @@
1
+ module SLA
2
+ class Base
3
+ attr_accessor :domain
4
+
5
+ def cache
6
+ @cache ||= Cache.instance.cache
7
+ end
8
+
9
+ def url_manager
10
+ @url_manager ||= UrlManager.instance
11
+ end
12
+
13
+ def base_url
14
+ url_manager.base_url.to_s
15
+ end
16
+ end
17
+ end
data/lib/sla/cache.rb ADDED
@@ -0,0 +1,15 @@
1
+ module SLA
2
+ class Cache
3
+ include Singleton
4
+
5
+ def cache
6
+ @cache ||= cache!
7
+ end
8
+
9
+ def cache!
10
+ result = WebCache.new
11
+ result.life = 60 * 60 * 24 # 24 hours
12
+ result
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,45 @@
1
+ module SLA
2
+ class Checker < Base
3
+ include Colsole
4
+
5
+ attr_accessor :checked_links, :results, :max_depth, :next_check
6
+
7
+ def initialize
8
+ @max_depth = 10
9
+ @checked_links = []
10
+ @next_check ||= []
11
+ end
12
+
13
+ def count
14
+ checked_links.count
15
+ end
16
+
17
+ def on_check(urls=nil, depth=1, &block)
18
+ urls ||= [base_url]
19
+
20
+ self.next_check = []
21
+
22
+ urls.each do |url|
23
+ check_url url, depth, &block
24
+ if depth < max_depth
25
+ on_check next_check, depth+1, &block
26
+ end
27
+ end
28
+ end
29
+
30
+ def check_url(url, depth, &block)
31
+ page = Page.new url, depth: depth, base_url: base_url
32
+ page.validate
33
+
34
+ yield page
35
+ return if depth >= max_depth
36
+
37
+ page.links.each do |link|
38
+ next if checked_links.include? link.url
39
+
40
+ checked_links.push link.url
41
+ next_check.push link.url
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,68 @@
1
+ module SLA
2
+ class CommandLine < Base
3
+ include Singleton
4
+ include Colsole
5
+
6
+ def execute(argv=[])
7
+ doc = File.read File.dirname(__FILE__) + '/docopt.txt'
8
+ begin
9
+ args = Docopt::docopt(doc, argv: argv, version: VERSION)
10
+ handle args
11
+ rescue Docopt::Exit => e
12
+ puts e.message
13
+ end
14
+ end
15
+
16
+ def handle(args)
17
+ if args['check']
18
+ @domain = args['DOMAIN']
19
+ @max_depth = args['--depth'].to_i
20
+ @cache_life = args['--cache'].to_i
21
+ check_domain
22
+ end
23
+ end
24
+
25
+ def check_domain
26
+ checker = Checker.new
27
+ checker.max_depth = @max_depth
28
+ checker.cache.life = @cache_life
29
+ url_manager.base_url = @domain
30
+
31
+ File.unlink 'log.log' if File.exist? 'log.log'
32
+
33
+ count = 1
34
+ failed = 0
35
+
36
+ open('log.log', 'a') do |f|
37
+ checker.on_check do |page|
38
+ indent = '-' * page.depth
39
+
40
+ status = page.status
41
+ colored_status = color_status status
42
+ failed +=1 if status != '200'
43
+
44
+ say "#{count} #{colored_status} #{indent} #{page.name}"
45
+ f.puts "#{count} #{status} #{indent} #{page.name}"
46
+ count += 1
47
+ end
48
+
49
+ color = failed > 0 ? '!txtred!' : '!txtgrn!'
50
+ say "#{color}Done with #{failed} failures"
51
+ f.puts "Done with #{failed} failures"
52
+ end
53
+
54
+ end
55
+
56
+ def color_status(status)
57
+ case status
58
+ when '200'
59
+ '!txtgrn!200!txtrst!'
60
+ when '404'
61
+ '!txtred!404!txtrst!'
62
+ else
63
+ status
64
+ end
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,20 @@
1
+ SLA
2
+
3
+ Usage:
4
+ sla check DOMAIN [options]
5
+ sla (-h|--help|--version)
6
+
7
+ Commands:
8
+ check
9
+ Start checking for broken links on a given domain
10
+
11
+ Options:
12
+ --depth, -d DEPTH
13
+ Set crawling depth [default: 5]
14
+
15
+ --cache, -c LIFE
16
+ Set cache life in seconds [default: 86400]
17
+
18
+ Examples:
19
+ sla check example.com
20
+ sla check example.com -c360 -d10
data/lib/sla/link.rb ADDED
@@ -0,0 +1,15 @@
1
+ module SLA
2
+ class Link
3
+ attr_accessor :text, :url
4
+
5
+ def initialize(text, url)
6
+ @text = text
7
+ @url = url
8
+ end
9
+
10
+ def path
11
+ uri = URI.parse url
12
+ uri.request_uri
13
+ end
14
+ end
15
+ end
data/lib/sla/page.rb ADDED
@@ -0,0 +1,75 @@
1
+ module SLA
2
+ class Page < Base
3
+ attr_accessor :depth, :status, :base_uri
4
+ attr_reader :url
5
+
6
+ def initialize(url, opts={})
7
+ @url = url
8
+ @base_uri = url
9
+ @status = '000'
10
+ self.depth = opts[:depth] if opts[:depth]
11
+ end
12
+
13
+ def valid?
14
+ content
15
+ status == '200'
16
+ end
17
+
18
+ def validate
19
+ content
20
+ end
21
+
22
+ def name
23
+ @name ||= name!
24
+ end
25
+
26
+ def name!
27
+ uri = URI.parse url
28
+ if uri.request_uri.empty? || uri.request_uri == '/'
29
+ url
30
+ else
31
+ uri.request_uri
32
+ end
33
+ end
34
+
35
+ def protocol
36
+ @protocol ||= protocol!
37
+ end
38
+
39
+ def protocol!
40
+ uri = URI.parse url
41
+ uri.scheme
42
+ end
43
+
44
+ def content
45
+ @content ||= content!
46
+ end
47
+
48
+ def content!
49
+ response = cache.get url
50
+ self.status = response.error ? '404' : '200'
51
+ self.base_uri = response.base_uri
52
+ response.content
53
+ end
54
+
55
+ def doc
56
+ @doc ||= Nokogiri::HTML content
57
+ end
58
+
59
+ def links
60
+ @links ||= links!
61
+ end
62
+
63
+ def links!
64
+ links = doc.css('a')
65
+ result = []
66
+ links.each do |link|
67
+ href = url_manager.absolute link['href'], base_uri
68
+ next unless href
69
+ result.push Link.new link.text, href
70
+ end
71
+ result
72
+ end
73
+ end
74
+
75
+ end
@@ -0,0 +1,23 @@
1
+ module SLA
2
+ class UrlManager < Base
3
+ include Singleton
4
+
5
+ attr_reader :uri, :base_url
6
+
7
+ def base_url=(url)
8
+ url = "http://#{url}" unless url[0..3] == 'http'
9
+ response = cache.get url
10
+ @base_url = response.base_uri
11
+ end
12
+
13
+ def absolute(relative, base=nil)
14
+ return false if relative =~ /^(tel|mailto|#)/
15
+ base ||= base_url
16
+ relative = URI.encode relative
17
+ result = URI.join(base, relative)
18
+
19
+ return false unless result.host == base_url.host
20
+ result.to_s
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,3 @@
1
+ module SLA
2
+ VERSION = "0.0.1"
3
+ end
data/lib/sla.rb ADDED
@@ -0,0 +1,20 @@
1
+ require 'webcache'
2
+ require 'nokogiri'
3
+ require 'colsole'
4
+ require 'singleton'
5
+ require 'docopt'
6
+ require 'uri'
7
+
8
+ require 'sla/version'
9
+ require 'sla/base'
10
+ require 'sla/page'
11
+ require 'sla/link'
12
+ require 'sla/checker'
13
+ require 'sla/command_line'
14
+ require 'sla/cache'
15
+ require 'sla/url_manager'
16
+
17
+ # Remove before deploy
18
+ require 'byebug'
19
+ require 'awesome_print'
20
+ require 'pp'
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sla
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Danny Ben Shitrit
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: runfile
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.8'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.8'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colsole
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: docopt
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.5'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: webcache
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.6'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.6'
83
+ - !ruby/object:Gem::Dependency
84
+ name: runfile
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.8'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.8'
97
+ - !ruby/object:Gem::Dependency
98
+ name: adsf
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.2'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.2'
111
+ - !ruby/object:Gem::Dependency
112
+ name: runfile-tasks
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.4'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.4'
125
+ - !ruby/object:Gem::Dependency
126
+ name: byebug
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '9.0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '9.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: awesome_print
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '1.7'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '1.7'
153
+ description: Check for broken links on a website
154
+ email: db@dannyben.com
155
+ executables: []
156
+ extensions: []
157
+ extra_rdoc_files: []
158
+ files:
159
+ - README.md
160
+ - lib/sla.rb
161
+ - lib/sla/base.rb
162
+ - lib/sla/cache.rb
163
+ - lib/sla/checker.rb
164
+ - lib/sla/command_line.rb
165
+ - lib/sla/docopt.txt
166
+ - lib/sla/link.rb
167
+ - lib/sla/page.rb
168
+ - lib/sla/url_manager.rb
169
+ - lib/sla/version.rb
170
+ homepage: https://github.com/DannyBen/sla
171
+ licenses:
172
+ - MIT
173
+ metadata: {}
174
+ post_install_message:
175
+ rdoc_options: []
176
+ require_paths:
177
+ - lib
178
+ required_ruby_version: !ruby/object:Gem::Requirement
179
+ requirements:
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ version: 2.0.0
183
+ required_rubygems_version: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ requirements: []
189
+ rubyforge_project:
190
+ rubygems_version: 2.5.1
191
+ signing_key:
192
+ specification_version: 4
193
+ summary: Site Link Analyzer
194
+ test_files: []