driller 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/bin/driller +38 -7
- data/lib/driller.rb +38 -1
- data/lib/driller/version.rb +1 -1
- metadata +1 -2
- data/lib/driller/crawler.rb +0 -44
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
Zjc5YTFiOGZhN2E3OTYwYjVmNjQ1ZTcyZjU1MmQzYzRjZDY2NzhkNw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZWU1Y2Q2NWQyNDFhNTVjODdjMjczZjA4NjkxNGM5MzdiNTc4M2E1ZA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MjM2MGM5OTAyYWVmNDZhNDA1NTE2YTNiZjM4ZGNmY2U0YzFjZDQ5MjNlMzQy
|
10
|
+
MzE0OGI0ZjJmOTY1OGZhMGNmZDFkNDg5MjNjZjc0ZDUzYjUwNzI5YzQzOTEw
|
11
|
+
MTdhYzkwYzkwMGM2YTcwMGZkODRiMGJlYjZmYzRhMGQwMjE0MGI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzcyZjU4MDBkMDVkOTJlMmRmY2NhYWNkNWZhNWFhMDUyMzEwZTVlMGZjYzQ1
|
14
|
+
YmNjZmViOGRjNDk3ZGU1MDM1YmRhYmQ5Y2NmODlmZjNiMzA1Y2JhZTYzMWEw
|
15
|
+
ODU3OTFhOTljMWZlM2QzZjg1M2Q1MzBkNzUxNjM3NDRjMjg2ZGU=
|
data/bin/driller
CHANGED
@@ -1,13 +1,49 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'driller/version'
|
3
|
+
require 'driller/uri_helper'
|
3
4
|
require 'fileutils'
|
4
5
|
require "rubygems"
|
6
|
+
require "anemone"
|
5
7
|
|
8
|
+
URL = ARGV[0]
|
6
9
|
|
7
|
-
|
8
|
-
|
10
|
+
Anemone.crawl(URL) do |anemone|
|
11
|
+
|
12
|
+
anemone.depth_limit = 2
|
13
|
+
|
14
|
+
anemone.focus_crawl do |page|
|
15
|
+
page.links.select { |url| url.starts_with? URL }
|
9
16
|
end
|
10
17
|
|
18
|
+
file = File.new('valid_pages.html', 'w')
|
19
|
+
file = File.new('broken.html', 'w')
|
20
|
+
file = File.new('slow_pages.html', 'w')
|
21
|
+
|
22
|
+
anemone.on_every_page do |page|
|
23
|
+
|
24
|
+
if page.code = 200 && page.code = 301
|
25
|
+
file = File.open('valid_pages.html', 'a')
|
26
|
+
file.puts page.url
|
27
|
+
end
|
28
|
+
|
29
|
+
if page.code != 200 && page.code != 301
|
30
|
+
puts "=======broken======\n"
|
31
|
+
puts page.url
|
32
|
+
file = File.open('broken.html', 'a')
|
33
|
+
file.puts page.url
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
if page.response_time > 5000
|
38
|
+
puts "=======Slow Page======\n"
|
39
|
+
puts page.url
|
40
|
+
file = File.open('slow_pages.html', 'a')
|
41
|
+
file.puts page.url
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
11
47
|
def print_usage
|
12
48
|
puts <<EOF
|
13
49
|
|
@@ -33,14 +69,11 @@ def print_help
|
|
33
69
|
|
34
70
|
<command-name> can be one of
|
35
71
|
help
|
36
|
-
url
|
37
72
|
version
|
38
73
|
|
39
74
|
Commands:
|
40
75
|
help : prints more detailed help information.
|
41
76
|
|
42
|
-
url : crawl the url and reports error pages and slow paged.
|
43
|
-
|
44
77
|
version : prints the gem version
|
45
78
|
|
46
79
|
<Options>
|
@@ -55,8 +88,6 @@ else
|
|
55
88
|
cmd = ARGV.shift
|
56
89
|
if cmd == "help"
|
57
90
|
print_help
|
58
|
-
elsif cmd == "url"
|
59
|
-
drill_website(ARGV[1])
|
60
91
|
elsif cmd == "version"
|
61
92
|
puts Driller::VERSION
|
62
93
|
else
|
data/lib/driller.rb
CHANGED
@@ -4,5 +4,42 @@ require "driller/crawler"
|
|
4
4
|
require "anemone"
|
5
5
|
|
6
6
|
module Driller
|
7
|
-
|
7
|
+
URL = ARGV[0]
|
8
|
+
Anemone.crawl(URL) do |anemone|
|
9
|
+
|
10
|
+
anemone.depth_limit = 2
|
11
|
+
|
12
|
+
anemone.focus_crawl do |page|
|
13
|
+
page.links.select { |url| url.starts_with? URL }
|
14
|
+
end
|
15
|
+
|
16
|
+
file = File.new('valid_pages.html', 'w')
|
17
|
+
file = File.new('broken.html', 'w')
|
18
|
+
file = File.new('slow_pages.html', 'w')
|
19
|
+
|
20
|
+
anemone.on_every_page do |page|
|
21
|
+
|
22
|
+
if page.code = 200 && page.code = 301
|
23
|
+
file = File.open('valid_pages.html', 'a')
|
24
|
+
file.puts page.url
|
25
|
+
end
|
26
|
+
|
27
|
+
if page.code != 200 && page.code != 301
|
28
|
+
puts "=======broken======\n"
|
29
|
+
puts page.url
|
30
|
+
file = File.open('broken.html', 'a')
|
31
|
+
file.puts page.url
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
if page.response_time > 5000
|
36
|
+
puts "=======Slow Page======\n"
|
37
|
+
puts page.url
|
38
|
+
file = File.open('slow_pages.html', 'a')
|
39
|
+
file.puts page.url
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
8
45
|
end
|
data/lib/driller/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: driller
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shashikant86
|
@@ -68,7 +68,6 @@ files:
|
|
68
68
|
- bin/driller
|
69
69
|
- driller.gemspec
|
70
70
|
- lib/driller.rb
|
71
|
-
- lib/driller/crawler.rb
|
72
71
|
- lib/driller/uri_helper.rb
|
73
72
|
- lib/driller/version.rb
|
74
73
|
homepage: ''
|
data/lib/driller/crawler.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
module Driller
|
2
|
-
|
3
|
-
class Crawler
|
4
|
-
|
5
|
-
URL = ARGV[0]
|
6
|
-
Anemone.crawl(URL) do |anemone|
|
7
|
-
|
8
|
-
anemone.depth_limit = 2
|
9
|
-
|
10
|
-
anemone.focus_crawl do |page|
|
11
|
-
page.links.select { |url| url.starts_with? URL }
|
12
|
-
end
|
13
|
-
|
14
|
-
file = File.new('valid_pages.html', 'w')
|
15
|
-
file = File.new('broken.html', 'w')
|
16
|
-
file = File.new('slow_pages.html', 'w')
|
17
|
-
|
18
|
-
anemone.on_every_page do |page|
|
19
|
-
|
20
|
-
if page.code = 200 && page.code = 301
|
21
|
-
file = File.open('valid_pages.html', 'a')
|
22
|
-
file.puts page.url
|
23
|
-
end
|
24
|
-
|
25
|
-
if page.code != 200 && page.code != 301
|
26
|
-
puts "=======broken======\n"
|
27
|
-
puts page.url
|
28
|
-
file = File.open('broken.html', 'a')
|
29
|
-
file.puts page.url
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
if page.response_time > 5000
|
34
|
-
puts "=======Slow Page======\n"
|
35
|
-
puts page.url
|
36
|
-
file = File.open('slow_pages.html', 'a')
|
37
|
-
file.puts page.url
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|