driller 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +23 -2
- data/bin/driller +2 -53
- data/driller.gemspec +2 -2
- data/lib/driller/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OTk5MWY5OWY0MjU2OTM3MTYyYjhkM2U1ZDc0Y2IzNjVmZjJiOTc3MQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
Yjg2ODI0ZDNjNTg1MjUyMTJhMTgxN2Q2NDJlNzA2YWMwYmE1Zjc3NA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YzJmYTE3ZTkyNWM1ZjY0MTI5YTJkYzNkZWRhYjI0YTBiMmZlYzUxNGNmZjFj
|
10
|
+
ZDdjYzAzNjUzNjVlNGVlN2E5NzhkMzhmYjRhOTFjZDBjMWFkMzMwZWQ1NWY3
|
11
|
+
YjFkZDgwNWZmZjNjMTI0N2MxYjUzYzVmMGNjZTU0OGJjNzY2YTQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
N2ZlODY2OTg4ZWU1ZGY1ZjU1ZjM5NjZlNDQxOTQwZWYyOWM4NjFhZmY4YTdh
|
14
|
+
ZWYzOWQ0YWQ4YTkwYzE1ZGIxZjVjNWU4ZDU1YzRiNGFhZjU0NGQ2Mzg2M2Nk
|
15
|
+
MzU1ZTMwYmFiZDVlMmE0MTFkYTczNDhlNGI3NzNmNTBjMGEwZTg=
|
data/README.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# Driller
|
2
2
|
|
3
|
-
|
3
|
+
Driller is a command line Ruby based web crawler based on Anemone. Driller can
|
4
|
+
|
5
|
+
* Crawl website and reports error pages which are not 200 or 301. This will report all other HTTP codes.
|
6
|
+
* Driller will report slow pages which are returned response time > 5000
|
7
|
+
* This will create three HTML files valid_urls.html which are 200 response. broken.html wich are not 200. slow_pages.html which are retuned reaponse time > 5000
|
8
|
+
|
9
|
+
|
4
10
|
|
5
11
|
## Installation
|
6
12
|
|
@@ -20,7 +26,22 @@ Or install it yourself as:
|
|
20
26
|
|
21
27
|
## Usage
|
22
28
|
|
23
|
-
|
29
|
+
Driller takes two arguments
|
30
|
+
|
31
|
+
* URL of the page to be crawled
|
32
|
+
* Depth of the crawling
|
33
|
+
|
34
|
+
|
35
|
+
$ driller http://www.example.com 2
|
36
|
+
|
37
|
+
If you have installed it from bundle the
|
38
|
+
|
39
|
+
$ bundle exec driller http://www.example.com 2
|
40
|
+
|
41
|
+
|
42
|
+
This will crawl website upto level 2. You can increase depth as per your need. This will create three HTML files valid_urls.html which are 200 response. broken.html wich are not 200. slow_pages.html which are retuned reaponse time > 5000
|
43
|
+
|
44
|
+
You an display these html files to CI server.
|
24
45
|
|
25
46
|
## Contributing
|
26
47
|
|
data/bin/driller
CHANGED
@@ -6,10 +6,11 @@ require "rubygems"
|
|
6
6
|
require "anemone"
|
7
7
|
|
8
8
|
URL = ARGV[0]
|
9
|
+
depth = ARGV[1]
|
9
10
|
|
10
11
|
Anemone.crawl(URL) do |anemone|
|
11
12
|
|
12
|
-
anemone.depth_limit =
|
13
|
+
anemone.depth_limit = depth.to_i
|
13
14
|
|
14
15
|
anemone.focus_crawl do |page|
|
15
16
|
page.links.select { |url| url.starts_with? URL }
|
@@ -42,55 +43,3 @@ anemone.on_every_page do |page|
|
|
42
43
|
end
|
43
44
|
end
|
44
45
|
end
|
45
|
-
|
46
|
-
|
47
|
-
def print_usage
|
48
|
-
puts <<EOF
|
49
|
-
|
50
|
-
Usage: driller <command-name> [parameters] [options]
|
51
|
-
|
52
|
-
<command-name> can be one of
|
53
|
-
help
|
54
|
-
prints more detailed help information.
|
55
|
-
url and depth
|
56
|
-
crawl the url and reports error pages and slow paged upto depth level specified
|
57
|
-
version
|
58
|
-
prints the gem version
|
59
|
-
|
60
|
-
<options> can be
|
61
|
-
-v, --verbose Turns on verbose logging
|
62
|
-
EOF
|
63
|
-
end
|
64
|
-
|
65
|
-
def print_help
|
66
|
-
puts <<EOF
|
67
|
-
|
68
|
-
Usage: driller <command-name>
|
69
|
-
|
70
|
-
<command-name> can be one of
|
71
|
-
help
|
72
|
-
version
|
73
|
-
|
74
|
-
Commands:
|
75
|
-
help : prints more detailed help information.
|
76
|
-
|
77
|
-
version : prints the gem version
|
78
|
-
|
79
|
-
<Options>
|
80
|
-
-v, --verbose Turns on verbose logging
|
81
|
-
EOF
|
82
|
-
end
|
83
|
-
|
84
|
-
|
85
|
-
if (ARGV.length == 0)
|
86
|
-
print_usage
|
87
|
-
else
|
88
|
-
cmd = ARGV.shift
|
89
|
-
if cmd == "help"
|
90
|
-
print_help
|
91
|
-
elsif cmd == "version"
|
92
|
-
puts Driller::VERSION
|
93
|
-
else
|
94
|
-
print_usage
|
95
|
-
end
|
96
|
-
end
|
data/driller.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Shashikant86"]
|
10
10
|
spec.email = ["shashikant.jagtap@aol.co.uk"]
|
11
11
|
spec.summary = %q{Drill your website for error and slow pages}
|
12
|
-
spec.description = %q{
|
13
|
-
spec.homepage = ""
|
12
|
+
spec.description = %q{Driller is a command line Ruby based web crawler based on Anemone. Driller can crawl website and reports error pages which are not 200 or 301.}
|
13
|
+
spec.homepage = "https://github.com/Shashikant86/driller"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0")
|
data/lib/driller/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: driller
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shashikant86
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,7 +52,8 @@ dependencies:
|
|
52
52
|
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.7.2
|
55
|
-
description:
|
55
|
+
description: Driller is a command line Ruby based web crawler based on Anemone. Driller
|
56
|
+
can crawl website and reports error pages which are not 200 or 301.
|
56
57
|
email:
|
57
58
|
- shashikant.jagtap@aol.co.uk
|
58
59
|
executables:
|
@@ -70,7 +71,7 @@ files:
|
|
70
71
|
- lib/driller.rb
|
71
72
|
- lib/driller/uri_helper.rb
|
72
73
|
- lib/driller/version.rb
|
73
|
-
homepage:
|
74
|
+
homepage: https://github.com/Shashikant86/driller
|
74
75
|
licenses:
|
75
76
|
- MIT
|
76
77
|
metadata: {}
|