driller 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWQzNGUxOWVjMmE4ZThmMjVmMTYwMTA2OTk4ZTYxZjk0ZGIzNDUzOQ==
4
+ YzMzYWYyZGM4MzA2OTk1NDZlNjQ4ZTQyOWFkN2UzNmNiMjYxNDQyMQ==
5
5
  data.tar.gz: !binary |-
6
- ZjFkNmFlOWE5Njg0YzhlODlmODcxMWI5NGI4MTZkYzM4NzQyZmFlZg==
6
+ NGM4YWIwZTU3YTY4MDIxMzhiM2UyZWM0ZGQ2M2Q5ZDlmMDBhNDAyNA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NGM5ZjcyMDk1MjU2NzZmYzZhZWVkNmFjODk0M2Y5Nzg3Y2VjOWFiYmU0ZWEz
10
- NDM5MDFlMDhjZDhjZjQ0NjBiNTg4NzAyNjVlNzYzY2ZmNzY5MDUyMTMwYWUx
11
- NzY4MDkxNDU2ZTk2NjRlYTI0NWVhMjI3YjlmNGViOGNlMDQ4YjA=
9
+ Mzk4NDU2ZDlkNTEyMDNlZDgwMjQxNDMwNjM0MDJiYzA4ZTA5Y2JkMDRkODlk
10
+ ZmFlOTI0ZDU4MDRkNTBmNjk1OWY4YTllNTAyNDEwOTA1ODRjMDVlODcxY2Rj
11
+ NDE2ODcyYTcwODhlYjY0NWUwMDJlNzE5MDA2MDdhNzc1NDQ3ZjI=
12
12
  data.tar.gz: !binary |-
13
- NzAxNmE4Y2VmZDkyZTM0MjVkNTM3Y2QxOGM1NmRlODhmZDQyYTViOWQ3Mjg0
14
- NjQyMzUyYTNjMDViODg0Y2M3YzU1OWYzZjE0MjU2YTQ3YzZhMzZlOWUyZGFk
15
- MzEyODVkODQyN2I0OTAyYjk1ZGJhNmM2OTg4OWJhZmJhZDE4ZjM=
13
+ NDNhZWQ5YWYwNWY1MDE5NzBhYzJjZDgwMTJmMWU1ZmE4NDliMzE5NzdhYTM1
14
+ OWViOTFlYWZjOTg2M2JlY2E0MzQ2NGI3ZDlhZWYxYWI1ZmZhM2ZhNjA4NWQ1
15
+ ZDA2NTdiYmU2NDlmZjE2NzdlMjU0Y2IwYzgxYWFkNzA4NDMxNjE=
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Shashikant86
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # Driller
2
+
3
+ Driller is a command line Ruby based web crawler based on Anemone. Driller can
4
+
5
+ * Crawl website and reports error pages which are not 200 or 301. This will report all other HTTP codes.
6
+ * Driller will report slow pages which are returned response time > 5000
7
+ * This will create three HTML files valid_urls.html which are 200 response. broken.html wich are not 200. slow_pages.html which are retuned reaponse time > 5000
8
+
9
+
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'driller'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install driller
26
+
27
+ ## Usage
28
+
29
+ Driller takes two arguments
30
+
31
+ * URL of the page to be crawled
32
+ * Depth of the crawling
33
+ * Proxy Hostname (optional) [default: nil]
34
+ * Proxy Port (optional) [default: 80]
35
+
36
+ $ driller <webpage> <depth> [<proxy_host>] [<proxy_port>]
37
+ $ driller http://www.example.com 2
38
+ $ driller http://www.example.com 2 'www-proxy.domain.co.uk' 80
39
+
40
+ If you have installed it from bundle the
41
+
42
+ $ bundle exec driller http://www.example.com 2
43
+
44
+ This will crawl website upto level 2. You can increase depth as per your need. This will create three HTML files valid_urls.html which are 200 response. broken.html wich are not 200. slow_pages.html which are retuned reaponse time > 5000
45
+
46
+ You an display these html files to CI server.
47
+
48
+ ## Contributing
49
+
50
+ 1. Fork it ( https://github.com/[my-github-username]/driller/fork )
51
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
52
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
53
+ 4. Push to the branch (`git push origin my-new-feature`)
54
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/driller ADDED
@@ -0,0 +1,21 @@
1
+ ##!/usr/bin/env ruby
2
+ require_relative '../lib/driller/main'
3
+
4
+ webpage = ARGV[0]
5
+ depth = ARGV[1]
6
+
7
+ proxy_host = ARGV[2]
8
+ proxy_port = ARGV[3]
9
+
10
+ if proxy_port.nil?
11
+ proxy_port = 80
12
+ end
13
+
14
+ if webpage.nil? || depth.nil?
15
+ puts "Error: Invalid number of arguments supplied."
16
+ abort("Usage: driller <webpage> <depth>")
17
+ end
18
+
19
+
20
+ m = Main.new(webpage, depth, proxy_host, proxy_port)
21
+ m.execute
data/driller.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('./lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ # require_relative './lib/driller/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "driller"
8
+ spec.version = "0.1.2"
9
+ spec.authors = ["Shashikant86", "Qambar"]
10
+ spec.email = ["shashikant.jagtap@aol.co.uk"]
11
+ spec.summary = %q{Drill your website for error and slow pages}
12
+ spec.description = %q{Driller is a command line Ruby based web crawler based on Anemone. Driller can crawl website and reports error pages which are not 200 or 301.}
13
+ spec.homepage = "https://github.com/Shashikant86/driller"
14
+ spec.license = "MIT"
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+ spec.add_runtime_dependency "bundler", "~> 1.0"
20
+ spec.add_runtime_dependency "rake"
21
+ spec.add_runtime_dependency "anemone"
22
+ end
@@ -0,0 +1,15 @@
1
+ class File_Handler
2
+ def initialize(filename, data)
3
+ write(filename, data)
4
+ end
5
+
6
+ def write(filename, data)
7
+ begin
8
+ file = File.new(filename + '.html', 'w')
9
+ file.puts data
10
+ file.close
11
+ rescue
12
+ puts "Unable to write to file, please check your permissions"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,57 @@
1
+ class Html_Report
2
+ def initialize(reportName, rows)
3
+ @reportName = reportName
4
+ @html = ""
5
+ createTable(rows)
6
+ end
7
+
8
+ def createTable(rows)
9
+
10
+ if rows.length == 0
11
+ @html = "#{@html}<tr><td>There are no #{@reportName}.</td></tr>"
12
+ return
13
+ end
14
+
15
+ addHeading(rows[0])
16
+
17
+ if rows.kind_of?(Array)
18
+ rows.each do |row|
19
+ unless row.nil?
20
+ addRow(row)
21
+ end
22
+ end
23
+ else
24
+ abort("Rows should be an array in Html_Report");
25
+ end
26
+ end
27
+
28
+ def addHeading(row)
29
+ @html = "#{@html}<tr>"
30
+ row.each do |key, value|
31
+ @html = "#{@html}<th>"
32
+ @html = "#{@html} #{(key.to_s).capitalize}"
33
+ @html = "#{@html}</th>"
34
+ end
35
+ @html = @html + "</tr>"
36
+ end
37
+
38
+ def addRow(row)
39
+ @html = @html + "<tr>"
40
+ row.each do |key, value|
41
+ @html = "#{@html}<td>"
42
+ @html = "#{@html}#{value}"
43
+ @html = "#{@html}</td>"
44
+ end
45
+ @html = @html + "</tr>"
46
+ end
47
+
48
+ def getReport
49
+
50
+ @htmlWithWrapper = "<!DOCTYPE html><html><head><title>Report</title></head><body>"
51
+ @htmlWithWrapper = @htmlWithWrapper + "<h1>#{@reportName.capitalize}</h1>"
52
+ @htmlWithWrapper = @htmlWithWrapper + "<table class='table'>#{@html}</table>"
53
+ @htmlWithWrapper = @htmlWithWrapper + "</body></html>"
54
+
55
+ return @htmlWithWrapper
56
+ end
57
+ end
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative "version"
3
+ require_relative "uri_helper"
4
+ require_relative "file_handler"
5
+ require_relative "html_report"
6
+
7
+ require "fileutils"
8
+ require "rubygems"
9
+ require "anemone"
10
+
11
+ class Main
12
+ attr_accessor :valid, :invalid, :slow, :extremely_slow
13
+
14
+ def initialize(webpage, depth, proxy_host, proxy_port)
15
+ @webpage = webpage
16
+ @depth = depth
17
+ @proxy_port = proxy_port
18
+ @proxy_host = proxy_host
19
+
20
+ @pageCount = 0
21
+
22
+ # initializing symbols
23
+ end
24
+
25
+ def execute
26
+ puts "Webpage : " + @webpage
27
+ puts "Depth : " + @depth
28
+
29
+ result = Hash.new
30
+ Anemone.crawl(@webpage) do |anemone|
31
+
32
+ unless @proxy_host.nil?
33
+ anemone.proxy_host = @proxy_host
34
+ anemone.proxy_port = @proxy_port
35
+ end
36
+
37
+ anemone.depth_limit = @depth.to_i
38
+ puts "============= Driller is now checking your website links. If any of the link returned non 200, it will be displayed here========="
39
+ anemone.focus_crawl do |page|
40
+ page.links.select { |url| url.starts_with? @webpage }
41
+ end
42
+
43
+
44
+ result[:valid] = Array.new
45
+ result[:invalid] = Array.new
46
+ result[:slow] = Array.new
47
+ result[:extremely_slow] = Array.new
48
+
49
+ anemone.on_every_page do |page|
50
+
51
+ pageObject = getPageObject(page)
52
+
53
+ if page.code == 200
54
+ result[:valid].push(pageObject)
55
+
56
+ # Check Response Time
57
+ # -------------------
58
+ # We only check response time
59
+ # for pages which are 200
60
+ # because there is no point of
61
+ # optimizing error pages.
62
+
63
+ if page.response_time > 10000
64
+ result[:slow].push(pageObject)
65
+
66
+ puts "=======Slow Page======\n"
67
+ puts "Time: #{page.response_time} - #{page.url}"
68
+ elsif page.response_time > 5000
69
+ result[:extremely_slow].push(pageObject)
70
+
71
+ puts "=======Very Slow Page======\n"
72
+ puts "Time: #{page.response_time} - #{page.url}"
73
+ end
74
+ #End checking response time.
75
+
76
+ else #404, 301, 500
77
+ result[:invalid].push(pageObject)
78
+
79
+ puts "======= NON-200 Page ======\n"
80
+ puts "#{page.code} Response from : #{page.url}"
81
+ puts
82
+
83
+ end
84
+
85
+ end
86
+
87
+ @pageCount = @pageCount + 1
88
+ puts "Checked #{@pageCount} pages" if @pageCount % 100 == 0
89
+ end
90
+
91
+ generateReport('valid_pages', result[:valid])
92
+ generateReport('broken', result[:invalid])
93
+ generateReport('slow_pages', result[:slow])
94
+ generateReport('extremely_slow_pages', result[:extremely_slow])
95
+
96
+ end
97
+
98
+ def getPageObject(page)
99
+ # This means we never got an access to internet
100
+ if page.code.nil?
101
+ abort("Error: Either you are offline or behind proxy.")
102
+ end
103
+
104
+ obj = Hash.new
105
+
106
+ obj['code'] = page.code
107
+ obj['url'] = (page.url).to_s
108
+ obj['response_time'] = page.response_time
109
+
110
+ return obj
111
+ end
112
+
113
+ def generateReport(reportName, data)
114
+ report = Html_Report.new(reportName, data)
115
+ File_Handler.new(reportName, report.getReport)
116
+ end
117
+ end
@@ -0,0 +1,8 @@
1
+ module URI
2
+ class Generic
3
+ def starts_with?(prefix)
4
+ prefix = prefix.to_s
5
+ self.to_s[0, prefix.length] == prefix
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,3 @@
1
+ module Driller
2
+ VERSION = "0.1.2"
3
+ end
data/lib/driller.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "driller/version"
2
+ require "driller/uri_helper"
3
+ require "driller/crawler"
4
+ require "anemone"
5
+
6
+ module Driller
7
+
8
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: driller
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shashikant86
@@ -57,10 +57,24 @@ description: Driller is a command line Ruby based web crawler based on Anemone.
57
57
  can crawl website and reports error pages which are not 200 or 301.
58
58
  email:
59
59
  - shashikant.jagtap@aol.co.uk
60
- executables: []
60
+ executables:
61
+ - driller
61
62
  extensions: []
62
63
  extra_rdoc_files: []
63
- files: []
64
+ files:
65
+ - .gitignore
66
+ - Gemfile
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - bin/driller
71
+ - driller.gemspec
72
+ - lib/driller.rb
73
+ - lib/driller/file_handler.rb
74
+ - lib/driller/html_report.rb
75
+ - lib/driller/main.rb
76
+ - lib/driller/uri_helper.rb
77
+ - lib/driller/version.rb
64
78
  homepage: https://github.com/Shashikant86/driller
65
79
  licenses:
66
80
  - MIT