driller 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWQzNGUxOWVjMmE4ZThmMjVmMTYwMTA2OTk4ZTYxZjk0ZGIzNDUzOQ==
4
+ YzMzYWYyZGM4MzA2OTk1NDZlNjQ4ZTQyOWFkN2UzNmNiMjYxNDQyMQ==
5
5
  data.tar.gz: !binary |-
6
- ZjFkNmFlOWE5Njg0YzhlODlmODcxMWI5NGI4MTZkYzM4NzQyZmFlZg==
6
+ NGM4YWIwZTU3YTY4MDIxMzhiM2UyZWM0ZGQ2M2Q5ZDlmMDBhNDAyNA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NGM5ZjcyMDk1MjU2NzZmYzZhZWVkNmFjODk0M2Y5Nzg3Y2VjOWFiYmU0ZWEz
10
- NDM5MDFlMDhjZDhjZjQ0NjBiNTg4NzAyNjVlNzYzY2ZmNzY5MDUyMTMwYWUx
11
- NzY4MDkxNDU2ZTk2NjRlYTI0NWVhMjI3YjlmNGViOGNlMDQ4YjA=
9
+ Mzk4NDU2ZDlkNTEyMDNlZDgwMjQxNDMwNjM0MDJiYzA4ZTA5Y2JkMDRkODlk
10
+ ZmFlOTI0ZDU4MDRkNTBmNjk1OWY4YTllNTAyNDEwOTA1ODRjMDVlODcxY2Rj
11
+ NDE2ODcyYTcwODhlYjY0NWUwMDJlNzE5MDA2MDdhNzc1NDQ3ZjI=
12
12
  data.tar.gz: !binary |-
13
- NzAxNmE4Y2VmZDkyZTM0MjVkNTM3Y2QxOGM1NmRlODhmZDQyYTViOWQ3Mjg0
14
- NjQyMzUyYTNjMDViODg0Y2M3YzU1OWYzZjE0MjU2YTQ3YzZhMzZlOWUyZGFk
15
- MzEyODVkODQyN2I0OTAyYjk1ZGJhNmM2OTg4OWJhZmJhZDE4ZjM=
13
+ NDNhZWQ5YWYwNWY1MDE5NzBhYzJjZDgwMTJmMWU1ZmE4NDliMzE5NzdhYTM1
14
+ OWViOTFlYWZjOTg2M2JlY2E0MzQ2NGI3ZDlhZWYxYWI1ZmZhM2ZhNjA4NWQ1
15
+ ZDA2NTdiYmU2NDlmZjE2NzdlMjU0Y2IwYzgxYWFkNzA4NDMxNjE=
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Shashikant86
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # Driller
2
+
3
+ Driller is a command line Ruby based web crawler based on Anemone. Driller can
4
+
5
+ * Crawl website and reports error pages which are not 200 or 301. This will report all other HTTP codes.
6
+ * Driller will report slow pages which are returned response time > 5000
7
+ * This will create three HTML files valid_urls.html which are 200 response. broken.html wich are not 200. slow_pages.html which are retuned reaponse time > 5000
8
+
9
+
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'driller'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install driller
26
+
27
+ ## Usage
28
+
29
+ Driller takes two arguments
30
+
31
+ * URL of the page to be crawled
32
+ * Depth of the crawling
33
+ * Proxy Hostname (optional) [default: nil]
34
+ * Proxy Port (optional) [default: 80]
35
+
36
+ $ driller <webpage> <depth> [<proxy_host>] [<proxy_port>]
37
+ $ driller http://www.example.com 2
38
+ $ driller http://www.example.com 2 'www-proxy.domain.co.uk' 80
39
+
40
+ If you have installed it from bundle the
41
+
42
+ $ bundle exec driller http://www.example.com 2
43
+
44
+ This will crawl website upto level 2. You can increase depth as per your need. This will create three HTML files valid_urls.html which are 200 response. broken.html wich are not 200. slow_pages.html which are retuned reaponse time > 5000
45
+
46
+ You an display these html files to CI server.
47
+
48
+ ## Contributing
49
+
50
+ 1. Fork it ( https://github.com/[my-github-username]/driller/fork )
51
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
52
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
53
+ 4. Push to the branch (`git push origin my-new-feature`)
54
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/driller ADDED
@@ -0,0 +1,21 @@
1
+ ##!/usr/bin/env ruby
2
+ require_relative '../lib/driller/main'
3
+
4
+ webpage = ARGV[0]
5
+ depth = ARGV[1]
6
+
7
+ proxy_host = ARGV[2]
8
+ proxy_port = ARGV[3]
9
+
10
+ if proxy_port.nil?
11
+ proxy_port = 80
12
+ end
13
+
14
+ if webpage.nil? || depth.nil?
15
+ puts "Error: Invalid number of arguments supplied."
16
+ abort("Usage: driller <webpage> <depth>")
17
+ end
18
+
19
+
20
+ m = Main.new(webpage, depth, proxy_host, proxy_port)
21
+ m.execute
data/driller.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('./lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ # require_relative './lib/driller/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "driller"
8
+ spec.version = "0.1.2"
9
+ spec.authors = ["Shashikant86", "Qambar"]
10
+ spec.email = ["shashikant.jagtap@aol.co.uk"]
11
+ spec.summary = %q{Drill your website for error and slow pages}
12
+ spec.description = %q{Driller is a command line Ruby based web crawler based on Anemone. Driller can crawl website and reports error pages which are not 200 or 301.}
13
+ spec.homepage = "https://github.com/Shashikant86/driller"
14
+ spec.license = "MIT"
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+ spec.add_runtime_dependency "bundler", "~> 1.0"
20
+ spec.add_runtime_dependency "rake"
21
+ spec.add_runtime_dependency "anemone"
22
+ end
@@ -0,0 +1,15 @@
1
+ class File_Handler
2
+ def initialize(filename, data)
3
+ write(filename, data)
4
+ end
5
+
6
+ def write(filename, data)
7
+ begin
8
+ file = File.new(filename + '.html', 'w')
9
+ file.puts data
10
+ file.close
11
+ rescue
12
+ puts "Unable to write to file, please check your permissions"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,57 @@
1
+ class Html_Report
2
+ def initialize(reportName, rows)
3
+ @reportName = reportName
4
+ @html = ""
5
+ createTable(rows)
6
+ end
7
+
8
+ def createTable(rows)
9
+
10
+ if rows.length == 0
11
+ @html = "#{@html}<tr><td>There are no #{@reportName}.</td></tr>"
12
+ return
13
+ end
14
+
15
+ addHeading(rows[0])
16
+
17
+ if rows.kind_of?(Array)
18
+ rows.each do |row|
19
+ unless row.nil?
20
+ addRow(row)
21
+ end
22
+ end
23
+ else
24
+ abort("Rows should be an array in Html_Report");
25
+ end
26
+ end
27
+
28
+ def addHeading(row)
29
+ @html = "#{@html}<tr>"
30
+ row.each do |key, value|
31
+ @html = "#{@html}<th>"
32
+ @html = "#{@html} #{(key.to_s).capitalize}"
33
+ @html = "#{@html}</th>"
34
+ end
35
+ @html = @html + "</tr>"
36
+ end
37
+
38
+ def addRow(row)
39
+ @html = @html + "<tr>"
40
+ row.each do |key, value|
41
+ @html = "#{@html}<td>"
42
+ @html = "#{@html}#{value}"
43
+ @html = "#{@html}</td>"
44
+ end
45
+ @html = @html + "</tr>"
46
+ end
47
+
48
+ def getReport
49
+
50
+ @htmlWithWrapper = "<!DOCTYPE html><html><head><title>Report</title></head><body>"
51
+ @htmlWithWrapper = @htmlWithWrapper + "<h1>#{@reportName.capitalize}</h1>"
52
+ @htmlWithWrapper = @htmlWithWrapper + "<table class='table'>#{@html}</table>"
53
+ @htmlWithWrapper = @htmlWithWrapper + "</body></html>"
54
+
55
+ return @htmlWithWrapper
56
+ end
57
+ end
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative "version"
3
+ require_relative "uri_helper"
4
+ require_relative "file_handler"
5
+ require_relative "html_report"
6
+
7
+ require "fileutils"
8
+ require "rubygems"
9
+ require "anemone"
10
+
11
+ class Main
12
+ attr_accessor :valid, :invalid, :slow, :extremely_slow
13
+
14
+ def initialize(webpage, depth, proxy_host, proxy_port)
15
+ @webpage = webpage
16
+ @depth = depth
17
+ @proxy_port = proxy_port
18
+ @proxy_host = proxy_host
19
+
20
+ @pageCount = 0
21
+
22
+ # initializing symbols
23
+ end
24
+
25
+ def execute
26
+ puts "Webpage : " + @webpage
27
+ puts "Depth : " + @depth
28
+
29
+ result = Hash.new
30
+ Anemone.crawl(@webpage) do |anemone|
31
+
32
+ unless @proxy_host.nil?
33
+ anemone.proxy_host = @proxy_host
34
+ anemone.proxy_port = @proxy_port
35
+ end
36
+
37
+ anemone.depth_limit = @depth.to_i
38
+ puts "============= Driller is now checking your website links. If any of the link returned non 200, it will be displayed here========="
39
+ anemone.focus_crawl do |page|
40
+ page.links.select { |url| url.starts_with? @webpage }
41
+ end
42
+
43
+
44
+ result[:valid] = Array.new
45
+ result[:invalid] = Array.new
46
+ result[:slow] = Array.new
47
+ result[:extremely_slow] = Array.new
48
+
49
+ anemone.on_every_page do |page|
50
+
51
+ pageObject = getPageObject(page)
52
+
53
+ if page.code == 200
54
+ result[:valid].push(pageObject)
55
+
56
+ # Check Response Time
57
+ # -------------------
58
+ # We only check response time
59
+ # for pages which are 200
60
+ # because there is no point of
61
+ # optimizing error pages.
62
+
63
+ if page.response_time > 10000
64
+ result[:slow].push(pageObject)
65
+
66
+ puts "=======Slow Page======\n"
67
+ puts "Time: #{page.response_time} - #{page.url}"
68
+ elsif page.response_time > 5000
69
+ result[:extremely_slow].push(pageObject)
70
+
71
+ puts "=======Very Slow Page======\n"
72
+ puts "Time: #{page.response_time} - #{page.url}"
73
+ end
74
+ #End checking response time.
75
+
76
+ else #404, 301, 500
77
+ result[:invalid].push(pageObject)
78
+
79
+ puts "======= NON-200 Page ======\n"
80
+ puts "#{page.code} Response from : #{page.url}"
81
+ puts
82
+
83
+ end
84
+
85
+ end
86
+
87
+ @pageCount = @pageCount + 1
88
+ puts "Checked #{@pageCount} pages" if @pageCount % 100 == 0
89
+ end
90
+
91
+ generateReport('valid_pages', result[:valid])
92
+ generateReport('broken', result[:invalid])
93
+ generateReport('slow_pages', result[:slow])
94
+ generateReport('extremely_slow_pages', result[:extremely_slow])
95
+
96
+ end
97
+
98
+ def getPageObject(page)
99
+ # This means we never got an access to internet
100
+ if page.code.nil?
101
+ abort("Error: Either you are offline or behind proxy.")
102
+ end
103
+
104
+ obj = Hash.new
105
+
106
+ obj['code'] = page.code
107
+ obj['url'] = (page.url).to_s
108
+ obj['response_time'] = page.response_time
109
+
110
+ return obj
111
+ end
112
+
113
+ def generateReport(reportName, data)
114
+ report = Html_Report.new(reportName, data)
115
+ File_Handler.new(reportName, report.getReport)
116
+ end
117
+ end
@@ -0,0 +1,8 @@
1
+ module URI
2
+ class Generic
3
+ def starts_with?(prefix)
4
+ prefix = prefix.to_s
5
+ self.to_s[0, prefix.length] == prefix
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,3 @@
1
+ module Driller
2
+ VERSION = "0.1.2"
3
+ end
data/lib/driller.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "driller/version"
2
+ require "driller/uri_helper"
3
+ require "driller/crawler"
4
+ require "anemone"
5
+
6
+ module Driller
7
+
8
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: driller
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shashikant86
@@ -57,10 +57,24 @@ description: Driller is a command line Ruby based web crawler based on Anemone.
57
57
  can crawl website and reports error pages which are not 200 or 301.
58
58
  email:
59
59
  - shashikant.jagtap@aol.co.uk
60
- executables: []
60
+ executables:
61
+ - driller
61
62
  extensions: []
62
63
  extra_rdoc_files: []
63
- files: []
64
+ files:
65
+ - .gitignore
66
+ - Gemfile
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - bin/driller
71
+ - driller.gemspec
72
+ - lib/driller.rb
73
+ - lib/driller/file_handler.rb
74
+ - lib/driller/html_report.rb
75
+ - lib/driller/main.rb
76
+ - lib/driller/uri_helper.rb
77
+ - lib/driller/version.rb
64
78
  homepage: https://github.com/Shashikant86/driller
65
79
  licenses:
66
80
  - MIT