codesake_links 0.50 → 0.71

Sign up to get free protection for your applications and to get access to all the features.
data/bin/links CHANGED
@@ -64,52 +64,55 @@ target = ARGV[0]
64
64
  logger.helo "#{APPNAME} v#{Codesake::Links::VERSION} (C) 2013 - paolo@armoredcode.com is starting up"
65
65
 
66
66
 
67
- list<<target if list.empty?
67
+ # list<<target if list.empty?
68
68
 
69
- logger.die("links: missing target") if list[0].nil?
69
+ logger.die("missing target") if target.nil?
70
+ logger.die("no -b or -r option specified") unless bulk or robots
70
71
 
71
72
  if robots
72
- list = Codesake::Links::Api.robots(target)
73
- logger.err "#{target}: no robots.txt found\n" if list.empty?
73
+ res = Codesake::Links::Api.robots(target)
74
+ list = res[:disallow_list]
75
+ logger.err "#{target}: no robots.txt found (#{res[:error]})\n" if res[:status] == :KO
76
+ logger.ok "no disallowed entries to test on #{target}" if list.empty?
77
+ logger.ok "found #{list.size} disallowed url(s) on #{target}" unless list.empty?
78
+ list.each do |l|
79
+ logger.ok "#{l} - #{Codesake::Links::Api.code(target+l, nil)}"
80
+ end
81
+ logger.helo "leaving"
82
+ Kernel.exit(0)
74
83
  end
75
84
 
85
+
76
86
  list.each do |l|
77
- if robots or bulk
78
- if ! l.start_with? '/'
79
- l = '/'+l.chomp
80
- end
81
- if ! target.start_with? 'http://' and ! target.start_with? 'https://'
82
- #defaulting to HTTP when no protocol has been supplied
83
- target = "http://"+target
84
- end
87
+ unless l.start_with? "#"
85
88
 
86
- logger.log "#{target}#{l}:"
87
- start = Time.now
88
- code = Codesake::Links::Api.code(target+l, proxy)
89
- stop = Time.now
90
- else
91
- logger.log "#{l}:"
89
+ l = l.chomp if l.end_with? "\n"
90
+ l = '/'+l unless l.start_with? '/'
91
+
92
+ url = target + l
92
93
  start = Time.now
93
- code = Codesake::Links::Api.code(l, proxy)
94
+ code = Codesake::Links::Api.code(url, nil)
94
95
  stop = Time.now
95
- end
96
96
 
97
- str=Codesake::Links::Api.human(code)
97
+ str=Codesake::Links::Api.human(code)
98
98
 
99
- Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
100
- Codesake::Links::Utils.print_code(logger, str, code, start, stop) if show_code
101
-
99
+ if code == "200"
100
+ Codesake::Links::Utils.print_str(url, logger, str, start, stop) unless show_code
101
+ Codesake::Links::Utils.print_code(url, logger, code, start, stop) if show_code
102
+ end
102
103
 
103
- if code == 301 or code == 302
104
- start = Time.now
105
- new_link = Codesake::Links::Api.follow(l, proxy)
106
- stop = Time.now
107
- logger.log "following from #{l} to #{new_link}\n"
108
- str=Codesake::Links::Api.human(code)
109
104
 
110
- Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
111
- Codesake::Links::Utils.print_code(logger, str, code, start, stop) if show_code
105
+ if code == 301 or code == 302
106
+ start = Time.now
107
+ new_link = Codesake::Links::Api.follow(l, proxy)
108
+ stop = Time.now
109
+ logger.log "following from #{l} to #{new_link}\n"
110
+ str=Codesake::Links::Api.human(code)
111
+
112
+ Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
113
+ Codesake::Links::Utils.print_code(logger, code, start, stop) if show_code
112
114
 
115
+ end
113
116
  end
114
117
 
115
118
  end
@@ -22,9 +22,7 @@ Gem::Specification.new do |s|
22
22
  # specify any dependencies here; for example:
23
23
  s.add_development_dependency "rake"
24
24
  s.add_development_dependency "rspec"
25
- s.add_development_dependency "rainbow"
26
- # s.add_runtime_dependency "rest-client"
27
- s.add_runtime_dependency "rainbow"
25
+ s.add_development_dependency 'webmock'
28
26
 
29
27
  s.add_dependency "nokogiri"
30
28
  s.add_dependency "mechanize"
@@ -32,35 +32,30 @@ module Codesake
32
32
  end
33
33
 
34
34
  # TESTING: SPIDERS, ROBOTS, AND CRAWLERS (OWASP-IG-001)
35
- def self.robots(site, only_disallow=true)
35
+ def self.robots(site)
36
36
 
37
- if (! site.start_with? 'http://') and (! site.start_with? 'https://')
38
- site = 'http://'+site
39
- end
37
+ site = 'http://'+site unless site.start_with? 'http://' or site.start_with? 'https://'
38
+
39
+
40
+ allow_list = []
41
+ disallow_list = []
40
42
 
41
- list = []
42
43
  begin
43
44
  res=Net::HTTP.get_response(URI(site+'/robots.txt'))
44
- if (res.code != "200")
45
- return []
46
- end
45
+ return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>"robots.txt response code was #{res.code}"} if (res.code != "200")
46
+
47
47
 
48
48
  res.body.split("\n").each do |line|
49
- if only_disallow
50
- if (line.downcase.start_with?('disallow'))
51
- list << line.split(":")[1].strip.chomp
52
- end
53
- else
54
- if (line.downcase.start_with?('allow') or line.downcase.start_with?('disallow'))
55
- list << line.split(":")[1].strip.chomp
56
- end
57
- end
49
+
50
+ disallow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('disallow'))
51
+ allow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('allow'))
52
+
58
53
  end
59
- rescue
60
- return []
54
+ rescue Exception => e
55
+ return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>e.message}
61
56
  end
62
57
 
63
- list
58
+ {:status=>:OK, :allow_list=>allow_list, :disallow_list=>disallow_list, :error=>""}
64
59
  end
65
60
 
66
61
  def self.follow(url, proxy)
@@ -97,14 +92,18 @@ module Codesake
97
92
  begin
98
93
  uri = URI(url)
99
94
  if uri.scheme == 'http'
100
- Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
101
- if (method == :get)
102
- res = http.get(uri.request_uri)
103
- else
104
- res = http.head(uri.request_uri)
105
- end
106
- return res
107
- }
95
+ unless proxy.nil?
96
+ Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
97
+ if (method == :get)
98
+ res = http.get(uri.request_uri)
99
+ else
100
+ res = http.head(uri.request_uri)
101
+ end
102
+ return res
103
+ }
104
+ else
105
+ res = Net::HTTP.get_response(URI(url))
106
+ end
108
107
  # res = Net::HTTP.get_response(URI(url))
109
108
  else
110
109
  request=Net::HTTP.new(uri.host, uri.port)
@@ -3,18 +3,16 @@ module Codesake
3
3
  module Links
4
4
  class Utils
5
5
 
6
- def self.print_str(logger, str, start, stop)
7
- logger.ok "#{str} (#{((stop-start) * 1000).round} msec)\n" if str == "Open"
8
- logger.err " #{str} (#{((stop-start) * 1000).round} msec)\n" if (str == "Closed" or str == "Non existent")
9
- logger.warn " #{str} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
6
+ def self.print_str(url, logger, str, start, stop)
7
+ logger.ok "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" if str == "Open"
8
+ logger.log "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" unless str == "Open"
10
9
 
11
10
  return
12
11
  end
13
12
 
14
- def self.print_code(logger, str, code, start, stop)
15
- logger.ok "#{code} (#{((stop-start) * 1000).round} msec)\n" if str == "Open"
16
- logger.err " #{code} (#{((stop-start) * 1000).round} msec)\n" if (str == "Closed" or str == "Non existent")
17
- logger.warn " #{code} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
13
+ def self.print_code(url, logger, code, start, stop)
14
+ logger.ok "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" if code == "200"
15
+ logger.warn "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" unless code == "200"
18
16
 
19
17
  return
20
18
  end
@@ -1,5 +1,5 @@
1
1
  module Codesake
2
2
  module Links
3
- VERSION = "0.50"
3
+ VERSION = "0.71"
4
4
  end
5
5
  end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper'
2
+ include WebMock::API
3
+
4
+ describe "The API for Codesake Links" do
5
+ it "returns an array with a single / if the robots.txt contains only Allow: /" do
6
+ stub_request(:get, "http://www.test.com/robots.txt").
7
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
8
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
9
+
10
+ ret = Codesake::Links::Api.robots("http://www.test.com")
11
+ ret[:status].should == :OK
12
+ ret[:allow_list].size.should == 1
13
+ ret[:allow_list].should == [ '/' ]
14
+ end
15
+
16
+ it "returns an array with a single / if the robots.txt contains only Allow: / for an HTTPS site" do
17
+ stub_request(:get, "http://www.test.com:443/robots.txt").
18
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
19
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
20
+
21
+ ret = Codesake::Links::Api.robots("https://www.test.com")
22
+ ret[:status].should == :OK
23
+ ret[:allow_list].size.should == 1
24
+ ret[:allow_list].should == [ '/' ]
25
+ end
26
+
27
+
28
+ it "can handle an input without the protocol if target talks HTTP" do
29
+ stub_request(:get, "http://www.test.com/robots.txt").
30
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
31
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
32
+
33
+ ret = Codesake::Links::Api.robots("www.test.com")
34
+ ret[:status].should == :OK
35
+ ret[:allow_list].size.should == 1
36
+ ret[:allow_list].should == [ '/' ]
37
+ end
38
+
39
+
40
+ it "can't handle an input without the protocol if target talks *only* HTTPS" do
41
+ stub_request(:get, "http://www.test.com:443/robots.txt").
42
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
43
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
44
+
45
+ ret = Codesake::Links::Api.robots("www.test.com")
46
+ ret[:status].should == :KO
47
+ end
48
+
49
+ it "returns a list of disallowed URLs" do
50
+ stub_request(:get, "http://www.test.com/robots.txt").
51
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
52
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /\nDisallow: /private\nDisallow: /cgi-bin\nDisallow: /a-secret-dir", :headers=>{})
53
+
54
+ ret = Codesake::Links::Api.robots("www.test.com")
55
+ ret[:disallow_list].size.should == 3
56
+ ret[:disallow_list].should == [ '/private', '/cgi-bin', '/a-secret-dir' ]
57
+
58
+ end
59
+
60
+ end
@@ -1 +1,2 @@
1
- require 'links'
1
+ require 'codesake_links'
2
+ require 'webmock/rspec'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: codesake_links
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.50'
4
+ version: '0.71'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-11 00:00:00.000000000 Z
12
+ date: 2013-04-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -44,7 +44,7 @@ dependencies:
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  - !ruby/object:Gem::Dependency
47
- name: rainbow
47
+ name: webmock
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  none: false
50
50
  requirements:
@@ -59,22 +59,6 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
- - !ruby/object:Gem::Dependency
63
- name: rainbow
64
- requirement: !ruby/object:Gem::Requirement
65
- none: false
66
- requirements:
67
- - - ! '>='
68
- - !ruby/object:Gem::Version
69
- version: '0'
70
- type: :runtime
71
- prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
78
62
  - !ruby/object:Gem::Dependency
79
63
  name: nokogiri
80
64
  requirement: !ruby/object:Gem::Requirement
@@ -145,6 +129,7 @@ files:
145
129
  - lib/codesake/links/utils.rb
146
130
  - lib/codesake/links/version.rb
147
131
  - lib/codesake_links.rb
132
+ - spec/codesake_links_api_spec.rb
148
133
  - spec/spec_helper.rb
149
134
  homepage: http://codesake.com
150
135
  licenses:
@@ -161,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
161
146
  version: '0'
162
147
  segments:
163
148
  - 0
164
- hash: -1757904362167703742
149
+ hash: 2079895705737470252
165
150
  required_rubygems_version: !ruby/object:Gem::Requirement
166
151
  none: false
167
152
  requirements:
@@ -170,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
170
155
  version: '0'
171
156
  segments:
172
157
  - 0
173
- hash: -1757904362167703742
158
+ hash: 2079895705737470252
174
159
  requirements: []
175
160
  rubyforge_project: links
176
161
  rubygems_version: 1.8.24
@@ -178,4 +163,5 @@ signing_key:
178
163
  specification_version: 3
179
164
  summary: Fetch, discover and crawl what's available in a website.
180
165
  test_files:
166
+ - spec/codesake_links_api_spec.rb
181
167
  - spec/spec_helper.rb