codesake_links 0.50 → 0.71
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/links +35 -32
- data/codesake_links.gemspec +1 -3
- data/lib/codesake/links/api.rb +27 -28
- data/lib/codesake/links/utils.rb +6 -8
- data/lib/codesake/links/version.rb +1 -1
- data/spec/codesake_links_api_spec.rb +60 -0
- data/spec/spec_helper.rb +2 -1
- metadata +7 -21
data/bin/links
CHANGED
@@ -64,52 +64,55 @@ target = ARGV[0]
|
|
64
64
|
logger.helo "#{APPNAME} v#{Codesake::Links::VERSION} (C) 2013 - paolo@armoredcode.com is starting up"
|
65
65
|
|
66
66
|
|
67
|
-
list<<target if list.empty?
|
67
|
+
# list<<target if list.empty?
|
68
68
|
|
69
|
-
logger.die("
|
69
|
+
logger.die("missing target") if target.nil?
|
70
|
+
logger.die("no -b or -r option specified") unless bulk or robots
|
70
71
|
|
71
72
|
if robots
|
72
|
-
|
73
|
-
|
73
|
+
res = Codesake::Links::Api.robots(target)
|
74
|
+
list = res[:disallow_list]
|
75
|
+
logger.err "#{target}: no robots.txt found (#{res[:error]})\n" if res[:status] == :KO
|
76
|
+
logger.ok "no disallowed entries to test on #{target}" if list.empty?
|
77
|
+
logger.ok "found #{list.size} disallowed url(s) on #{target}" unless list.empty?
|
78
|
+
list.each do |l|
|
79
|
+
logger.ok "#{l} - #{Codesake::Links::Api.code(target+l, nil)}"
|
80
|
+
end
|
81
|
+
logger.helo "leaving"
|
82
|
+
Kernel.exit(0)
|
74
83
|
end
|
75
84
|
|
85
|
+
|
76
86
|
list.each do |l|
|
77
|
-
|
78
|
-
if ! l.start_with? '/'
|
79
|
-
l = '/'+l.chomp
|
80
|
-
end
|
81
|
-
if ! target.start_with? 'http://' and ! target.start_with? 'https://'
|
82
|
-
#defaulting to HTTP when no protocol has been supplied
|
83
|
-
target = "http://"+target
|
84
|
-
end
|
87
|
+
unless l.start_with? "#"
|
85
88
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
logger.log "#{l}:"
|
89
|
+
l = l.chomp if l.end_with? "\n"
|
90
|
+
l = '/'+l unless l.start_with? '/'
|
91
|
+
|
92
|
+
url = target + l
|
92
93
|
start = Time.now
|
93
|
-
code = Codesake::Links::Api.code(
|
94
|
+
code = Codesake::Links::Api.code(url, nil)
|
94
95
|
stop = Time.now
|
95
|
-
end
|
96
96
|
|
97
|
-
|
97
|
+
str=Codesake::Links::Api.human(code)
|
98
98
|
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
if code == "200"
|
100
|
+
Codesake::Links::Utils.print_str(url, logger, str, start, stop) unless show_code
|
101
|
+
Codesake::Links::Utils.print_code(url, logger, code, start, stop) if show_code
|
102
|
+
end
|
102
103
|
|
103
|
-
if code == 301 or code == 302
|
104
|
-
start = Time.now
|
105
|
-
new_link = Codesake::Links::Api.follow(l, proxy)
|
106
|
-
stop = Time.now
|
107
|
-
logger.log "following from #{l} to #{new_link}\n"
|
108
|
-
str=Codesake::Links::Api.human(code)
|
109
104
|
|
110
|
-
|
111
|
-
|
105
|
+
if code == 301 or code == 302
|
106
|
+
start = Time.now
|
107
|
+
new_link = Codesake::Links::Api.follow(l, proxy)
|
108
|
+
stop = Time.now
|
109
|
+
logger.log "following from #{l} to #{new_link}\n"
|
110
|
+
str=Codesake::Links::Api.human(code)
|
111
|
+
|
112
|
+
Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
|
113
|
+
Codesake::Links::Utils.print_code(logger, code, start, stop) if show_code
|
112
114
|
|
115
|
+
end
|
113
116
|
end
|
114
117
|
|
115
118
|
end
|
data/codesake_links.gemspec
CHANGED
@@ -22,9 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
# specify any dependencies here; for example:
|
23
23
|
s.add_development_dependency "rake"
|
24
24
|
s.add_development_dependency "rspec"
|
25
|
-
s.add_development_dependency
|
26
|
-
# s.add_runtime_dependency "rest-client"
|
27
|
-
s.add_runtime_dependency "rainbow"
|
25
|
+
s.add_development_dependency 'webmock'
|
28
26
|
|
29
27
|
s.add_dependency "nokogiri"
|
30
28
|
s.add_dependency "mechanize"
|
data/lib/codesake/links/api.rb
CHANGED
@@ -32,35 +32,30 @@ module Codesake
|
|
32
32
|
end
|
33
33
|
|
34
34
|
# TESTING: SPIDERS, ROBOTS, AND CRAWLERS (OWASP-IG-001)
|
35
|
-
def self.robots(site
|
35
|
+
def self.robots(site)
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
site = 'http://'+site unless site.start_with? 'http://' or site.start_with? 'https://'
|
38
|
+
|
39
|
+
|
40
|
+
allow_list = []
|
41
|
+
disallow_list = []
|
40
42
|
|
41
|
-
list = []
|
42
43
|
begin
|
43
44
|
res=Net::HTTP.get_response(URI(site+'/robots.txt'))
|
44
|
-
if (res.code != "200")
|
45
|
-
|
46
|
-
end
|
45
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>"robots.txt response code was #{res.code}"} if (res.code != "200")
|
46
|
+
|
47
47
|
|
48
48
|
res.body.split("\n").each do |line|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
else
|
54
|
-
if (line.downcase.start_with?('allow') or line.downcase.start_with?('disallow'))
|
55
|
-
list << line.split(":")[1].strip.chomp
|
56
|
-
end
|
57
|
-
end
|
49
|
+
|
50
|
+
disallow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('disallow'))
|
51
|
+
allow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('allow'))
|
52
|
+
|
58
53
|
end
|
59
|
-
rescue
|
60
|
-
return []
|
54
|
+
rescue Exception => e
|
55
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>e.message}
|
61
56
|
end
|
62
57
|
|
63
|
-
|
58
|
+
{:status=>:OK, :allow_list=>allow_list, :disallow_list=>disallow_list, :error=>""}
|
64
59
|
end
|
65
60
|
|
66
61
|
def self.follow(url, proxy)
|
@@ -97,14 +92,18 @@ module Codesake
|
|
97
92
|
begin
|
98
93
|
uri = URI(url)
|
99
94
|
if uri.scheme == 'http'
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
95
|
+
unless proxy.nil?
|
96
|
+
Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
|
97
|
+
if (method == :get)
|
98
|
+
res = http.get(uri.request_uri)
|
99
|
+
else
|
100
|
+
res = http.head(uri.request_uri)
|
101
|
+
end
|
102
|
+
return res
|
103
|
+
}
|
104
|
+
else
|
105
|
+
res = Net::HTTP.get_response(URI(url))
|
106
|
+
end
|
108
107
|
# res = Net::HTTP.get_response(URI(url))
|
109
108
|
else
|
110
109
|
request=Net::HTTP.new(uri.host, uri.port)
|
data/lib/codesake/links/utils.rb
CHANGED
@@ -3,18 +3,16 @@ module Codesake
|
|
3
3
|
module Links
|
4
4
|
class Utils
|
5
5
|
|
6
|
-
def self.print_str(logger, str, start, stop)
|
7
|
-
logger.ok "#{str} (#{((stop-start) * 1000).round} msec)
|
8
|
-
logger.
|
9
|
-
logger.warn " #{str} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
|
6
|
+
def self.print_str(url, logger, str, start, stop)
|
7
|
+
logger.ok "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" if str == "Open"
|
8
|
+
logger.log "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" unless str == "Open"
|
10
9
|
|
11
10
|
return
|
12
11
|
end
|
13
12
|
|
14
|
-
def self.print_code(
|
15
|
-
logger.ok "#{code} (#{((stop-start) * 1000).round} msec)
|
16
|
-
logger.
|
17
|
-
logger.warn " #{code} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
|
13
|
+
def self.print_code(url, logger, code, start, stop)
|
14
|
+
logger.ok "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" if code == "200"
|
15
|
+
logger.warn "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" unless code == "200"
|
18
16
|
|
19
17
|
return
|
20
18
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
include WebMock::API
|
3
|
+
|
4
|
+
describe "The API for Codesake Links" do
|
5
|
+
it "returns an array with a single / if the robots.txt contains only Allow: /" do
|
6
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
7
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
8
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
9
|
+
|
10
|
+
ret = Codesake::Links::Api.robots("http://www.test.com")
|
11
|
+
ret[:status].should == :OK
|
12
|
+
ret[:allow_list].size.should == 1
|
13
|
+
ret[:allow_list].should == [ '/' ]
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns an array with a single / if the robots.txt contains only Allow: / for an HTTPS site" do
|
17
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
18
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
19
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
20
|
+
|
21
|
+
ret = Codesake::Links::Api.robots("https://www.test.com")
|
22
|
+
ret[:status].should == :OK
|
23
|
+
ret[:allow_list].size.should == 1
|
24
|
+
ret[:allow_list].should == [ '/' ]
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
it "can handle an input without the protocol if target talks HTTP" do
|
29
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
30
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
31
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
32
|
+
|
33
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
34
|
+
ret[:status].should == :OK
|
35
|
+
ret[:allow_list].size.should == 1
|
36
|
+
ret[:allow_list].should == [ '/' ]
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
it "can't handle an input without the protocol if target talks *only* HTTPS" do
|
41
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
42
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
43
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
44
|
+
|
45
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
46
|
+
ret[:status].should == :KO
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns a list of disallowed URLs" do
|
50
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
51
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
52
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /\nDisallow: /private\nDisallow: /cgi-bin\nDisallow: /a-secret-dir", :headers=>{})
|
53
|
+
|
54
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
55
|
+
ret[:disallow_list].size.should == 3
|
56
|
+
ret[:disallow_list].should == [ '/private', '/cgi-bin', '/a-secret-dir' ]
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
require '
|
1
|
+
require 'codesake_links'
|
2
|
+
require 'webmock/rspec'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: codesake_links
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.71'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
47
|
+
name: webmock
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
49
|
none: false
|
50
50
|
requirements:
|
@@ -59,22 +59,6 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rainbow
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
|
-
requirements:
|
67
|
-
- - ! '>='
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '0'
|
70
|
-
type: :runtime
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
|
-
requirements:
|
75
|
-
- - ! '>='
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
version: '0'
|
78
62
|
- !ruby/object:Gem::Dependency
|
79
63
|
name: nokogiri
|
80
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,6 +129,7 @@ files:
|
|
145
129
|
- lib/codesake/links/utils.rb
|
146
130
|
- lib/codesake/links/version.rb
|
147
131
|
- lib/codesake_links.rb
|
132
|
+
- spec/codesake_links_api_spec.rb
|
148
133
|
- spec/spec_helper.rb
|
149
134
|
homepage: http://codesake.com
|
150
135
|
licenses:
|
@@ -161,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
161
146
|
version: '0'
|
162
147
|
segments:
|
163
148
|
- 0
|
164
|
-
hash:
|
149
|
+
hash: 2079895705737470252
|
165
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
166
151
|
none: false
|
167
152
|
requirements:
|
@@ -170,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
155
|
version: '0'
|
171
156
|
segments:
|
172
157
|
- 0
|
173
|
-
hash:
|
158
|
+
hash: 2079895705737470252
|
174
159
|
requirements: []
|
175
160
|
rubyforge_project: links
|
176
161
|
rubygems_version: 1.8.24
|
@@ -178,4 +163,5 @@ signing_key:
|
|
178
163
|
specification_version: 3
|
179
164
|
summary: Fetch, discover and crawl what's available in a website.
|
180
165
|
test_files:
|
166
|
+
- spec/codesake_links_api_spec.rb
|
181
167
|
- spec/spec_helper.rb
|