codesake_links 0.50 → 0.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/links +35 -32
- data/codesake_links.gemspec +1 -3
- data/lib/codesake/links/api.rb +27 -28
- data/lib/codesake/links/utils.rb +6 -8
- data/lib/codesake/links/version.rb +1 -1
- data/spec/codesake_links_api_spec.rb +60 -0
- data/spec/spec_helper.rb +2 -1
- metadata +7 -21
data/bin/links
CHANGED
@@ -64,52 +64,55 @@ target = ARGV[0]
|
|
64
64
|
logger.helo "#{APPNAME} v#{Codesake::Links::VERSION} (C) 2013 - paolo@armoredcode.com is starting up"
|
65
65
|
|
66
66
|
|
67
|
-
list<<target if list.empty?
|
67
|
+
# list<<target if list.empty?
|
68
68
|
|
69
|
-
logger.die("
|
69
|
+
logger.die("missing target") if target.nil?
|
70
|
+
logger.die("no -b or -r option specified") unless bulk or robots
|
70
71
|
|
71
72
|
if robots
|
72
|
-
|
73
|
-
|
73
|
+
res = Codesake::Links::Api.robots(target)
|
74
|
+
list = res[:disallow_list]
|
75
|
+
logger.err "#{target}: no robots.txt found (#{res[:error]})\n" if res[:status] == :KO
|
76
|
+
logger.ok "no disallowed entries to test on #{target}" if list.empty?
|
77
|
+
logger.ok "found #{list.size} disallowed url(s) on #{target}" unless list.empty?
|
78
|
+
list.each do |l|
|
79
|
+
logger.ok "#{l} - #{Codesake::Links::Api.code(target+l, nil)}"
|
80
|
+
end
|
81
|
+
logger.helo "leaving"
|
82
|
+
Kernel.exit(0)
|
74
83
|
end
|
75
84
|
|
85
|
+
|
76
86
|
list.each do |l|
|
77
|
-
|
78
|
-
if ! l.start_with? '/'
|
79
|
-
l = '/'+l.chomp
|
80
|
-
end
|
81
|
-
if ! target.start_with? 'http://' and ! target.start_with? 'https://'
|
82
|
-
#defaulting to HTTP when no protocol has been supplied
|
83
|
-
target = "http://"+target
|
84
|
-
end
|
87
|
+
unless l.start_with? "#"
|
85
88
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
logger.log "#{l}:"
|
89
|
+
l = l.chomp if l.end_with? "\n"
|
90
|
+
l = '/'+l unless l.start_with? '/'
|
91
|
+
|
92
|
+
url = target + l
|
92
93
|
start = Time.now
|
93
|
-
code = Codesake::Links::Api.code(
|
94
|
+
code = Codesake::Links::Api.code(url, nil)
|
94
95
|
stop = Time.now
|
95
|
-
end
|
96
96
|
|
97
|
-
|
97
|
+
str=Codesake::Links::Api.human(code)
|
98
98
|
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
if code == "200"
|
100
|
+
Codesake::Links::Utils.print_str(url, logger, str, start, stop) unless show_code
|
101
|
+
Codesake::Links::Utils.print_code(url, logger, code, start, stop) if show_code
|
102
|
+
end
|
102
103
|
|
103
|
-
if code == 301 or code == 302
|
104
|
-
start = Time.now
|
105
|
-
new_link = Codesake::Links::Api.follow(l, proxy)
|
106
|
-
stop = Time.now
|
107
|
-
logger.log "following from #{l} to #{new_link}\n"
|
108
|
-
str=Codesake::Links::Api.human(code)
|
109
104
|
|
110
|
-
|
111
|
-
|
105
|
+
if code == 301 or code == 302
|
106
|
+
start = Time.now
|
107
|
+
new_link = Codesake::Links::Api.follow(l, proxy)
|
108
|
+
stop = Time.now
|
109
|
+
logger.log "following from #{l} to #{new_link}\n"
|
110
|
+
str=Codesake::Links::Api.human(code)
|
111
|
+
|
112
|
+
Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
|
113
|
+
Codesake::Links::Utils.print_code(logger, code, start, stop) if show_code
|
112
114
|
|
115
|
+
end
|
113
116
|
end
|
114
117
|
|
115
118
|
end
|
data/codesake_links.gemspec
CHANGED
@@ -22,9 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
# specify any dependencies here; for example:
|
23
23
|
s.add_development_dependency "rake"
|
24
24
|
s.add_development_dependency "rspec"
|
25
|
-
s.add_development_dependency
|
26
|
-
# s.add_runtime_dependency "rest-client"
|
27
|
-
s.add_runtime_dependency "rainbow"
|
25
|
+
s.add_development_dependency 'webmock'
|
28
26
|
|
29
27
|
s.add_dependency "nokogiri"
|
30
28
|
s.add_dependency "mechanize"
|
data/lib/codesake/links/api.rb
CHANGED
@@ -32,35 +32,30 @@ module Codesake
|
|
32
32
|
end
|
33
33
|
|
34
34
|
# TESTING: SPIDERS, ROBOTS, AND CRAWLERS (OWASP-IG-001)
|
35
|
-
def self.robots(site
|
35
|
+
def self.robots(site)
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
site = 'http://'+site unless site.start_with? 'http://' or site.start_with? 'https://'
|
38
|
+
|
39
|
+
|
40
|
+
allow_list = []
|
41
|
+
disallow_list = []
|
40
42
|
|
41
|
-
list = []
|
42
43
|
begin
|
43
44
|
res=Net::HTTP.get_response(URI(site+'/robots.txt'))
|
44
|
-
if (res.code != "200")
|
45
|
-
|
46
|
-
end
|
45
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>"robots.txt response code was #{res.code}"} if (res.code != "200")
|
46
|
+
|
47
47
|
|
48
48
|
res.body.split("\n").each do |line|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
else
|
54
|
-
if (line.downcase.start_with?('allow') or line.downcase.start_with?('disallow'))
|
55
|
-
list << line.split(":")[1].strip.chomp
|
56
|
-
end
|
57
|
-
end
|
49
|
+
|
50
|
+
disallow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('disallow'))
|
51
|
+
allow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('allow'))
|
52
|
+
|
58
53
|
end
|
59
|
-
rescue
|
60
|
-
return []
|
54
|
+
rescue Exception => e
|
55
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>e.message}
|
61
56
|
end
|
62
57
|
|
63
|
-
|
58
|
+
{:status=>:OK, :allow_list=>allow_list, :disallow_list=>disallow_list, :error=>""}
|
64
59
|
end
|
65
60
|
|
66
61
|
def self.follow(url, proxy)
|
@@ -97,14 +92,18 @@ module Codesake
|
|
97
92
|
begin
|
98
93
|
uri = URI(url)
|
99
94
|
if uri.scheme == 'http'
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
95
|
+
unless proxy.nil?
|
96
|
+
Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
|
97
|
+
if (method == :get)
|
98
|
+
res = http.get(uri.request_uri)
|
99
|
+
else
|
100
|
+
res = http.head(uri.request_uri)
|
101
|
+
end
|
102
|
+
return res
|
103
|
+
}
|
104
|
+
else
|
105
|
+
res = Net::HTTP.get_response(URI(url))
|
106
|
+
end
|
108
107
|
# res = Net::HTTP.get_response(URI(url))
|
109
108
|
else
|
110
109
|
request=Net::HTTP.new(uri.host, uri.port)
|
data/lib/codesake/links/utils.rb
CHANGED
@@ -3,18 +3,16 @@ module Codesake
|
|
3
3
|
module Links
|
4
4
|
class Utils
|
5
5
|
|
6
|
-
def self.print_str(logger, str, start, stop)
|
7
|
-
logger.ok "#{str} (#{((stop-start) * 1000).round} msec)
|
8
|
-
logger.
|
9
|
-
logger.warn " #{str} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
|
6
|
+
def self.print_str(url, logger, str, start, stop)
|
7
|
+
logger.ok "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" if str == "Open"
|
8
|
+
logger.log "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" unless str == "Open"
|
10
9
|
|
11
10
|
return
|
12
11
|
end
|
13
12
|
|
14
|
-
def self.print_code(
|
15
|
-
logger.ok "#{code} (#{((stop-start) * 1000).round} msec)
|
16
|
-
logger.
|
17
|
-
logger.warn " #{code} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
|
13
|
+
def self.print_code(url, logger, code, start, stop)
|
14
|
+
logger.ok "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" if code == "200"
|
15
|
+
logger.warn "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" unless code == "200"
|
18
16
|
|
19
17
|
return
|
20
18
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
include WebMock::API
|
3
|
+
|
4
|
+
describe "The API for Codesake Links" do
|
5
|
+
it "returns an array with a single / if the robots.txt contains only Allow: /" do
|
6
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
7
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
8
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
9
|
+
|
10
|
+
ret = Codesake::Links::Api.robots("http://www.test.com")
|
11
|
+
ret[:status].should == :OK
|
12
|
+
ret[:allow_list].size.should == 1
|
13
|
+
ret[:allow_list].should == [ '/' ]
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns an array with a single / if the robots.txt contains only Allow: / for an HTTPS site" do
|
17
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
18
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
19
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
20
|
+
|
21
|
+
ret = Codesake::Links::Api.robots("https://www.test.com")
|
22
|
+
ret[:status].should == :OK
|
23
|
+
ret[:allow_list].size.should == 1
|
24
|
+
ret[:allow_list].should == [ '/' ]
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
it "can handle an input without the protocol if target talks HTTP" do
|
29
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
30
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
31
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
32
|
+
|
33
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
34
|
+
ret[:status].should == :OK
|
35
|
+
ret[:allow_list].size.should == 1
|
36
|
+
ret[:allow_list].should == [ '/' ]
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
it "can't handle an input without the protocol if target talks *only* HTTPS" do
|
41
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
42
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
43
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
44
|
+
|
45
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
46
|
+
ret[:status].should == :KO
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns a list of disallowed URLs" do
|
50
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
51
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
52
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /\nDisallow: /private\nDisallow: /cgi-bin\nDisallow: /a-secret-dir", :headers=>{})
|
53
|
+
|
54
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
55
|
+
ret[:disallow_list].size.should == 3
|
56
|
+
ret[:disallow_list].should == [ '/private', '/cgi-bin', '/a-secret-dir' ]
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
require '
|
1
|
+
require 'codesake_links'
|
2
|
+
require 'webmock/rspec'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: codesake_links
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.71'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
47
|
+
name: webmock
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
49
|
none: false
|
50
50
|
requirements:
|
@@ -59,22 +59,6 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rainbow
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
|
-
requirements:
|
67
|
-
- - ! '>='
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '0'
|
70
|
-
type: :runtime
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
|
-
requirements:
|
75
|
-
- - ! '>='
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
version: '0'
|
78
62
|
- !ruby/object:Gem::Dependency
|
79
63
|
name: nokogiri
|
80
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,6 +129,7 @@ files:
|
|
145
129
|
- lib/codesake/links/utils.rb
|
146
130
|
- lib/codesake/links/version.rb
|
147
131
|
- lib/codesake_links.rb
|
132
|
+
- spec/codesake_links_api_spec.rb
|
148
133
|
- spec/spec_helper.rb
|
149
134
|
homepage: http://codesake.com
|
150
135
|
licenses:
|
@@ -161,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
161
146
|
version: '0'
|
162
147
|
segments:
|
163
148
|
- 0
|
164
|
-
hash:
|
149
|
+
hash: 2079895705737470252
|
165
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
166
151
|
none: false
|
167
152
|
requirements:
|
@@ -170,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
155
|
version: '0'
|
171
156
|
segments:
|
172
157
|
- 0
|
173
|
-
hash:
|
158
|
+
hash: 2079895705737470252
|
174
159
|
requirements: []
|
175
160
|
rubyforge_project: links
|
176
161
|
rubygems_version: 1.8.24
|
@@ -178,4 +163,5 @@ signing_key:
|
|
178
163
|
specification_version: 3
|
179
164
|
summary: Fetch, discover and crawl what's available in a website.
|
180
165
|
test_files:
|
166
|
+
- spec/codesake_links_api_spec.rb
|
181
167
|
- spec/spec_helper.rb
|