links 0.30.0 → 0.75
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/README.md +1 -1
- data/bin/links +167 -54
- data/lib/codesake/links/api.rb +129 -0
- data/lib/codesake/links/google.rb +47 -0
- data/lib/codesake/links/utils.rb +24 -0
- data/lib/codesake/links/version.rb +5 -0
- data/lib/links.rb +4 -2
- data/links.gemspec +13 -9
- data/spec/codesake_links_api_spec.rb +60 -0
- data/spec/spec_helper.rb +2 -1
- metadata +103 -43
- data/lib/links/api.rb +0 -96
- data/lib/links/version.rb +0 -16
- data/spec/w3ping_spec.rb +0 -8
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c727c4e55bc5c6ebd300a23d9dcf183d7814d774
|
4
|
+
data.tar.gz: 9d5c4b2fac1b72bd0cfb4ea92064642a143490f2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0b74f49cb283654eda4214df8ecbe7cd2bda931bc640bec4880df4ce4a458e6b90c557c010e6a544ef22c1cfd65428dde339b3ec2e28cf76d728b70844e5bd13
|
7
|
+
data.tar.gz: 2d3f8355d3c75e3b3b50caa04bec8a6c9160616b71e4f330510e0c807d8482c52dcda73f0cc3ecfc647d36afaac7b8ffa7aae6a81632803cbfadf6646d73500e
|
data/.gitignore
CHANGED
data/README.md
CHANGED
data/bin/links
CHANGED
@@ -1,19 +1,71 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
require "links"
|
3
2
|
require "rainbow"
|
4
3
|
require 'getoptlong'
|
5
4
|
|
5
|
+
require 'anemone'
|
6
|
+
require "codesake-commons"
|
7
|
+
require "links"
|
8
|
+
|
9
|
+
require 'data_mapper'
|
10
|
+
require 'dm-sqlite-adapter'
|
11
|
+
|
12
|
+
class Scan
|
13
|
+
include DataMapper::Resource
|
14
|
+
|
15
|
+
property :id, Serial
|
16
|
+
property :base, String, :length=>256, :required => true
|
17
|
+
property :tool, String
|
18
|
+
property :version, String
|
19
|
+
property :created_at, DateTime, :default=>DateTime.now
|
20
|
+
property :updated_at, DateTime, :default=>DateTime.now
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
class Link
|
25
|
+
include DataMapper::Resource
|
26
|
+
|
27
|
+
property :id, Serial
|
28
|
+
property :path, String, :length=>256, :required => true
|
29
|
+
property :q, String, :length=>256
|
30
|
+
property :tested, Boolean, :default=>false
|
31
|
+
property :created_at, DateTime, :default=>DateTime.now
|
32
|
+
property :updated_at, DateTime, :default=>DateTime.now
|
33
|
+
|
34
|
+
def self.all_dynamic
|
35
|
+
Link.all(:q.not => nil)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
POST_WITHOUT_SLASH = %r[\d{4}\/\d{2}\/[^\/]+$]
|
40
|
+
POST_WITH_SLASH = %r[\d{4}\/\d{2}\/[\w-]+\/$]
|
41
|
+
ANY_POST = Regexp.union POST_WITHOUT_SLASH, POST_WITH_SLASH
|
42
|
+
ANY_PAGE = %r[page\/\d+]
|
43
|
+
ANY_PATTERN = Regexp.union ANY_PAGE, ANY_POST
|
44
|
+
|
45
|
+
APPNAME = File.basename($0)
|
46
|
+
|
47
|
+
logger = Codesake::Commons::Logging.instance
|
48
|
+
logger.toggle_syslog
|
49
|
+
|
6
50
|
opts = GetoptLong.new(
|
7
51
|
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
8
52
|
[ '--version', '-v', GetoptLong::NO_ARGUMENT ],
|
53
|
+
[ '--dynamic', '-d', GetoptLong::NO_ARGUMENT ],
|
9
54
|
[ '--bulk', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
55
|
+
[ '--proxy', '-P', GetoptLong::REQUIRED_ARGUMENT ],
|
56
|
+
[ '--crawl', '-c', GetoptLong::NO_ARGUMENT ],
|
10
57
|
[ '--robots', '-r', GetoptLong::NO_ARGUMENT ]
|
11
58
|
)
|
59
|
+
trap("INT") { logger.die("[INTERRUPTED]") }
|
12
60
|
|
13
|
-
|
14
|
-
list=[]
|
15
|
-
robots=false
|
16
|
-
bulk=false
|
61
|
+
|
62
|
+
list = []
|
63
|
+
robots = false
|
64
|
+
bulk = false
|
65
|
+
show_code = false
|
66
|
+
crawl = false
|
67
|
+
dynamic = false
|
68
|
+
proxy = {:host=>nil, :port=>-1}
|
17
69
|
|
18
70
|
opts.each do |opt, arg|
|
19
71
|
case opt
|
@@ -21,12 +73,21 @@ opts.each do |opt, arg|
|
|
21
73
|
puts "usage: links [-bvh] [filename]"
|
22
74
|
puts " -b filename: loads the url list from a plain text file"
|
23
75
|
puts " -r : parse robots.txt and make requests to disallowed urls"
|
76
|
+
puts " -c : shows the return code instead of human readable answer"
|
77
|
+
puts " -P host:port : connect using a proxy server. Useful in combination with Paros, Owasp Zap and other"
|
24
78
|
puts " -v : shows version information"
|
25
79
|
puts " -h : shows this help"
|
26
80
|
exit 0
|
81
|
+
when '--dynamic'
|
82
|
+
dynamic = true
|
27
83
|
when '--version'
|
28
|
-
puts "
|
84
|
+
puts "#{Codesake::Links::VERSION}"
|
29
85
|
exit 0
|
86
|
+
when '--crawl'
|
87
|
+
crawl= true
|
88
|
+
when '--proxy'
|
89
|
+
proxy[:host]=arg.split(':')[0]
|
90
|
+
proxy[:port]=arg.split(':')[1].to_i
|
30
91
|
when '--robots'
|
31
92
|
robots=true
|
32
93
|
when '--bulk'
|
@@ -43,65 +104,117 @@ opts.each do |opt, arg|
|
|
43
104
|
end
|
44
105
|
end
|
45
106
|
|
46
|
-
target = ARGV
|
107
|
+
target = ARGV.shift
|
108
|
+
logger.helo APPNAME, Codesake::Links::VERSION
|
47
109
|
|
48
|
-
|
49
|
-
|
50
|
-
|
110
|
+
db_name = URI.parse(target).host.gsub('.','_')
|
111
|
+
DataMapper.setup(:default, "sqlite3://#{File.join(Dir.pwd, db_name)}.db")
|
112
|
+
DataMapper.finalize
|
113
|
+
DataMapper.auto_upgrade!
|
51
114
|
|
52
|
-
if list
|
53
|
-
|
54
|
-
|
55
|
-
|
115
|
+
# list<<target if list.empty?
|
116
|
+
|
117
|
+
logger.die("missing target") if target.nil?
|
118
|
+
# logger.die("no -b or -r option specified") unless bulk or robots
|
56
119
|
|
57
120
|
if robots
|
58
|
-
|
121
|
+
res = Codesake::Links::Api.robots(target)
|
122
|
+
list = res[:disallow_list]
|
123
|
+
logger.err "#{target}: no robots.txt found (#{res[:error]})\n" if res[:status] == :KO
|
124
|
+
logger.ok "no disallowed entries to test on #{target}" if list.empty?
|
125
|
+
logger.ok "found #{list.size} disallowed url(s) on #{target}" unless list.empty?
|
126
|
+
list.each do |l|
|
127
|
+
logger.ok "#{l} - #{Codesake::Links::Api.code(target+l, nil)}"
|
128
|
+
end
|
129
|
+
logger.bye
|
130
|
+
Kernel.exit(0)
|
59
131
|
end
|
60
132
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
133
|
+
if bulk
|
134
|
+
|
135
|
+
list.each do |l|
|
136
|
+
unless l.start_with? "#"
|
137
|
+
|
138
|
+
l = l.chomp if l.end_with? "\n"
|
139
|
+
l = '/'+l unless l.start_with? '/'
|
140
|
+
|
141
|
+
url = target + l
|
142
|
+
start = Time.now
|
143
|
+
code = Codesake::Links::Api.code(url, nil)
|
144
|
+
stop = Time.now
|
145
|
+
|
146
|
+
str=Codesake::Links::Api.human(code)
|
147
|
+
|
148
|
+
if code == "200"
|
149
|
+
Codesake::Links::Utils.print_str(url, logger, str, start, stop) unless show_code
|
150
|
+
Codesake::Links::Utils.print_code(url, logger, code, start, stop) if show_code
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
if code == 301 or code == 302
|
155
|
+
start = Time.now
|
156
|
+
new_link = Codesake::Links::Api.follow(l, proxy)
|
157
|
+
stop = Time.now
|
158
|
+
logger.log "following from #{l} to #{new_link}\n"
|
159
|
+
str=Codesake::Links::Api.human(code)
|
160
|
+
|
161
|
+
Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
|
162
|
+
Codesake::Links::Utils.print_code(logger, code, start, stop) if show_code
|
163
|
+
|
164
|
+
end
|
69
165
|
end
|
70
166
|
|
71
|
-
#puts "T: #{target}"
|
72
|
-
#puts "L: #{l}"
|
73
|
-
print "#{target}#{l}:".color(:white)
|
74
|
-
code = Links::Api.human(target+l)
|
75
|
-
else
|
76
|
-
print "#{l}:".color(:white)
|
77
|
-
code = Links::Api.human(l)
|
78
|
-
end
|
79
|
-
case code
|
80
|
-
when "Open"
|
81
|
-
print " #{code}\n".color(:green)
|
82
|
-
when "Non existent"
|
83
|
-
print " #{code}\n".color(:red)
|
84
|
-
when "Closed"
|
85
|
-
print " #{code}\n".color(:red)
|
86
|
-
else
|
87
|
-
print " #{code}\n".color(:yellow)
|
88
167
|
end
|
168
|
+
end
|
89
169
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
when "Non existent"
|
98
|
-
print " #{code}\n".color(:red)
|
99
|
-
when "Closed"
|
100
|
-
print " #{code}\n".color(:red)
|
101
|
-
else
|
102
|
-
print " #{code}\n".color(:yellow)
|
103
|
-
end
|
170
|
+
if dynamic
|
171
|
+
list = Link.all_dynamic
|
172
|
+
logger.log "#{list.size} dynamic urls found during last crawl"
|
173
|
+
list.each do |l|
|
174
|
+
logger.ok "#{l.path}/#{l.q}"
|
175
|
+
end
|
176
|
+
end
|
104
177
|
|
178
|
+
if crawl
|
179
|
+
s=Scan.first(:base=>target)
|
180
|
+
unless s.nil?
|
181
|
+
s=Scan.new
|
182
|
+
s.base=target
|
183
|
+
s.tool=APPNAME
|
184
|
+
s.version = VERSION
|
185
|
+
s.save
|
105
186
|
end
|
106
187
|
|
188
|
+
logger.log "start crawling #{target}"
|
189
|
+
|
190
|
+
Anemone.crawl(target, :redirect_limit=>2, :depth_limit => 5) do |anemone|
|
191
|
+
anemone.on_every_page do |page|
|
192
|
+
l = Link.first(:path=>page.url.path)
|
193
|
+
if l.nil?
|
194
|
+
l = Link.new
|
195
|
+
l.path = page.url.path
|
196
|
+
l.q = page.url.query
|
197
|
+
saved = l.save
|
198
|
+
logger.ok "adding #{page.url.path}" if saved
|
199
|
+
logger.err "error saving #{page.url.path}: #{l.errors.inspect}" unless saved
|
200
|
+
else
|
201
|
+
logger.warn "skipping #{page.url.path}"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
107
205
|
end
|
206
|
+
|
207
|
+
# Anemone.crawl(target) do |anemone|
|
208
|
+
# anemone.focus_crawl do |page|
|
209
|
+
# page.links.keep_if { |link| link.to_s.match(ANY_PATTERN) } # crawl only links that are pages or blog posts
|
210
|
+
# end
|
211
|
+
# anemone.on_pages_like(POST_WITH_SLASH) do |page|
|
212
|
+
# title = page.doc.at_xpath("//div[@role='main']/header/h1").text rescue nil
|
213
|
+
# tag = page.doc.at_xpath("//header/div[@class='post-data']/p/a").text rescue nil
|
214
|
+
|
215
|
+
# if title and tag
|
216
|
+
# post = {title: title, tag: tag}
|
217
|
+
# logger.log "Inserting #{post.inspect}"
|
218
|
+
# end
|
219
|
+
# end
|
220
|
+
# end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
module Codesake
|
5
|
+
|
6
|
+
module Links
|
7
|
+
module Api
|
8
|
+
|
9
|
+
# include Links::Google
|
10
|
+
|
11
|
+
def self.get(url, proxy)
|
12
|
+
return Links::Api.request({:url=>url, :proxy=>proxy, :method=>:get})
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.head(url, proxy)
|
16
|
+
return Links::Api.request({:url=>url, :proxy=>proxy, :method=>:head})
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.code(url, proxy)
|
20
|
+
res = Links::Api.get(url, proxy)
|
21
|
+
(res.nil?)? -1 : res.code
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.links(url, proxy)
|
25
|
+
res = Links::Api.get(url, proxy)
|
26
|
+
if res.nil?
|
27
|
+
return []
|
28
|
+
end
|
29
|
+
doc = Nokogiri::HTML.parse(res.body)
|
30
|
+
l = doc.css('a').map { |link| link['href'] }
|
31
|
+
l
|
32
|
+
end
|
33
|
+
|
34
|
+
# TESTING: SPIDERS, ROBOTS, AND CRAWLERS (OWASP-IG-001)
|
35
|
+
def self.robots(site)
|
36
|
+
|
37
|
+
site = 'http://'+site unless site.start_with? 'http://' or site.start_with? 'https://'
|
38
|
+
|
39
|
+
|
40
|
+
allow_list = []
|
41
|
+
disallow_list = []
|
42
|
+
|
43
|
+
begin
|
44
|
+
res=Net::HTTP.get_response(URI(site+'/robots.txt'))
|
45
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>"robots.txt response code was #{res.code}"} if (res.code != "200")
|
46
|
+
|
47
|
+
|
48
|
+
res.body.split("\n").each do |line|
|
49
|
+
|
50
|
+
disallow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('disallow'))
|
51
|
+
allow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('allow'))
|
52
|
+
|
53
|
+
end
|
54
|
+
rescue Exception => e
|
55
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>e.message}
|
56
|
+
end
|
57
|
+
|
58
|
+
{:status=>:OK, :allow_list=>allow_list, :disallow_list=>disallow_list, :error=>""}
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.follow(url, proxy)
|
62
|
+
l = Links::Api.links(url)
|
63
|
+
l[0]
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.human(code)
|
67
|
+
case code.to_i
|
68
|
+
when 200
|
69
|
+
return "Open"
|
70
|
+
when 301
|
71
|
+
return "Moved"
|
72
|
+
when 404
|
73
|
+
return "Non existent"
|
74
|
+
when 401
|
75
|
+
return "Closed"
|
76
|
+
when 403
|
77
|
+
return "Forbidden"
|
78
|
+
when -1
|
79
|
+
return "No answer"
|
80
|
+
else
|
81
|
+
return "Broken"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def self.request(options)
|
88
|
+
url = options[:url]
|
89
|
+
proxy = options[:proxy]
|
90
|
+
method = options[:method]
|
91
|
+
|
92
|
+
begin
|
93
|
+
uri = URI(url)
|
94
|
+
if uri.scheme == 'http'
|
95
|
+
unless proxy.nil?
|
96
|
+
Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
|
97
|
+
if (method == :get)
|
98
|
+
res = http.get(uri.request_uri)
|
99
|
+
else
|
100
|
+
res = http.head(uri.request_uri)
|
101
|
+
end
|
102
|
+
return res
|
103
|
+
}
|
104
|
+
else
|
105
|
+
res = Net::HTTP.get_response(URI(url))
|
106
|
+
end
|
107
|
+
# res = Net::HTTP.get_response(URI(url))
|
108
|
+
else
|
109
|
+
request=Net::HTTP.new(uri.host, uri.port)
|
110
|
+
request.use_ssl=true
|
111
|
+
request.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
112
|
+
if (method == :get)
|
113
|
+
res = request.get(uri.request_uri)
|
114
|
+
else
|
115
|
+
res = request.head(uri.request_uri)
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
return res
|
120
|
+
rescue
|
121
|
+
return nil
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
module Codesake
|
3
|
+
|
4
|
+
module Links
|
5
|
+
|
6
|
+
# Public: it implements check described into SEARCH ENGINE DISCOVERY/RECONNAISSANCE (OWASP-IG-002)
|
7
|
+
#
|
8
|
+
# The idea underneath is to use a search engine like google as tool to
|
9
|
+
# discovery entrypoints, web applications or whatever about the domain we
|
10
|
+
# want to test for.
|
11
|
+
#
|
12
|
+
# Please bear in mind that you **must be authorized** from the system owner
|
13
|
+
# before doing any sort of security test.
|
14
|
+
#
|
15
|
+
# Be ethical.
|
16
|
+
#
|
17
|
+
# Usage
|
18
|
+
# google = Links::Api::Google.search('somedomain.org')
|
19
|
+
# google.results.each |res| do
|
20
|
+
# puts "Discovered #{res}"
|
21
|
+
# end
|
22
|
+
module Google
|
23
|
+
|
24
|
+
|
25
|
+
attr_reader :results
|
26
|
+
|
27
|
+
def self.search(domain) do
|
28
|
+
|
29
|
+
a = Mechanize.new { |agent|
|
30
|
+
agent.user_agent_alias = 'Mac Safari'
|
31
|
+
}
|
32
|
+
|
33
|
+
a.get('http://google.com/') do |page|
|
34
|
+
search_result = page.form_with(:name => 'f') do |search|
|
35
|
+
search.q = 'Hello world'
|
36
|
+
end.submit
|
37
|
+
|
38
|
+
search_result.links.each do |link|
|
39
|
+
puts link.text
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return []
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Codesake
|
2
|
+
|
3
|
+
module Links
|
4
|
+
class Utils
|
5
|
+
|
6
|
+
def self.print_str(url, logger, str, start, stop)
|
7
|
+
logger.ok "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" if str == "Open"
|
8
|
+
logger.log "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" unless str == "Open"
|
9
|
+
|
10
|
+
return
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.print_code(url, logger, code, start, stop)
|
14
|
+
logger.ok "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" if code == "200"
|
15
|
+
logger.warn "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" unless code == "200"
|
16
|
+
|
17
|
+
return
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
data/lib/links.rb
CHANGED
data/links.gemspec
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
$:.push File.expand_path("../lib", __FILE__)
|
3
|
-
require "links/version"
|
3
|
+
require "codesake/links/version"
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "links"
|
7
|
-
s.version = Links::
|
7
|
+
s.version = Codesake::Links::VERSION
|
8
8
|
s.authors = ["Paolo Perego"]
|
9
|
-
s.email = ["
|
10
|
-
s.homepage = ""
|
9
|
+
s.email = ["paolo@armoredcode.com"]
|
10
|
+
s.homepage = "http://codesake.com"
|
11
11
|
s.summary = %q{Fetch, discover and crawl what's available in a website.}
|
12
12
|
s.description = %q{During the first stage of a security test, it's useful to enumerate website urls without making too much noise. Links can help in this using robots.txt or link in a web page telling you the website contents.}
|
13
13
|
s.license = "BSD"
|
@@ -22,9 +22,13 @@ Gem::Specification.new do |s|
|
|
22
22
|
# specify any dependencies here; for example:
|
23
23
|
s.add_development_dependency "rake"
|
24
24
|
s.add_development_dependency "rspec"
|
25
|
-
s.add_development_dependency
|
26
|
-
|
27
|
-
|
28
|
-
s.
|
29
|
-
s.
|
25
|
+
s.add_development_dependency 'webmock'
|
26
|
+
|
27
|
+
s.add_dependency 'anemone'
|
28
|
+
s.add_dependency 'data_mapper'
|
29
|
+
s.add_dependency 'dm-sqlite-adapter'
|
30
|
+
s.add_dependency "nokogiri"
|
31
|
+
s.add_dependency "mechanize"
|
32
|
+
|
33
|
+
s.add_dependency "codesake-commons"
|
30
34
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
include WebMock::API
|
3
|
+
|
4
|
+
describe "The API for Codesake Links" do
|
5
|
+
it "returns an array with a single / if the robots.txt contains only Allow: /" do
|
6
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
7
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
8
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
9
|
+
|
10
|
+
ret = Codesake::Links::Api.robots("http://www.test.com")
|
11
|
+
ret[:status].should == :OK
|
12
|
+
ret[:allow_list].size.should == 1
|
13
|
+
ret[:allow_list].should == [ '/' ]
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns an array with a single / if the robots.txt contains only Allow: / for an HTTPS site" do
|
17
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
18
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
19
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
20
|
+
|
21
|
+
ret = Codesake::Links::Api.robots("https://www.test.com")
|
22
|
+
ret[:status].should == :OK
|
23
|
+
ret[:allow_list].size.should == 1
|
24
|
+
ret[:allow_list].should == [ '/' ]
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
it "can handle an input without the protocol if target talks HTTP" do
|
29
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
30
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
31
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
32
|
+
|
33
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
34
|
+
ret[:status].should == :OK
|
35
|
+
ret[:allow_list].size.should == 1
|
36
|
+
ret[:allow_list].should == [ '/' ]
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
it "can't handle an input without the protocol if target talks *only* HTTPS" do
|
41
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
42
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
43
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
44
|
+
|
45
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
46
|
+
ret[:status].should == :KO
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns a list of disallowed URLs" do
|
50
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
51
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
52
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /\nDisallow: /private\nDisallow: /cgi-bin\nDisallow: /a-secret-dir", :headers=>{})
|
53
|
+
|
54
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
55
|
+
ret[:disallow_list].size.should == 3
|
56
|
+
ret[:disallow_list].should == [ '/private', '/cgi-bin', '/a-secret-dir' ]
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
require '
|
1
|
+
require 'links'
|
2
|
+
require 'webmock/rspec'
|
metadata
CHANGED
@@ -1,87 +1,146 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: links
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: '0.75'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Paolo Perego
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-11 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rake
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rspec
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
38
|
-
requirement:
|
39
|
-
none: false
|
42
|
+
name: webmock
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
49
|
-
requirement:
|
50
|
-
none: false
|
56
|
+
name: anemone
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
|
-
type: :
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: data_mapper
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: dm-sqlite-adapter
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
56
91
|
prerelease: false
|
57
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
58
97
|
- !ruby/object:Gem::Dependency
|
59
98
|
name: nokogiri
|
60
|
-
requirement:
|
61
|
-
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: mechanize
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
62
114
|
requirements:
|
63
|
-
- -
|
115
|
+
- - '>='
|
64
116
|
- !ruby/object:Gem::Version
|
65
117
|
version: '0'
|
66
118
|
type: :runtime
|
67
119
|
prerelease: false
|
68
|
-
version_requirements:
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
69
125
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
71
|
-
requirement:
|
72
|
-
none: false
|
126
|
+
name: codesake-commons
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
73
128
|
requirements:
|
74
|
-
- -
|
129
|
+
- - '>='
|
75
130
|
- !ruby/object:Gem::Version
|
76
131
|
version: '0'
|
77
132
|
type: :runtime
|
78
133
|
prerelease: false
|
79
|
-
version_requirements:
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
80
139
|
description: During the first stage of a security test, it's useful to enumerate website
|
81
140
|
urls without making too much noise. Links can help in this using robots.txt or link
|
82
141
|
in a web page telling you the website contents.
|
83
142
|
email:
|
84
|
-
-
|
143
|
+
- paolo@armoredcode.com
|
85
144
|
executables:
|
86
145
|
- links
|
87
146
|
extensions: []
|
@@ -93,37 +152,38 @@ files:
|
|
93
152
|
- README.md
|
94
153
|
- Rakefile
|
95
154
|
- bin/links
|
155
|
+
- lib/codesake/links/api.rb
|
156
|
+
- lib/codesake/links/google.rb
|
157
|
+
- lib/codesake/links/utils.rb
|
158
|
+
- lib/codesake/links/version.rb
|
96
159
|
- lib/links.rb
|
97
|
-
- lib/links/api.rb
|
98
|
-
- lib/links/version.rb
|
99
160
|
- links.gemspec
|
161
|
+
- spec/codesake_links_api_spec.rb
|
100
162
|
- spec/spec_helper.rb
|
101
|
-
|
102
|
-
homepage: ''
|
163
|
+
homepage: http://codesake.com
|
103
164
|
licenses:
|
104
165
|
- BSD
|
166
|
+
metadata: {}
|
105
167
|
post_install_message:
|
106
168
|
rdoc_options: []
|
107
169
|
require_paths:
|
108
170
|
- lib
|
109
171
|
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
-
none: false
|
111
172
|
requirements:
|
112
|
-
- -
|
173
|
+
- - '>='
|
113
174
|
- !ruby/object:Gem::Version
|
114
175
|
version: '0'
|
115
176
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
-
none: false
|
117
177
|
requirements:
|
118
|
-
- -
|
178
|
+
- - '>='
|
119
179
|
- !ruby/object:Gem::Version
|
120
180
|
version: '0'
|
121
181
|
requirements: []
|
122
182
|
rubyforge_project: links
|
123
|
-
rubygems_version:
|
183
|
+
rubygems_version: 2.2.1
|
124
184
|
signing_key:
|
125
|
-
specification_version:
|
185
|
+
specification_version: 4
|
126
186
|
summary: Fetch, discover and crawl what's available in a website.
|
127
187
|
test_files:
|
188
|
+
- spec/codesake_links_api_spec.rb
|
128
189
|
- spec/spec_helper.rb
|
129
|
-
- spec/w3ping_spec.rb
|
data/lib/links/api.rb
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
require "net/http"
|
2
|
-
require "nokogiri"
|
3
|
-
|
4
|
-
module Links
|
5
|
-
class Api
|
6
|
-
|
7
|
-
def self.code(url)
|
8
|
-
res = Links::Api.get(url)
|
9
|
-
(res.nil?)? -1 : res.code
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.links(url)
|
13
|
-
res = Links::Api.get(url)
|
14
|
-
if res.nil?
|
15
|
-
return []
|
16
|
-
end
|
17
|
-
doc = Nokogiri::HTML.parse(res.body)
|
18
|
-
l = doc.css('a').map { |link| link['href'] }
|
19
|
-
l
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.robots(site, only_disallow=true)
|
23
|
-
|
24
|
-
if (! site.start_with? 'http://') and (! site.start_with? 'https://')
|
25
|
-
site = 'http://'+site
|
26
|
-
end
|
27
|
-
|
28
|
-
list = []
|
29
|
-
begin
|
30
|
-
res=Net::HTTP.get_response(URI(site+'/robots.txt'))
|
31
|
-
if (res.code != "200")
|
32
|
-
return []
|
33
|
-
end
|
34
|
-
|
35
|
-
res.body.split("\n").each do |line|
|
36
|
-
if only_disallow
|
37
|
-
if (line.start_with?('Disallow'))
|
38
|
-
list << line.split(":")[1].strip.chomp
|
39
|
-
end
|
40
|
-
else
|
41
|
-
if (line.start_with?('Allow') or line.start_with?('Disallow'))
|
42
|
-
list << line.split(":")[1].strip.chomp
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
rescue
|
47
|
-
return []
|
48
|
-
end
|
49
|
-
|
50
|
-
list
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.follow(url)
|
54
|
-
l = Links::Api.links(url)
|
55
|
-
l[0]
|
56
|
-
end
|
57
|
-
|
58
|
-
def self.human(url)
|
59
|
-
case self.code(url).to_i
|
60
|
-
when 200
|
61
|
-
return "Open"
|
62
|
-
when 301
|
63
|
-
return "Moved"
|
64
|
-
when 404
|
65
|
-
return "Non existent"
|
66
|
-
when 401
|
67
|
-
return "Closed"
|
68
|
-
when 403
|
69
|
-
return "Forbidden"
|
70
|
-
when -1
|
71
|
-
return "No answer"
|
72
|
-
else
|
73
|
-
return "Broken"
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
private
|
78
|
-
def self.get(url)
|
79
|
-
begin
|
80
|
-
uri = URI(url)
|
81
|
-
if uri.scheme == 'http'
|
82
|
-
res = Net::HTTP.get_response(URI(url))
|
83
|
-
else
|
84
|
-
request=Net::HTTP.new(uri.host, uri.port)
|
85
|
-
request.use_ssl=true
|
86
|
-
request.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
87
|
-
res = request.get(uri.request_uri)
|
88
|
-
end
|
89
|
-
return res
|
90
|
-
rescue
|
91
|
-
return nil
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|
96
|
-
end
|
data/lib/links/version.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
module Links
|
2
|
-
module Version
|
3
|
-
MAJOR = 0
|
4
|
-
MINOR = 30
|
5
|
-
PATCH = 0
|
6
|
-
BUILD = ''
|
7
|
-
|
8
|
-
def self.version
|
9
|
-
if BUILD.empty?
|
10
|
-
return [MAJOR, MINOR, PATCH].compact.join('.')
|
11
|
-
else
|
12
|
-
return [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|