links 0.30.0 → 0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/README.md +1 -1
- data/bin/links +167 -54
- data/lib/codesake/links/api.rb +129 -0
- data/lib/codesake/links/google.rb +47 -0
- data/lib/codesake/links/utils.rb +24 -0
- data/lib/codesake/links/version.rb +5 -0
- data/lib/links.rb +4 -2
- data/links.gemspec +13 -9
- data/spec/codesake_links_api_spec.rb +60 -0
- data/spec/spec_helper.rb +2 -1
- metadata +103 -43
- data/lib/links/api.rb +0 -96
- data/lib/links/version.rb +0 -16
- data/spec/w3ping_spec.rb +0 -8
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c727c4e55bc5c6ebd300a23d9dcf183d7814d774
|
4
|
+
data.tar.gz: 9d5c4b2fac1b72bd0cfb4ea92064642a143490f2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0b74f49cb283654eda4214df8ecbe7cd2bda931bc640bec4880df4ce4a458e6b90c557c010e6a544ef22c1cfd65428dde339b3ec2e28cf76d728b70844e5bd13
|
7
|
+
data.tar.gz: 2d3f8355d3c75e3b3b50caa04bec8a6c9160616b71e4f330510e0c807d8482c52dcda73f0cc3ecfc647d36afaac7b8ffa7aae6a81632803cbfadf6646d73500e
|
data/.gitignore
CHANGED
data/README.md
CHANGED
data/bin/links
CHANGED
@@ -1,19 +1,71 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
require "links"
|
3
2
|
require "rainbow"
|
4
3
|
require 'getoptlong'
|
5
4
|
|
5
|
+
require 'anemone'
|
6
|
+
require "codesake-commons"
|
7
|
+
require "links"
|
8
|
+
|
9
|
+
require 'data_mapper'
|
10
|
+
require 'dm-sqlite-adapter'
|
11
|
+
|
12
|
+
class Scan
|
13
|
+
include DataMapper::Resource
|
14
|
+
|
15
|
+
property :id, Serial
|
16
|
+
property :base, String, :length=>256, :required => true
|
17
|
+
property :tool, String
|
18
|
+
property :version, String
|
19
|
+
property :created_at, DateTime, :default=>DateTime.now
|
20
|
+
property :updated_at, DateTime, :default=>DateTime.now
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
class Link
|
25
|
+
include DataMapper::Resource
|
26
|
+
|
27
|
+
property :id, Serial
|
28
|
+
property :path, String, :length=>256, :required => true
|
29
|
+
property :q, String, :length=>256
|
30
|
+
property :tested, Boolean, :default=>false
|
31
|
+
property :created_at, DateTime, :default=>DateTime.now
|
32
|
+
property :updated_at, DateTime, :default=>DateTime.now
|
33
|
+
|
34
|
+
def self.all_dynamic
|
35
|
+
Link.all(:q.not => nil)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
POST_WITHOUT_SLASH = %r[\d{4}\/\d{2}\/[^\/]+$]
|
40
|
+
POST_WITH_SLASH = %r[\d{4}\/\d{2}\/[\w-]+\/$]
|
41
|
+
ANY_POST = Regexp.union POST_WITHOUT_SLASH, POST_WITH_SLASH
|
42
|
+
ANY_PAGE = %r[page\/\d+]
|
43
|
+
ANY_PATTERN = Regexp.union ANY_PAGE, ANY_POST
|
44
|
+
|
45
|
+
APPNAME = File.basename($0)
|
46
|
+
|
47
|
+
logger = Codesake::Commons::Logging.instance
|
48
|
+
logger.toggle_syslog
|
49
|
+
|
6
50
|
opts = GetoptLong.new(
|
7
51
|
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
8
52
|
[ '--version', '-v', GetoptLong::NO_ARGUMENT ],
|
53
|
+
[ '--dynamic', '-d', GetoptLong::NO_ARGUMENT ],
|
9
54
|
[ '--bulk', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
55
|
+
[ '--proxy', '-P', GetoptLong::REQUIRED_ARGUMENT ],
|
56
|
+
[ '--crawl', '-c', GetoptLong::NO_ARGUMENT ],
|
10
57
|
[ '--robots', '-r', GetoptLong::NO_ARGUMENT ]
|
11
58
|
)
|
59
|
+
trap("INT") { logger.die("[INTERRUPTED]") }
|
12
60
|
|
13
|
-
|
14
|
-
list=[]
|
15
|
-
robots=false
|
16
|
-
bulk=false
|
61
|
+
|
62
|
+
list = []
|
63
|
+
robots = false
|
64
|
+
bulk = false
|
65
|
+
show_code = false
|
66
|
+
crawl = false
|
67
|
+
dynamic = false
|
68
|
+
proxy = {:host=>nil, :port=>-1}
|
17
69
|
|
18
70
|
opts.each do |opt, arg|
|
19
71
|
case opt
|
@@ -21,12 +73,21 @@ opts.each do |opt, arg|
|
|
21
73
|
puts "usage: links [-bvh] [filename]"
|
22
74
|
puts " -b filename: loads the url list from a plain text file"
|
23
75
|
puts " -r : parse robots.txt and make requests to disallowed urls"
|
76
|
+
puts " -c : shows the return code instead of human readable answer"
|
77
|
+
puts " -P host:port : connect using a proxy server. Useful in combination with Paros, Owasp Zap and other"
|
24
78
|
puts " -v : shows version information"
|
25
79
|
puts " -h : shows this help"
|
26
80
|
exit 0
|
81
|
+
when '--dynamic'
|
82
|
+
dynamic = true
|
27
83
|
when '--version'
|
28
|
-
puts "
|
84
|
+
puts "#{Codesake::Links::VERSION}"
|
29
85
|
exit 0
|
86
|
+
when '--crawl'
|
87
|
+
crawl= true
|
88
|
+
when '--proxy'
|
89
|
+
proxy[:host]=arg.split(':')[0]
|
90
|
+
proxy[:port]=arg.split(':')[1].to_i
|
30
91
|
when '--robots'
|
31
92
|
robots=true
|
32
93
|
when '--bulk'
|
@@ -43,65 +104,117 @@ opts.each do |opt, arg|
|
|
43
104
|
end
|
44
105
|
end
|
45
106
|
|
46
|
-
target = ARGV
|
107
|
+
target = ARGV.shift
|
108
|
+
logger.helo APPNAME, Codesake::Links::VERSION
|
47
109
|
|
48
|
-
|
49
|
-
|
50
|
-
|
110
|
+
db_name = URI.parse(target).host.gsub('.','_')
|
111
|
+
DataMapper.setup(:default, "sqlite3://#{File.join(Dir.pwd, db_name)}.db")
|
112
|
+
DataMapper.finalize
|
113
|
+
DataMapper.auto_upgrade!
|
51
114
|
|
52
|
-
if list
|
53
|
-
|
54
|
-
|
55
|
-
|
115
|
+
# list<<target if list.empty?
|
116
|
+
|
117
|
+
logger.die("missing target") if target.nil?
|
118
|
+
# logger.die("no -b or -r option specified") unless bulk or robots
|
56
119
|
|
57
120
|
if robots
|
58
|
-
|
121
|
+
res = Codesake::Links::Api.robots(target)
|
122
|
+
list = res[:disallow_list]
|
123
|
+
logger.err "#{target}: no robots.txt found (#{res[:error]})\n" if res[:status] == :KO
|
124
|
+
logger.ok "no disallowed entries to test on #{target}" if list.empty?
|
125
|
+
logger.ok "found #{list.size} disallowed url(s) on #{target}" unless list.empty?
|
126
|
+
list.each do |l|
|
127
|
+
logger.ok "#{l} - #{Codesake::Links::Api.code(target+l, nil)}"
|
128
|
+
end
|
129
|
+
logger.bye
|
130
|
+
Kernel.exit(0)
|
59
131
|
end
|
60
132
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
133
|
+
if bulk
|
134
|
+
|
135
|
+
list.each do |l|
|
136
|
+
unless l.start_with? "#"
|
137
|
+
|
138
|
+
l = l.chomp if l.end_with? "\n"
|
139
|
+
l = '/'+l unless l.start_with? '/'
|
140
|
+
|
141
|
+
url = target + l
|
142
|
+
start = Time.now
|
143
|
+
code = Codesake::Links::Api.code(url, nil)
|
144
|
+
stop = Time.now
|
145
|
+
|
146
|
+
str=Codesake::Links::Api.human(code)
|
147
|
+
|
148
|
+
if code == "200"
|
149
|
+
Codesake::Links::Utils.print_str(url, logger, str, start, stop) unless show_code
|
150
|
+
Codesake::Links::Utils.print_code(url, logger, code, start, stop) if show_code
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
if code == 301 or code == 302
|
155
|
+
start = Time.now
|
156
|
+
new_link = Codesake::Links::Api.follow(l, proxy)
|
157
|
+
stop = Time.now
|
158
|
+
logger.log "following from #{l} to #{new_link}\n"
|
159
|
+
str=Codesake::Links::Api.human(code)
|
160
|
+
|
161
|
+
Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
|
162
|
+
Codesake::Links::Utils.print_code(logger, code, start, stop) if show_code
|
163
|
+
|
164
|
+
end
|
69
165
|
end
|
70
166
|
|
71
|
-
#puts "T: #{target}"
|
72
|
-
#puts "L: #{l}"
|
73
|
-
print "#{target}#{l}:".color(:white)
|
74
|
-
code = Links::Api.human(target+l)
|
75
|
-
else
|
76
|
-
print "#{l}:".color(:white)
|
77
|
-
code = Links::Api.human(l)
|
78
|
-
end
|
79
|
-
case code
|
80
|
-
when "Open"
|
81
|
-
print " #{code}\n".color(:green)
|
82
|
-
when "Non existent"
|
83
|
-
print " #{code}\n".color(:red)
|
84
|
-
when "Closed"
|
85
|
-
print " #{code}\n".color(:red)
|
86
|
-
else
|
87
|
-
print " #{code}\n".color(:yellow)
|
88
167
|
end
|
168
|
+
end
|
89
169
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
when "Non existent"
|
98
|
-
print " #{code}\n".color(:red)
|
99
|
-
when "Closed"
|
100
|
-
print " #{code}\n".color(:red)
|
101
|
-
else
|
102
|
-
print " #{code}\n".color(:yellow)
|
103
|
-
end
|
170
|
+
if dynamic
|
171
|
+
list = Link.all_dynamic
|
172
|
+
logger.log "#{list.size} dynamic urls found during last crawl"
|
173
|
+
list.each do |l|
|
174
|
+
logger.ok "#{l.path}/#{l.q}"
|
175
|
+
end
|
176
|
+
end
|
104
177
|
|
178
|
+
if crawl
|
179
|
+
s=Scan.first(:base=>target)
|
180
|
+
unless s.nil?
|
181
|
+
s=Scan.new
|
182
|
+
s.base=target
|
183
|
+
s.tool=APPNAME
|
184
|
+
s.version = VERSION
|
185
|
+
s.save
|
105
186
|
end
|
106
187
|
|
188
|
+
logger.log "start crawling #{target}"
|
189
|
+
|
190
|
+
Anemone.crawl(target, :redirect_limit=>2, :depth_limit => 5) do |anemone|
|
191
|
+
anemone.on_every_page do |page|
|
192
|
+
l = Link.first(:path=>page.url.path)
|
193
|
+
if l.nil?
|
194
|
+
l = Link.new
|
195
|
+
l.path = page.url.path
|
196
|
+
l.q = page.url.query
|
197
|
+
saved = l.save
|
198
|
+
logger.ok "adding #{page.url.path}" if saved
|
199
|
+
logger.err "error saving #{page.url.path}: #{l.errors.inspect}" unless saved
|
200
|
+
else
|
201
|
+
logger.warn "skipping #{page.url.path}"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
107
205
|
end
|
206
|
+
|
207
|
+
# Anemone.crawl(target) do |anemone|
|
208
|
+
# anemone.focus_crawl do |page|
|
209
|
+
# page.links.keep_if { |link| link.to_s.match(ANY_PATTERN) } # crawl only links that are pages or blog posts
|
210
|
+
# end
|
211
|
+
# anemone.on_pages_like(POST_WITH_SLASH) do |page|
|
212
|
+
# title = page.doc.at_xpath("//div[@role='main']/header/h1").text rescue nil
|
213
|
+
# tag = page.doc.at_xpath("//header/div[@class='post-data']/p/a").text rescue nil
|
214
|
+
|
215
|
+
# if title and tag
|
216
|
+
# post = {title: title, tag: tag}
|
217
|
+
# logger.log "Inserting #{post.inspect}"
|
218
|
+
# end
|
219
|
+
# end
|
220
|
+
# end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
module Codesake
|
5
|
+
|
6
|
+
module Links
|
7
|
+
module Api
|
8
|
+
|
9
|
+
# include Links::Google
|
10
|
+
|
11
|
+
def self.get(url, proxy)
|
12
|
+
return Links::Api.request({:url=>url, :proxy=>proxy, :method=>:get})
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.head(url, proxy)
|
16
|
+
return Links::Api.request({:url=>url, :proxy=>proxy, :method=>:head})
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.code(url, proxy)
|
20
|
+
res = Links::Api.get(url, proxy)
|
21
|
+
(res.nil?)? -1 : res.code
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.links(url, proxy)
|
25
|
+
res = Links::Api.get(url, proxy)
|
26
|
+
if res.nil?
|
27
|
+
return []
|
28
|
+
end
|
29
|
+
doc = Nokogiri::HTML.parse(res.body)
|
30
|
+
l = doc.css('a').map { |link| link['href'] }
|
31
|
+
l
|
32
|
+
end
|
33
|
+
|
34
|
+
# TESTING: SPIDERS, ROBOTS, AND CRAWLERS (OWASP-IG-001)
|
35
|
+
def self.robots(site)
|
36
|
+
|
37
|
+
site = 'http://'+site unless site.start_with? 'http://' or site.start_with? 'https://'
|
38
|
+
|
39
|
+
|
40
|
+
allow_list = []
|
41
|
+
disallow_list = []
|
42
|
+
|
43
|
+
begin
|
44
|
+
res=Net::HTTP.get_response(URI(site+'/robots.txt'))
|
45
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>"robots.txt response code was #{res.code}"} if (res.code != "200")
|
46
|
+
|
47
|
+
|
48
|
+
res.body.split("\n").each do |line|
|
49
|
+
|
50
|
+
disallow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('disallow'))
|
51
|
+
allow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('allow'))
|
52
|
+
|
53
|
+
end
|
54
|
+
rescue Exception => e
|
55
|
+
return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>e.message}
|
56
|
+
end
|
57
|
+
|
58
|
+
{:status=>:OK, :allow_list=>allow_list, :disallow_list=>disallow_list, :error=>""}
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.follow(url, proxy)
|
62
|
+
l = Links::Api.links(url)
|
63
|
+
l[0]
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.human(code)
|
67
|
+
case code.to_i
|
68
|
+
when 200
|
69
|
+
return "Open"
|
70
|
+
when 301
|
71
|
+
return "Moved"
|
72
|
+
when 404
|
73
|
+
return "Non existent"
|
74
|
+
when 401
|
75
|
+
return "Closed"
|
76
|
+
when 403
|
77
|
+
return "Forbidden"
|
78
|
+
when -1
|
79
|
+
return "No answer"
|
80
|
+
else
|
81
|
+
return "Broken"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def self.request(options)
|
88
|
+
url = options[:url]
|
89
|
+
proxy = options[:proxy]
|
90
|
+
method = options[:method]
|
91
|
+
|
92
|
+
begin
|
93
|
+
uri = URI(url)
|
94
|
+
if uri.scheme == 'http'
|
95
|
+
unless proxy.nil?
|
96
|
+
Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
|
97
|
+
if (method == :get)
|
98
|
+
res = http.get(uri.request_uri)
|
99
|
+
else
|
100
|
+
res = http.head(uri.request_uri)
|
101
|
+
end
|
102
|
+
return res
|
103
|
+
}
|
104
|
+
else
|
105
|
+
res = Net::HTTP.get_response(URI(url))
|
106
|
+
end
|
107
|
+
# res = Net::HTTP.get_response(URI(url))
|
108
|
+
else
|
109
|
+
request=Net::HTTP.new(uri.host, uri.port)
|
110
|
+
request.use_ssl=true
|
111
|
+
request.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
112
|
+
if (method == :get)
|
113
|
+
res = request.get(uri.request_uri)
|
114
|
+
else
|
115
|
+
res = request.head(uri.request_uri)
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
return res
|
120
|
+
rescue
|
121
|
+
return nil
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
module Codesake
|
3
|
+
|
4
|
+
module Links
|
5
|
+
|
6
|
+
# Public: it implements check described into SEARCH ENGINE DISCOVERY/RECONNAISSANCE (OWASP-IG-002)
|
7
|
+
#
|
8
|
+
# The idea underneath is to use a search engine like google as tool to
|
9
|
+
# discovery entrypoints, web applications or whatever about the domain we
|
10
|
+
# want to test for.
|
11
|
+
#
|
12
|
+
# Please bear in mind that you **must be authorized** from the system owner
|
13
|
+
# before doing any sort of security test.
|
14
|
+
#
|
15
|
+
# Be ethical.
|
16
|
+
#
|
17
|
+
# Usage
|
18
|
+
# google = Links::Api::Google.search('somedomain.org')
|
19
|
+
# google.results.each |res| do
|
20
|
+
# puts "Discovered #{res}"
|
21
|
+
# end
|
22
|
+
module Google
|
23
|
+
|
24
|
+
|
25
|
+
attr_reader :results
|
26
|
+
|
27
|
+
def self.search(domain) do
|
28
|
+
|
29
|
+
a = Mechanize.new { |agent|
|
30
|
+
agent.user_agent_alias = 'Mac Safari'
|
31
|
+
}
|
32
|
+
|
33
|
+
a.get('http://google.com/') do |page|
|
34
|
+
search_result = page.form_with(:name => 'f') do |search|
|
35
|
+
search.q = 'Hello world'
|
36
|
+
end.submit
|
37
|
+
|
38
|
+
search_result.links.each do |link|
|
39
|
+
puts link.text
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return []
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Codesake
|
2
|
+
|
3
|
+
module Links
|
4
|
+
class Utils
|
5
|
+
|
6
|
+
def self.print_str(url, logger, str, start, stop)
|
7
|
+
logger.ok "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" if str == "Open"
|
8
|
+
logger.log "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" unless str == "Open"
|
9
|
+
|
10
|
+
return
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.print_code(url, logger, code, start, stop)
|
14
|
+
logger.ok "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" if code == "200"
|
15
|
+
logger.warn "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" unless code == "200"
|
16
|
+
|
17
|
+
return
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
data/lib/links.rb
CHANGED
data/links.gemspec
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
$:.push File.expand_path("../lib", __FILE__)
|
3
|
-
require "links/version"
|
3
|
+
require "codesake/links/version"
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "links"
|
7
|
-
s.version = Links::
|
7
|
+
s.version = Codesake::Links::VERSION
|
8
8
|
s.authors = ["Paolo Perego"]
|
9
|
-
s.email = ["
|
10
|
-
s.homepage = ""
|
9
|
+
s.email = ["paolo@armoredcode.com"]
|
10
|
+
s.homepage = "http://codesake.com"
|
11
11
|
s.summary = %q{Fetch, discover and crawl what's available in a website.}
|
12
12
|
s.description = %q{During the first stage of a security test, it's useful to enumerate website urls without making too much noise. Links can help in this using robots.txt or link in a web page telling you the website contents.}
|
13
13
|
s.license = "BSD"
|
@@ -22,9 +22,13 @@ Gem::Specification.new do |s|
|
|
22
22
|
# specify any dependencies here; for example:
|
23
23
|
s.add_development_dependency "rake"
|
24
24
|
s.add_development_dependency "rspec"
|
25
|
-
s.add_development_dependency
|
26
|
-
|
27
|
-
|
28
|
-
s.
|
29
|
-
s.
|
25
|
+
s.add_development_dependency 'webmock'
|
26
|
+
|
27
|
+
s.add_dependency 'anemone'
|
28
|
+
s.add_dependency 'data_mapper'
|
29
|
+
s.add_dependency 'dm-sqlite-adapter'
|
30
|
+
s.add_dependency "nokogiri"
|
31
|
+
s.add_dependency "mechanize"
|
32
|
+
|
33
|
+
s.add_dependency "codesake-commons"
|
30
34
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
include WebMock::API
|
3
|
+
|
4
|
+
describe "The API for Codesake Links" do
|
5
|
+
it "returns an array with a single / if the robots.txt contains only Allow: /" do
|
6
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
7
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
8
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
9
|
+
|
10
|
+
ret = Codesake::Links::Api.robots("http://www.test.com")
|
11
|
+
ret[:status].should == :OK
|
12
|
+
ret[:allow_list].size.should == 1
|
13
|
+
ret[:allow_list].should == [ '/' ]
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns an array with a single / if the robots.txt contains only Allow: / for an HTTPS site" do
|
17
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
18
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
19
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
20
|
+
|
21
|
+
ret = Codesake::Links::Api.robots("https://www.test.com")
|
22
|
+
ret[:status].should == :OK
|
23
|
+
ret[:allow_list].size.should == 1
|
24
|
+
ret[:allow_list].should == [ '/' ]
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
it "can handle an input without the protocol if target talks HTTP" do
|
29
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
30
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
31
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
32
|
+
|
33
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
34
|
+
ret[:status].should == :OK
|
35
|
+
ret[:allow_list].size.should == 1
|
36
|
+
ret[:allow_list].should == [ '/' ]
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
it "can't handle an input without the protocol if target talks *only* HTTPS" do
|
41
|
+
stub_request(:get, "http://www.test.com:443/robots.txt").
|
42
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
43
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
|
44
|
+
|
45
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
46
|
+
ret[:status].should == :KO
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns a list of disallowed URLs" do
|
50
|
+
stub_request(:get, "http://www.test.com/robots.txt").
|
51
|
+
with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
|
52
|
+
to_return(:status=>200, :body=>"User-agent: *\nAllow: /\nDisallow: /private\nDisallow: /cgi-bin\nDisallow: /a-secret-dir", :headers=>{})
|
53
|
+
|
54
|
+
ret = Codesake::Links::Api.robots("www.test.com")
|
55
|
+
ret[:disallow_list].size.should == 3
|
56
|
+
ret[:disallow_list].should == [ '/private', '/cgi-bin', '/a-secret-dir' ]
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
require '
|
1
|
+
require 'links'
|
2
|
+
require 'webmock/rspec'
|
metadata
CHANGED
@@ -1,87 +1,146 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: links
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: '0.75'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Paolo Perego
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-09-11 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rake
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rspec
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
38
|
-
requirement:
|
39
|
-
none: false
|
42
|
+
name: webmock
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - '>='
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
49
|
-
requirement:
|
50
|
-
none: false
|
56
|
+
name: anemone
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- -
|
59
|
+
- - '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
|
-
type: :
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: data_mapper
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: dm-sqlite-adapter
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
56
91
|
prerelease: false
|
57
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
58
97
|
- !ruby/object:Gem::Dependency
|
59
98
|
name: nokogiri
|
60
|
-
requirement:
|
61
|
-
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: mechanize
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
62
114
|
requirements:
|
63
|
-
- -
|
115
|
+
- - '>='
|
64
116
|
- !ruby/object:Gem::Version
|
65
117
|
version: '0'
|
66
118
|
type: :runtime
|
67
119
|
prerelease: false
|
68
|
-
version_requirements:
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
69
125
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
71
|
-
requirement:
|
72
|
-
none: false
|
126
|
+
name: codesake-commons
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
73
128
|
requirements:
|
74
|
-
- -
|
129
|
+
- - '>='
|
75
130
|
- !ruby/object:Gem::Version
|
76
131
|
version: '0'
|
77
132
|
type: :runtime
|
78
133
|
prerelease: false
|
79
|
-
version_requirements:
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
80
139
|
description: During the first stage of a security test, it's useful to enumerate website
|
81
140
|
urls without making too much noise. Links can help in this using robots.txt or link
|
82
141
|
in a web page telling you the website contents.
|
83
142
|
email:
|
84
|
-
-
|
143
|
+
- paolo@armoredcode.com
|
85
144
|
executables:
|
86
145
|
- links
|
87
146
|
extensions: []
|
@@ -93,37 +152,38 @@ files:
|
|
93
152
|
- README.md
|
94
153
|
- Rakefile
|
95
154
|
- bin/links
|
155
|
+
- lib/codesake/links/api.rb
|
156
|
+
- lib/codesake/links/google.rb
|
157
|
+
- lib/codesake/links/utils.rb
|
158
|
+
- lib/codesake/links/version.rb
|
96
159
|
- lib/links.rb
|
97
|
-
- lib/links/api.rb
|
98
|
-
- lib/links/version.rb
|
99
160
|
- links.gemspec
|
161
|
+
- spec/codesake_links_api_spec.rb
|
100
162
|
- spec/spec_helper.rb
|
101
|
-
|
102
|
-
homepage: ''
|
163
|
+
homepage: http://codesake.com
|
103
164
|
licenses:
|
104
165
|
- BSD
|
166
|
+
metadata: {}
|
105
167
|
post_install_message:
|
106
168
|
rdoc_options: []
|
107
169
|
require_paths:
|
108
170
|
- lib
|
109
171
|
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
-
none: false
|
111
172
|
requirements:
|
112
|
-
- -
|
173
|
+
- - '>='
|
113
174
|
- !ruby/object:Gem::Version
|
114
175
|
version: '0'
|
115
176
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
-
none: false
|
117
177
|
requirements:
|
118
|
-
- -
|
178
|
+
- - '>='
|
119
179
|
- !ruby/object:Gem::Version
|
120
180
|
version: '0'
|
121
181
|
requirements: []
|
122
182
|
rubyforge_project: links
|
123
|
-
rubygems_version:
|
183
|
+
rubygems_version: 2.2.1
|
124
184
|
signing_key:
|
125
|
-
specification_version:
|
185
|
+
specification_version: 4
|
126
186
|
summary: Fetch, discover and crawl what's available in a website.
|
127
187
|
test_files:
|
188
|
+
- spec/codesake_links_api_spec.rb
|
128
189
|
- spec/spec_helper.rb
|
129
|
-
- spec/w3ping_spec.rb
|
data/lib/links/api.rb
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
require "net/http"
|
2
|
-
require "nokogiri"
|
3
|
-
|
4
|
-
module Links
|
5
|
-
class Api
|
6
|
-
|
7
|
-
def self.code(url)
|
8
|
-
res = Links::Api.get(url)
|
9
|
-
(res.nil?)? -1 : res.code
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.links(url)
|
13
|
-
res = Links::Api.get(url)
|
14
|
-
if res.nil?
|
15
|
-
return []
|
16
|
-
end
|
17
|
-
doc = Nokogiri::HTML.parse(res.body)
|
18
|
-
l = doc.css('a').map { |link| link['href'] }
|
19
|
-
l
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.robots(site, only_disallow=true)
|
23
|
-
|
24
|
-
if (! site.start_with? 'http://') and (! site.start_with? 'https://')
|
25
|
-
site = 'http://'+site
|
26
|
-
end
|
27
|
-
|
28
|
-
list = []
|
29
|
-
begin
|
30
|
-
res=Net::HTTP.get_response(URI(site+'/robots.txt'))
|
31
|
-
if (res.code != "200")
|
32
|
-
return []
|
33
|
-
end
|
34
|
-
|
35
|
-
res.body.split("\n").each do |line|
|
36
|
-
if only_disallow
|
37
|
-
if (line.start_with?('Disallow'))
|
38
|
-
list << line.split(":")[1].strip.chomp
|
39
|
-
end
|
40
|
-
else
|
41
|
-
if (line.start_with?('Allow') or line.start_with?('Disallow'))
|
42
|
-
list << line.split(":")[1].strip.chomp
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
rescue
|
47
|
-
return []
|
48
|
-
end
|
49
|
-
|
50
|
-
list
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.follow(url)
|
54
|
-
l = Links::Api.links(url)
|
55
|
-
l[0]
|
56
|
-
end
|
57
|
-
|
58
|
-
def self.human(url)
|
59
|
-
case self.code(url).to_i
|
60
|
-
when 200
|
61
|
-
return "Open"
|
62
|
-
when 301
|
63
|
-
return "Moved"
|
64
|
-
when 404
|
65
|
-
return "Non existent"
|
66
|
-
when 401
|
67
|
-
return "Closed"
|
68
|
-
when 403
|
69
|
-
return "Forbidden"
|
70
|
-
when -1
|
71
|
-
return "No answer"
|
72
|
-
else
|
73
|
-
return "Broken"
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
private
|
78
|
-
def self.get(url)
|
79
|
-
begin
|
80
|
-
uri = URI(url)
|
81
|
-
if uri.scheme == 'http'
|
82
|
-
res = Net::HTTP.get_response(URI(url))
|
83
|
-
else
|
84
|
-
request=Net::HTTP.new(uri.host, uri.port)
|
85
|
-
request.use_ssl=true
|
86
|
-
request.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
87
|
-
res = request.get(uri.request_uri)
|
88
|
-
end
|
89
|
-
return res
|
90
|
-
rescue
|
91
|
-
return nil
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|
96
|
-
end
|
data/lib/links/version.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
module Links
|
2
|
-
module Version
|
3
|
-
MAJOR = 0
|
4
|
-
MINOR = 30
|
5
|
-
PATCH = 0
|
6
|
-
BUILD = ''
|
7
|
-
|
8
|
-
def self.version
|
9
|
-
if BUILD.empty?
|
10
|
-
return [MAJOR, MINOR, PATCH].compact.join('.')
|
11
|
-
else
|
12
|
-
return [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|