codesake_links 0.50 → 0.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/links CHANGED
@@ -64,52 +64,55 @@ target = ARGV[0]
64
64
  logger.helo "#{APPNAME} v#{Codesake::Links::VERSION} (C) 2013 - paolo@armoredcode.com is starting up"
65
65
 
66
66
 
67
- list<<target if list.empty?
67
+ # list<<target if list.empty?
68
68
 
69
- logger.die("links: missing target") if list[0].nil?
69
+ logger.die("missing target") if target.nil?
70
+ logger.die("no -b or -r option specified") unless bulk or robots
70
71
 
71
72
  if robots
72
- list = Codesake::Links::Api.robots(target)
73
- logger.err "#{target}: no robots.txt found\n" if list.empty?
73
+ res = Codesake::Links::Api.robots(target)
74
+ list = res[:disallow_list]
75
+ logger.err "#{target}: no robots.txt found (#{res[:error]})\n" if res[:status] == :KO
76
+ logger.ok "no disallowed entries to test on #{target}" if list.empty?
77
+ logger.ok "found #{list.size} disallowed url(s) on #{target}" unless list.empty?
78
+ list.each do |l|
79
+ logger.ok "#{l} - #{Codesake::Links::Api.code(target+l, nil)}"
80
+ end
81
+ logger.helo "leaving"
82
+ Kernel.exit(0)
74
83
  end
75
84
 
85
+
76
86
  list.each do |l|
77
- if robots or bulk
78
- if ! l.start_with? '/'
79
- l = '/'+l.chomp
80
- end
81
- if ! target.start_with? 'http://' and ! target.start_with? 'https://'
82
- #defaulting to HTTP when no protocol has been supplied
83
- target = "http://"+target
84
- end
87
+ unless l.start_with? "#"
85
88
 
86
- logger.log "#{target}#{l}:"
87
- start = Time.now
88
- code = Codesake::Links::Api.code(target+l, proxy)
89
- stop = Time.now
90
- else
91
- logger.log "#{l}:"
89
+ l = l.chomp if l.end_with? "\n"
90
+ l = '/'+l unless l.start_with? '/'
91
+
92
+ url = target + l
92
93
  start = Time.now
93
- code = Codesake::Links::Api.code(l, proxy)
94
+ code = Codesake::Links::Api.code(url, nil)
94
95
  stop = Time.now
95
- end
96
96
 
97
- str=Codesake::Links::Api.human(code)
97
+ str=Codesake::Links::Api.human(code)
98
98
 
99
- Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
100
- Codesake::Links::Utils.print_code(logger, str, code, start, stop) if show_code
101
-
99
+ if code == "200"
100
+ Codesake::Links::Utils.print_str(url, logger, str, start, stop) unless show_code
101
+ Codesake::Links::Utils.print_code(url, logger, code, start, stop) if show_code
102
+ end
102
103
 
103
- if code == 301 or code == 302
104
- start = Time.now
105
- new_link = Codesake::Links::Api.follow(l, proxy)
106
- stop = Time.now
107
- logger.log "following from #{l} to #{new_link}\n"
108
- str=Codesake::Links::Api.human(code)
109
104
 
110
- Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
111
- Codesake::Links::Utils.print_code(logger, str, code, start, stop) if show_code
105
+ if code == 301 or code == 302
106
+ start = Time.now
107
+ new_link = Codesake::Links::Api.follow(l, proxy)
108
+ stop = Time.now
109
+ logger.log "following from #{l} to #{new_link}\n"
110
+ str=Codesake::Links::Api.human(code)
111
+
112
+ Codesake::Links::Utils.print_str(logger, str, start, stop) unless show_code
113
+ Codesake::Links::Utils.print_code(logger, code, start, stop) if show_code
112
114
 
115
+ end
113
116
  end
114
117
 
115
118
  end
@@ -22,9 +22,7 @@ Gem::Specification.new do |s|
22
22
  # specify any dependencies here; for example:
23
23
  s.add_development_dependency "rake"
24
24
  s.add_development_dependency "rspec"
25
- s.add_development_dependency "rainbow"
26
- # s.add_runtime_dependency "rest-client"
27
- s.add_runtime_dependency "rainbow"
25
+ s.add_development_dependency 'webmock'
28
26
 
29
27
  s.add_dependency "nokogiri"
30
28
  s.add_dependency "mechanize"
@@ -32,35 +32,30 @@ module Codesake
32
32
  end
33
33
 
34
34
  # TESTING: SPIDERS, ROBOTS, AND CRAWLERS (OWASP-IG-001)
35
- def self.robots(site, only_disallow=true)
35
+ def self.robots(site)
36
36
 
37
- if (! site.start_with? 'http://') and (! site.start_with? 'https://')
38
- site = 'http://'+site
39
- end
37
+ site = 'http://'+site unless site.start_with? 'http://' or site.start_with? 'https://'
38
+
39
+
40
+ allow_list = []
41
+ disallow_list = []
40
42
 
41
- list = []
42
43
  begin
43
44
  res=Net::HTTP.get_response(URI(site+'/robots.txt'))
44
- if (res.code != "200")
45
- return []
46
- end
45
+ return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>"robots.txt response code was #{res.code}"} if (res.code != "200")
46
+
47
47
 
48
48
  res.body.split("\n").each do |line|
49
- if only_disallow
50
- if (line.downcase.start_with?('disallow'))
51
- list << line.split(":")[1].strip.chomp
52
- end
53
- else
54
- if (line.downcase.start_with?('allow') or line.downcase.start_with?('disallow'))
55
- list << line.split(":")[1].strip.chomp
56
- end
57
- end
49
+
50
+ disallow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('disallow'))
51
+ allow_list << line.split(":")[1].strip.chomp if (line.downcase.start_with?('allow'))
52
+
58
53
  end
59
- rescue
60
- return []
54
+ rescue Exception => e
55
+ return {:status=>:KO, :allow_list=>[], :disallow_list=>[], :error=>e.message}
61
56
  end
62
57
 
63
- list
58
+ {:status=>:OK, :allow_list=>allow_list, :disallow_list=>disallow_list, :error=>""}
64
59
  end
65
60
 
66
61
  def self.follow(url, proxy)
@@ -97,14 +92,18 @@ module Codesake
97
92
  begin
98
93
  uri = URI(url)
99
94
  if uri.scheme == 'http'
100
- Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
101
- if (method == :get)
102
- res = http.get(uri.request_uri)
103
- else
104
- res = http.head(uri.request_uri)
105
- end
106
- return res
107
- }
95
+ unless proxy.nil?
96
+ Net::HTTP::Proxy(proxy[:host], proxy[:port]).start(uri.host) {|http|
97
+ if (method == :get)
98
+ res = http.get(uri.request_uri)
99
+ else
100
+ res = http.head(uri.request_uri)
101
+ end
102
+ return res
103
+ }
104
+ else
105
+ res = Net::HTTP.get_response(URI(url))
106
+ end
108
107
  # res = Net::HTTP.get_response(URI(url))
109
108
  else
110
109
  request=Net::HTTP.new(uri.host, uri.port)
@@ -3,18 +3,16 @@ module Codesake
3
3
  module Links
4
4
  class Utils
5
5
 
6
- def self.print_str(logger, str, start, stop)
7
- logger.ok "#{str} (#{((stop-start) * 1000).round} msec)\n" if str == "Open"
8
- logger.err " #{str} (#{((stop-start) * 1000).round} msec)\n" if (str == "Closed" or str == "Non existent")
9
- logger.warn " #{str} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
6
+ def self.print_str(url, logger, str, start, stop)
7
+ logger.ok "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" if str == "Open"
8
+ logger.log "#{url}: #{str} (#{((stop-start) * 1000).round} msec)" unless str == "Open"
10
9
 
11
10
  return
12
11
  end
13
12
 
14
- def self.print_code(logger, str, code, start, stop)
15
- logger.ok "#{code} (#{((stop-start) * 1000).round} msec)\n" if str == "Open"
16
- logger.err " #{code} (#{((stop-start) * 1000).round} msec)\n" if (str == "Closed" or str == "Non existent")
17
- logger.warn " #{code} (#{((stop-start) * 1000).round} msec)\n" if (str != "Closed" and str != "Non existent" and str != "Open")
13
+ def self.print_code(url, logger, code, start, stop)
14
+ logger.ok "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" if code == "200"
15
+ logger.warn "#{url}: #{code} (#{((stop-start) * 1000).round} msec)" unless code == "200"
18
16
 
19
17
  return
20
18
  end
@@ -1,5 +1,5 @@
1
1
  module Codesake
2
2
  module Links
3
- VERSION = "0.50"
3
+ VERSION = "0.71"
4
4
  end
5
5
  end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper'
2
+ include WebMock::API
3
+
4
+ describe "The API for Codesake Links" do
5
+ it "returns an array with a single / if the robots.txt contains only Allow: /" do
6
+ stub_request(:get, "http://www.test.com/robots.txt").
7
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
8
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
9
+
10
+ ret = Codesake::Links::Api.robots("http://www.test.com")
11
+ ret[:status].should == :OK
12
+ ret[:allow_list].size.should == 1
13
+ ret[:allow_list].should == [ '/' ]
14
+ end
15
+
16
+ it "returns an array with a single / if the robots.txt contains only Allow: / for an HTTPS site" do
17
+ stub_request(:get, "http://www.test.com:443/robots.txt").
18
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
19
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
20
+
21
+ ret = Codesake::Links::Api.robots("https://www.test.com")
22
+ ret[:status].should == :OK
23
+ ret[:allow_list].size.should == 1
24
+ ret[:allow_list].should == [ '/' ]
25
+ end
26
+
27
+
28
+ it "can handle an input without the protocol if target talks HTTP" do
29
+ stub_request(:get, "http://www.test.com/robots.txt").
30
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
31
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
32
+
33
+ ret = Codesake::Links::Api.robots("www.test.com")
34
+ ret[:status].should == :OK
35
+ ret[:allow_list].size.should == 1
36
+ ret[:allow_list].should == [ '/' ]
37
+ end
38
+
39
+
40
+ it "can't handle an input without the protocol if target talks *only* HTTPS" do
41
+ stub_request(:get, "http://www.test.com:443/robots.txt").
42
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
43
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /", :headers=>{})
44
+
45
+ ret = Codesake::Links::Api.robots("www.test.com")
46
+ ret[:status].should == :KO
47
+ end
48
+
49
+ it "returns a list of disallowed URLs" do
50
+ stub_request(:get, "http://www.test.com/robots.txt").
51
+ with(:headers => {'Accept'=>'*/*', 'User-Agent'=>'Ruby'}).
52
+ to_return(:status=>200, :body=>"User-agent: *\nAllow: /\nDisallow: /private\nDisallow: /cgi-bin\nDisallow: /a-secret-dir", :headers=>{})
53
+
54
+ ret = Codesake::Links::Api.robots("www.test.com")
55
+ ret[:disallow_list].size.should == 3
56
+ ret[:disallow_list].should == [ '/private', '/cgi-bin', '/a-secret-dir' ]
57
+
58
+ end
59
+
60
+ end
@@ -1 +1,2 @@
1
- require 'links'
1
+ require 'codesake_links'
2
+ require 'webmock/rspec'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: codesake_links
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.50'
4
+ version: '0.71'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-11 00:00:00.000000000 Z
12
+ date: 2013-04-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -44,7 +44,7 @@ dependencies:
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  - !ruby/object:Gem::Dependency
47
- name: rainbow
47
+ name: webmock
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  none: false
50
50
  requirements:
@@ -59,22 +59,6 @@ dependencies:
59
59
  - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
- - !ruby/object:Gem::Dependency
63
- name: rainbow
64
- requirement: !ruby/object:Gem::Requirement
65
- none: false
66
- requirements:
67
- - - ! '>='
68
- - !ruby/object:Gem::Version
69
- version: '0'
70
- type: :runtime
71
- prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
78
62
  - !ruby/object:Gem::Dependency
79
63
  name: nokogiri
80
64
  requirement: !ruby/object:Gem::Requirement
@@ -145,6 +129,7 @@ files:
145
129
  - lib/codesake/links/utils.rb
146
130
  - lib/codesake/links/version.rb
147
131
  - lib/codesake_links.rb
132
+ - spec/codesake_links_api_spec.rb
148
133
  - spec/spec_helper.rb
149
134
  homepage: http://codesake.com
150
135
  licenses:
@@ -161,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
161
146
  version: '0'
162
147
  segments:
163
148
  - 0
164
- hash: -1757904362167703742
149
+ hash: 2079895705737470252
165
150
  required_rubygems_version: !ruby/object:Gem::Requirement
166
151
  none: false
167
152
  requirements:
@@ -170,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
170
155
  version: '0'
171
156
  segments:
172
157
  - 0
173
- hash: -1757904362167703742
158
+ hash: 2079895705737470252
174
159
  requirements: []
175
160
  rubyforge_project: links
176
161
  rubygems_version: 1.8.24
@@ -178,4 +163,5 @@ signing_key:
178
163
  specification_version: 3
179
164
  summary: Fetch, discover and crawl what's available in a website.
180
165
  test_files:
166
+ - spec/codesake_links_api_spec.rb
181
167
  - spec/spec_helper.rb