wmap 2.6.6 → 2.6.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4f814d2c0a04e5aedf5314a05a6de6ea59ff1a9cc6a71296bfcf94c301f9e67
4
- data.tar.gz: 641c3dab030ff37bd0292f0ab2bfc2750c94d72051ddb46891ce490d917fb3b9
3
+ metadata.gz: 2e4f2a2dfe9b4b119331eefffc7b9b025d9953c2ce5f7255e4d2a08929a591c3
4
+ data.tar.gz: 3d018d69469cf4e4551b38397657341661fd95c3f59bebe8bb21405d4e107881
5
5
  SHA512:
6
- metadata.gz: 977683b1f6a166ce1da6036e465b88e71aa9445b20e9c80048a640f6bf1dacee26b3503123529b0e5241c862370b0bc03c108c94ccfdf2e63c4af809af28a5ea
7
- data.tar.gz: bac9ed1cd639750040f035b88001244751d7a1e627594c750d38962c4cb8625035cd2e2ec64d40b9464989b33d33a8cc70b702d7f5d737e0373d104b3ada443e
6
+ metadata.gz: 7e7d27b4d4abfc34ab3df0933412b4c99e94af93f71251a2e7a0706b4782ad62a2541dbf7c5f391f47d5a3b9eae9feb45ce1690b2e43fddab4f52a14e7bb334b
7
+ data.tar.gz: 0117422a9eac9f1c7a66783a0a4ca870711aeaa95252a9df8fe4f9ce2f8f10fd4ba461b2146a681f3b867becb9bace05c61a3e11b284714732432c95e12dc983
data/bin/wmap CHANGED
@@ -6,13 +6,8 @@
6
6
  require "wmap"
7
7
  require "optparse"
8
8
 
9
- # program helper
10
- def print_usage
11
- abort "Program to perform website asset discovery and tracking. \nUsage: wmap -t <Target Host | URL | IP | CIDR | or a seed file with any of the above combo> -d <Optional Discovery Result Directory>"
12
- end
13
-
14
9
  # program command line options
15
- options = {:data_dir => nil, :target => nil}
10
+ options = {:data_dir => nil, :target => nil, :verbose => false}
16
11
  parser = OptionParser.new do|opts|
17
12
  opts.banner = Wmap.banner
18
13
  opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
@@ -21,9 +16,11 @@ parser = OptionParser.new do|opts|
21
16
  opts.on('-t', '--target target', 'Web Mapper target') do |target|
22
17
  options[:target] = target;
23
18
  end
19
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
20
+ options[:verbose] = v;
21
+ end
24
22
  opts.on('-h', '--help', 'Displays Help') do
25
- print Wmap.banner,"\n"
26
- print_usage
23
+ puts opts
27
24
  exit 0
28
25
  end
29
26
  end
@@ -47,7 +44,7 @@ Dir.mkdir(Log_dir) unless Dir.exist?(Log_dir)
47
44
  Wmap.wlog("Execute the command: wmap -t #{options[:target]}","wmap",Log_dir.join("wmap.log").to_s)
48
45
  urls = Array.new
49
46
  # first step - construct the host list
50
- scanner = Wmap::PortScanner.new(:verbose=>false, :socket_timeout=>600) # default time-out of 600 milliseconds
47
+ scanner = Wmap::PortScanner.new(:verbose=>options[:verbose], :socket_timeout=>600) # default time-out of 600 milliseconds
51
48
  hosts=Array.new
52
49
  if File.exist?(options[:target])
53
50
  puts "Parsing the discovery seed file: \"#{options[:target]}\" "
@@ -65,18 +62,18 @@ if File.exist?(options[:target])
65
62
  cidrs.push(x) if scanner.is_cidr?(x)
66
63
  end
67
64
  puts "Parsing done. "
68
- hosts+=Wmap::DnsBruter.new(:verbose=>false).dns_brute_workers(domains.uniq).values.flatten if domains.size > 0
65
+ hosts+=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_workers(domains.uniq).values.flatten if domains.size > 0
69
66
  cidrs.map { |x| hosts+= scanner.cidr_2_ips(x) } if cidrs.size > 0
70
67
  elsif scanner.is_url?(options[:target])
71
68
  puts "Processing the URL: #{options[:target]}"
72
69
  urls.push(options[:target])
73
70
  elsif Wmap.domain_known?(options[:target]) or Wmap.sub_domain_known?(options[:target])
74
71
  puts "Processing the domain: #{options[:target]}"
75
- hosts+=Wmap::DnsBruter.new(:verbose=>false).dns_brute_worker(options[:target]).values.flatten
72
+ hosts+=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_worker(options[:target]).values.flatten
76
73
  elsif scanner.is_fqdn?(options[:target])
77
74
  puts "Processing the host: #{options[:target]}"
78
75
  hosts.push(options[:target])
79
- my_hosts=Wmap::DnsBruter.new(:verbose=>false).dns_brute_worker(options[:target]).values.flatten if (options[:target].split('.')[0] =~ /\d+/)
76
+ my_hosts=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_worker(options[:target]).values.flatten if (options[:target].split('.')[0] =~ /\d+/)
80
77
  hosts+=my_hosts unless my_hosts.nil?
81
78
  elsif scanner.is_cidr?(options[:target])
82
79
  puts "Processing the network block: #{options[:target]}"
@@ -102,7 +99,7 @@ if options[:target] && options[:data_dir]
102
99
  crawler = Wmap::UrlCrawler.new(:data_dir => options[:data_dir])
103
100
  elsif options[:target]
104
101
  puts "Fire up the crawler."
105
- crawler = Wmap::UrlCrawler.new(:verbose=>false)
102
+ crawler = Wmap::UrlCrawler.new(:verbose=>options[:verbose])
106
103
  else
107
104
  abort "Error firing up UrlCrawler instance!"
108
105
  end
@@ -168,14 +165,14 @@ end
168
165
  if options[:target] && options[:data_dir]
169
166
  puts "Invoke the HostTracker with optional directory setter."
170
167
  host_tracker = Wmap::HostTracker.instance
171
- host_tracker.verbose=false
168
+ host_tracker.verbose=options[:verbose]
172
169
  host_tracker.data_dir = options[:data_dir]
173
170
  host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
174
171
  host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
175
172
  elsif options[:target]
176
173
  puts puts "Invoke the HostTracker."
177
174
  host_tracker = Wmap::HostTracker.instance
178
- host_tracker.verbose=false
175
+ host_tracker.verbose=options[:verbose]
179
176
  else
180
177
  abort "Error firing up HostTracker instance!"
181
178
  end
@@ -57,9 +57,9 @@ class Wmap::HostTracker
57
57
  end
58
58
  f.close
59
59
  return @known_hosts
60
- #rescue => ee
61
- # puts "Exception on method #{__method__}: #{ee}"
62
- # return known_hosts
60
+ rescue => ee
61
+ puts "Exception on method #{__method__}: #{ee}"
62
+ return known_hosts
63
63
  end
64
64
 
65
65
  # Save the current local hosts hash table into a (random) data repository file
@@ -282,8 +282,8 @@ class Wmap::SiteTracker
282
282
  puts "No new entry added. "
283
283
  end
284
284
  return results
285
- #rescue => ee
286
- #puts "Exception on method #{__method__}: #{ee}" if @verbose
285
+ rescue => ee
286
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
287
287
  end
288
288
  alias_method :adds, :bulk_add
289
289
 
@@ -12,7 +12,7 @@ module Wmap
12
12
 
13
13
  # Class to identify and track adware within the site store
14
14
  include Wmap::Utils
15
- attr_accessor :signature_file, :tag_file, :verbose, :data_dir, :data_store
15
+ attr_accessor :signature_file, :tag_file, :verbose, :data_dir
16
16
  attr_reader :tag_signatures, :tag_store
17
17
 
18
18
 
@@ -26,7 +26,7 @@ module Wmap
26
26
  # Set default instance variables
27
27
  @signature_file=File.dirname(__FILE__) + '/../../../settings/' + 'tag_signatures'
28
28
  file=params.fetch(:signature_file, @signature_file)
29
- @tag_signatures=load_from_file(file)
29
+ @tag_signatures=load_sig_from_file(file)
30
30
  @tag_file=params.fetch(:tag_file, @data_dir + 'tag_sites')
31
31
  File.write(@tag_file, "") unless File.exist?(@tag_file)
32
32
  # load the known tag store
@@ -34,9 +34,8 @@ module Wmap
34
34
  @landings = Hash.new # cache landing page to reduce redundant browsing
35
35
  end
36
36
 
37
-
38
37
  # load the known tag signatures into an instance variable
39
- def load_from_file (file, lc=true)
38
+ def load_sig_from_file (file, lc=true)
40
39
  puts "Loading data file: #{file}" if @verbose
41
40
  data_store=Hash.new
42
41
  f = File.open(file, 'r')
@@ -53,7 +52,6 @@ module Wmap
53
52
  else
54
53
  data_store[entry[0]]=entry[1].strip
55
54
  end
56
-
57
55
  end
58
56
  f.close
59
57
  return data_store
@@ -105,11 +103,11 @@ module Wmap
105
103
  end
106
104
  alias_method :save!, :save_to_file!
107
105
 
108
- # add tag entries (from the sitetracker list)
106
+ # Refresh adware tag store signatures
109
107
  def refresh (num=@max_parallel,use_cache=true)
110
108
  puts "Add entries to the local cache table from site tracker: " if @verbose
111
109
  results = Hash.new
112
- tags = Wmap::SiteTracker.instance.known_sites.keys
110
+ tags = @tag_store.keys
113
111
  if tags.size > 0
114
112
  Parallel.map(tags, :in_processes => num) { |target|
115
113
  check_adware(target,use_cache)
@@ -66,210 +66,196 @@ class Wmap::UrlCrawler
66
66
  # A web crawler to crawl a known website and search for html links within the same root domain. For example,
67
67
  # by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
68
68
  def crawl(url)
69
- begin
70
- puts "Start web crawling on #{url}"
71
- result=Array.new
72
- url=url.chomp.strip
73
- result.push(url_2_site(url))
74
- raise "Error! Invalid url format: #{urls}" unless is_url?(url)
75
- # Add logic to profile the web server before crawling; this is used to optimize the crawling speed
76
- pre_crawl(url)
77
- status = Timeout::timeout(Crawl_timeout/1000) {
78
- result+=crawl_worker(url).keys
79
- }
80
- puts "Web crawling time-out on #{url}: #{status}" if @verbose
81
- return result
82
- rescue => ee
83
- puts "Exception on method #{__method__} for URL #{url}: #{ee}"
84
- return result
85
- end
69
+ puts "Start web crawling on #{url}"
70
+ result=Array.new
71
+ url=url.chomp.strip
72
+ result.push(url_2_site(url))
73
+ raise "Error! Invalid url format: #{urls}" unless is_url?(url)
74
+ # Add logic to profile the web server before crawling; this is used to optimize the crawling speed
75
+ pre_crawl(url)
76
+ status = Timeout::timeout(Crawl_timeout/1000) {
77
+ result+=crawl_worker(url).keys
78
+ }
79
+ puts "Web crawling time-out on #{url}: #{status}" if @verbose
80
+ return result
81
+ rescue => ee
82
+ puts "Exception on method #{__method__} for URL #{url}: #{ee}"
83
+ return result
86
84
  end
87
85
  alias_method :query, :crawl
88
86
 
89
87
  # The worker instance of crawler who perform the labour work
90
88
  def crawl_worker(url0)
91
- begin
92
- puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
93
- # Input URL sanity check first
94
- if is_url?(url0)
95
- host=url_2_host(url0)
96
- ip=host_2_ip(host).to_s
97
- raise "Invalid IP address: #{url0}" if ip.nil?
98
- port=url_2_port(url0).to_s
99
- raise "Invalid port number: #{url0}" if port.nil?
100
- else
101
- raise "Invalid URL: #{url0}. Please check it out with your browser again."
102
- end
103
- log_info=Hash.new
104
- log_info[1]="Start working on #{url0}"
105
- url_stores=Hash.new
106
- url_stores[url0]=true unless url_stores.key?(url0)
107
- @discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
108
- @crawl_start[url0]=true unless @crawl_start.key?(url0)
89
+ puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
90
+ # Input URL sanity check first
91
+ if is_url?(url0)
92
+ host=url_2_host(url0)
93
+ ip=host_2_ip(host).to_s
94
+ raise "Invalid IP address: #{url0}" if ip.nil?
95
+ port=url_2_port(url0).to_s
96
+ raise "Invalid port number: #{url0}" if port.nil?
97
+ else
98
+ raise "Invalid URL: #{url0}. Please check it out with your browser again."
99
+ end
100
+ log_info=Hash.new
101
+ log_info[1]="Start working on #{url0}"
102
+ url_stores=Hash.new
103
+ url_stores[url0]=true unless url_stores.key?(url0)
104
+ @discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
105
+ @crawl_start[url0]=true unless @crawl_start.key?(url0)
109
106
  # $discovered_urls[url0]=true unless $discovered_urls.key?(url0)
110
- @crawl_depth.times do
111
- url_stores.keys.each do |url|
112
- # 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
113
- next if @visited_urls_by_crawler.key?(url)
114
- url_object = open_url(url)
115
- next if url_object == nil
116
- url = update_url_if_redirected(url, url_object)
117
- url_body = read_url(url)
118
- # Protection code - to avoid parsing failure on the empty or nil object
119
- next if url_body.nil? or url_body.empty?
120
- url_stores[url]=true unless url_stores.key?(url)
121
- @discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
107
+ @crawl_depth.times do
108
+ url_stores.keys.each do |url|
109
+ # 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
110
+ next if @visited_urls_by_crawler.key?(url)
111
+ url_object = open_url(url)
112
+ next if url_object == nil
113
+ url = update_url_if_redirected(url, url_object)
114
+ url_body = read_url(url)
115
+ # Protection code - to avoid parsing failure on the empty or nil object
116
+ next if url_body.nil? or url_body.empty?
117
+ url_stores[url]=true unless url_stores.key?(url)
118
+ @discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
122
119
  # $discovered_urls[url]=true unless $discovered_urls.key?(url)
123
- doc = Nokogiri::HTML(url_body)
124
- next if doc == nil
125
- if url_stores.size >= @crawl_page_limit
126
- #@visited_urls_by_crawler.merge!(url_stores)
127
- @discovered_urls_by_crawler.merge!(url_stores)
120
+ doc = Nokogiri::HTML(url_body)
121
+ next if doc == nil
122
+ if url_stores.size >= @crawl_page_limit
123
+ #@visited_urls_by_crawler.merge!(url_stores)
124
+ @discovered_urls_by_crawler.merge!(url_stores)
128
125
  # $discovered_urls.merge!(url_stores)
129
- puts "Finish web crawling the url: #{url0}"
130
- return url_stores
131
- end
132
- page_urls = find_urls_on_page(doc, url)
133
- page_urls.uniq!
134
- page_urls.map do |y|
135
- y=normalize_url(y)
136
- url_stores[y]=true unless url_stores.key?(y)
137
- @discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
126
+ puts "Finish web crawling the url: #{url0}"
127
+ return url_stores
128
+ end
129
+ page_urls = find_urls_on_page(doc, url)
130
+ page_urls.uniq!
131
+ page_urls.map do |y|
132
+ y=normalize_url(y)
133
+ url_stores[y]=true unless url_stores.key?(y)
134
+ @discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
138
135
  # $discovered_urls[y]=true unless $discovered_urls.key?(y)
139
- end
140
136
  end
141
137
  end
142
- puts "Finish web crawling on: #{url0}"
143
- log_info[2]="Finish working on: #{url0}"
144
- wlog(log_info, "UrlCrawler", @log_file)
145
- @crawl_done[url0]=true unless @crawl_done.key?(url0)
146
- return url_stores
147
- rescue => ee
148
- puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
149
- log_info[3]="Exception on #{url0}"
150
- wlog(log_info,"UrlCrawler",@log_file)
151
- return url_stores
152
138
  end
139
+ puts "Finish web crawling on: #{url0}"
140
+ log_info[2]="Finish working on: #{url0}"
141
+ wlog(log_info, "UrlCrawler", @log_file)
142
+ @crawl_done[url0]=true unless @crawl_done.key?(url0)
143
+ return url_stores
144
+ rescue => ee
145
+ puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
146
+ log_info[3]="Exception on #{url0}"
147
+ wlog(log_info,"UrlCrawler",@log_file)
148
+ return url_stores
153
149
  end
154
150
 
155
151
  # Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
156
152
  # each child process will continuously work on the target pool until all the works are done
157
153
  def crawl_workers (targets,num=@max_parallel)
158
- begin
159
- raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
160
- puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
161
- #puts "This could be awhile depending on the list size. Please be patient ..."
162
- # 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
163
- targets -= ["", nil]
164
- uniq_sites=Hash.new
165
- targets.dup.map do |target|
166
- if is_url?(target)
167
- host=url_2_host(target)
168
- ip=host_2_ip(host).to_s
169
- next if ip.nil?
170
- port=url_2_port(target).to_s
171
- next if port.nil?
172
- site_key=ip+":"+port
173
- unless uniq_sites.key?(site_key)
174
- uniq_sites[site_key]=target
175
- end
154
+ raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
155
+ puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
156
+ #puts "This could be awhile depending on the list size. Please be patient ..."
157
+ # 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
158
+ targets -= ["", nil]
159
+ uniq_sites=Hash.new
160
+ targets.dup.map do |target|
161
+ if is_url?(target)
162
+ host=url_2_host(target)
163
+ ip=host_2_ip(host).to_s
164
+ next if ip.nil?
165
+ port=url_2_port(target).to_s
166
+ next if port.nil?
167
+ site_key=ip+":"+port
168
+ unless uniq_sites.key?(site_key)
169
+ uniq_sites[site_key]=target
176
170
  end
177
171
  end
178
- puts "Sanitization done! " if @verbose
179
- puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
180
- puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
181
- raise "Error: target list is empty!" if targets.size < 1
182
- Parallel.map(uniq_sites.values, :in_processes => num) { |target|
183
- puts "Working on #{target} ..." if @verbose
184
- crawl(target)
185
- }.dup.each do |process|
186
- puts "process.inspect: #{process}" if @verbose
187
- urls=process
188
- urls-=["",nil] unless urls.nil?
189
- if urls.nil?
190
- next
191
- elsif urls.empty?
192
- next
193
- #do nothing
194
- else
195
- urls.map do |url|
196
- url.strip!
197
- @discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
198
- #$discovered_urls[url]=true unless $discovered_urls.key?(url)
199
- end
172
+ end
173
+ puts "Sanitization done! " if @verbose
174
+ puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
175
+ puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
176
+ raise "Error: target list is empty!" if targets.size < 1
177
+ Parallel.map(uniq_sites.values, :in_processes => num) { |target|
178
+ puts "Working on #{target} ..." if @verbose
179
+ crawl(target)
180
+ }.dup.each do |process|
181
+ puts "process.inspect: #{process}" if @verbose
182
+ urls=process
183
+ urls-=["",nil] unless urls.nil?
184
+ if urls.nil?
185
+ next
186
+ elsif urls.empty?
187
+ next
188
+ #do nothing
189
+ else
190
+ urls.map do |url|
191
+ url.strip!
192
+ @discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
193
+ #$discovered_urls[url]=true unless $discovered_urls.key?(url)
200
194
  end
201
195
  end
202
- #return sites
203
- return @discovered_urls_by_crawler.keys
204
- rescue Exception => ee
205
- puts "Exception on method #{__method__}: #{ee}" if @verbose
206
- return nil
207
196
  end
197
+ #return sites
198
+ return @discovered_urls_by_crawler.keys
199
+ rescue Exception => ee
200
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
201
+ return nil
208
202
  end
209
203
  alias_method :crawls, :crawl_workers
210
204
 
211
205
  # Fast crawling method - build the target pool from the input file
212
206
  def crawl_workers_on_file (file)
213
- begin
214
- puts "Web crawl the list of targets from file: #{file}"
215
- targets=file_2_list(file)
216
- sites=crawl_workers(targets,num=@max_parallel)
217
- return sites
218
- rescue => ee
219
- puts "Exception on method #{__method__}: #{ee}" if @verbose
220
- return nil
221
- end
207
+ puts "Web crawl the list of targets from file: #{file}"
208
+ targets=file_2_list(file)
209
+ sites=crawl_workers(targets,num=@max_parallel)
210
+ return sites
211
+ rescue => ee
212
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
213
+ return nil
222
214
  end
223
215
  alias_method :query_file, :crawl_workers_on_file
224
216
  alias_method :crawl_file, :crawl_workers_on_file
225
217
 
226
218
  # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
227
219
  def open_url(url)
228
- begin
229
- puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
230
- if url =~ /http\:/i
231
- # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
232
- url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
233
- #url_object = open(url)
234
- elsif url =~ /https\:/i
235
- url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
236
- #url_object = open(url,:ssl_verify_mode => 0)
237
- else
238
- raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
239
- end
240
- return url_object
241
- rescue => ee
242
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
243
- return nil
244
- end
220
+ puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
221
+ if url =~ /http\:/i
222
+ # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
223
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
224
+ #url_object = open(url)
225
+ elsif url =~ /https\:/i
226
+ url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
227
+ #url_object = open(url,:ssl_verify_mode => 0)
228
+ else
229
+ raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
230
+ end
231
+ return url_object
232
+ rescue => ee
233
+ puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
234
+ return nil
245
235
  end
246
236
 
247
237
  # Wrapper to use OpenURI method 'read' to return url body contents
248
238
  def read_url(url)
249
- begin
250
- puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
251
- url_object=open_url(url)
252
- @visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
253
- body=url_object.read
254
- return body
255
- rescue => ee
256
- puts "Exception on method #{__method__}: #{ee}" if @verbose
257
- return nil
258
- end
239
+ puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
240
+ url_object=open_url(url)
241
+ @visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
242
+ body=url_object.read
243
+ return body
244
+ rescue => ee
245
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
246
+ return nil
259
247
  end
260
248
 
261
249
  # Return the destination url in case of url re-direct
262
250
  def update_url_if_redirected(url, url_object)
263
- begin
264
- #puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
265
- if url != url_object.base_uri.to_s
266
- return url_object.base_uri.to_s
267
- end
268
- return url
269
- rescue => ee
270
- puts "Exception on method #{__method__}: #{ee}" if @verbose
271
- return nil
272
- end
251
+ #puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
252
+ if url != url_object.base_uri.to_s
253
+ return url_object.base_uri.to_s
254
+ end
255
+ return url
256
+ rescue => ee
257
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
258
+ return nil
273
259
  end
274
260
 
275
261
  =begin
@@ -290,90 +276,82 @@ class Wmap::UrlCrawler
290
276
 
291
277
  # Search 'current_url' and return found URLs under the same domain
292
278
  def find_urls_on_page(doc, current_url)
293
- begin
294
- puts "Search and return URLs within the doc: #{doc}" if @verbose
295
- urls_list = []
296
- # case 1 - search embedded HTML tag <a href='url'> for the url elements
297
- links=doc.css('a')
298
- links.map do |x|
299
- #puts "x: #{x}"
300
- new_url = x.attribute('href').to_s
301
- unless new_url == nil
302
- if new_url.match("http")
303
- #if urls_on_same_domain?(new_url,current_url)
304
- urls_list.push(new_url)
305
- #end
306
- else
307
- new_url = make_absolute(current_url, new_url)
279
+ puts "Search and return URLs within the doc: #{doc}" if @verbose
280
+ urls_list = []
281
+ # case 1 - search embedded HTML tag <a href='url'> for the url elements
282
+ links=doc.css('a')
283
+ links.map do |x|
284
+ #puts "x: #{x}"
285
+ new_url = x.attribute('href').to_s
286
+ unless new_url == nil
287
+ if new_url.match("http")
288
+ #if urls_on_same_domain?(new_url,current_url)
308
289
  urls_list.push(new_url)
309
- end
290
+ #end
291
+ else
292
+ new_url = make_absolute(current_url, new_url)
293
+ urls_list.push(new_url)
310
294
  end
311
295
  end
312
- # case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
313
- elements=doc.css("meta[http-equiv]")
314
- unless elements.size == 0
315
- link=elements.attr("content").value.split(/url\=/i)[1]
316
- unless link.nil?
317
- new_url = make_absolute(current_url, link)
318
- urls_list.push(new_url) unless new_url.nil?
319
- end
296
+ end
297
+ # case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
298
+ elements=doc.css("meta[http-equiv]")
299
+ unless elements.size == 0
300
+ link=elements.attr("content").value.split(/url\=/i)[1]
301
+ unless link.nil?
302
+ new_url = make_absolute(current_url, link)
303
+ urls_list.push(new_url) unless new_url.nil?
320
304
  end
321
- #puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
322
- return urls_list.uniq-["",nil]
323
- rescue => ee
324
- puts "Exception on method #{__method__}: #{ee}" if @verbose
325
- return nil
326
305
  end
306
+ #puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
307
+ return urls_list.uniq-["",nil]
308
+ rescue => ee
309
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
310
+ return nil
327
311
  end
328
312
 
329
313
  # Method to print out discovery URL result
330
314
  def print_discovered_urls_by_crawler
331
- begin
332
- puts "Print discovered url by the crawler. " if @verbose
333
- puts "\nSummary Report of Discovered URLs from the Crawler:"
334
- @discovered_urls_by_crawler.keys.each do |url|
335
- puts url
336
- end
337
- puts "Total: #{@discovered_urls_by_crawler.keys.size}"
338
- puts "End of the summary"
339
- rescue => ee
340
- puts "Exception on method #{__method__}: #{ee}" if @verbose
341
- return nil
342
- end
315
+ puts "Print discovered url by the crawler. " if @verbose
316
+ puts "\nSummary Report of Discovered URLs from the Crawler:"
317
+ @discovered_urls_by_crawler.keys.each do |url|
318
+ puts url
319
+ end
320
+ puts "Total: #{@discovered_urls_by_crawler.keys.size}"
321
+ puts "End of the summary"
322
+ rescue => ee
323
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
324
+ return nil
343
325
  end
344
326
  alias_method :print, :print_discovered_urls_by_crawler
345
327
 
346
328
  # Method to save URL discovery result
347
329
  def save_discovered_urls (file)
348
- begin
349
- puts "Save discovered urls by the crawler to file: #{file} "
350
- list_2_file(@discovered_urls_by_crawler.keys, file)
351
- puts "Done!"
352
- rescue => ee
353
- puts "Exception on method #{__method__}: #{ee}" if @verbose
354
- return nil
355
- end
330
+ puts "Save discovered urls by the crawler to file: #{file} "
331
+ list_2_file(@discovered_urls_by_crawler.keys, file)
332
+ puts "Done!"
333
+ rescue => ee
334
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
335
+ return nil
356
336
  end
357
337
  alias_method :save, :save_discovered_urls
358
338
 
359
339
  # Method to retrieve discovery site result
360
340
  def get_discovered_sites_by_crawler
361
- begin
362
- puts "Print summary report of discovered sites. " if @verbose
363
- puts "\nSummary Report of Discovered Sites from the Crawler:"
364
- sites = Hash.new
365
- @discovered_urls_by_crawler.keys.each do |url|
366
- site=url_2_site(url)
367
- sites[site]=true unless sites.key?(site)
368
- end
369
- sites.keys.map { |site| puts site }
370
- puts "Total: #{sites.size}"
371
- puts "End of the summary"
372
- return sites.keys
373
- rescue => ee
374
- puts "Exception on method #{__method__}: #{ee}" if @verbose
375
- return nil
376
- end
341
+ puts "Print summary report of discovered sites. " if @verbose
342
+ puts "\nSummary Report of Discovered Sites from the Crawler:"
343
+ sites = Hash.new
344
+ @discovered_urls_by_crawler.keys.each do |url|
345
+ site=url_2_site(url)
346
+ sites[site]=true unless sites.key?(site)
347
+ end
348
+ sites.keys.map { |site| puts site }
349
+ puts "Total: #{sites.size}"
350
+ puts "End of the summary"
351
+ return sites.keys
352
+ rescue => ee
353
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
354
+ return nil
377
355
  end
378
356
  alias_method :get_sites, :get_discovered_sites_by_crawler
379
357
 
data/version.txt CHANGED
@@ -3,8 +3,8 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.6.6
7
- date = 2019-11-12
6
+ version = 2.6.7
7
+ date = 2019-11-19
8
8
 
9
9
  author = Sam (Yang) Li
10
10
  email = yang.li@owasp.org
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.6
4
+ version: 2.6.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-12 00:00:00.000000000 Z
11
+ date: 2019-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dnsruby