wmap 2.7.9 → 2.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 723c7f71bbe80edf1e1c2ed5cad74fe045a8d3e6494b6394921cdd4546d562e7
4
- data.tar.gz: 072757ff8e19fee784c3387e362ce9feef86bb7869998df86f8c55437dc9b199
3
+ metadata.gz: 0430d7df339ee9c4f6099e1175b6cdef5ee0a453c2a5340e72e897e92a24ecfd
4
+ data.tar.gz: bb4aef610f1f71c992a25c41e0bcd1601039a224a250c251a4fc6b64bc2c362c
5
5
  SHA512:
6
- metadata.gz: f783d31ad51063734b4ba54cc683399196b838d482cff57d9a27643bcb81451653475953aa2ae4ece6e277ec3756e3a3db05e250c2a6be9595fd7df993088088
7
- data.tar.gz: 6502c7c0f5b989440b0dd4f849a1665ddf37cb0cb2d90a673ae31e9928148a1bff164111f46696578b0833ef5ca97a376bfc46793d032bc4596986fd57c1c74b
6
+ metadata.gz: 371d69e9e72145befa5589a79ca7a7ac541c022881fc4a7ebf2c0ccedd3ab92962557487f7195f216427e1cc3f876efcbcae7cd00adf6ea4e55fec72ec94c5fe
7
+ data.tar.gz: f3e2245e00a680240b8c0b9f41a2074aff46f5b5ae7cd71ace31be71456a7ee0b909f95223b49dc846cd2f40bdd71cb26f815553943b2b1e78e9b2b567a8008c
data/bin/wmap CHANGED
@@ -29,6 +29,10 @@ parser.parse!
29
29
  # print program banner
30
30
  puts Wmap.banner
31
31
  # print_usage unless options[:target]
32
+ unless options[:target]
33
+ puts "Usage: $ wmap -h"
34
+ exit 1
35
+ end
32
36
 
33
37
  # Preparing - check out the working logs directory
34
38
  if options[:data_dir]
@@ -17,7 +17,8 @@ require "parallel"
17
17
  class Wmap::UrlCrawler
18
18
  include Wmap::Utils
19
19
 
20
- attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, :verbose, :data_dir
20
+ attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, \
21
+ :verbose, :data_dir, :user_agent
21
22
  attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
22
23
  # Global variable used to store the combined result of all the forked child processes. Note that class variable
23
24
  # would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
@@ -35,13 +36,16 @@ class Wmap::UrlCrawler
35
36
  @crawl_depth=params.fetch(:crawl_depth, 4)
36
37
  @crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
37
38
  @max_parallel=params.fetch(:max_parallel, 40)
39
+ @user_agent=params.fetch(:user_agent, "OWASP WMAP Spider")
38
40
  # Discovered data store
39
41
  @discovered_urls_by_crawler=Hash.new
40
42
  @visited_urls_by_crawler=Hash.new
41
43
  @crawl_start=Hash.new
42
44
  @crawl_done=Hash.new
43
45
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
44
- @log_file=@data_dir + "/../logs/crawler.log"
46
+ @log_dir=@data_dir + "/../logs/"
47
+ Dir.mkdir(@log_dir) unless Dir.exist?(@log_dir)
48
+ @log_file=@log_dir + "crawler.log"
45
49
  end
46
50
 
47
51
  # Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
@@ -216,14 +220,14 @@ class Wmap::UrlCrawler
216
220
  alias_method :crawl_file, :crawl_workers_on_file
217
221
 
218
222
  # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
219
- def open_url(url)
223
+ def open_url(url,user_agent=@user_agent)
220
224
  puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
221
225
  if url =~ /http\:/i
222
226
  # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
223
- url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
227
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
224
228
  #url_object = open(url)
225
229
  elsif url =~ /https\:/i
226
- url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
230
+ url_object = open(url, :ssl_verify_mode=>0, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
227
231
  #url_object = open(url,:ssl_verify_mode => 0)
228
232
  else
229
233
  raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
@@ -258,22 +262,6 @@ class Wmap::UrlCrawler
258
262
  return nil
259
263
  end
260
264
 
261
- =begin
262
- # Wrapper for the Nokogiri DOM parser
263
- def parse_html(html_body)
264
- begin
265
- #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
266
- doc = Nokogiri::HTML(html_body)
267
- #puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
268
- #puts "doc: #{doc}" if @verbose
269
- return doc
270
- rescue => ee
271
- puts "Exception on method #{__method__}: #{ee}" if @verbose
272
- return nil
273
- end
274
- end
275
- =end
276
-
277
265
  # Search 'current_url' and return found URLs under the same domain
278
266
  def find_urls_on_page(doc, current_url)
279
267
  puts "Search and return URLs within the doc: #{doc}" if @verbose
@@ -8,46 +8,43 @@
8
8
 
9
9
 
10
10
  module Wmap
11
- module Utils
11
+ module Utils
12
12
  # Module to log debugging and other messages
13
- module Logger
13
+ module Logger
14
14
  extend self
15
15
  # Append information into the log file for the trouble-shooting purpose
16
16
  def wlog (obj, agent, file)
17
17
  puts "Writing #{obj} into log file: #{file}" if @verbose
18
- begin
19
- return false if obj.nil?
20
- # 01/27/2015, implementing singleton pattern for the logger
21
- @@f=File.open(file,'a')
22
- timestamp=Time.now
23
- case obj
24
- when Array
25
- if obj.size >= 0
26
- @@f.write "#{timestamp}: #{agent}: \n"
27
- obj.map { |x| @@f.write " #{x}\n" }
28
- puts "The list is successfully saved into the log file: #{file} " if @verbose
29
- end
30
- when Hash
31
- if obj.length >= 0
32
- @@f.write "#{timestamp}: #{agent}: \n"
33
- obj.each_value { |value| @@f.write " #{value}\n" }
34
- puts "The hash is successfully saved into the log file: #{file} " if @verbose
35
- end
36
- when String
37
- @@f.write "#{timestamp}: #{agent}: #{obj}\n"
38
- puts "The string is successfully saved into the log file: #{file} " if @verbose
39
- else
40
- #do nothing
41
- puts "Un-handled exception on: #{obj}" if @verbose
18
+ return false if obj.nil?
19
+ @@f=File.open(file,'a')
20
+ timestamp=Time.now
21
+ case obj
22
+ when Array
23
+ if obj.size >= 0
24
+ @@f.write "#{timestamp}: #{agent}: \n"
25
+ obj.map { |x| @@f.write " #{x}\n" }
26
+ puts "The list is successfully saved into the log file: #{file} " if @verbose
42
27
  end
43
- @@f.close
44
- return true
45
- rescue => ee
46
- puts "Exception on method #{__method__}: #{ee}" if @verbose
47
- return false
48
- end
28
+ when Hash
29
+ if obj.length >= 0
30
+ @@f.write "#{timestamp}: #{agent}: \n"
31
+ obj.each_value { |value| @@f.write " #{value}\n" }
32
+ puts "The hash is successfully saved into the log file: #{file} " if @verbose
33
+ end
34
+ when String
35
+ @@f.write "#{timestamp}: #{agent}: #{obj}\n"
36
+ puts "The string is successfully saved into the log file: #{file} " if @verbose
37
+ else
38
+ #do nothing
39
+ puts "Un-handled exception on: #{obj}" if @verbose
40
+ end
41
+ @@f.close
42
+ return true
43
+ rescue => ee
44
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
45
+ return false
49
46
  end
50
-
51
- end
47
+
48
+ end
52
49
  end
53
50
  end
@@ -15,6 +15,7 @@ module Wmap
15
15
 
16
16
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
17
17
  Max_http_timeout=15000
18
+ User_agent = "OWASP WMAP Spider"
18
19
 
19
20
  # Simple sanity check on a 'claimed' URL string.
20
21
  def is_url?(url)
@@ -377,7 +378,8 @@ module Wmap
377
378
 
378
379
  # Given an URL, open the page, then return the DOM text from a normal user perspective
379
380
  def open_page(url)
380
- args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, read_timeout: Max_http_timeout/1000}
381
+ args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, \
382
+ read_timeout: Max_http_timeout/1000, "User-Agent"=>User_agent}
381
383
  doc = Nokogiri::HTML(open(url, args))
382
384
  if doc.text.include?("Please enable JavaScript to view the page content")
383
385
  puts "Invoke headless chrome through webdriver ..." if @verbose
@@ -385,7 +387,7 @@ module Wmap
385
387
  #driver = Selenium::WebDriver.for :chrome
386
388
  # http://watir.com/guides/chrome/
387
389
  args = ['--ignore-certificate-errors', '--disable-popup-blocking', '--disable-translate', '--disk-cache-size 8192']
388
- browser = Watir::Browser.new :chrome, headless: true, options: {args: args}
390
+ browser = Watir::Browser.new :chrome, headless: true, switches: %w[--user-agent=OWASP\ WMAP\ Spider]
389
391
  browser.goto(url)
390
392
  sleep(2) # wait for the loading
391
393
  doc = Nokogiri::HTML(browser.html)
@@ -3,8 +3,8 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.7.9
7
- date = 2020-03-30
6
+ version = 2.8.1
7
+ date = 2020-05-08
8
8
 
9
9
  author = Sam (Yang) Li
10
10
  email = yang.li@owasp.org
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.9
4
+ version: 2.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-30 00:00:00.000000000 Z
11
+ date: 2020-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dnsruby