wmap 2.7.9 → 2.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 723c7f71bbe80edf1e1c2ed5cad74fe045a8d3e6494b6394921cdd4546d562e7
4
- data.tar.gz: 072757ff8e19fee784c3387e362ce9feef86bb7869998df86f8c55437dc9b199
3
+ metadata.gz: 0430d7df339ee9c4f6099e1175b6cdef5ee0a453c2a5340e72e897e92a24ecfd
4
+ data.tar.gz: bb4aef610f1f71c992a25c41e0bcd1601039a224a250c251a4fc6b64bc2c362c
5
5
  SHA512:
6
- metadata.gz: f783d31ad51063734b4ba54cc683399196b838d482cff57d9a27643bcb81451653475953aa2ae4ece6e277ec3756e3a3db05e250c2a6be9595fd7df993088088
7
- data.tar.gz: 6502c7c0f5b989440b0dd4f849a1665ddf37cb0cb2d90a673ae31e9928148a1bff164111f46696578b0833ef5ca97a376bfc46793d032bc4596986fd57c1c74b
6
+ metadata.gz: 371d69e9e72145befa5589a79ca7a7ac541c022881fc4a7ebf2c0ccedd3ab92962557487f7195f216427e1cc3f876efcbcae7cd00adf6ea4e55fec72ec94c5fe
7
+ data.tar.gz: f3e2245e00a680240b8c0b9f41a2074aff46f5b5ae7cd71ace31be71456a7ee0b909f95223b49dc846cd2f40bdd71cb26f815553943b2b1e78e9b2b567a8008c
data/bin/wmap CHANGED
@@ -29,6 +29,10 @@ parser.parse!
29
29
  # print program banner
30
30
  puts Wmap.banner
31
31
  # print_usage unless options[:target]
32
+ unless options[:target]
33
+ puts "Usage: $ wmap -h"
34
+ exit 1
35
+ end
32
36
 
33
37
  # Preparing - check out the working logs directory
34
38
  if options[:data_dir]
@@ -17,7 +17,8 @@ require "parallel"
17
17
  class Wmap::UrlCrawler
18
18
  include Wmap::Utils
19
19
 
20
- attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, :verbose, :data_dir
20
+ attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, \
21
+ :verbose, :data_dir, :user_agent
21
22
  attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
22
23
  # Global variable used to store the combined result of all the forked child processes. Note that class variable
23
24
  # would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
@@ -35,13 +36,16 @@ class Wmap::UrlCrawler
35
36
  @crawl_depth=params.fetch(:crawl_depth, 4)
36
37
  @crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
37
38
  @max_parallel=params.fetch(:max_parallel, 40)
39
+ @user_agent=params.fetch(:user_agent, "OWASP WMAP Spider")
38
40
  # Discovered data store
39
41
  @discovered_urls_by_crawler=Hash.new
40
42
  @visited_urls_by_crawler=Hash.new
41
43
  @crawl_start=Hash.new
42
44
  @crawl_done=Hash.new
43
45
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
44
- @log_file=@data_dir + "/../logs/crawler.log"
46
+ @log_dir=@data_dir + "/../logs/"
47
+ Dir.mkdir(@log_dir) unless Dir.exist?(@log_dir)
48
+ @log_file=@log_dir + "crawler.log"
45
49
  end
46
50
 
47
51
  # Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
@@ -216,14 +220,14 @@ class Wmap::UrlCrawler
216
220
  alias_method :crawl_file, :crawl_workers_on_file
217
221
 
218
222
  # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
219
- def open_url(url)
223
+ def open_url(url,user_agent=@user_agent)
220
224
  puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
221
225
  if url =~ /http\:/i
222
226
  # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
223
- url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
227
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
224
228
  #url_object = open(url)
225
229
  elsif url =~ /https\:/i
226
- url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
230
+ url_object = open(url, :ssl_verify_mode=>0, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
227
231
  #url_object = open(url,:ssl_verify_mode => 0)
228
232
  else
229
233
  raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
@@ -258,22 +262,6 @@ class Wmap::UrlCrawler
258
262
  return nil
259
263
  end
260
264
 
261
- =begin
262
- # Wrapper for the Nokogiri DOM parser
263
- def parse_html(html_body)
264
- begin
265
- #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
266
- doc = Nokogiri::HTML(html_body)
267
- #puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
268
- #puts "doc: #{doc}" if @verbose
269
- return doc
270
- rescue => ee
271
- puts "Exception on method #{__method__}: #{ee}" if @verbose
272
- return nil
273
- end
274
- end
275
- =end
276
-
277
265
  # Search 'current_url' and return found URLs under the same domain
278
266
  def find_urls_on_page(doc, current_url)
279
267
  puts "Search and return URLs within the doc: #{doc}" if @verbose
@@ -8,46 +8,43 @@
8
8
 
9
9
 
10
10
  module Wmap
11
- module Utils
11
+ module Utils
12
12
  # Module to log debugging and other messages
13
- module Logger
13
+ module Logger
14
14
  extend self
15
15
  # Append information into the log file for the trouble-shooting purpose
16
16
  def wlog (obj, agent, file)
17
17
  puts "Writing #{obj} into log file: #{file}" if @verbose
18
- begin
19
- return false if obj.nil?
20
- # 01/27/2015, implementing singleton pattern for the logger
21
- @@f=File.open(file,'a')
22
- timestamp=Time.now
23
- case obj
24
- when Array
25
- if obj.size >= 0
26
- @@f.write "#{timestamp}: #{agent}: \n"
27
- obj.map { |x| @@f.write " #{x}\n" }
28
- puts "The list is successfully saved into the log file: #{file} " if @verbose
29
- end
30
- when Hash
31
- if obj.length >= 0
32
- @@f.write "#{timestamp}: #{agent}: \n"
33
- obj.each_value { |value| @@f.write " #{value}\n" }
34
- puts "The hash is successfully saved into the log file: #{file} " if @verbose
35
- end
36
- when String
37
- @@f.write "#{timestamp}: #{agent}: #{obj}\n"
38
- puts "The string is successfully saved into the log file: #{file} " if @verbose
39
- else
40
- #do nothing
41
- puts "Un-handled exception on: #{obj}" if @verbose
18
+ return false if obj.nil?
19
+ @@f=File.open(file,'a')
20
+ timestamp=Time.now
21
+ case obj
22
+ when Array
23
+ if obj.size >= 0
24
+ @@f.write "#{timestamp}: #{agent}: \n"
25
+ obj.map { |x| @@f.write " #{x}\n" }
26
+ puts "The list is successfully saved into the log file: #{file} " if @verbose
42
27
  end
43
- @@f.close
44
- return true
45
- rescue => ee
46
- puts "Exception on method #{__method__}: #{ee}" if @verbose
47
- return false
48
- end
28
+ when Hash
29
+ if obj.length >= 0
30
+ @@f.write "#{timestamp}: #{agent}: \n"
31
+ obj.each_value { |value| @@f.write " #{value}\n" }
32
+ puts "The hash is successfully saved into the log file: #{file} " if @verbose
33
+ end
34
+ when String
35
+ @@f.write "#{timestamp}: #{agent}: #{obj}\n"
36
+ puts "The string is successfully saved into the log file: #{file} " if @verbose
37
+ else
38
+ #do nothing
39
+ puts "Un-handled exception on: #{obj}" if @verbose
40
+ end
41
+ @@f.close
42
+ return true
43
+ rescue => ee
44
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
45
+ return false
49
46
  end
50
-
51
- end
47
+
48
+ end
52
49
  end
53
50
  end
@@ -15,6 +15,7 @@ module Wmap
15
15
 
16
16
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
17
17
  Max_http_timeout=15000
18
+ User_agent = "OWASP WMAP Spider"
18
19
 
19
20
  # Simple sanity check on a 'claimed' URL string.
20
21
  def is_url?(url)
@@ -377,7 +378,8 @@ module Wmap
377
378
 
378
379
  # Given an URL, open the page, then return the DOM text from a normal user perspective
379
380
  def open_page(url)
380
- args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, read_timeout: Max_http_timeout/1000}
381
+ args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, \
382
+ read_timeout: Max_http_timeout/1000, "User-Agent"=>User_agent}
381
383
  doc = Nokogiri::HTML(open(url, args))
382
384
  if doc.text.include?("Please enable JavaScript to view the page content")
383
385
  puts "Invoke headless chrome through webdriver ..." if @verbose
@@ -385,7 +387,7 @@ module Wmap
385
387
  #driver = Selenium::WebDriver.for :chrome
386
388
  # http://watir.com/guides/chrome/
387
389
  args = ['--ignore-certificate-errors', '--disable-popup-blocking', '--disable-translate', '--disk-cache-size 8192']
388
- browser = Watir::Browser.new :chrome, headless: true, options: {args: args}
390
+ browser = Watir::Browser.new :chrome, headless: true, switches: %w[--user-agent=OWASP\ WMAP\ Spider]
389
391
  browser.goto(url)
390
392
  sleep(2) # wait for the loading
391
393
  doc = Nokogiri::HTML(browser.html)
@@ -3,8 +3,8 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.7.9
7
- date = 2020-03-30
6
+ version = 2.8.1
7
+ date = 2020-05-08
8
8
 
9
9
  author = Sam (Yang) Li
10
10
  email = yang.li@owasp.org
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.9
4
+ version: 2.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-30 00:00:00.000000000 Z
11
+ date: 2020-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dnsruby