wmap 2.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +141 -0
  3. data/LICENSE.txt +15 -0
  4. data/README.rdoc +98 -0
  5. data/TODO +13 -0
  6. data/bin/deprime +21 -0
  7. data/bin/distrust +38 -0
  8. data/bin/googleBot +23 -0
  9. data/bin/prime +21 -0
  10. data/bin/refresh +26 -0
  11. data/bin/run_tests +16 -0
  12. data/bin/spiderBot +26 -0
  13. data/bin/trust +38 -0
  14. data/bin/updateAll +57 -0
  15. data/bin/wadd +25 -0
  16. data/bin/wadds +26 -0
  17. data/bin/wcheck +28 -0
  18. data/bin/wdel +25 -0
  19. data/bin/wdump +21 -0
  20. data/bin/wmap +151 -0
  21. data/bin/wscan +32 -0
  22. data/data/cidrs +2 -0
  23. data/data/deactivated_sites +1 -0
  24. data/data/domains +2 -0
  25. data/data/hosts +1 -0
  26. data/data/prime_hosts +1 -0
  27. data/data/sites +2 -0
  28. data/data/sub_domains +2 -0
  29. data/demos/bruter.rb +27 -0
  30. data/demos/dns_brutes.rb +28 -0
  31. data/demos/filter_cidr.rb +18 -0
  32. data/demos/filter_crawls.rb +5 -0
  33. data/demos/filter_domain.rb +25 -0
  34. data/demos/filter_geoip.rb +26 -0
  35. data/demos/filter_known_services.rb +59 -0
  36. data/demos/filter_netinfo.rb +23 -0
  37. data/demos/filter_prime.rb +25 -0
  38. data/demos/filter_profiler.rb +3 -0
  39. data/demos/filter_redirection.rb +19 -0
  40. data/demos/filter_site.rb +40 -0
  41. data/demos/filter_siteip.rb +31 -0
  42. data/demos/filter_status.rb +17 -0
  43. data/demos/filter_timestamp.rb +23 -0
  44. data/demos/filter_url.rb +19 -0
  45. data/demos/new_fnd.rb +66 -0
  46. data/demos/nmap_parser.pl +138 -0
  47. data/demos/site_format.rb +18 -0
  48. data/demos/whois_domain.rb +78 -0
  49. data/dicts/GeoIP.dat +0 -0
  50. data/dicts/GeoIPASNum.dat +0 -0
  51. data/dicts/GeoLiteCity.dat +0 -0
  52. data/dicts/ccsld.txt +2646 -0
  53. data/dicts/cctld.txt +243 -0
  54. data/dicts/gtld.txt +25 -0
  55. data/dicts/hostnames-dict.big +1402 -0
  56. data/dicts/hostnames-dict.txt +101 -0
  57. data/lib/wmap/cidr_tracker.rb +327 -0
  58. data/lib/wmap/dns_bruter.rb +308 -0
  59. data/lib/wmap/domain_tracker/sub_domain.rb +142 -0
  60. data/lib/wmap/domain_tracker.rb +342 -0
  61. data/lib/wmap/geoip_tracker.rb +72 -0
  62. data/lib/wmap/google_search_scraper.rb +177 -0
  63. data/lib/wmap/host_tracker/primary_host.rb +130 -0
  64. data/lib/wmap/host_tracker.rb +550 -0
  65. data/lib/wmap/network_profiler.rb +144 -0
  66. data/lib/wmap/port_scanner.rb +208 -0
  67. data/lib/wmap/site_tracker/deactivated_site.rb +85 -0
  68. data/lib/wmap/site_tracker.rb +937 -0
  69. data/lib/wmap/url_checker.rb +314 -0
  70. data/lib/wmap/url_crawler.rb +381 -0
  71. data/lib/wmap/utils/domain_root.rb +184 -0
  72. data/lib/wmap/utils/logger.rb +53 -0
  73. data/lib/wmap/utils/url_magic.rb +343 -0
  74. data/lib/wmap/utils/utils.rb +333 -0
  75. data/lib/wmap/whois.rb +76 -0
  76. data/lib/wmap.rb +227 -0
  77. data/logs/wmap.log +17 -0
  78. data/ruby_whois_patches/base_cocca2.rb +149 -0
  79. data/ruby_whois_patches/kero.yachay.pe.rb +120 -0
  80. data/ruby_whois_patches/whois.PublicDomainRegistry.com.rb +124 -0
  81. data/ruby_whois_patches/whois.above.com.rb +61 -0
  82. data/ruby_whois_patches/whois.adamsnames.tc.rb +107 -0
  83. data/ruby_whois_patches/whois.aeda.net.ae.rb +105 -0
  84. data/ruby_whois_patches/whois.ai.rb +112 -0
  85. data/ruby_whois_patches/whois.arnes.si.rb +121 -0
  86. data/ruby_whois_patches/whois.ascio.com.rb +91 -0
  87. data/ruby_whois_patches/whois.cnnic.cn.rb +123 -0
  88. data/ruby_whois_patches/whois.corporatedomains.com.rb +67 -0
  89. data/ruby_whois_patches/whois.crsnic.net.rb +108 -0
  90. data/ruby_whois_patches/whois.denic.de.rb +174 -0
  91. data/ruby_whois_patches/whois.dk-hostmaster.dk.rb +120 -0
  92. data/ruby_whois_patches/whois.dns.be.rb +134 -0
  93. data/ruby_whois_patches/whois.dns.lu.rb +129 -0
  94. data/ruby_whois_patches/whois.dns.pl.rb +150 -0
  95. data/ruby_whois_patches/whois.dns.pt.rb +119 -0
  96. data/ruby_whois_patches/whois.domain.kg.rb +126 -0
  97. data/ruby_whois_patches/whois.domainregistry.my.rb +123 -0
  98. data/ruby_whois_patches/whois.domreg.lt.rb +110 -0
  99. data/ruby_whois_patches/whois.dot.tk.rb +140 -0
  100. data/ruby_whois_patches/whois.hkirc.hk.rb +121 -0
  101. data/ruby_whois_patches/whois.isnic.is.rb +130 -0
  102. data/ruby_whois_patches/whois.je.rb +119 -0
  103. data/ruby_whois_patches/whois.jprs.jp.rb +137 -0
  104. data/ruby_whois_patches/whois.kenic.or.ke.rb +140 -0
  105. data/ruby_whois_patches/whois.markmonitor.com.rb +118 -0
  106. data/ruby_whois_patches/whois.melbourneit.com.rb +58 -0
  107. data/ruby_whois_patches/whois.nic.as.rb +96 -0
  108. data/ruby_whois_patches/whois.nic.at.rb +109 -0
  109. data/ruby_whois_patches/whois.nic.ch.rb +141 -0
  110. data/ruby_whois_patches/whois.nic.cl.rb +117 -0
  111. data/ruby_whois_patches/whois.nic.ec.rb +157 -0
  112. data/ruby_whois_patches/whois.nic.im.rb +120 -0
  113. data/ruby_whois_patches/whois.nic.it.rb +170 -0
  114. data/ruby_whois_patches/whois.nic.lv.rb +116 -0
  115. data/ruby_whois_patches/whois.nic.ly.rb +127 -0
  116. data/ruby_whois_patches/whois.nic.mu.rb +27 -0
  117. data/ruby_whois_patches/whois.nic.mx.rb +123 -0
  118. data/ruby_whois_patches/whois.nic.net.sa.rb +111 -0
  119. data/ruby_whois_patches/whois.nic.or.kr.rb +101 -0
  120. data/ruby_whois_patches/whois.nic.tel.rb +129 -0
  121. data/ruby_whois_patches/whois.nic.tr.rb +133 -0
  122. data/ruby_whois_patches/whois.nic.us.rb +129 -0
  123. data/ruby_whois_patches/whois.nic.ve.rb +135 -0
  124. data/ruby_whois_patches/whois.norid.no.rb +127 -0
  125. data/ruby_whois_patches/whois.pandi.or.id.rb +118 -0
  126. data/ruby_whois_patches/whois.psi-usa.info.rb +63 -0
  127. data/ruby_whois_patches/whois.registro.br.rb +109 -0
  128. data/ruby_whois_patches/whois.registrygate.com.rb +55 -0
  129. data/ruby_whois_patches/whois.rrpproxy.net.rb +61 -0
  130. data/ruby_whois_patches/whois.sgnic.sg.rb +130 -0
  131. data/ruby_whois_patches/whois.srs.net.nz.rb +166 -0
  132. data/ruby_whois_patches/whois.tucows.com.rb +70 -0
  133. data/ruby_whois_patches/whois.twnic.net.tw.rb +133 -0
  134. data/settings/discovery_ports +24 -0
  135. data/settings/google_keywords.txt +9 -0
  136. data/settings/google_locator.txt +23 -0
  137. data/test/domain_tracker_test.rb +31 -0
  138. data/test/utils_test.rb +168 -0
  139. data/version.txt +13 -0
  140. data/wmap.gemspec +49 -0
  141. metadata +202 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5694ee5d1b8c7c612253fe814a4c829ea30f0107
4
+ data.tar.gz: f20a472f8b02efb83c918d2a8359d09adeed6bd9
5
+ SHA512:
6
+ metadata.gz: 1598c1da69120421bd68ba0dc92730e07e7a099206255ada23a55ac4db6e79410af0b9a4184fb56779cef9ab5f5cfe887ca22d7e0505117d08450c36293a59b7
7
+ data.tar.gz: 23532584bc9deec9fb6fc76a1537c474b7ee05e0e74d27d9c79d2d4ef880d10a5a77eb13c8d6e7f04b58b32339e20bb3d989002a4319fee36ad29a4b938cf74b
data/CHANGELOG.md ADDED
@@ -0,0 +1,141 @@
1
+ ##
2
+ # Wmap
3
+ #
4
+ - A pure Ruby library for Internet web application discovery and tracking.
5
+ -
6
+ - Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+
8
+
9
+ # Change-log
10
+
11
+
12
+ ## Mile-stones
13
+
14
+ - July 2015: Move the project under OWASP
15
+ - November 2014: Re-name from web_discovery to wmap, re-factor the code base to better scale up
16
+ across the board
17
+ - May 2013: First release of web_discovery gem package 1.1.0
18
+ - December 2012: Alpha Release 0.1.0, with everything in one file for fast prototype purpose
19
+
20
+
21
+ ## Backward Incompatibilities
22
+
23
+ - List of features that are backward incompatible:
24
+
25
+
26
+ ## Beta Release 1.x
27
+
28
+ - 07/25/2015 Clean up for OWASP release.
29
+ - 04/14/2015 Fix bug in 'trust'/'distrust' utilities, so that they could properly handle CIDR as input format.
30
+ - 04/02/2015 Removing tracking of the 'un-reponsive' http site under 'Wmap::SiteTracker' class.
31
+ - 03/30/2015 Fix a bug in 'bulk_refresh' method under 'Wmap::SiteTracker' class.
32
+ - 03/24/2015 Implement 'wcheck' executable support to improve user experience.
33
+ - 03/23/2015 Implement 'wadds' executable support to improve user experience.
34
+ - 02/19/2015 Bug fix on url_2_site method to handle rare cases containing "-" or "?" special chars.
35
+ - 02/18/2015 Implement the tcp port-scanner executable 'wscan'.
36
+ - 02/17/2015 A bug fix on the Wmap::Utils.normalize_url method, where the rare case of trailing dot after hostname could
37
+ - properly removed as well.
38
+ - 02/11/2015 A bug fix on the Wmap::SiteTracker.get_prim_uniq_sites method, where host resolved to multiple IPs could
39
+ - cause exception in the rare case.
40
+ - 02/10/2015 Implement the 'singleton' module in the Wmap::HostTracker::PrimaryHost class
41
+ - 02/05/2015 Implement singleton pattern on the Wmap::DomainTracker::SubDomain, Wmap::SiteTracker::DeactivatedSite class.
42
+ - 02/03/2015 Implement singleton pattern on the logger module, re-organize the log file structure and location.
43
+ - 01/30/2015 Implement the singleton pattern on the Wmap::SiteTracker, Wmap::DomainTracker class.
44
+ - 01/27/2015 Separate the logger sub-module and implement the singleton pattern on the logger.
45
+ - 01/24/2015 Implement the 'singleton' module in the Wmap::HostTracker class, in order to avoid race condition
46
+ - under the parallel engine
47
+ - 01/08/2015 Implement the Ruby MiniTest Unit-test frame-work, with the first unit test file 'utils_test.rb'
48
+ - 12/15/2014 Replace the instance variables @known_hosts, @known_sites with class variables
49
+ - across all modules,i.e. enforcing the singleton pattern to avoid race condition under parallel execution.
50
+ - 11/17/2014 Re-factor the code Wmap::SiteTracker, optimize the 'dump' and 'dump_xml' methods.
51
+ - 11/14/2014 Change the package name from "WebDiscovery" version 1.5.3 to "Wmap" to simple reason, start with version 1.0
52
+ - 11/13/2014 Add XML support as the program output format, i.e. 'save_uniq_sites_xml' method for WebDiscovery::SiteTracker class
53
+ - 11/01/2014 Add 'add' and 'delete' methods for WebDiscovery::CidrTracker class, in order to make it self-contained and user friendly.
54
+ - 10/31/2014 Add executables (trust, distrust etc..) under bin directory, so that the application is more user friendly
55
+ - 10/28/2014 Add parallel support in the HostTracker, SubDomain classes.
56
+ - 10/27/2014 Add 'prime', 'wdump' bin executables, to manually set the prime host, and to dump out unique sites
57
+ - in the site tracking data repository respectively.
58
+ - 10/21/2014 Re-implement bulk_refresh method under WebDiscovery::SiteTracker class,
59
+ - with parallel engine support.
60
+ - 09/29/2014 Add 'save' method to WebDiscovery::UrlCrawler class.
61
+ - 09/08/2014 Support parallel and add 'checks' method to WebDiscovery::UrlChecker class, in order to
62
+ - scale up.
63
+ - 08/04/2014 Add brute_all method for WebDiscovery::DnsBruter class.
64
+ - 07/21/2014 Add consistency checks for WebDiscovery::SiteTracker::Dump method.
65
+ - 07/08/2014 Add stop_hostname method in the class WebDiscovery::HostTracker; enhance the DnsBruter methods
66
+ - accordingly.
67
+ - 05/19/2014 Add site_ip_known? method in the class WebDiscovery::SiteTracker
68
+ - 03/31/2014 Add deduplicate procedures in the class WebDiscovery::HostTracker::PrimaryHost
69
+ - 02/10/2014 Add additional class WebDiscovery::DomainTracker::SubDomain to better manage sub-domains
70
+ - make changes in DomainTracker, HostTracker, and executable 'wd' accordingly
71
+ - 02/07/2014 Add additional class WebDiscovery::SiteTracker::DeactivatedSite and logic to record
72
+ - the decommissioned or no longer accessible site
73
+ - 01/21/2014 Introduce 'hostname_mutation' method into WebDiscovery::DnsBruter class
74
+ - 01/15/2014 Modify WebDiscovery::Utils::UrlMagic class and add URL 'normalize' method
75
+ - 01/14/2014 Modify WebDiscovery::UrlCrawler class and set hard stop limit of crawler time-out to
76
+ - 1 0 minutes per instance.
77
+ - 01/06/2014 Revamp WebDiscovery::UrlCrawler class, to make it more readable and scalable.
78
+ - 12/19/2013 Add new method 'SiteTracker.site_check', to pull out a record from the site store.
79
+ - 12/04/2013 Extend the usage of the 'parallel' to the 'SiteTracker.refresh_all' method, in order to
80
+ - scale up the completion time.
81
+ - 11/06/2013 Include GeoIPLite into the gem pack and modify the links under GeoIPTracker class.
82
+ - 10/29/2013 Fix a small bug in the 'HostTracker.bulk_add' method
83
+ - 10/21/2013 Change the data structure of domain_tracker class, to include open zone transfer
84
+ - information.
85
+ - 10/09/2013 Add additional logic to handle Dnsruby::ServFail type error intelligently in order to
86
+ - optimize the DnsBruter speed.
87
+ - 10/08/2013 Add additional logic to avoid repeating crawling the same link by different child processes.
88
+ - 10/04/2013 Add additional logic to profile the web server to improving the crawler speed
89
+ - 09/30/2013 Add additional logic to eliminate crawling duplicate sites for the multi-threaded crawlers.
90
+ - 09/23/2013 Optimize the DNS bruter code, as we have a large number of internet domains/sub-domains
91
+ - that cause overflow when using the array concatenation method.
92
+ - 09/09/2013 Fix of error handling of "Connection reset by peer" in 'url_checker' class,
93
+ - along with other minors
94
+ - 07/16/2013 Add an intelligent network profiler to maximize the port scanner performance.
95
+ - 06/24/2013 Add support to map CN in the ssl sites into the primary host table.
96
+ - 06/11/2013 Fix some minor bugs within the dns_bruter class
97
+ - 05/29/2013 Implement Google search scraper for Google search engine discovery.
98
+ - 05/20/2013 Support sub-domain identification and brute-forcing by: a) add 'get_subdomains' method in
99
+ - the 'DomainRoot' module; b) add 'dump_sub_domains' method in the 'HostTracker' class;
100
+ - c) implement 'dns_brute_subdomains' method in the 'DnsBruter' class
101
+ - 05/16/2013 Add 'server' type into 'SiteTracker' data structure, implement a simple 'search' method.
102
+ - 05/15/2013 Implement the 'get_server_header' method for the UrlChecker class
103
+ - 05/08/2013 Implement the 'resolve_ip_sites' method for the SiteTracker class
104
+ - 05/07/2013 Implement GeoIP tracker class WebDiscovery::GeoIPTracker, which wrap around the
105
+ - Ruby GeoIP 1.1.0 gem - http://geoip.rubyforge.org/
106
+ - 05/06/2013 Add methods in the WebDiscovery::Whois class to extract netname and description
107
+ - when performing whois lookup for an IP address
108
+ - 05/02/2013 Change data structure for the 'SiteTracker' class, add service port field
109
+ - for better tracking purpose; add an ASCII art banner
110
+ - 04/25/2013 Add method to retrieve common name from a server cert in the 'UrlChecker' class
111
+ - 04/23/2013 Fully test out the port scanner, DNS bruter and the main executable;
112
+ - change README.txt to README.rdoc; create Rakefile
113
+ - 04/18/2013 patch main executable 'webDiscovery' to make it flexible for the users
114
+ - 04/15/2013 Finish major re-haul of the code base. Now it's broke into OO code-let
115
+ - 03/11/2013 Add support for country code second level domain lookup
116
+ - 02/25/2013 Implement new methods to handle the internet domain seed file
117
+ - 12/12/2012 Re-factor the crawler code; add new feature of the hosts cache table for
118
+ - local domain name (reverse) lookup
119
+ - 11/30/2012 Phase out the dependency on the 'dig' command; replace it with the native
120
+ - Ruby 'resolv', and 'dnsruby' modules instead
121
+ - 11/27/2012 Re-factor 'get_domain_root' method, fix a bug there
122
+ - 11/21/2012 Implement the 'whois' wrapper for the domain research
123
+ - 11/16/2012 Implement @know_cidr_blks class hash for discovered app labelling
124
+ - 11/15/2012 Optimize the algorithm used by the method 'ip_known?'
125
+ - 11/14/2012 Fix small bug in method 'host_2_ips'
126
+ - 11/13/2012 Fix small bugs handling url case-insensitivity
127
+ - 11/08/2012 Add methods for host discovery via open zone transfer
128
+ - 11/07/2012 Add the dns_brute_force method
129
+ - 11/06/2012 Re-factor the crawler code
130
+ - 11/05/2012 Bug fix on the class attributes access
131
+ - 10/28/2012 Bug fix on the reverse-dns lookup method
132
+ - 10/26/2012 Implement a web crawler for website crawling and link extraction
133
+ - 10/24/2012 Re-factor the port discovery method for better performance
134
+ - 10/23/2012 Implement the port discovery method
135
+ - 10/22/2012 Implement a simple HTTP service detection method
136
+ - 10/21/2012 Implement a simple TCP port scanner, and SSL socket detection methods
137
+ - 10/20/2012 Implement the IP address validation process
138
+ - 10/19/2012 Reimplement the URL status check method
139
+ - 10/18/2012 Implement proprietary URI manipulation methods
140
+ - 10/17/2012 Refine and reimplement the DNS query methods and process
141
+ - 10/16/2012 Start from the drawing board, prototype BESTwebDiscovery class.
data/LICENSE.txt ADDED
@@ -0,0 +1,15 @@
1
+ Copyright (c) 2012-2015 Yang Li <yang.li8@owasp.org>
2
+
3
+ Apache License 2.0
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
data/README.rdoc ADDED
@@ -0,0 +1,98 @@
1
+ = OWASP WebMapper Ruby API README
2
+
3
+
4
+ == What's this program for?
5
+ This program is designed for the web application asset discovery and tracking. It was originally developed to
6
+ to cover the gaps of a similar commercial product. Over the time it grows to be a more capable and complete replacement (IMHO).
7
+
8
+
9
+ == Program Version
10
+ The latest release is Beta version 1.5.x as of fall 2014. Please refer to the CHANGELOG.md for the program's history information.
11
+
12
+
13
+ == Author Contact
14
+ This program is designed and developed by Yang Li. You can reach him by Email: <yang.li@owasp.org>
15
+
16
+
17
+ == Installation
18
+ To take full power of this program, you would need an *nix flavor machine with direct Internet access. I have installed it successfully on both Mac and Linux machines. You'll also need the Ruby environment being setup properly. The easiest way to install OWASP Web Mapper is by using Ruby Gems. Download the latest gem 'wmap-x.x.x.gem' into the local file system. Then install it from command line there:
19
+
20
+ gem install wmap-x.x.x.gem --no-rdoc
21
+
22
+
23
+ == Dependency
24
+ You need the Ruby 1.9.2 or above in order to use this program. In my test environment, I was able to set it up with RVM. Please refer to this page for more installation information: http://www.ruby-lang.org/en/downloads/
25
+
26
+ In addition, the following Ruby GEM dependency are needed by different components of this software:
27
+ require "digest/md5"
28
+ require "dnsruby"
29
+ require "geoip"
30
+ require "minitest/autorun"
31
+ require "net/http"
32
+ require "net/ping"
33
+ require "netaddr"
34
+ require "nokogiri"
35
+ require "open_uri_redirections"
36
+ require "openssl"
37
+ require "open-uri"
38
+ require "parallel"
39
+ require "resolv"
40
+ require "singleton"
41
+ require "uri"
42
+ require "whois"
43
+ require 'httpclient'
44
+ require 'nokogiri'
45
+ require 'open-uri'
46
+ To install "uri" gem for example, use the command below:
47
+ $ gem install uri
48
+
49
+ == Ruby-whois Gem Patches
50
+ This software depends on a patched version of Ruby gem ruby-whois-2.7.0 (http://www.ruby-whois.org/) for the domain whois lookup feature. For better result, you could manually add the patches into your local whois gem installation directory as shown below:
51
+ $ cp whois_patches/* [Your_ruby_whois_gem_path]/whois/lib/whois/record/parser/
52
+ Or you can directly download the branched whois gem from this repository - https://github.com/yangsec888/whois
53
+
54
+
55
+ == Before Using This Program
56
+ You need to define a scope for the program to run successful. The scope includes both your legitimate Internet domain, and your public
57
+ network block in the CIDR format.
58
+
59
+ To add your Internet domain into the scope, use the build-in shell command below:
60
+ $ trust XYZ.COM
61
+
62
+ To add your public network block into the scope:
63
+ $ trust x.x.x.x/x
64
+
65
+
66
+ == Automatic Discovery and Tracking
67
+
68
+ $ wmap <seed file | target host | target url | target IP or network cidr>
69
+
70
+ The above utility is intelligent enough to take argument as either a seed file, or a string such as a host, an IP, a network block, or a URL. The new discoveries will be automatically tracked in the data file 'lib/wmap/data/target_sites'.
71
+ Note: seed file - mix of url, cidr and domain seed, one entry per line.
72
+ url seed - known URL(s) for further discovery via the web crawler.
73
+ cidr seed - list of known network blocks, for discovering web service via port scanning; it is also used to validate if the web service has a known IP (internal hosted).
74
+ domain seed - validated internet domain to be used for DNS record brute-forcing; it is also used to validate the ownership of found web service.
75
+
76
+
77
+ == Dump Out Discovery Database
78
+ You can dump out the program output by using the build-in utility 'wdump' as shown below:
79
+ $ wdump [output file name from you]
80
+
81
+ The above utility will dump out the discovery database into a single file as program output. Currently, the supported file format is Comma-separated Value (.csv) and Extensible Markup Language (.xml)
82
+
83
+
84
+ == More Usage Cases:
85
+ There are more examples under the 'demos' folder of this package. The examples show how to use the 'wmap' API to get your job done easily. Please check out the code - they should be easy and straightforward to be understood.
86
+
87
+
88
+ == More Document(s):
89
+ The software comes with the Ruby doc during your installation as shown above. For your convenience, the Ruby doc is also distributed with this software. You can navigate to the 'doc' folder of your local installation, and click the 'index.html' to open the start page in your favorite browser. You can also download the wmap-x.x.x.rdoc.zip documentation package alone from GitHub, unzip and open the doc/index.html in your browser.
90
+
91
+ If you need additional documentation / information other than this README file and the Ruby document package, please be patient - as I'm still working on it :)
92
+
93
+ == How do I report the bugs, or maybe require some new features?
94
+ Contact the author Yang Li directly at email 'yang.li@owasp.org'.
95
+
96
+
97
+ == Legal Disclaimer:
98
+ This software is provided strictly 'as-if' without any implied warranty. You're free to copy or modify the codes anyway you want - a reference back to this software will be appreciated. Please refer to the 'LICENSE.txt' file for more information.
data/TODO ADDED
@@ -0,0 +1,13 @@
1
+ # The following improvements would be nice to have:
2
+
3
+
4
+ 1. Add unit test cases for all major modules / classes (Note: Jan. 2015, Wmap::Utils module unit tests added).
5
+ 2. Add Google search capability (Note: Oct. 2013, partial done, need Google API licensing /fee in order for the full implementation test. )
6
+ 3. Better documentations / man pages for executables / operation manuals, in addition to the 'README' and 'rdoc'
7
+ 4. Port the local data repository to a relational DB (for support of multi user sessions in the future)
8
+ 5. Implement a RoR web interface for better user experience; with standard SOAP API as a web service.
9
+ 6. Implement a report engine for better metric reporting.
10
+ 7. Improve the crawler, such as supporting link extraction in AJAX, FLASH and other rich HTML5 format
11
+ 8. Improve the build-in port scanner 'wscan'. So it could eventually replace 'nmap' in the larget scale network blocks scanning process.
12
+ 9. Add vulnerability scanner engine.
13
+ 10.Add vulnerability life cycle management.
data/bin/deprime ADDED
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to remove a fully qualified domain name into the prime host tracking data repository
3
+ # This is useful as a self-correction mechanism to flag out unique website in a constant way
4
+ require "wmap"
5
+
6
+ def print_usage
7
+ puts "Program to remove a primary host entry in the local data repository. Usage: deprime [fully qualified domain name]"
8
+ end
9
+
10
+ puts Wmap.banner
11
+ print_usage
12
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
13
+ Wmap.wlog("Execute the command: deprime #{ARGV[0]}","deprime",Log_dir+"wmap.log")
14
+
15
+ # Update primary host store
16
+ ph=Wmap::HostTracker::PrimaryHost.instance
17
+ abort "Incorrect program argument! Proper Usage: deprime [fully qualified domain name]" unless ARGV.length==1 && ph.is_fqdn?(ARGV[0])
18
+
19
+ ph.delete(ARGV[0])
20
+ ph.save!
21
+ ph=nil
data/bin/distrust ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to remove entry from the ring of the trust. I.E. the trusted domain or CIDR
3
+ require "wmap"
4
+
5
+ def print_usage
6
+ puts "Program to remove trust authority entry. Usage: distrust [domain | netblock]"
7
+ end
8
+
9
+ puts Wmap.banner
10
+ print_usage
11
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
12
+ Wmap.wlog("Execute the command: distrust #{ARGV[0]}","distrust",Log_dir+"wmap.log")
13
+
14
+ dt=Wmap::DomainTracker.instance
15
+ ct=Wmap::CidrTracker.new(:verbose=>false)
16
+ abort "Incorrect program argument! Proper Usage: distrust [domain | netblock]" unless ARGV.length==1 && (dt.is_fqdn?(ARGV[0]) || ct.is_cidr?(ARGV[0]))
17
+
18
+ puts "Start the demonizing ..."
19
+
20
+
21
+ if dt.is_domain?(ARGV[0])
22
+ result=dt.delete(ARGV[0])
23
+ unless result.nil?
24
+ dt.save!
25
+ abort "Domain #{ARGV[0]} is successfully demonized!"
26
+ end
27
+ end
28
+ dt=nil
29
+
30
+ if ct.is_cidr?(ARGV[0].to_s)
31
+ result=ct.delete(ARGV[0].to_s)
32
+ unless result.nil?
33
+ ct.save!
34
+ abort "Net block #{ARGV[0]} is successfully demonized!"
35
+ end
36
+ end
37
+ ct=nil
38
+
data/bin/googleBot ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # Using the Google scraper module to query Google engines on different geographic locations, then work through the google results to update site store
3
+ require 'wmap'
4
+
5
+ def print_usage
6
+ puts "Program to discovery website assets from Google search engines. Usage: googleBot"
7
+ end
8
+
9
+ puts Wmap.banner
10
+ print_usage
11
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
12
+ Wmap.wlog("Execute the command: googleBot","googleBot",Log_dir+"wmap.log")
13
+
14
+ abort "Incorrect program argument - no argument needed! Proper Usage: googleBot " unless ARGV.length==0
15
+
16
+ puts "Start the Google search scraper. This may take a while, please be patient ..."
17
+ dis=Wmap::GoogleSearchScraper.new
18
+ dis.workers
19
+ sites=dis.discovered_sites_from_scraper.keys
20
+ puts "Update Google search results into the data repository:"
21
+ k=Wmap::SiteTracker.instance
22
+ k.adds(sites)
23
+ k.save!
data/bin/prime ADDED
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to add a fully qualified domain name into the prime host tracking data repository
3
+ # This is useful as a self-correction mechanism to flag out unique website in a constant way
4
+ require "wmap"
5
+
6
+ def print_usage
7
+ puts "Program to add a primary host entry in the local data repository. Usage: prime [hostname]"
8
+ end
9
+
10
+ puts Wmap.banner
11
+ print_usage
12
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
13
+ Wmap.wlog("Execute the command: prime #{ARGV[0]}","prime",Log_dir+"wmap.log")
14
+
15
+ # Update primary host store
16
+ ph=Wmap::HostTracker::PrimaryHost.instance
17
+ abort "Incorrect program argument! Proper Usage: prime [fully qualified domain name]" unless ARGV.length==1 && ph.is_fqdn?(ARGV[0])
18
+
19
+ ph.add(ARGV[0])
20
+ ph.save!
21
+ ph=nil
data/bin/refresh ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to refresh a site within the tracking data repository
3
+ # This is useful as site information may change over the time
4
+ ## Usage: refresh [a site|all]
5
+ require "wmap"
6
+
7
+ def print_usage
8
+ puts "Program to refresh the website entry in the local data repository. Usage: refresh [a site|all]"
9
+ end
10
+
11
+ puts Wmap.banner
12
+ print_usage
13
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
14
+ Wmap.wlog("Execute the command: refresh #{ARGV[0]}","refresh",Log_dir+"wmap.log")
15
+
16
+ abort "Incorrect program argument! Proper Usage: refresh [site]" unless ARGV.length==1
17
+
18
+ #puts "Captured command argument: #{ARGV[0]}"
19
+ if ARGV[0]=="all"
20
+ # magic token 'all' to refresh all sites in the store
21
+ Wmap.refresh_all
22
+ else
23
+ abort "Error: incorrect site syntax! It must be in http(s):://xxx.xxx/ format." unless Wmap::SiteTracker.instance.is_site?(ARGV[0])
24
+ st=nil
25
+ Wmap.refresh(ARGV[0])
26
+ end
data/bin/run_tests ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ # Ruby MiniTest Unit-test Auto Runner
3
+ require "wmap"
4
+
5
+ def print_usage
6
+ puts "Program to run all the build-in unit tests under /test. Usage: run_tests"
7
+ end
8
+
9
+ puts Wmap.banner
10
+ print_usage
11
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
12
+ Wmap.wlog("Execute the command: run_tests","run_tests",Log_dir+"wmap.log")
13
+
14
+ spec = Gem::Specification.find_by_name("wmap")
15
+ Dir[spec.gem_dir+'/test/*.rb'].map { |test| puts "Executing test file: #{test}";system('ruby', test) }
16
+
data/bin/spiderBot ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ # script to automate the new site discovery through by crawling all unique sites in the site store
3
+ require "wmap"
4
+
5
+ def print_usage
6
+ puts "Program to crawl all unique sites within the site store, then update the store accordingly. \nUsage: spiderBot"
7
+ end
8
+
9
+ puts Wmap.banner
10
+ print_usage
11
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
12
+ Wmap.wlog("Execute the command: spiderBot","spiderBot",Log_dir+"wmap.log")
13
+
14
+ abort "Incorrect program argument - no argument need! Proper Usage: spiderBot" unless ARGV.length==0
15
+ dis=Wmap::SiteTracker.instance
16
+ sites=dis.get_uniq_sites
17
+
18
+ crawler=Wmap::UrlCrawler.new(:max_parallel=>40)
19
+ crawler.crawls(sites)
20
+ v_sites=crawler.discovered_urls_by_crawler.keys.map {|x| crawler.url_2_site(x) }
21
+ v_sites.uniq!
22
+
23
+ dis.adds(v_sites)
24
+ dis.save!
25
+
26
+
data/bin/trust ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ require "wmap"
3
+ # Executable to add seed entry into ring of the trust. I.E. the trusted domain or CIDR
4
+
5
+ def print_usage
6
+ puts "Program to add trust authority entry. Usage: trust [domain|CIDR]"
7
+ end
8
+
9
+ puts Wmap.banner
10
+ print_usage
11
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
12
+ Wmap.wlog("Execute the command: trust #{ARGV[0]}","trust",Log_dir+"wmap.log")
13
+
14
+ dt=Wmap::DomainTracker.instance
15
+ ct=Wmap::CidrTracker.new
16
+ abort "Incorrect program argument! Proper usage: trust [domain | netblock]" unless ARGV.length==1 && (dt.is_fqdn?(ARGV[0]) || ct.is_cidr?(ARGV[0]))
17
+
18
+ puts "Start the baptizing process ..."
19
+
20
+ # Add entry into the local repository
21
+
22
+ if dt.is_domain?(ARGV[0])
23
+ result=dt.add(ARGV[0])
24
+ unless result.nil?
25
+ dt.save!
26
+ abort "Domain #{ARGV[0]} is successfully baptized!"
27
+ end
28
+ end
29
+ dt=nil
30
+
31
+ if ct.is_cidr?(ARGV[0])
32
+ result=ct.add(ARGV[0])
33
+ unless result.nil?
34
+ ct.save!
35
+ abort "Net block #{ARGV[0]} is successfully baptized!"
36
+ end
37
+ end
38
+ ct=nil
data/bin/updateAll ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # the up to date program to refresh all local cache tables in one shot. Note it requires an uninterrupted Internet connection to perform the job. It also takes a long time so patient is needed. Lastly, don't forget to backup the existing 'data' folder before execute this command, cause any unexpected interruption may wreak havoc on the existing data file!!!
3
+
4
+ require "wmap"
5
+
6
+ puts Wmap.banner
7
+ puts "Program to refresh the local data repository."
8
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
9
+ Wmap.wlog("Execute the command: updateAll","updateAll",Log_dir+"wmap.log")
10
+
11
+ abort "Incorrect program argument - no argument needed! Proper Usage: updateAll" unless ARGV.length==0
12
+
13
+ puts "You're about to update Wmap data repository. It'll take a long time. And the Internet connection must be un-interrupted during the process. You're also expected to backup the data folder before proceeding. Are you ready? (Yes/No)"
14
+ STDOUT.flush
15
+ answer=gets.chomp
16
+ if answer =~ /yes/i
17
+ puts "Start up to date process. Please be patient ..."
18
+ else
19
+ abort "You got it. Mission is successfully aborted. "
20
+ end
21
+ # Update sub-domain table
22
+ sd=Wmap::DomainTracker::SubDomain.instance
23
+ sd.update_from_host_store!
24
+ subs=sd.known_internet_sub_domains.keys
25
+ sd=nil
26
+
27
+ # Brute force sub-domains to detect sub-domain hosts
28
+ bruter=Wmap::DnsBruter.new
29
+ sub_hosts=bruter.brutes(subs).values.flatten
30
+
31
+ # Update primary host store
32
+ ph=Wmap::HostTracker::PrimaryHost.instance
33
+ ph.update_from_site_store!
34
+ ph.refresh_all
35
+ ph.save!
36
+ ph=nil
37
+
38
+ # Update host store
39
+ h=Wmap::HostTracker.instance
40
+ h.refresh_all
41
+ h.adds(sub_hosts)
42
+ h.save!
43
+ h=nil
44
+
45
+ # Update site store
46
+ st=Wmap::SiteTracker.instance
47
+ st.refresh_all
48
+ dt=Wmap::SiteTracker::DeactivatedSite.instance
49
+ ds=dt.known_sites.keys
50
+ st.adds(ds) #double-check the de-activated sites in case the site is back on-line again
51
+ st.save!
52
+ st=nil
53
+ dt=nil
54
+
55
+
56
+
57
+
data/bin/wadd ADDED
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to add a new site into the tracking data repository
3
+ # This is useful as a self-correction mechanism to flag out unique website in a constant way
4
+ require "wmap"
5
+
6
+ def print_usage
7
+ puts "Program to add an entry into the local data repository. Usage: wadd [site]"
8
+ end
9
+
10
+ puts Wmap.banner
11
+ print_usage
12
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
13
+ Wmap.wlog("Execute the command: wadd #{ARGV[0]}","wadd",Log_dir+"wmap.log")
14
+
15
+ st=Wmap::SiteTracker.instance
16
+ abort "Incorrect program argument!" unless ARGV.length==1
17
+
18
+ # Evaluate the argument and update the data store accordingly
19
+ if st.is_site?(ARGV[0])
20
+ st.add(ARGV[0])
21
+ st.save!
22
+ st=nil
23
+ else
24
+ abort "Unknown argument format: #{ARGV[0]}"
25
+ end
data/bin/wadds ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to bulk add sites into the tracking data repository
3
+ require "wmap"
4
+
5
+ def print_usage
6
+ puts "Program to add sites from a file into local data repository. Usage: wadds [file_sites]"
7
+ end
8
+
9
+ puts Wmap.banner
10
+ print_usage
11
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
12
+ Wmap.wlog("Execute the command: wadds #{ARGV[0]}","wadds",Log_dir+"wmap.log")
13
+
14
+ st=Wmap::SiteTracker.instance
15
+ abort "Incorrect program argument!" unless ARGV.length==1 && File.exist?(ARGV[0])
16
+
17
+ # Evaluate the argument and update the data store accordingly
18
+ sites=st.file_2_list(ARGV[0]).map { |x| st.url_2_site(x) }
19
+ if sites.length > 0
20
+ news=st.adds(sites)
21
+ puts news
22
+ st.save! if news.length>0
23
+ st=nil
24
+ else
25
+ abort "No site entry found in file: #{ARGV[0]}. Please check your file format to ensure one site per line."
26
+ end
data/bin/wcheck ADDED
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ # Executable to check the current status of a new site
3
+ # This is useful as a self-correction mechanism to flag out unique website in a constant way
4
+ require "wmap"
5
+
6
+ def print_usage
7
+ puts "Program to quick check the status of a remote URL. Usage: wcheck [URL]"
8
+ end
9
+
10
+ puts Wmap.banner
11
+
12
+ Log_dir=File.dirname(__FILE__)+'/../logs/'
13
+ Wmap.wlog("Execute the command: wcheck #{ARGV[0]}","wcheck",Log_dir+"wmap.log")
14
+
15
+ checker=Wmap::UrlChecker.new(:verbose=>false)
16
+ unless ARGV.length==1
17
+ print_usage
18
+ abort "Incorrect program argument!"
19
+ end
20
+
21
+ # Evaluate the argument and update the data store accordingly
22
+ if checker.is_url?(ARGV[0])
23
+ status=checker.url_worker(ARGV[0])
24
+ puts status
25
+ else
26
+ print_usage
27
+ abort "Invalid URL format: #{ARGV[0]}"
28
+ end