wmap 2.7.7 → 2.8.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/wmap +19 -10
- data/dicts/tlds.txt +9 -30
- data/lib/wmap/cidr_tracker.rb +4 -3
- data/lib/wmap/domain_tracker.rb +2 -2
- data/lib/wmap/domain_tracker/sub_domain.rb +1 -1
- data/lib/wmap/host_tracker.rb +34 -21
- data/lib/wmap/site_tracker.rb +27 -7
- data/lib/wmap/site_tracker/wp_tracker.rb +1 -1
- data/lib/wmap/url_checker.rb +3 -3
- data/lib/wmap/url_crawler.rb +10 -22
- data/lib/wmap/url_crawler/adware_tag.rb +1 -1
- data/lib/wmap/utils/domain_root.rb +1 -1
- data/lib/wmap/utils/logger.rb +31 -34
- data/lib/wmap/utils/url_magic.rb +4 -2
- data/lib/wmap/utils/utils.rb +10 -12
- data/lib/wmap/utils/wp_detect.rb +5 -1
- data/test/cidr_tracker_test.rb +36 -0
- data/test/utils_test.rb +51 -46
- data/version.txt +2 -2
- data/wmap.gemspec +1 -1
- metadata +6 -9
- data/bin/RHPG +0 -107
- data/bin/wmaps +0 -23
- data/settings/tag_signatures +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6632168c88c35189b601d225ebbf99fddd034a561814e5fe34b2d57bb79c75c
|
4
|
+
data.tar.gz: 4beab7d92e6a5e4258d37dfa52a64f4edaf06d8c4213331cc9833d0be5cc70aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 46482f94126bc1ad4af322cb23894e77ee5a0f85dc31741e25b75752be4a47b1a620c3d1a6f28786a2f53e37eecde42b571f011e770e4bacdaf00012d737ea9e
|
7
|
+
data.tar.gz: b5ed0e1af39d8b7910b0d77b5bde99461293a7e985d5e587405bbf929cae8bffa5a32a1cf31eb142f823a11057377537adecf9ee6fb7f475fb2252e9d3edc2b1
|
data/bin/wmap
CHANGED
@@ -13,7 +13,7 @@ parser = OptionParser.new do|opts|
|
|
13
13
|
opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
|
14
14
|
options[:data_dir] = data_dir;
|
15
15
|
end
|
16
|
-
opts.on('-t', '--target target', 'Web Mapper target') do |target|
|
16
|
+
opts.on('-t', '--target target', 'Web Mapper target / seed for discovery') do |target|
|
17
17
|
options[:target] = target;
|
18
18
|
end
|
19
19
|
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
@@ -29,6 +29,10 @@ parser.parse!
|
|
29
29
|
# print program banner
|
30
30
|
puts Wmap.banner
|
31
31
|
# print_usage unless options[:target]
|
32
|
+
unless options[:target]
|
33
|
+
puts "Usage: $ wmap -h"
|
34
|
+
exit 1
|
35
|
+
end
|
32
36
|
|
33
37
|
# Preparing - check out the working logs directory
|
34
38
|
if options[:data_dir]
|
@@ -129,6 +133,10 @@ Wmap.wlog(dis_urls.keys, "wmap", Log_dir+"discovered_urls.log") unless dis_urls.
|
|
129
133
|
Wmap.wlog(dis_sites.keys, "wmap", Log_dir+"discovered_sites.log") unless dis_sites.empty?
|
130
134
|
#crawler.wlog(c_start.keys,Log_dir+"crawler.log")
|
131
135
|
#crawler.wlog(c_done.keys,Log_dir+"crawler.log")
|
136
|
+
|
137
|
+
|
138
|
+
# Save the current disovery urls only to a specific file, patched 07/23/2021
|
139
|
+
crawler.save_discovered_urls(Log_dir+"cur_urls.log")
|
132
140
|
crawler=nil
|
133
141
|
|
134
142
|
|
@@ -138,22 +146,23 @@ when nil,[]
|
|
138
146
|
puts "No new site found. There is no change to the site tracking data repository. "
|
139
147
|
else
|
140
148
|
puts "Automatically save the discovery results into the site tracking data repository: "
|
149
|
+
inventory = Wmap::SiteTracker.instance
|
141
150
|
if options[:target] && options[:data_dir]
|
142
151
|
puts "Start the SiteTracker with the optional directory setter. "
|
143
|
-
inventory=Wmap::SiteTracker.instance
|
144
152
|
inventory.data_dir = options[:data_dir]
|
153
|
+
#inventory.verbose = true
|
145
154
|
inventory.sites_file = inventory.data_dir + "/" + "sites"
|
146
155
|
inventory.load_site_stores_from_file(inventory.sites_file)
|
147
156
|
elsif options[:target]
|
148
157
|
puts "Start the SiteTracker. "
|
149
|
-
inventory=Wmap::SiteTracker.instance
|
150
158
|
else
|
151
159
|
abort "Error firing up SiteTracker instance!"
|
152
160
|
end
|
153
161
|
new_sites=inventory.adds(dis_sites.keys-["",nil])
|
154
|
-
|
162
|
+
puts "Newly discovery sties: #{new_sites}"
|
163
|
+
if options[:data_dir]
|
155
164
|
inventory.save!(inventory.sites_file)
|
156
|
-
|
165
|
+
else
|
157
166
|
inventory.save!
|
158
167
|
end
|
159
168
|
inventory=nil
|
@@ -162,17 +171,17 @@ end
|
|
162
171
|
|
163
172
|
|
164
173
|
# seventh step - update the hosts repository
|
174
|
+
puts "Invoke the HostTracker with optional directory setter."
|
175
|
+
host_tracker = Wmap::HostTracker.instance
|
165
176
|
if options[:target] && options[:data_dir]
|
166
|
-
puts "Invoke the HostTracker with
|
167
|
-
host_tracker = Wmap::HostTracker.instance
|
177
|
+
puts puts "Invoke the HostTracker with options: #{options[:data_dir]}, #{options[:target]}"
|
168
178
|
host_tracker.verbose=options[:verbose]
|
169
179
|
host_tracker.data_dir = options[:data_dir]
|
170
180
|
host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
|
171
181
|
host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
|
172
182
|
elsif options[:target]
|
173
|
-
puts puts "Invoke the HostTracker."
|
174
|
-
host_tracker
|
175
|
-
host_tracker.verbose=options[:verbose]
|
183
|
+
puts puts "Invoke the HostTracker with option: #{options[:target]}."
|
184
|
+
#host_tracker.verbose=options[:verbose]
|
176
185
|
else
|
177
186
|
abort "Error firing up HostTracker instance!"
|
178
187
|
end
|
data/dicts/tlds.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
2
|
-
# Version
|
2
|
+
# Version 2020033000, Last Updated Mon Mar 30 07:07:01 2020 UTC
|
3
3
|
AAA
|
4
4
|
AARP
|
5
5
|
ABARTH
|
@@ -16,7 +16,6 @@ ACCENTURE
|
|
16
16
|
ACCOUNTANT
|
17
17
|
ACCOUNTANTS
|
18
18
|
ACO
|
19
|
-
ACTIVE
|
20
19
|
ACTOR
|
21
20
|
AD
|
22
21
|
ADAC
|
@@ -146,7 +145,6 @@ BIZ
|
|
146
145
|
BJ
|
147
146
|
BLACK
|
148
147
|
BLACKFRIDAY
|
149
|
-
BLANCO
|
150
148
|
BLOCKBUSTER
|
151
149
|
BLOG
|
152
150
|
BLOOMBERG
|
@@ -155,7 +153,6 @@ BM
|
|
155
153
|
BMS
|
156
154
|
BMW
|
157
155
|
BN
|
158
|
-
BNL
|
159
156
|
BNPPARIBAS
|
160
157
|
BO
|
161
158
|
BOATS
|
@@ -214,7 +211,6 @@ CARE
|
|
214
211
|
CAREER
|
215
212
|
CAREERS
|
216
213
|
CARS
|
217
|
-
CARTIER
|
218
214
|
CASA
|
219
215
|
CASE
|
220
216
|
CASEIH
|
@@ -247,7 +243,6 @@ CHEAP
|
|
247
243
|
CHINTAI
|
248
244
|
CHRISTMAS
|
249
245
|
CHROME
|
250
|
-
CHRYSLER
|
251
246
|
CHURCH
|
252
247
|
CI
|
253
248
|
CIPRIANI
|
@@ -299,6 +294,7 @@ COUNTRY
|
|
299
294
|
COUPON
|
300
295
|
COUPONS
|
301
296
|
COURSES
|
297
|
+
CPA
|
302
298
|
CR
|
303
299
|
CREDIT
|
304
300
|
CREDITCARD
|
@@ -360,9 +356,7 @@ DNP
|
|
360
356
|
DO
|
361
357
|
DOCS
|
362
358
|
DOCTOR
|
363
|
-
DODGE
|
364
359
|
DOG
|
365
|
-
DOHA
|
366
360
|
DOMAINS
|
367
361
|
DOT
|
368
362
|
DOWNLOAD
|
@@ -371,7 +365,6 @@ DTV
|
|
371
365
|
DUBAI
|
372
366
|
DUCK
|
373
367
|
DUNLOP
|
374
|
-
DUNS
|
375
368
|
DUPONT
|
376
369
|
DURBAN
|
377
370
|
DVAG
|
@@ -392,7 +385,6 @@ ENERGY
|
|
392
385
|
ENGINEER
|
393
386
|
ENGINEERING
|
394
387
|
ENTERPRISES
|
395
|
-
EPOST
|
396
388
|
EPSON
|
397
389
|
EQUIPMENT
|
398
390
|
ER
|
@@ -408,7 +400,6 @@ EU
|
|
408
400
|
EUROVISION
|
409
401
|
EUS
|
410
402
|
EVENTS
|
411
|
-
EVERBANK
|
412
403
|
EXCHANGE
|
413
404
|
EXPERT
|
414
405
|
EXPOSED
|
@@ -488,6 +479,7 @@ GAME
|
|
488
479
|
GAMES
|
489
480
|
GAP
|
490
481
|
GARDEN
|
482
|
+
GAY
|
491
483
|
GB
|
492
484
|
GBIZ
|
493
485
|
GD
|
@@ -580,7 +572,6 @@ HOMEGOODS
|
|
580
572
|
HOMES
|
581
573
|
HOMESENSE
|
582
574
|
HONDA
|
583
|
-
HONEYWELL
|
584
575
|
HORSE
|
585
576
|
HOSPITAL
|
586
577
|
HOST
|
@@ -634,7 +625,6 @@ IQ
|
|
634
625
|
IR
|
635
626
|
IRISH
|
636
627
|
IS
|
637
|
-
ISELECT
|
638
628
|
ISMAILI
|
639
629
|
IST
|
640
630
|
ISTANBUL
|
@@ -699,12 +689,10 @@ KYOTO
|
|
699
689
|
KZ
|
700
690
|
LA
|
701
691
|
LACAIXA
|
702
|
-
LADBROKES
|
703
692
|
LAMBORGHINI
|
704
693
|
LAMER
|
705
694
|
LANCASTER
|
706
695
|
LANCIA
|
707
|
-
LANCOME
|
708
696
|
LAND
|
709
697
|
LANDROVER
|
710
698
|
LANXESS
|
@@ -725,7 +713,6 @@ LEGO
|
|
725
713
|
LEXUS
|
726
714
|
LGBT
|
727
715
|
LI
|
728
|
-
LIAISON
|
729
716
|
LIDL
|
730
717
|
LIFE
|
731
718
|
LIFEINSURANCE
|
@@ -744,6 +731,7 @@ LIVING
|
|
744
731
|
LIXIL
|
745
732
|
LK
|
746
733
|
LLC
|
734
|
+
LLP
|
747
735
|
LOAN
|
748
736
|
LOANS
|
749
737
|
LOCKER
|
@@ -819,7 +807,6 @@ MN
|
|
819
807
|
MO
|
820
808
|
MOBI
|
821
809
|
MOBILE
|
822
|
-
MOBILY
|
823
810
|
MODA
|
824
811
|
MOE
|
825
812
|
MOI
|
@@ -827,7 +814,6 @@ MOM
|
|
827
814
|
MONASH
|
828
815
|
MONEY
|
829
816
|
MONSTER
|
830
|
-
MOPAR
|
831
817
|
MORMON
|
832
818
|
MORTGAGE
|
833
819
|
MOSCOW
|
@@ -835,7 +821,6 @@ MOTO
|
|
835
821
|
MOTORCYCLES
|
836
822
|
MOV
|
837
823
|
MOVIE
|
838
|
-
MOVISTAR
|
839
824
|
MP
|
840
825
|
MQ
|
841
826
|
MR
|
@@ -854,7 +839,6 @@ MY
|
|
854
839
|
MZ
|
855
840
|
NA
|
856
841
|
NAB
|
857
|
-
NADEX
|
858
842
|
NAGOYA
|
859
843
|
NAME
|
860
844
|
NATIONWIDE
|
@@ -955,7 +939,6 @@ PHOTO
|
|
955
939
|
PHOTOGRAPHY
|
956
940
|
PHOTOS
|
957
941
|
PHYSIO
|
958
|
-
PIAGET
|
959
942
|
PICS
|
960
943
|
PICTET
|
961
944
|
PICTURES
|
@@ -1152,18 +1135,16 @@ SONG
|
|
1152
1135
|
SONY
|
1153
1136
|
SOY
|
1154
1137
|
SPACE
|
1155
|
-
SPIEGEL
|
1156
1138
|
SPORT
|
1157
1139
|
SPOT
|
1158
1140
|
SPREADBETTING
|
1159
1141
|
SR
|
1160
1142
|
SRL
|
1161
|
-
|
1143
|
+
SS
|
1162
1144
|
ST
|
1163
1145
|
STADA
|
1164
1146
|
STAPLES
|
1165
1147
|
STAR
|
1166
|
-
STARHUB
|
1167
1148
|
STATEBANK
|
1168
1149
|
STATEFARM
|
1169
1150
|
STC
|
@@ -1211,7 +1192,6 @@ TEAM
|
|
1211
1192
|
TECH
|
1212
1193
|
TECHNOLOGY
|
1213
1194
|
TEL
|
1214
|
-
TELEFONICA
|
1215
1195
|
TEMASEK
|
1216
1196
|
TENNIS
|
1217
1197
|
TEVA
|
@@ -1271,7 +1251,6 @@ TZ
|
|
1271
1251
|
UA
|
1272
1252
|
UBANK
|
1273
1253
|
UBS
|
1274
|
-
UCONNECT
|
1275
1254
|
UG
|
1276
1255
|
UK
|
1277
1256
|
UNICOM
|
@@ -1305,7 +1284,6 @@ VIP
|
|
1305
1284
|
VIRGIN
|
1306
1285
|
VISA
|
1307
1286
|
VISION
|
1308
|
-
VISTAPRINT
|
1309
1287
|
VIVA
|
1310
1288
|
VIVO
|
1311
1289
|
VLAANDEREN
|
@@ -1324,7 +1302,6 @@ WALMART
|
|
1324
1302
|
WALTER
|
1325
1303
|
WANG
|
1326
1304
|
WANGGOU
|
1327
|
-
WARMAN
|
1328
1305
|
WATCH
|
1329
1306
|
WATCHES
|
1330
1307
|
WEATHER
|
@@ -1452,13 +1429,14 @@ XN--MGBA7C0BBN0A
|
|
1452
1429
|
XN--MGBAAKC7DVF
|
1453
1430
|
XN--MGBAAM7A8H
|
1454
1431
|
XN--MGBAB2BD
|
1432
|
+
XN--MGBAH1A3HJKRD
|
1455
1433
|
XN--MGBAI9AZGQP6J
|
1456
1434
|
XN--MGBAYH7GPA
|
1457
|
-
XN--MGBB9FBPOB
|
1458
1435
|
XN--MGBBH1A
|
1459
1436
|
XN--MGBBH1A71E
|
1460
1437
|
XN--MGBC0A9AZCG
|
1461
1438
|
XN--MGBCA7DZDO
|
1439
|
+
XN--MGBCPQ6GPA1A
|
1462
1440
|
XN--MGBERP4A5D4AR
|
1463
1441
|
XN--MGBGU82A
|
1464
1442
|
XN--MGBI4ECEXP
|
@@ -1484,8 +1462,10 @@ XN--P1AI
|
|
1484
1462
|
XN--PBT977C
|
1485
1463
|
XN--PGBS0DH
|
1486
1464
|
XN--PSSY2U
|
1465
|
+
XN--Q7CE6A
|
1487
1466
|
XN--Q9JYB4C
|
1488
1467
|
XN--QCKA1PMC
|
1468
|
+
XN--QXA6A
|
1489
1469
|
XN--QXAM
|
1490
1470
|
XN--RHQV96G
|
1491
1471
|
XN--ROVU88B
|
@@ -1530,7 +1510,6 @@ ZAPPOS
|
|
1530
1510
|
ZARA
|
1531
1511
|
ZERO
|
1532
1512
|
ZIP
|
1533
|
-
ZIPPO
|
1534
1513
|
ZM
|
1535
1514
|
ZONE
|
1536
1515
|
ZUERICH
|
data/lib/wmap/cidr_tracker.rb
CHANGED
@@ -18,7 +18,7 @@ class Wmap::CidrTracker
|
|
18
18
|
@verbose=params.fetch(:verbose, false)
|
19
19
|
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
|
20
20
|
Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
|
21
|
-
@cidr_seeds=params.fetch(:cidr_seeds, @data_dir + 'cidrs')
|
21
|
+
@cidr_seeds=params.fetch(:cidr_seeds, @data_dir + '/' + 'cidrs')
|
22
22
|
File.write(@cidr_seeds, "") unless File.exist?(@cidr_seeds)
|
23
23
|
load_cidr_blks_from_file(@cidr_seeds)
|
24
24
|
end
|
@@ -99,8 +99,8 @@ class Wmap::CidrTracker
|
|
99
99
|
#@known_cidr_blks_asce_index=NetAddr.sort(@known_cidr_blks.keys, :Desc=>false)
|
100
100
|
@known_cidr_blks_asce_index=@known_cidr_blks.keys.sort
|
101
101
|
@known_cidr_blks_desc_index=@known_cidr_blks_asce_index.reverse
|
102
|
-
|
103
|
-
|
102
|
+
rescue => ee
|
103
|
+
puts "Exception on method #{__method__}: #{ee}" # if @verbose
|
104
104
|
end
|
105
105
|
|
106
106
|
# 'setter' to remove an entry to CIDR store @known_cidr_blks
|
@@ -167,6 +167,7 @@ class Wmap::CidrTracker
|
|
167
167
|
known = cidr4.contains?(ip+'/32')
|
168
168
|
break if known
|
169
169
|
end
|
170
|
+
return known
|
170
171
|
rescue => ee
|
171
172
|
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
172
173
|
return false
|
data/lib/wmap/domain_tracker.rb
CHANGED
@@ -169,7 +169,7 @@ class Wmap::DomainTracker
|
|
169
169
|
end
|
170
170
|
end
|
171
171
|
@known_internet_domains.merge!(results)
|
172
|
-
puts "Done loading entries."
|
172
|
+
puts "Done loading domain entries."
|
173
173
|
return results
|
174
174
|
else
|
175
175
|
puts "Error: no entry is loaded. Please check your list and try again."
|
@@ -262,7 +262,7 @@ class Wmap::DomainTracker
|
|
262
262
|
when "Wmap::DomainTracker::SubDomain"
|
263
263
|
return @known_internet_sub_domains.key?(domain)
|
264
264
|
else
|
265
|
-
return
|
265
|
+
return false
|
266
266
|
end
|
267
267
|
rescue => ee
|
268
268
|
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
@@ -70,7 +70,7 @@ class SubDomain < Wmap::DomainTracker
|
|
70
70
|
end
|
71
71
|
end
|
72
72
|
@known_internet_sub_domains.merge!(results)
|
73
|
-
puts "Done loading entries."
|
73
|
+
puts "Done loading sub_domain entries."
|
74
74
|
return results
|
75
75
|
else
|
76
76
|
puts "Error: no entry is loaded. Please check your list and try again."
|
data/lib/wmap/host_tracker.rb
CHANGED
@@ -27,13 +27,13 @@ class Wmap::HostTracker
|
|
27
27
|
@max_parallel=params.fetch(:max_parallel, 40)
|
28
28
|
# Initialize the instance variables
|
29
29
|
File.write(@hosts_file, "") unless File.exist?(@hosts_file)
|
30
|
-
load_known_hosts_from_file(@hosts_file)
|
30
|
+
@known_hosts=load_known_hosts_from_file(@hosts_file)
|
31
31
|
end
|
32
32
|
|
33
33
|
# Setter to load the known hosts from the local hosts file into a class instance
|
34
34
|
def load_known_hosts_from_file (f_hosts=@hosts_file)
|
35
35
|
puts "Loading local hosts from file: #{f_hosts} ..." if @verbose
|
36
|
-
|
36
|
+
known_hosts=Hash.new
|
37
37
|
@alias = Hash.new
|
38
38
|
File.write(f_hosts, "") unless File.exist?(f_hosts)
|
39
39
|
f=File.open(f_hosts, 'r')
|
@@ -43,11 +43,11 @@ class Wmap::HostTracker
|
|
43
43
|
key=entry[0].downcase
|
44
44
|
value=entry[1]
|
45
45
|
puts "Loading key value pair: #{key} - #{value}" if @verbose
|
46
|
-
|
47
|
-
|
46
|
+
known_hosts[key] = Hash.new unless known_hosts.key?(key)
|
47
|
+
known_hosts[key]= value
|
48
48
|
# For reverse host lookup
|
49
|
-
|
50
|
-
|
49
|
+
known_hosts[value] = Hash.new unless known_hosts.key?(value)
|
50
|
+
known_hosts[value] = key
|
51
51
|
# Count the number of alias for the recorded IP
|
52
52
|
if @alias.key?(value)
|
53
53
|
@alias[value]+=1
|
@@ -56,11 +56,12 @@ class Wmap::HostTracker
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
f.close
|
59
|
-
return @known_hosts
|
60
|
-
rescue => ee
|
61
|
-
puts "Exception on method #{__method__}: #{ee}"
|
62
59
|
return known_hosts
|
60
|
+
#rescue => ee
|
61
|
+
# puts "Exception on method #{__method__}: #{ee}"
|
62
|
+
# return known_hosts
|
63
63
|
end
|
64
|
+
alias_method :load, :load_known_hosts_from_file
|
64
65
|
|
65
66
|
# Save the current local hosts hash table into a (random) data repository file
|
66
67
|
def save_known_hosts_to_file!(f_hosts=@hosts_file)
|
@@ -96,30 +97,42 @@ class Wmap::HostTracker
|
|
96
97
|
puts "Exception on method #{__method__}: #{ee}"
|
97
98
|
end
|
98
99
|
|
100
|
+
# determine if host is part of trusted (known) root domains
|
101
|
+
def is_trusted?(host)
|
102
|
+
puts "Determin if host #{host} is part of trusted root domains" if @verbose
|
103
|
+
root=get_domain_root(host)
|
104
|
+
puts "Domain root: #{root}" if @verbose
|
105
|
+
domain_tracker=Wmap::DomainTracker.instance
|
106
|
+
domain_tracker.data_dir=@data_dir
|
107
|
+
domain_tracker.domains_file = domain_tracker.data_dir + "/" + "domains"
|
108
|
+
domain_tracker.load_domains_from_file
|
109
|
+
if domain_tracker.domain_known?(root)
|
110
|
+
domain_tracker=nil
|
111
|
+
return true
|
112
|
+
else
|
113
|
+
domain_tracker=nil
|
114
|
+
return false
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
99
118
|
# Setter to add host entry to the cache once at a time
|
100
119
|
def add(host)
|
101
120
|
puts "Add entry to the local host repository: #{host}"
|
102
121
|
host=host.strip.downcase unless host.nil?
|
122
|
+
root=get_domain_root(host)
|
103
123
|
unless @known_hosts.key?(host)
|
104
124
|
ip=host_2_ip(host)
|
105
125
|
record=Hash.new
|
106
126
|
if is_ip?(ip)
|
107
127
|
# filter host to known domains only
|
108
|
-
|
109
|
-
puts "Domain root: #{root}" if @verbose
|
110
|
-
domain_tracker=Wmap::DomainTracker.instance
|
111
|
-
domain_tracker.data_dir=@data_dir
|
112
|
-
domain_tracker.domains_file = domain_tracker.data_dir + "domains"
|
113
|
-
domain_tracker.load_domains_from_file
|
114
|
-
if domain_tracker.domain_known?(root)
|
115
|
-
domain_tracker=nil
|
128
|
+
if is_trusted?(host)
|
116
129
|
record[host]=ip
|
117
130
|
record[ip]=host
|
118
131
|
puts "Host data repository entry loaded: #{host} <=> #{ip}"
|
119
132
|
# Replace instance with the class variable to avoid potential race condition under parallel engine
|
120
133
|
# add additional logic to update the sub-domain table as well, 02/10/2014
|
121
134
|
sub=get_sub_domain(host)
|
122
|
-
if sub!=
|
135
|
+
if sub!=nil
|
123
136
|
tracker=Wmap::DomainTracker::SubDomain.instance
|
124
137
|
tracker.data_dir=@data_dir
|
125
138
|
tracker.sub_domains_file = tracker.data_dir + "sub_domains"
|
@@ -142,8 +155,8 @@ class Wmap::HostTracker
|
|
142
155
|
else
|
143
156
|
puts "Host is already exist. Skip: #{host}"
|
144
157
|
end
|
145
|
-
rescue => ee
|
146
|
-
|
158
|
+
#rescue => ee
|
159
|
+
# puts "Exception on method #{__method__}: #{ee}" if @verbose
|
147
160
|
end
|
148
161
|
|
149
162
|
# Setter to add host entry to the local hosts in batch (from an array)
|
@@ -164,7 +177,7 @@ class Wmap::HostTracker
|
|
164
177
|
end
|
165
178
|
end
|
166
179
|
@known_hosts.merge!(results)
|
167
|
-
puts "Done loading entries."
|
180
|
+
puts "Done loading host entries."
|
168
181
|
return results
|
169
182
|
else
|
170
183
|
puts "Error: empty list - no entry is loaded. Please check your input list and try again."
|
data/lib/wmap/site_tracker.rb
CHANGED
@@ -77,7 +77,9 @@ class Wmap::SiteTracker
|
|
77
77
|
f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
78
78
|
f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
|
79
79
|
@known_sites.keys.sort.map do |key|
|
80
|
-
|
80
|
+
if is_trusted?(key)
|
81
|
+
f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
|
82
|
+
end
|
81
83
|
end
|
82
84
|
f.close
|
83
85
|
puts "site store table is successfully saved: #{file_sites}"
|
@@ -94,6 +96,24 @@ class Wmap::SiteTracker
|
|
94
96
|
puts "Exception on method #{__method__}: #{ee}"
|
95
97
|
end
|
96
98
|
|
99
|
+
# determine site is trusted based on the known domains
|
100
|
+
def is_trusted?(site)
|
101
|
+
trusted=false
|
102
|
+
host=url_2_host(site)
|
103
|
+
root=get_domain_root(host)
|
104
|
+
domain_tracker=Wmap::DomainTracker.instance
|
105
|
+
domain_tracker.data_dir=@data_dir
|
106
|
+
domain_tracker.domains_file=@data_dir + "/" + "domains"
|
107
|
+
File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
|
108
|
+
domain_tracker.load_domains_from_file(domain_tracker.domains_file)
|
109
|
+
trusted=domain_tracker.domain_known?(root)
|
110
|
+
domain_tracker=nil
|
111
|
+
return trusted
|
112
|
+
rescue => ee
|
113
|
+
puts "Exception on method #{__method__}: #{ee}"
|
114
|
+
return trusted
|
115
|
+
end
|
116
|
+
|
97
117
|
# Setter to add site entry to the cache one at a time
|
98
118
|
def add(site)
|
99
119
|
puts "Add entry to the site store: #{site}"
|
@@ -132,6 +152,10 @@ class Wmap::SiteTracker
|
|
132
152
|
end
|
133
153
|
end
|
134
154
|
# add record only if trusted
|
155
|
+
host_tracker = Wmap::HostTracker.instance
|
156
|
+
host_tracker.data_dir= @data_dir
|
157
|
+
host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
|
158
|
+
host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
|
135
159
|
if trusted
|
136
160
|
# Add logic to check site status before adding it
|
137
161
|
checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
|
@@ -144,10 +168,6 @@ class Wmap::SiteTracker
|
|
144
168
|
raise "Site is currently down. Skip #{site}" if checker['code']==10000
|
145
169
|
end
|
146
170
|
raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
|
147
|
-
host_tracker = Wmap::HostTracker.instance
|
148
|
-
host_tracker.data_dir= @data_dir
|
149
|
-
host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
|
150
|
-
host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
|
151
171
|
# Update the local host table when necessary
|
152
172
|
if is_ip?(host)
|
153
173
|
# Case #1: Trusted site contains IP
|
@@ -341,8 +361,8 @@ class Wmap::SiteTracker
|
|
341
361
|
else
|
342
362
|
puts "Error: no entry is loaded. Please check your list and try again."
|
343
363
|
end
|
344
|
-
|
345
|
-
|
364
|
+
rescue => ee
|
365
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
346
366
|
end
|
347
367
|
alias_method :dels, :bulk_delete
|
348
368
|
|
@@ -174,7 +174,7 @@ class WpTracker < Wmap::SiteTracker
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
@known_wp_sites.merge!(results)
|
177
|
-
puts "Done loading entries."
|
177
|
+
puts "Done loading wp entries."
|
178
178
|
return results
|
179
179
|
else
|
180
180
|
puts "Error: no entry is loaded. Please check your list and try again."
|
data/lib/wmap/url_checker.rb
CHANGED
@@ -88,9 +88,9 @@ class Wmap::UrlChecker
|
|
88
88
|
checker['redirection']=nil
|
89
89
|
checker['timestamp']=timestamp
|
90
90
|
return checker
|
91
|
-
rescue Exception => ee
|
92
|
-
|
93
|
-
|
91
|
+
#rescue Exception => ee
|
92
|
+
# puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
|
93
|
+
# return nil
|
94
94
|
end
|
95
95
|
alias_method :check, :url_worker
|
96
96
|
|
data/lib/wmap/url_crawler.rb
CHANGED
@@ -17,7 +17,8 @@ require "parallel"
|
|
17
17
|
class Wmap::UrlCrawler
|
18
18
|
include Wmap::Utils
|
19
19
|
|
20
|
-
attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel,
|
20
|
+
attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, \
|
21
|
+
:verbose, :data_dir, :user_agent
|
21
22
|
attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
|
22
23
|
# Global variable used to store the combined result of all the forked child processes. Note that class variable
|
23
24
|
# would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
|
@@ -35,13 +36,16 @@ class Wmap::UrlCrawler
|
|
35
36
|
@crawl_depth=params.fetch(:crawl_depth, 4)
|
36
37
|
@crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
|
37
38
|
@max_parallel=params.fetch(:max_parallel, 40)
|
39
|
+
@user_agent=params.fetch(:user_agent, "OWASP WMAP Spider")
|
38
40
|
# Discovered data store
|
39
41
|
@discovered_urls_by_crawler=Hash.new
|
40
42
|
@visited_urls_by_crawler=Hash.new
|
41
43
|
@crawl_start=Hash.new
|
42
44
|
@crawl_done=Hash.new
|
43
45
|
Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
|
44
|
-
@
|
46
|
+
@log_dir=@data_dir + "/logs/"
|
47
|
+
Dir.mkdir(@log_dir) unless Dir.exist?(@log_dir)
|
48
|
+
@log_file=@log_dir + "crawler.log"
|
45
49
|
end
|
46
50
|
|
47
51
|
# Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
|
@@ -86,7 +90,7 @@ class Wmap::UrlCrawler
|
|
86
90
|
|
87
91
|
# The worker instance of crawler who perform the labour work
|
88
92
|
def crawl_worker(url0)
|
89
|
-
puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and
|
93
|
+
puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and discovery contents."
|
90
94
|
# Input URL sanity check first
|
91
95
|
if is_url?(url0)
|
92
96
|
host=url_2_host(url0)
|
@@ -216,14 +220,14 @@ class Wmap::UrlCrawler
|
|
216
220
|
alias_method :crawl_file, :crawl_workers_on_file
|
217
221
|
|
218
222
|
# Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
|
219
|
-
def open_url(url)
|
223
|
+
def open_url(url,user_agent=@user_agent)
|
220
224
|
puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
|
221
225
|
if url =~ /http\:/i
|
222
226
|
# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
|
223
|
-
url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
|
227
|
+
url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
|
224
228
|
#url_object = open(url)
|
225
229
|
elsif url =~ /https\:/i
|
226
|
-
url_object = open(url
|
230
|
+
url_object = open(url, :ssl_verify_mode=>0, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
|
227
231
|
#url_object = open(url,:ssl_verify_mode => 0)
|
228
232
|
else
|
229
233
|
raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
|
@@ -258,22 +262,6 @@ class Wmap::UrlCrawler
|
|
258
262
|
return nil
|
259
263
|
end
|
260
264
|
|
261
|
-
=begin
|
262
|
-
# Wrapper for the Nokogiri DOM parser
|
263
|
-
def parse_html(html_body)
|
264
|
-
begin
|
265
|
-
#puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
|
266
|
-
doc = Nokogiri::HTML(html_body)
|
267
|
-
#puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
|
268
|
-
#puts "doc: #{doc}" if @verbose
|
269
|
-
return doc
|
270
|
-
rescue => ee
|
271
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
272
|
-
return nil
|
273
|
-
end
|
274
|
-
end
|
275
|
-
=end
|
276
|
-
|
277
265
|
# Search 'current_url' and return found URLs under the same domain
|
278
266
|
def find_urls_on_page(doc, current_url)
|
279
267
|
puts "Search and return URLs within the doc: #{doc}" if @verbose
|
data/lib/wmap/utils/logger.rb
CHANGED
@@ -8,46 +8,43 @@
|
|
8
8
|
|
9
9
|
|
10
10
|
module Wmap
|
11
|
-
module Utils
|
11
|
+
module Utils
|
12
12
|
# Module to log debugging and other messages
|
13
|
-
module Logger
|
13
|
+
module Logger
|
14
14
|
extend self
|
15
15
|
# Append information into the log file for the trouble-shooting purpose
|
16
16
|
def wlog (obj, agent, file)
|
17
17
|
puts "Writing #{obj} into log file: #{file}" if @verbose
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
obj.map { |x| @@f.write " #{x}\n" }
|
28
|
-
puts "The list is successfully saved into the log file: #{file} " if @verbose
|
29
|
-
end
|
30
|
-
when Hash
|
31
|
-
if obj.length >= 0
|
32
|
-
@@f.write "#{timestamp}: #{agent}: \n"
|
33
|
-
obj.each_value { |value| @@f.write " #{value}\n" }
|
34
|
-
puts "The hash is successfully saved into the log file: #{file} " if @verbose
|
35
|
-
end
|
36
|
-
when String
|
37
|
-
@@f.write "#{timestamp}: #{agent}: #{obj}\n"
|
38
|
-
puts "The string is successfully saved into the log file: #{file} " if @verbose
|
39
|
-
else
|
40
|
-
#do nothing
|
41
|
-
puts "Un-handled exception on: #{obj}" if @verbose
|
18
|
+
return false if obj.nil?
|
19
|
+
@@f=File.open(file,'a')
|
20
|
+
timestamp=Time.now
|
21
|
+
case obj
|
22
|
+
when Array
|
23
|
+
if obj.size >= 0
|
24
|
+
@@f.write "#{timestamp}: #{agent}: \n"
|
25
|
+
obj.map { |x| @@f.write " #{x}\n" }
|
26
|
+
puts "The list is successfully saved into the log file: #{file} " if @verbose
|
42
27
|
end
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
28
|
+
when Hash
|
29
|
+
if obj.length >= 0
|
30
|
+
@@f.write "#{timestamp}: #{agent}: \n"
|
31
|
+
obj.each_value { |value| @@f.write " #{value}\n" }
|
32
|
+
puts "The hash is successfully saved into the log file: #{file} " if @verbose
|
33
|
+
end
|
34
|
+
when String
|
35
|
+
@@f.write "#{timestamp}: #{agent}: #{obj}\n"
|
36
|
+
puts "The string is successfully saved into the log file: #{file} " if @verbose
|
37
|
+
else
|
38
|
+
#do nothing
|
39
|
+
puts "Un-handled exception on: #{obj}" if @verbose
|
40
|
+
end
|
41
|
+
@@f.close
|
42
|
+
return true
|
43
|
+
rescue => ee
|
44
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
45
|
+
return false
|
49
46
|
end
|
50
|
-
|
51
|
-
end
|
47
|
+
|
48
|
+
end
|
52
49
|
end
|
53
50
|
end
|
data/lib/wmap/utils/url_magic.rb
CHANGED
@@ -15,6 +15,7 @@ module Wmap
|
|
15
15
|
|
16
16
|
# set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
|
17
17
|
Max_http_timeout=15000
|
18
|
+
User_agent = "OWASP WMAP Spider"
|
18
19
|
|
19
20
|
# Simple sanity check on a 'claimed' URL string.
|
20
21
|
def is_url?(url)
|
@@ -377,7 +378,8 @@ module Wmap
|
|
377
378
|
|
378
379
|
# Given an URL, open the page, then return the DOM text from a normal user perspective
|
379
380
|
def open_page(url)
|
380
|
-
args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe,
|
381
|
+
args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, \
|
382
|
+
read_timeout: Max_http_timeout/1000, "User-Agent"=>User_agent}
|
381
383
|
doc = Nokogiri::HTML(open(url, args))
|
382
384
|
if doc.text.include?("Please enable JavaScript to view the page content")
|
383
385
|
puts "Invoke headless chrome through webdriver ..." if @verbose
|
@@ -385,7 +387,7 @@ module Wmap
|
|
385
387
|
#driver = Selenium::WebDriver.for :chrome
|
386
388
|
# http://watir.com/guides/chrome/
|
387
389
|
args = ['--ignore-certificate-errors', '--disable-popup-blocking', '--disable-translate', '--disk-cache-size 8192']
|
388
|
-
browser = Watir::Browser.new :chrome, headless: true,
|
390
|
+
browser = Watir::Browser.new :chrome, headless: true, switches: %w[--user-agent=OWASP\ WMAP\ Spider]
|
389
391
|
browser.goto(url)
|
390
392
|
sleep(2) # wait for the loading
|
391
393
|
doc = Nokogiri::HTML(browser.html)
|
data/lib/wmap/utils/utils.rb
CHANGED
@@ -159,20 +159,18 @@ module Wmap
|
|
159
159
|
# Simple test a host string format. Return true if it contains a valid internet domain sub-string. Note: Don't be confused with another method 'valid_dns_record?', which is a stricter and time-consuming test on the DNS server for a resolvable internet host.
|
160
160
|
def is_fqdn? (host)
|
161
161
|
puts "Validate the host-name format is valid: #{host}" if @verbose
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
else
|
170
|
-
return false
|
171
|
-
end
|
172
|
-
rescue => ee
|
173
|
-
puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
|
162
|
+
return false if is_ip?(host) or is_url?(host)
|
163
|
+
domain=get_domain_root(host)
|
164
|
+
if domain.nil?
|
165
|
+
return false
|
166
|
+
elsif is_domain_root?(domain)
|
167
|
+
return true
|
168
|
+
else
|
174
169
|
return false
|
175
170
|
end
|
171
|
+
# rescue => ee
|
172
|
+
# puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
|
173
|
+
# return false
|
176
174
|
end
|
177
175
|
alias_method :is_host?, :is_fqdn?
|
178
176
|
|
data/lib/wmap/utils/wp_detect.rb
CHANGED
@@ -239,7 +239,11 @@ module Wmap
|
|
239
239
|
if tag.to_s.include?(pattern)
|
240
240
|
puts tag.to_s if @verbose
|
241
241
|
k=nil
|
242
|
-
|
242
|
+
if tag.to_s.scan(/[\d+\.]+\d+/).first =~ /\d+\./
|
243
|
+
return tag.to_s.scan(/[\d+\.]+\d+/).first
|
244
|
+
else
|
245
|
+
return nil
|
246
|
+
end
|
243
247
|
end
|
244
248
|
end
|
245
249
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#--
|
2
|
+
# Wmap
|
3
|
+
#
|
4
|
+
# A pure Ruby library for the Internet web application discovery and tracking.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
7
|
+
#++
|
8
|
+
# Unit Test File for Wmap::DomainTracker.instance class
|
9
|
+
|
10
|
+
require "minitest/autorun"
|
11
|
+
require "Wmap"
|
12
|
+
|
13
|
+
class CidrTrackerTest < MiniTest::Unit::TestCase
|
14
|
+
include Wmap::Utils
|
15
|
+
|
16
|
+
def test_cidr_add
|
17
|
+
w = Wmap::CidrTracker.new
|
18
|
+
w.add("192.168.1.0/24")
|
19
|
+
assert_equal true, w.known_cidr_blks.key?("192.168.1.0/24")
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_cidr_delete
|
23
|
+
w = Wmap::CidrTracker.new
|
24
|
+
w.add("10.0.0.0/8")
|
25
|
+
w.delete("10.0.0.0/8")
|
26
|
+
assert_equal false, w.known_cidr_blks.key?("10.0.0.0/8")
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_ip_trusted?
|
30
|
+
w = Wmap::CidrTracker.new
|
31
|
+
w.add("192.168.1.0/24")
|
32
|
+
assert_equal true, w.ip_trusted?("192.168.1.1")
|
33
|
+
assert_equal true, w.ip_trusted?("192.168.1.255")
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/test/utils_test.rb
CHANGED
@@ -12,7 +12,7 @@ require "Wmap"
|
|
12
12
|
|
13
13
|
class UtilsTest < MiniTest::Unit::TestCase
|
14
14
|
include Wmap::Utils
|
15
|
-
|
15
|
+
|
16
16
|
def test_sld_domain_conversion
|
17
17
|
assert_equal "yahoo.com", get_domain_root("yahoo.com")
|
18
18
|
end
|
@@ -28,75 +28,75 @@ class UtilsTest < MiniTest::Unit::TestCase
|
|
28
28
|
def test_is_domain_root_case_1?
|
29
29
|
assert_equal false, is_domain_root?("www.yahoo.co.uk")
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
def test_is_domain_root_case_2?
|
33
33
|
assert_equal true, is_domain_root?("yahoo.co.uk")
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
def test_get_sub_domain
|
37
37
|
assert_equal "mail.yahoo.co.uk", get_sub_domain("www.mail.yahoo.co.uk")
|
38
38
|
end
|
39
39
|
|
40
40
|
def test_is_url_case_1?
|
41
41
|
assert_equal true, is_url?("http://www.mail.yahoo.co.uk/")
|
42
|
-
end
|
42
|
+
end
|
43
43
|
|
44
44
|
def test_is_url_case_2?
|
45
45
|
assert_equal true, is_url?("https://www.mail.yahoo.co.uk/")
|
46
|
-
end
|
46
|
+
end
|
47
47
|
|
48
48
|
def test_is_url_case_3?
|
49
49
|
assert_equal false, is_url?("http://www.mail.yahoo.uii/")
|
50
|
-
end
|
50
|
+
end
|
51
51
|
|
52
52
|
def test_is_url_case_4?
|
53
53
|
assert_equal false, is_url?("http:\\www.mail.yahoo.co.uk")
|
54
|
-
end
|
55
|
-
|
54
|
+
end
|
55
|
+
|
56
56
|
def test_is_ssl?
|
57
57
|
assert_equal false, is_ssl?("http://www.mail.yahoo.co.uk/")
|
58
|
-
end
|
59
|
-
|
58
|
+
end
|
59
|
+
|
60
60
|
def test_is_site?
|
61
61
|
assert_equal false, is_site?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
62
|
-
end
|
63
|
-
|
62
|
+
end
|
63
|
+
|
64
64
|
def test_url_2_host
|
65
65
|
assert_equal "login.yahoo.com", url_2_host("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
66
|
-
end
|
67
|
-
|
66
|
+
end
|
67
|
+
|
68
68
|
def test_url_2_site_case_1
|
69
69
|
assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
70
|
-
end
|
70
|
+
end
|
71
71
|
|
72
72
|
def test_url_2_site_case_2
|
73
73
|
assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
74
|
-
end
|
74
|
+
end
|
75
75
|
|
76
76
|
def test_url_2_site_case_3
|
77
77
|
assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com#.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
78
|
-
end
|
79
|
-
|
78
|
+
end
|
79
|
+
|
80
80
|
def test_url_2_path
|
81
81
|
assert_equal "/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", url_2_path("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
82
|
-
end
|
82
|
+
end
|
83
83
|
|
84
84
|
def test_urls_on_same_domain?
|
85
85
|
assert_equal true, urls_on_same_domain?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", "https://us-mg4.mail.yahoo.com/neo/launch?.rand=8hjd08hc6t1lq")
|
86
|
-
end
|
86
|
+
end
|
87
87
|
|
88
88
|
def test_host_2_url_case_1
|
89
89
|
assert_equal "https://mail.yahoo.com/", host_2_url("mail.yahoo.com",443)
|
90
|
-
end
|
90
|
+
end
|
91
91
|
|
92
92
|
def test_host_2_url_case_2
|
93
93
|
assert_equal "http://mail.yahoo.com/", host_2_url("mail.yahoo.com")
|
94
|
-
end
|
95
|
-
|
94
|
+
end
|
95
|
+
|
96
96
|
def test_make_absolute
|
97
97
|
assert_equal "http://games.yahoo.com/game/the-magic-snowman-flash.html", make_absolute("http://games.yahoo.com/","game/the-magic-snowman-flash.html")
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
def test_create_absolute_url_from_base
|
101
101
|
assert_equal "http://images.search.yahoo.com/search/images?p=raiders", create_absolute_url_from_base("http://images.search.yahoo.com/images","/search/images?p=raiders")
|
102
102
|
end
|
@@ -108,7 +108,7 @@ class UtilsTest < MiniTest::Unit::TestCase
|
|
108
108
|
def test_normalize_url_case_1
|
109
109
|
assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/./images/search/images?p=raiders")
|
110
110
|
end
|
111
|
-
|
111
|
+
|
112
112
|
def test_normalize_url_case_2
|
113
113
|
assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/../images/../search/images?p=raiders")
|
114
114
|
end
|
@@ -116,53 +116,58 @@ class UtilsTest < MiniTest::Unit::TestCase
|
|
116
116
|
def test_normalize_url_case_3
|
117
117
|
assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com./../images/../search/images?p=raiders")
|
118
118
|
end
|
119
|
-
|
119
|
+
|
120
120
|
def test_is_ip_case_1?
|
121
121
|
assert_equal false, is_ip?("256.2.3.1")
|
122
|
-
end
|
122
|
+
end
|
123
123
|
|
124
124
|
def test_is_ip_case_2?
|
125
125
|
assert_equal false, is_ip?("25.2.3.1.22")
|
126
|
-
end
|
126
|
+
end
|
127
127
|
|
128
128
|
def test_is_ip_case_3?
|
129
129
|
assert_equal true, is_ip?("196.168.230.1")
|
130
|
-
end
|
130
|
+
end
|
131
131
|
|
132
132
|
def test_is_fqdn_case_1?
|
133
133
|
assert_equal true, is_fqdn?("images.search.yahoo.com")
|
134
|
-
end
|
134
|
+
end
|
135
135
|
|
136
136
|
def test_is_fqdn_case_2?
|
137
137
|
assert_equal true, is_fqdn?("yahoo.com")
|
138
|
-
end
|
139
|
-
|
138
|
+
end
|
139
|
+
|
140
140
|
def test_is_fqdn_case_3?
|
141
|
-
|
142
|
-
|
143
|
-
|
141
|
+
# according to latest tlds list - http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
142
|
+
assert_equal true, is_fqdn?("images.search.yahoo")
|
143
|
+
end
|
144
|
+
|
144
145
|
def test_is_fqdn_case_4?
|
145
146
|
assert_equal false, is_fqdn?("images")
|
146
|
-
end
|
147
|
-
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_is_fqdn_case_5?
|
150
|
+
assert_equal false, is_fqdn?("images.search.gargle")
|
151
|
+
end
|
152
|
+
|
148
153
|
def test_is_cidr_case_1?
|
149
154
|
assert_equal false, is_cidr?("196.168.230.1")
|
150
|
-
end
|
155
|
+
end
|
151
156
|
|
152
157
|
def test_is_cidr_case_2?
|
153
158
|
assert_equal false, is_cidr?("196.168.2.257/12")
|
154
|
-
end
|
155
|
-
|
159
|
+
end
|
160
|
+
|
156
161
|
def test_is_cidr_case_3?
|
157
162
|
assert_equal true, is_cidr?("196.168.2.25/12")
|
158
|
-
end
|
159
|
-
|
163
|
+
end
|
164
|
+
|
160
165
|
def test_cidr_2_ips
|
161
166
|
assert_equal ["192.168.1.1"], cidr_2_ips("192.168.1.1/32")
|
162
|
-
end
|
163
|
-
|
167
|
+
end
|
168
|
+
|
164
169
|
def test_sort_ips
|
165
170
|
assert_equal ["192.168.1.1", "192.168.1.2", "192.168.2.1"], sort_ips(["192.168.1.2", "192.168.2.1","192.168.1.1"])
|
166
|
-
end
|
167
|
-
|
171
|
+
end
|
172
|
+
|
168
173
|
end
|
data/version.txt
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
###############################################################################
|
4
4
|
package = wmap
|
5
5
|
# wmap version 2.0 == web_discovery version 1.5.3
|
6
|
-
version = 2.
|
7
|
-
date =
|
6
|
+
version = 2.8.3
|
7
|
+
date = 2021-07-26
|
8
8
|
|
9
9
|
author = Sam (Yang) Li
|
10
10
|
email = yang.li@owasp.org
|
data/wmap.gemspec
CHANGED
@@ -36,7 +36,7 @@ Gem::Specification.new do |s|
|
|
36
36
|
s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
|
37
37
|
s.email = info["email"]
|
38
38
|
s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","trusts","distrust","run_tests"]
|
39
|
-
s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","
|
39
|
+
s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports", "LICENSE.txt",
|
40
40
|
"version.txt","README.md", "wmap.gemspec"]
|
41
41
|
s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['dicts/*']
|
42
42
|
#s.homepage = "none"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam (Yang) Li
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dnsruby
|
@@ -234,7 +234,6 @@ files:
|
|
234
234
|
- LICENSE.txt
|
235
235
|
- README.md
|
236
236
|
- TODO
|
237
|
-
- bin/RHPG
|
238
237
|
- bin/deprime
|
239
238
|
- bin/distrust
|
240
239
|
- bin/googleBot
|
@@ -251,7 +250,6 @@ files:
|
|
251
250
|
- bin/wdel
|
252
251
|
- bin/wdump
|
253
252
|
- bin/wmap
|
254
|
-
- bin/wmaps
|
255
253
|
- bin/wscan
|
256
254
|
- demos/bruter.rb
|
257
255
|
- demos/dns_brutes.rb
|
@@ -308,7 +306,7 @@ files:
|
|
308
306
|
- settings/discovery_ports
|
309
307
|
- settings/google_keywords.txt
|
310
308
|
- settings/google_locator.txt
|
311
|
-
-
|
309
|
+
- test/cidr_tracker_test.rb
|
312
310
|
- test/domain_tracker_test.rb
|
313
311
|
- test/utils_test.rb
|
314
312
|
- version.txt
|
@@ -335,9 +333,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
335
333
|
- !ruby/object:Gem::Version
|
336
334
|
version: '0'
|
337
335
|
requirements: []
|
338
|
-
|
339
|
-
|
340
|
-
signing_key:
|
336
|
+
rubygems_version: 3.0.9
|
337
|
+
signing_key:
|
341
338
|
specification_version: 4
|
342
339
|
summary: A pure Ruby web application and service discovery API.
|
343
340
|
test_files: []
|
data/bin/RHPG
DELETED
@@ -1,107 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# Executable to lookup then merge site tech details into the RHPG asset spreadsheet in CSV format only
|
3
|
-
#
|
4
|
-
## Usage: RHPG [RHPG.csv]
|
5
|
-
require "wmap"
|
6
|
-
require "csv"
|
7
|
-
include Wmap::Utils
|
8
|
-
|
9
|
-
def print_usage
|
10
|
-
puts "Program to lookup then merge the site details into RHPG asset spreadsheet. \nUsage: RHPG [RHPG.csv]"
|
11
|
-
end
|
12
|
-
|
13
|
-
# Lookup the site store for a domain; then return the fingger print info of the site
|
14
|
-
def site_tracker_lookup(domain)
|
15
|
-
tracker=Wmap::SiteTracker.instance
|
16
|
-
tracker.verbose=false
|
17
|
-
#first order search
|
18
|
-
tracker.known_sites.each do |key,val|
|
19
|
-
if key.include?(domain.strip.downcase) && key.include?("https")
|
20
|
-
tracker=nil
|
21
|
-
return [key] + val.values
|
22
|
-
end
|
23
|
-
end
|
24
|
-
#second order search
|
25
|
-
tracker.known_sites.each do |key,val|
|
26
|
-
if key.include?(domain.strip.downcase)
|
27
|
-
tracker=nil
|
28
|
-
return [key] + val.values
|
29
|
-
end
|
30
|
-
end
|
31
|
-
tracker=nil
|
32
|
-
return [nil]*9
|
33
|
-
end
|
34
|
-
|
35
|
-
# look up the wp site data store for a domain; then return the wp finger print info: [is_wp?,wp_ver]
|
36
|
-
def wp_tracker_lookup(domain)
|
37
|
-
tracker=Wmap::WpTracker.new(:verbose=>false)
|
38
|
-
# first order
|
39
|
-
tracker.known_wp_sites.each do |key,val|
|
40
|
-
if key.include?(domain.strip.downcase) && val
|
41
|
-
ver=tracker.wp_ver(key)
|
42
|
-
tracker=nil
|
43
|
-
return [val,ver]
|
44
|
-
end
|
45
|
-
end
|
46
|
-
# second order
|
47
|
-
tracker.known_wp_sites.each do |key,val|
|
48
|
-
if key.include?(domain.strip.downcase) && key.include?("https") && val
|
49
|
-
tracker=nil
|
50
|
-
return [val,nil]
|
51
|
-
end
|
52
|
-
end
|
53
|
-
# third order
|
54
|
-
tracker.known_wp_sites.each do |key,val|
|
55
|
-
if key.include?(domain.strip.downcase)
|
56
|
-
tracker=nil
|
57
|
-
return [val,nil]
|
58
|
-
end
|
59
|
-
end
|
60
|
-
tracker=nil
|
61
|
-
return [nil,nil]
|
62
|
-
end
|
63
|
-
|
64
|
-
# perform the wpscan on a site
|
65
|
-
def wpscan(domain)
|
66
|
-
url=site_tracker_lookup(domain)[0]
|
67
|
-
return nil if url.nil?
|
68
|
-
if url.include?("https")
|
69
|
-
command="wpscan --disable-tls-checks --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
|
70
|
-
else
|
71
|
-
command="wpscan --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
|
72
|
-
end
|
73
|
-
system(command)
|
74
|
-
end
|
75
|
-
|
76
|
-
puts Wmap.banner
|
77
|
-
print_usage
|
78
|
-
|
79
|
-
# open output file to write
|
80
|
-
CSV.open("output.csv", "wb") do |csv|
|
81
|
-
cnt=1
|
82
|
-
# open RHPG input file to read
|
83
|
-
CSV.foreach(ARGV[0]) do |row|
|
84
|
-
puts "Processing row #{cnt}"
|
85
|
-
#puts row.inspect
|
86
|
-
my_row=Array.new
|
87
|
-
if cnt > 1
|
88
|
-
if is_domain?(row[0])
|
89
|
-
=begin
|
90
|
-
if row[3] =~ /Keep/i && row[3] != /Redirect/i
|
91
|
-
unless File.exist?(row[0]+".wpscan")
|
92
|
-
wpscan(row[0])
|
93
|
-
end
|
94
|
-
end
|
95
|
-
=end
|
96
|
-
my_row = row + site_tracker_lookup(row[0]) + wp_tracker_lookup(row[0])
|
97
|
-
else
|
98
|
-
my_row = row + [nil]*10
|
99
|
-
end
|
100
|
-
else
|
101
|
-
my_row = row + ["Website","Primary IP","Port","Hosting Status","Server","Response Code","MD5 Finger-print","Redirection","Timestamp", "WordPress", "WordPress Version"]
|
102
|
-
end
|
103
|
-
cnt+=1
|
104
|
-
csv << my_row
|
105
|
-
end
|
106
|
-
puts "All done. "
|
107
|
-
end
|
data/bin/wmaps
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# script to automate the new site discovery through by crawling all unique sites in the site store
|
3
|
-
require "wmap"
|
4
|
-
require "parallel"
|
5
|
-
|
6
|
-
def wmap_worker(domain)
|
7
|
-
cmd = "wmap " + domain
|
8
|
-
puts "wmap discovery on domain: ", domain
|
9
|
-
system(cmd)
|
10
|
-
end
|
11
|
-
|
12
|
-
|
13
|
-
tracker=Wmap::DomainTracker.instance
|
14
|
-
Parallel.map(tracker.known_internet_domains.keys, :in_processes => 10) { |target|
|
15
|
-
puts "Working on #{target} ..." if @verbose
|
16
|
-
wmap_worker(target)
|
17
|
-
}
|
18
|
-
=begin
|
19
|
-
tracker.known_internet_domains.keys.map do |domain|
|
20
|
-
wmap_worker(domain)
|
21
|
-
end
|
22
|
-
=end
|
23
|
-
tracker=nil
|