wmap 2.7.7 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/wmap +19 -10
- data/dicts/tlds.txt +9 -30
- data/lib/wmap/cidr_tracker.rb +4 -3
- data/lib/wmap/domain_tracker.rb +2 -2
- data/lib/wmap/domain_tracker/sub_domain.rb +1 -1
- data/lib/wmap/host_tracker.rb +34 -21
- data/lib/wmap/site_tracker.rb +27 -7
- data/lib/wmap/site_tracker/wp_tracker.rb +1 -1
- data/lib/wmap/url_checker.rb +3 -3
- data/lib/wmap/url_crawler.rb +10 -22
- data/lib/wmap/url_crawler/adware_tag.rb +1 -1
- data/lib/wmap/utils/domain_root.rb +1 -1
- data/lib/wmap/utils/logger.rb +31 -34
- data/lib/wmap/utils/url_magic.rb +4 -2
- data/lib/wmap/utils/utils.rb +10 -12
- data/lib/wmap/utils/wp_detect.rb +5 -1
- data/test/cidr_tracker_test.rb +36 -0
- data/test/utils_test.rb +51 -46
- data/version.txt +2 -2
- data/wmap.gemspec +1 -1
- metadata +6 -9
- data/bin/RHPG +0 -107
- data/bin/wmaps +0 -23
- data/settings/tag_signatures +0 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a6632168c88c35189b601d225ebbf99fddd034a561814e5fe34b2d57bb79c75c
|
|
4
|
+
data.tar.gz: 4beab7d92e6a5e4258d37dfa52a64f4edaf06d8c4213331cc9833d0be5cc70aa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 46482f94126bc1ad4af322cb23894e77ee5a0f85dc31741e25b75752be4a47b1a620c3d1a6f28786a2f53e37eecde42b571f011e770e4bacdaf00012d737ea9e
|
|
7
|
+
data.tar.gz: b5ed0e1af39d8b7910b0d77b5bde99461293a7e985d5e587405bbf929cae8bffa5a32a1cf31eb142f823a11057377537adecf9ee6fb7f475fb2252e9d3edc2b1
|
data/bin/wmap
CHANGED
|
@@ -13,7 +13,7 @@ parser = OptionParser.new do|opts|
|
|
|
13
13
|
opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
|
|
14
14
|
options[:data_dir] = data_dir;
|
|
15
15
|
end
|
|
16
|
-
opts.on('-t', '--target target', 'Web Mapper target') do |target|
|
|
16
|
+
opts.on('-t', '--target target', 'Web Mapper target / seed for discovery') do |target|
|
|
17
17
|
options[:target] = target;
|
|
18
18
|
end
|
|
19
19
|
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
|
@@ -29,6 +29,10 @@ parser.parse!
|
|
|
29
29
|
# print program banner
|
|
30
30
|
puts Wmap.banner
|
|
31
31
|
# print_usage unless options[:target]
|
|
32
|
+
unless options[:target]
|
|
33
|
+
puts "Usage: $ wmap -h"
|
|
34
|
+
exit 1
|
|
35
|
+
end
|
|
32
36
|
|
|
33
37
|
# Preparing - check out the working logs directory
|
|
34
38
|
if options[:data_dir]
|
|
@@ -129,6 +133,10 @@ Wmap.wlog(dis_urls.keys, "wmap", Log_dir+"discovered_urls.log") unless dis_urls.
|
|
|
129
133
|
Wmap.wlog(dis_sites.keys, "wmap", Log_dir+"discovered_sites.log") unless dis_sites.empty?
|
|
130
134
|
#crawler.wlog(c_start.keys,Log_dir+"crawler.log")
|
|
131
135
|
#crawler.wlog(c_done.keys,Log_dir+"crawler.log")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# Save the current disovery urls only to a specific file, patched 07/23/2021
|
|
139
|
+
crawler.save_discovered_urls(Log_dir+"cur_urls.log")
|
|
132
140
|
crawler=nil
|
|
133
141
|
|
|
134
142
|
|
|
@@ -138,22 +146,23 @@ when nil,[]
|
|
|
138
146
|
puts "No new site found. There is no change to the site tracking data repository. "
|
|
139
147
|
else
|
|
140
148
|
puts "Automatically save the discovery results into the site tracking data repository: "
|
|
149
|
+
inventory = Wmap::SiteTracker.instance
|
|
141
150
|
if options[:target] && options[:data_dir]
|
|
142
151
|
puts "Start the SiteTracker with the optional directory setter. "
|
|
143
|
-
inventory=Wmap::SiteTracker.instance
|
|
144
152
|
inventory.data_dir = options[:data_dir]
|
|
153
|
+
#inventory.verbose = true
|
|
145
154
|
inventory.sites_file = inventory.data_dir + "/" + "sites"
|
|
146
155
|
inventory.load_site_stores_from_file(inventory.sites_file)
|
|
147
156
|
elsif options[:target]
|
|
148
157
|
puts "Start the SiteTracker. "
|
|
149
|
-
inventory=Wmap::SiteTracker.instance
|
|
150
158
|
else
|
|
151
159
|
abort "Error firing up SiteTracker instance!"
|
|
152
160
|
end
|
|
153
161
|
new_sites=inventory.adds(dis_sites.keys-["",nil])
|
|
154
|
-
|
|
162
|
+
puts "Newly discovery sties: #{new_sites}"
|
|
163
|
+
if options[:data_dir]
|
|
155
164
|
inventory.save!(inventory.sites_file)
|
|
156
|
-
|
|
165
|
+
else
|
|
157
166
|
inventory.save!
|
|
158
167
|
end
|
|
159
168
|
inventory=nil
|
|
@@ -162,17 +171,17 @@ end
|
|
|
162
171
|
|
|
163
172
|
|
|
164
173
|
# seventh step - update the hosts repository
|
|
174
|
+
puts "Invoke the HostTracker with optional directory setter."
|
|
175
|
+
host_tracker = Wmap::HostTracker.instance
|
|
165
176
|
if options[:target] && options[:data_dir]
|
|
166
|
-
puts "Invoke the HostTracker with
|
|
167
|
-
host_tracker = Wmap::HostTracker.instance
|
|
177
|
+
puts puts "Invoke the HostTracker with options: #{options[:data_dir]}, #{options[:target]}"
|
|
168
178
|
host_tracker.verbose=options[:verbose]
|
|
169
179
|
host_tracker.data_dir = options[:data_dir]
|
|
170
180
|
host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
|
|
171
181
|
host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
|
|
172
182
|
elsif options[:target]
|
|
173
|
-
puts puts "Invoke the HostTracker."
|
|
174
|
-
host_tracker
|
|
175
|
-
host_tracker.verbose=options[:verbose]
|
|
183
|
+
puts puts "Invoke the HostTracker with option: #{options[:target]}."
|
|
184
|
+
#host_tracker.verbose=options[:verbose]
|
|
176
185
|
else
|
|
177
186
|
abort "Error firing up HostTracker instance!"
|
|
178
187
|
end
|
data/dicts/tlds.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
|
2
|
-
# Version
|
|
2
|
+
# Version 2020033000, Last Updated Mon Mar 30 07:07:01 2020 UTC
|
|
3
3
|
AAA
|
|
4
4
|
AARP
|
|
5
5
|
ABARTH
|
|
@@ -16,7 +16,6 @@ ACCENTURE
|
|
|
16
16
|
ACCOUNTANT
|
|
17
17
|
ACCOUNTANTS
|
|
18
18
|
ACO
|
|
19
|
-
ACTIVE
|
|
20
19
|
ACTOR
|
|
21
20
|
AD
|
|
22
21
|
ADAC
|
|
@@ -146,7 +145,6 @@ BIZ
|
|
|
146
145
|
BJ
|
|
147
146
|
BLACK
|
|
148
147
|
BLACKFRIDAY
|
|
149
|
-
BLANCO
|
|
150
148
|
BLOCKBUSTER
|
|
151
149
|
BLOG
|
|
152
150
|
BLOOMBERG
|
|
@@ -155,7 +153,6 @@ BM
|
|
|
155
153
|
BMS
|
|
156
154
|
BMW
|
|
157
155
|
BN
|
|
158
|
-
BNL
|
|
159
156
|
BNPPARIBAS
|
|
160
157
|
BO
|
|
161
158
|
BOATS
|
|
@@ -214,7 +211,6 @@ CARE
|
|
|
214
211
|
CAREER
|
|
215
212
|
CAREERS
|
|
216
213
|
CARS
|
|
217
|
-
CARTIER
|
|
218
214
|
CASA
|
|
219
215
|
CASE
|
|
220
216
|
CASEIH
|
|
@@ -247,7 +243,6 @@ CHEAP
|
|
|
247
243
|
CHINTAI
|
|
248
244
|
CHRISTMAS
|
|
249
245
|
CHROME
|
|
250
|
-
CHRYSLER
|
|
251
246
|
CHURCH
|
|
252
247
|
CI
|
|
253
248
|
CIPRIANI
|
|
@@ -299,6 +294,7 @@ COUNTRY
|
|
|
299
294
|
COUPON
|
|
300
295
|
COUPONS
|
|
301
296
|
COURSES
|
|
297
|
+
CPA
|
|
302
298
|
CR
|
|
303
299
|
CREDIT
|
|
304
300
|
CREDITCARD
|
|
@@ -360,9 +356,7 @@ DNP
|
|
|
360
356
|
DO
|
|
361
357
|
DOCS
|
|
362
358
|
DOCTOR
|
|
363
|
-
DODGE
|
|
364
359
|
DOG
|
|
365
|
-
DOHA
|
|
366
360
|
DOMAINS
|
|
367
361
|
DOT
|
|
368
362
|
DOWNLOAD
|
|
@@ -371,7 +365,6 @@ DTV
|
|
|
371
365
|
DUBAI
|
|
372
366
|
DUCK
|
|
373
367
|
DUNLOP
|
|
374
|
-
DUNS
|
|
375
368
|
DUPONT
|
|
376
369
|
DURBAN
|
|
377
370
|
DVAG
|
|
@@ -392,7 +385,6 @@ ENERGY
|
|
|
392
385
|
ENGINEER
|
|
393
386
|
ENGINEERING
|
|
394
387
|
ENTERPRISES
|
|
395
|
-
EPOST
|
|
396
388
|
EPSON
|
|
397
389
|
EQUIPMENT
|
|
398
390
|
ER
|
|
@@ -408,7 +400,6 @@ EU
|
|
|
408
400
|
EUROVISION
|
|
409
401
|
EUS
|
|
410
402
|
EVENTS
|
|
411
|
-
EVERBANK
|
|
412
403
|
EXCHANGE
|
|
413
404
|
EXPERT
|
|
414
405
|
EXPOSED
|
|
@@ -488,6 +479,7 @@ GAME
|
|
|
488
479
|
GAMES
|
|
489
480
|
GAP
|
|
490
481
|
GARDEN
|
|
482
|
+
GAY
|
|
491
483
|
GB
|
|
492
484
|
GBIZ
|
|
493
485
|
GD
|
|
@@ -580,7 +572,6 @@ HOMEGOODS
|
|
|
580
572
|
HOMES
|
|
581
573
|
HOMESENSE
|
|
582
574
|
HONDA
|
|
583
|
-
HONEYWELL
|
|
584
575
|
HORSE
|
|
585
576
|
HOSPITAL
|
|
586
577
|
HOST
|
|
@@ -634,7 +625,6 @@ IQ
|
|
|
634
625
|
IR
|
|
635
626
|
IRISH
|
|
636
627
|
IS
|
|
637
|
-
ISELECT
|
|
638
628
|
ISMAILI
|
|
639
629
|
IST
|
|
640
630
|
ISTANBUL
|
|
@@ -699,12 +689,10 @@ KYOTO
|
|
|
699
689
|
KZ
|
|
700
690
|
LA
|
|
701
691
|
LACAIXA
|
|
702
|
-
LADBROKES
|
|
703
692
|
LAMBORGHINI
|
|
704
693
|
LAMER
|
|
705
694
|
LANCASTER
|
|
706
695
|
LANCIA
|
|
707
|
-
LANCOME
|
|
708
696
|
LAND
|
|
709
697
|
LANDROVER
|
|
710
698
|
LANXESS
|
|
@@ -725,7 +713,6 @@ LEGO
|
|
|
725
713
|
LEXUS
|
|
726
714
|
LGBT
|
|
727
715
|
LI
|
|
728
|
-
LIAISON
|
|
729
716
|
LIDL
|
|
730
717
|
LIFE
|
|
731
718
|
LIFEINSURANCE
|
|
@@ -744,6 +731,7 @@ LIVING
|
|
|
744
731
|
LIXIL
|
|
745
732
|
LK
|
|
746
733
|
LLC
|
|
734
|
+
LLP
|
|
747
735
|
LOAN
|
|
748
736
|
LOANS
|
|
749
737
|
LOCKER
|
|
@@ -819,7 +807,6 @@ MN
|
|
|
819
807
|
MO
|
|
820
808
|
MOBI
|
|
821
809
|
MOBILE
|
|
822
|
-
MOBILY
|
|
823
810
|
MODA
|
|
824
811
|
MOE
|
|
825
812
|
MOI
|
|
@@ -827,7 +814,6 @@ MOM
|
|
|
827
814
|
MONASH
|
|
828
815
|
MONEY
|
|
829
816
|
MONSTER
|
|
830
|
-
MOPAR
|
|
831
817
|
MORMON
|
|
832
818
|
MORTGAGE
|
|
833
819
|
MOSCOW
|
|
@@ -835,7 +821,6 @@ MOTO
|
|
|
835
821
|
MOTORCYCLES
|
|
836
822
|
MOV
|
|
837
823
|
MOVIE
|
|
838
|
-
MOVISTAR
|
|
839
824
|
MP
|
|
840
825
|
MQ
|
|
841
826
|
MR
|
|
@@ -854,7 +839,6 @@ MY
|
|
|
854
839
|
MZ
|
|
855
840
|
NA
|
|
856
841
|
NAB
|
|
857
|
-
NADEX
|
|
858
842
|
NAGOYA
|
|
859
843
|
NAME
|
|
860
844
|
NATIONWIDE
|
|
@@ -955,7 +939,6 @@ PHOTO
|
|
|
955
939
|
PHOTOGRAPHY
|
|
956
940
|
PHOTOS
|
|
957
941
|
PHYSIO
|
|
958
|
-
PIAGET
|
|
959
942
|
PICS
|
|
960
943
|
PICTET
|
|
961
944
|
PICTURES
|
|
@@ -1152,18 +1135,16 @@ SONG
|
|
|
1152
1135
|
SONY
|
|
1153
1136
|
SOY
|
|
1154
1137
|
SPACE
|
|
1155
|
-
SPIEGEL
|
|
1156
1138
|
SPORT
|
|
1157
1139
|
SPOT
|
|
1158
1140
|
SPREADBETTING
|
|
1159
1141
|
SR
|
|
1160
1142
|
SRL
|
|
1161
|
-
|
|
1143
|
+
SS
|
|
1162
1144
|
ST
|
|
1163
1145
|
STADA
|
|
1164
1146
|
STAPLES
|
|
1165
1147
|
STAR
|
|
1166
|
-
STARHUB
|
|
1167
1148
|
STATEBANK
|
|
1168
1149
|
STATEFARM
|
|
1169
1150
|
STC
|
|
@@ -1211,7 +1192,6 @@ TEAM
|
|
|
1211
1192
|
TECH
|
|
1212
1193
|
TECHNOLOGY
|
|
1213
1194
|
TEL
|
|
1214
|
-
TELEFONICA
|
|
1215
1195
|
TEMASEK
|
|
1216
1196
|
TENNIS
|
|
1217
1197
|
TEVA
|
|
@@ -1271,7 +1251,6 @@ TZ
|
|
|
1271
1251
|
UA
|
|
1272
1252
|
UBANK
|
|
1273
1253
|
UBS
|
|
1274
|
-
UCONNECT
|
|
1275
1254
|
UG
|
|
1276
1255
|
UK
|
|
1277
1256
|
UNICOM
|
|
@@ -1305,7 +1284,6 @@ VIP
|
|
|
1305
1284
|
VIRGIN
|
|
1306
1285
|
VISA
|
|
1307
1286
|
VISION
|
|
1308
|
-
VISTAPRINT
|
|
1309
1287
|
VIVA
|
|
1310
1288
|
VIVO
|
|
1311
1289
|
VLAANDEREN
|
|
@@ -1324,7 +1302,6 @@ WALMART
|
|
|
1324
1302
|
WALTER
|
|
1325
1303
|
WANG
|
|
1326
1304
|
WANGGOU
|
|
1327
|
-
WARMAN
|
|
1328
1305
|
WATCH
|
|
1329
1306
|
WATCHES
|
|
1330
1307
|
WEATHER
|
|
@@ -1452,13 +1429,14 @@ XN--MGBA7C0BBN0A
|
|
|
1452
1429
|
XN--MGBAAKC7DVF
|
|
1453
1430
|
XN--MGBAAM7A8H
|
|
1454
1431
|
XN--MGBAB2BD
|
|
1432
|
+
XN--MGBAH1A3HJKRD
|
|
1455
1433
|
XN--MGBAI9AZGQP6J
|
|
1456
1434
|
XN--MGBAYH7GPA
|
|
1457
|
-
XN--MGBB9FBPOB
|
|
1458
1435
|
XN--MGBBH1A
|
|
1459
1436
|
XN--MGBBH1A71E
|
|
1460
1437
|
XN--MGBC0A9AZCG
|
|
1461
1438
|
XN--MGBCA7DZDO
|
|
1439
|
+
XN--MGBCPQ6GPA1A
|
|
1462
1440
|
XN--MGBERP4A5D4AR
|
|
1463
1441
|
XN--MGBGU82A
|
|
1464
1442
|
XN--MGBI4ECEXP
|
|
@@ -1484,8 +1462,10 @@ XN--P1AI
|
|
|
1484
1462
|
XN--PBT977C
|
|
1485
1463
|
XN--PGBS0DH
|
|
1486
1464
|
XN--PSSY2U
|
|
1465
|
+
XN--Q7CE6A
|
|
1487
1466
|
XN--Q9JYB4C
|
|
1488
1467
|
XN--QCKA1PMC
|
|
1468
|
+
XN--QXA6A
|
|
1489
1469
|
XN--QXAM
|
|
1490
1470
|
XN--RHQV96G
|
|
1491
1471
|
XN--ROVU88B
|
|
@@ -1530,7 +1510,6 @@ ZAPPOS
|
|
|
1530
1510
|
ZARA
|
|
1531
1511
|
ZERO
|
|
1532
1512
|
ZIP
|
|
1533
|
-
ZIPPO
|
|
1534
1513
|
ZM
|
|
1535
1514
|
ZONE
|
|
1536
1515
|
ZUERICH
|
data/lib/wmap/cidr_tracker.rb
CHANGED
|
@@ -18,7 +18,7 @@ class Wmap::CidrTracker
|
|
|
18
18
|
@verbose=params.fetch(:verbose, false)
|
|
19
19
|
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
|
|
20
20
|
Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
|
|
21
|
-
@cidr_seeds=params.fetch(:cidr_seeds, @data_dir + 'cidrs')
|
|
21
|
+
@cidr_seeds=params.fetch(:cidr_seeds, @data_dir + '/' + 'cidrs')
|
|
22
22
|
File.write(@cidr_seeds, "") unless File.exist?(@cidr_seeds)
|
|
23
23
|
load_cidr_blks_from_file(@cidr_seeds)
|
|
24
24
|
end
|
|
@@ -99,8 +99,8 @@ class Wmap::CidrTracker
|
|
|
99
99
|
#@known_cidr_blks_asce_index=NetAddr.sort(@known_cidr_blks.keys, :Desc=>false)
|
|
100
100
|
@known_cidr_blks_asce_index=@known_cidr_blks.keys.sort
|
|
101
101
|
@known_cidr_blks_desc_index=@known_cidr_blks_asce_index.reverse
|
|
102
|
-
|
|
103
|
-
|
|
102
|
+
rescue => ee
|
|
103
|
+
puts "Exception on method #{__method__}: #{ee}" # if @verbose
|
|
104
104
|
end
|
|
105
105
|
|
|
106
106
|
# 'setter' to remove an entry to CIDR store @known_cidr_blks
|
|
@@ -167,6 +167,7 @@ class Wmap::CidrTracker
|
|
|
167
167
|
known = cidr4.contains?(ip+'/32')
|
|
168
168
|
break if known
|
|
169
169
|
end
|
|
170
|
+
return known
|
|
170
171
|
rescue => ee
|
|
171
172
|
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
172
173
|
return false
|
data/lib/wmap/domain_tracker.rb
CHANGED
|
@@ -169,7 +169,7 @@ class Wmap::DomainTracker
|
|
|
169
169
|
end
|
|
170
170
|
end
|
|
171
171
|
@known_internet_domains.merge!(results)
|
|
172
|
-
puts "Done loading entries."
|
|
172
|
+
puts "Done loading domain entries."
|
|
173
173
|
return results
|
|
174
174
|
else
|
|
175
175
|
puts "Error: no entry is loaded. Please check your list and try again."
|
|
@@ -262,7 +262,7 @@ class Wmap::DomainTracker
|
|
|
262
262
|
when "Wmap::DomainTracker::SubDomain"
|
|
263
263
|
return @known_internet_sub_domains.key?(domain)
|
|
264
264
|
else
|
|
265
|
-
return
|
|
265
|
+
return false
|
|
266
266
|
end
|
|
267
267
|
rescue => ee
|
|
268
268
|
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
@@ -70,7 +70,7 @@ class SubDomain < Wmap::DomainTracker
|
|
|
70
70
|
end
|
|
71
71
|
end
|
|
72
72
|
@known_internet_sub_domains.merge!(results)
|
|
73
|
-
puts "Done loading entries."
|
|
73
|
+
puts "Done loading sub_domain entries."
|
|
74
74
|
return results
|
|
75
75
|
else
|
|
76
76
|
puts "Error: no entry is loaded. Please check your list and try again."
|
data/lib/wmap/host_tracker.rb
CHANGED
|
@@ -27,13 +27,13 @@ class Wmap::HostTracker
|
|
|
27
27
|
@max_parallel=params.fetch(:max_parallel, 40)
|
|
28
28
|
# Initialize the instance variables
|
|
29
29
|
File.write(@hosts_file, "") unless File.exist?(@hosts_file)
|
|
30
|
-
load_known_hosts_from_file(@hosts_file)
|
|
30
|
+
@known_hosts=load_known_hosts_from_file(@hosts_file)
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
# Setter to load the known hosts from the local hosts file into a class instance
|
|
34
34
|
def load_known_hosts_from_file (f_hosts=@hosts_file)
|
|
35
35
|
puts "Loading local hosts from file: #{f_hosts} ..." if @verbose
|
|
36
|
-
|
|
36
|
+
known_hosts=Hash.new
|
|
37
37
|
@alias = Hash.new
|
|
38
38
|
File.write(f_hosts, "") unless File.exist?(f_hosts)
|
|
39
39
|
f=File.open(f_hosts, 'r')
|
|
@@ -43,11 +43,11 @@ class Wmap::HostTracker
|
|
|
43
43
|
key=entry[0].downcase
|
|
44
44
|
value=entry[1]
|
|
45
45
|
puts "Loading key value pair: #{key} - #{value}" if @verbose
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
known_hosts[key] = Hash.new unless known_hosts.key?(key)
|
|
47
|
+
known_hosts[key]= value
|
|
48
48
|
# For reverse host lookup
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
known_hosts[value] = Hash.new unless known_hosts.key?(value)
|
|
50
|
+
known_hosts[value] = key
|
|
51
51
|
# Count the number of alias for the recorded IP
|
|
52
52
|
if @alias.key?(value)
|
|
53
53
|
@alias[value]+=1
|
|
@@ -56,11 +56,12 @@ class Wmap::HostTracker
|
|
|
56
56
|
end
|
|
57
57
|
end
|
|
58
58
|
f.close
|
|
59
|
-
return @known_hosts
|
|
60
|
-
rescue => ee
|
|
61
|
-
puts "Exception on method #{__method__}: #{ee}"
|
|
62
59
|
return known_hosts
|
|
60
|
+
#rescue => ee
|
|
61
|
+
# puts "Exception on method #{__method__}: #{ee}"
|
|
62
|
+
# return known_hosts
|
|
63
63
|
end
|
|
64
|
+
alias_method :load, :load_known_hosts_from_file
|
|
64
65
|
|
|
65
66
|
# Save the current local hosts hash table into a (random) data repository file
|
|
66
67
|
def save_known_hosts_to_file!(f_hosts=@hosts_file)
|
|
@@ -96,30 +97,42 @@ class Wmap::HostTracker
|
|
|
96
97
|
puts "Exception on method #{__method__}: #{ee}"
|
|
97
98
|
end
|
|
98
99
|
|
|
100
|
+
# determine if host is part of trusted (known) root domains
|
|
101
|
+
def is_trusted?(host)
|
|
102
|
+
puts "Determin if host #{host} is part of trusted root domains" if @verbose
|
|
103
|
+
root=get_domain_root(host)
|
|
104
|
+
puts "Domain root: #{root}" if @verbose
|
|
105
|
+
domain_tracker=Wmap::DomainTracker.instance
|
|
106
|
+
domain_tracker.data_dir=@data_dir
|
|
107
|
+
domain_tracker.domains_file = domain_tracker.data_dir + "/" + "domains"
|
|
108
|
+
domain_tracker.load_domains_from_file
|
|
109
|
+
if domain_tracker.domain_known?(root)
|
|
110
|
+
domain_tracker=nil
|
|
111
|
+
return true
|
|
112
|
+
else
|
|
113
|
+
domain_tracker=nil
|
|
114
|
+
return false
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
99
118
|
# Setter to add host entry to the cache once at a time
|
|
100
119
|
def add(host)
|
|
101
120
|
puts "Add entry to the local host repository: #{host}"
|
|
102
121
|
host=host.strip.downcase unless host.nil?
|
|
122
|
+
root=get_domain_root(host)
|
|
103
123
|
unless @known_hosts.key?(host)
|
|
104
124
|
ip=host_2_ip(host)
|
|
105
125
|
record=Hash.new
|
|
106
126
|
if is_ip?(ip)
|
|
107
127
|
# filter host to known domains only
|
|
108
|
-
|
|
109
|
-
puts "Domain root: #{root}" if @verbose
|
|
110
|
-
domain_tracker=Wmap::DomainTracker.instance
|
|
111
|
-
domain_tracker.data_dir=@data_dir
|
|
112
|
-
domain_tracker.domains_file = domain_tracker.data_dir + "domains"
|
|
113
|
-
domain_tracker.load_domains_from_file
|
|
114
|
-
if domain_tracker.domain_known?(root)
|
|
115
|
-
domain_tracker=nil
|
|
128
|
+
if is_trusted?(host)
|
|
116
129
|
record[host]=ip
|
|
117
130
|
record[ip]=host
|
|
118
131
|
puts "Host data repository entry loaded: #{host} <=> #{ip}"
|
|
119
132
|
# Replace instance with the class variable to avoid potential race condition under parallel engine
|
|
120
133
|
# add additional logic to update the sub-domain table as well, 02/10/2014
|
|
121
134
|
sub=get_sub_domain(host)
|
|
122
|
-
if sub!=
|
|
135
|
+
if sub!=nil
|
|
123
136
|
tracker=Wmap::DomainTracker::SubDomain.instance
|
|
124
137
|
tracker.data_dir=@data_dir
|
|
125
138
|
tracker.sub_domains_file = tracker.data_dir + "sub_domains"
|
|
@@ -142,8 +155,8 @@ class Wmap::HostTracker
|
|
|
142
155
|
else
|
|
143
156
|
puts "Host is already exist. Skip: #{host}"
|
|
144
157
|
end
|
|
145
|
-
rescue => ee
|
|
146
|
-
|
|
158
|
+
#rescue => ee
|
|
159
|
+
# puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
147
160
|
end
|
|
148
161
|
|
|
149
162
|
# Setter to add host entry to the local hosts in batch (from an array)
|
|
@@ -164,7 +177,7 @@ class Wmap::HostTracker
|
|
|
164
177
|
end
|
|
165
178
|
end
|
|
166
179
|
@known_hosts.merge!(results)
|
|
167
|
-
puts "Done loading entries."
|
|
180
|
+
puts "Done loading host entries."
|
|
168
181
|
return results
|
|
169
182
|
else
|
|
170
183
|
puts "Error: empty list - no entry is loaded. Please check your input list and try again."
|
data/lib/wmap/site_tracker.rb
CHANGED
|
@@ -77,7 +77,9 @@ class Wmap::SiteTracker
|
|
|
77
77
|
f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
|
78
78
|
f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
|
|
79
79
|
@known_sites.keys.sort.map do |key|
|
|
80
|
-
|
|
80
|
+
if is_trusted?(key)
|
|
81
|
+
f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
|
|
82
|
+
end
|
|
81
83
|
end
|
|
82
84
|
f.close
|
|
83
85
|
puts "site store table is successfully saved: #{file_sites}"
|
|
@@ -94,6 +96,24 @@ class Wmap::SiteTracker
|
|
|
94
96
|
puts "Exception on method #{__method__}: #{ee}"
|
|
95
97
|
end
|
|
96
98
|
|
|
99
|
+
# determine site is trusted based on the known domains
|
|
100
|
+
def is_trusted?(site)
|
|
101
|
+
trusted=false
|
|
102
|
+
host=url_2_host(site)
|
|
103
|
+
root=get_domain_root(host)
|
|
104
|
+
domain_tracker=Wmap::DomainTracker.instance
|
|
105
|
+
domain_tracker.data_dir=@data_dir
|
|
106
|
+
domain_tracker.domains_file=@data_dir + "/" + "domains"
|
|
107
|
+
File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
|
|
108
|
+
domain_tracker.load_domains_from_file(domain_tracker.domains_file)
|
|
109
|
+
trusted=domain_tracker.domain_known?(root)
|
|
110
|
+
domain_tracker=nil
|
|
111
|
+
return trusted
|
|
112
|
+
rescue => ee
|
|
113
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
114
|
+
return trusted
|
|
115
|
+
end
|
|
116
|
+
|
|
97
117
|
# Setter to add site entry to the cache one at a time
|
|
98
118
|
def add(site)
|
|
99
119
|
puts "Add entry to the site store: #{site}"
|
|
@@ -132,6 +152,10 @@ class Wmap::SiteTracker
|
|
|
132
152
|
end
|
|
133
153
|
end
|
|
134
154
|
# add record only if trusted
|
|
155
|
+
host_tracker = Wmap::HostTracker.instance
|
|
156
|
+
host_tracker.data_dir= @data_dir
|
|
157
|
+
host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
|
|
158
|
+
host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
|
|
135
159
|
if trusted
|
|
136
160
|
# Add logic to check site status before adding it
|
|
137
161
|
checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
|
|
@@ -144,10 +168,6 @@ class Wmap::SiteTracker
|
|
|
144
168
|
raise "Site is currently down. Skip #{site}" if checker['code']==10000
|
|
145
169
|
end
|
|
146
170
|
raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
|
|
147
|
-
host_tracker = Wmap::HostTracker.instance
|
|
148
|
-
host_tracker.data_dir= @data_dir
|
|
149
|
-
host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
|
|
150
|
-
host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
|
|
151
171
|
# Update the local host table when necessary
|
|
152
172
|
if is_ip?(host)
|
|
153
173
|
# Case #1: Trusted site contains IP
|
|
@@ -341,8 +361,8 @@ class Wmap::SiteTracker
|
|
|
341
361
|
else
|
|
342
362
|
puts "Error: no entry is loaded. Please check your list and try again."
|
|
343
363
|
end
|
|
344
|
-
|
|
345
|
-
|
|
364
|
+
rescue => ee
|
|
365
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
346
366
|
end
|
|
347
367
|
alias_method :dels, :bulk_delete
|
|
348
368
|
|
|
@@ -174,7 +174,7 @@ class WpTracker < Wmap::SiteTracker
|
|
|
174
174
|
end
|
|
175
175
|
end
|
|
176
176
|
@known_wp_sites.merge!(results)
|
|
177
|
-
puts "Done loading entries."
|
|
177
|
+
puts "Done loading wp entries."
|
|
178
178
|
return results
|
|
179
179
|
else
|
|
180
180
|
puts "Error: no entry is loaded. Please check your list and try again."
|
data/lib/wmap/url_checker.rb
CHANGED
|
@@ -88,9 +88,9 @@ class Wmap::UrlChecker
|
|
|
88
88
|
checker['redirection']=nil
|
|
89
89
|
checker['timestamp']=timestamp
|
|
90
90
|
return checker
|
|
91
|
-
rescue Exception => ee
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
#rescue Exception => ee
|
|
92
|
+
# puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
|
|
93
|
+
# return nil
|
|
94
94
|
end
|
|
95
95
|
alias_method :check, :url_worker
|
|
96
96
|
|
data/lib/wmap/url_crawler.rb
CHANGED
|
@@ -17,7 +17,8 @@ require "parallel"
|
|
|
17
17
|
class Wmap::UrlCrawler
|
|
18
18
|
include Wmap::Utils
|
|
19
19
|
|
|
20
|
-
attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel,
|
|
20
|
+
attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, \
|
|
21
|
+
:verbose, :data_dir, :user_agent
|
|
21
22
|
attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
|
|
22
23
|
# Global variable used to store the combined result of all the forked child processes. Note that class variable
|
|
23
24
|
# would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
|
|
@@ -35,13 +36,16 @@ class Wmap::UrlCrawler
|
|
|
35
36
|
@crawl_depth=params.fetch(:crawl_depth, 4)
|
|
36
37
|
@crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
|
|
37
38
|
@max_parallel=params.fetch(:max_parallel, 40)
|
|
39
|
+
@user_agent=params.fetch(:user_agent, "OWASP WMAP Spider")
|
|
38
40
|
# Discovered data store
|
|
39
41
|
@discovered_urls_by_crawler=Hash.new
|
|
40
42
|
@visited_urls_by_crawler=Hash.new
|
|
41
43
|
@crawl_start=Hash.new
|
|
42
44
|
@crawl_done=Hash.new
|
|
43
45
|
Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
|
|
44
|
-
@
|
|
46
|
+
@log_dir=@data_dir + "/logs/"
|
|
47
|
+
Dir.mkdir(@log_dir) unless Dir.exist?(@log_dir)
|
|
48
|
+
@log_file=@log_dir + "crawler.log"
|
|
45
49
|
end
|
|
46
50
|
|
|
47
51
|
# Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
|
|
@@ -86,7 +90,7 @@ class Wmap::UrlCrawler
|
|
|
86
90
|
|
|
87
91
|
# The worker instance of crawler who perform the labour work
|
|
88
92
|
def crawl_worker(url0)
|
|
89
|
-
puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and
|
|
93
|
+
puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and discovery contents."
|
|
90
94
|
# Input URL sanity check first
|
|
91
95
|
if is_url?(url0)
|
|
92
96
|
host=url_2_host(url0)
|
|
@@ -216,14 +220,14 @@ class Wmap::UrlCrawler
|
|
|
216
220
|
alias_method :crawl_file, :crawl_workers_on_file
|
|
217
221
|
|
|
218
222
|
# Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
|
|
219
|
-
def open_url(url)
|
|
223
|
+
def open_url(url,user_agent=@user_agent)
|
|
220
224
|
puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
|
|
221
225
|
if url =~ /http\:/i
|
|
222
226
|
# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
|
|
223
|
-
url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
|
|
227
|
+
url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
|
|
224
228
|
#url_object = open(url)
|
|
225
229
|
elsif url =~ /https\:/i
|
|
226
|
-
url_object = open(url
|
|
230
|
+
url_object = open(url, :ssl_verify_mode=>0, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
|
|
227
231
|
#url_object = open(url,:ssl_verify_mode => 0)
|
|
228
232
|
else
|
|
229
233
|
raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
|
|
@@ -258,22 +262,6 @@ class Wmap::UrlCrawler
|
|
|
258
262
|
return nil
|
|
259
263
|
end
|
|
260
264
|
|
|
261
|
-
=begin
|
|
262
|
-
# Wrapper for the Nokogiri DOM parser
|
|
263
|
-
def parse_html(html_body)
|
|
264
|
-
begin
|
|
265
|
-
#puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
|
|
266
|
-
doc = Nokogiri::HTML(html_body)
|
|
267
|
-
#puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
|
|
268
|
-
#puts "doc: #{doc}" if @verbose
|
|
269
|
-
return doc
|
|
270
|
-
rescue => ee
|
|
271
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
272
|
-
return nil
|
|
273
|
-
end
|
|
274
|
-
end
|
|
275
|
-
=end
|
|
276
|
-
|
|
277
265
|
# Search 'current_url' and return found URLs under the same domain
|
|
278
266
|
def find_urls_on_page(doc, current_url)
|
|
279
267
|
puts "Search and return URLs within the doc: #{doc}" if @verbose
|
data/lib/wmap/utils/logger.rb
CHANGED
|
@@ -8,46 +8,43 @@
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
module Wmap
|
|
11
|
-
module Utils
|
|
11
|
+
module Utils
|
|
12
12
|
# Module to log debugging and other messages
|
|
13
|
-
module Logger
|
|
13
|
+
module Logger
|
|
14
14
|
extend self
|
|
15
15
|
# Append information into the log file for the trouble-shooting purpose
|
|
16
16
|
def wlog (obj, agent, file)
|
|
17
17
|
puts "Writing #{obj} into log file: #{file}" if @verbose
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
obj.map { |x| @@f.write " #{x}\n" }
|
|
28
|
-
puts "The list is successfully saved into the log file: #{file} " if @verbose
|
|
29
|
-
end
|
|
30
|
-
when Hash
|
|
31
|
-
if obj.length >= 0
|
|
32
|
-
@@f.write "#{timestamp}: #{agent}: \n"
|
|
33
|
-
obj.each_value { |value| @@f.write " #{value}\n" }
|
|
34
|
-
puts "The hash is successfully saved into the log file: #{file} " if @verbose
|
|
35
|
-
end
|
|
36
|
-
when String
|
|
37
|
-
@@f.write "#{timestamp}: #{agent}: #{obj}\n"
|
|
38
|
-
puts "The string is successfully saved into the log file: #{file} " if @verbose
|
|
39
|
-
else
|
|
40
|
-
#do nothing
|
|
41
|
-
puts "Un-handled exception on: #{obj}" if @verbose
|
|
18
|
+
return false if obj.nil?
|
|
19
|
+
@@f=File.open(file,'a')
|
|
20
|
+
timestamp=Time.now
|
|
21
|
+
case obj
|
|
22
|
+
when Array
|
|
23
|
+
if obj.size >= 0
|
|
24
|
+
@@f.write "#{timestamp}: #{agent}: \n"
|
|
25
|
+
obj.map { |x| @@f.write " #{x}\n" }
|
|
26
|
+
puts "The list is successfully saved into the log file: #{file} " if @verbose
|
|
42
27
|
end
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
28
|
+
when Hash
|
|
29
|
+
if obj.length >= 0
|
|
30
|
+
@@f.write "#{timestamp}: #{agent}: \n"
|
|
31
|
+
obj.each_value { |value| @@f.write " #{value}\n" }
|
|
32
|
+
puts "The hash is successfully saved into the log file: #{file} " if @verbose
|
|
33
|
+
end
|
|
34
|
+
when String
|
|
35
|
+
@@f.write "#{timestamp}: #{agent}: #{obj}\n"
|
|
36
|
+
puts "The string is successfully saved into the log file: #{file} " if @verbose
|
|
37
|
+
else
|
|
38
|
+
#do nothing
|
|
39
|
+
puts "Un-handled exception on: #{obj}" if @verbose
|
|
40
|
+
end
|
|
41
|
+
@@f.close
|
|
42
|
+
return true
|
|
43
|
+
rescue => ee
|
|
44
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
45
|
+
return false
|
|
49
46
|
end
|
|
50
|
-
|
|
51
|
-
end
|
|
47
|
+
|
|
48
|
+
end
|
|
52
49
|
end
|
|
53
50
|
end
|
data/lib/wmap/utils/url_magic.rb
CHANGED
|
@@ -15,6 +15,7 @@ module Wmap
|
|
|
15
15
|
|
|
16
16
|
# set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
|
|
17
17
|
Max_http_timeout=15000
|
|
18
|
+
User_agent = "OWASP WMAP Spider"
|
|
18
19
|
|
|
19
20
|
# Simple sanity check on a 'claimed' URL string.
|
|
20
21
|
def is_url?(url)
|
|
@@ -377,7 +378,8 @@ module Wmap
|
|
|
377
378
|
|
|
378
379
|
# Given an URL, open the page, then return the DOM text from a normal user perspective
|
|
379
380
|
def open_page(url)
|
|
380
|
-
args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe,
|
|
381
|
+
args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, \
|
|
382
|
+
read_timeout: Max_http_timeout/1000, "User-Agent"=>User_agent}
|
|
381
383
|
doc = Nokogiri::HTML(open(url, args))
|
|
382
384
|
if doc.text.include?("Please enable JavaScript to view the page content")
|
|
383
385
|
puts "Invoke headless chrome through webdriver ..." if @verbose
|
|
@@ -385,7 +387,7 @@ module Wmap
|
|
|
385
387
|
#driver = Selenium::WebDriver.for :chrome
|
|
386
388
|
# http://watir.com/guides/chrome/
|
|
387
389
|
args = ['--ignore-certificate-errors', '--disable-popup-blocking', '--disable-translate', '--disk-cache-size 8192']
|
|
388
|
-
browser = Watir::Browser.new :chrome, headless: true,
|
|
390
|
+
browser = Watir::Browser.new :chrome, headless: true, switches: %w[--user-agent=OWASP\ WMAP\ Spider]
|
|
389
391
|
browser.goto(url)
|
|
390
392
|
sleep(2) # wait for the loading
|
|
391
393
|
doc = Nokogiri::HTML(browser.html)
|
data/lib/wmap/utils/utils.rb
CHANGED
|
@@ -159,20 +159,18 @@ module Wmap
|
|
|
159
159
|
# Simple test a host string format. Return true if it contains a valid internet domain sub-string. Note: Don't be confused with another method 'valid_dns_record?', which is a stricter and time-consuming test on the DNS server for a resolvable internet host.
|
|
160
160
|
def is_fqdn? (host)
|
|
161
161
|
puts "Validate the host-name format is valid: #{host}" if @verbose
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
else
|
|
170
|
-
return false
|
|
171
|
-
end
|
|
172
|
-
rescue => ee
|
|
173
|
-
puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
|
|
162
|
+
return false if is_ip?(host) or is_url?(host)
|
|
163
|
+
domain=get_domain_root(host)
|
|
164
|
+
if domain.nil?
|
|
165
|
+
return false
|
|
166
|
+
elsif is_domain_root?(domain)
|
|
167
|
+
return true
|
|
168
|
+
else
|
|
174
169
|
return false
|
|
175
170
|
end
|
|
171
|
+
# rescue => ee
|
|
172
|
+
# puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
|
|
173
|
+
# return false
|
|
176
174
|
end
|
|
177
175
|
alias_method :is_host?, :is_fqdn?
|
|
178
176
|
|
data/lib/wmap/utils/wp_detect.rb
CHANGED
|
@@ -239,7 +239,11 @@ module Wmap
|
|
|
239
239
|
if tag.to_s.include?(pattern)
|
|
240
240
|
puts tag.to_s if @verbose
|
|
241
241
|
k=nil
|
|
242
|
-
|
|
242
|
+
if tag.to_s.scan(/[\d+\.]+\d+/).first =~ /\d+\./
|
|
243
|
+
return tag.to_s.scan(/[\d+\.]+\d+/).first
|
|
244
|
+
else
|
|
245
|
+
return nil
|
|
246
|
+
end
|
|
243
247
|
end
|
|
244
248
|
end
|
|
245
249
|
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Wmap
|
|
3
|
+
#
|
|
4
|
+
# A pure Ruby library for the Internet web application discovery and tracking.
|
|
5
|
+
#
|
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
|
7
|
+
#++
|
|
8
|
+
# Unit Test File for Wmap::DomainTracker.instance class
|
|
9
|
+
|
|
10
|
+
require "minitest/autorun"
|
|
11
|
+
require "Wmap"
|
|
12
|
+
|
|
13
|
+
class CidrTrackerTest < MiniTest::Unit::TestCase
|
|
14
|
+
include Wmap::Utils
|
|
15
|
+
|
|
16
|
+
def test_cidr_add
|
|
17
|
+
w = Wmap::CidrTracker.new
|
|
18
|
+
w.add("192.168.1.0/24")
|
|
19
|
+
assert_equal true, w.known_cidr_blks.key?("192.168.1.0/24")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def test_cidr_delete
|
|
23
|
+
w = Wmap::CidrTracker.new
|
|
24
|
+
w.add("10.0.0.0/8")
|
|
25
|
+
w.delete("10.0.0.0/8")
|
|
26
|
+
assert_equal false, w.known_cidr_blks.key?("10.0.0.0/8")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_ip_trusted?
|
|
30
|
+
w = Wmap::CidrTracker.new
|
|
31
|
+
w.add("192.168.1.0/24")
|
|
32
|
+
assert_equal true, w.ip_trusted?("192.168.1.1")
|
|
33
|
+
assert_equal true, w.ip_trusted?("192.168.1.255")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
data/test/utils_test.rb
CHANGED
|
@@ -12,7 +12,7 @@ require "Wmap"
|
|
|
12
12
|
|
|
13
13
|
class UtilsTest < MiniTest::Unit::TestCase
|
|
14
14
|
include Wmap::Utils
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
def test_sld_domain_conversion
|
|
17
17
|
assert_equal "yahoo.com", get_domain_root("yahoo.com")
|
|
18
18
|
end
|
|
@@ -28,75 +28,75 @@ class UtilsTest < MiniTest::Unit::TestCase
|
|
|
28
28
|
def test_is_domain_root_case_1?
|
|
29
29
|
assert_equal false, is_domain_root?("www.yahoo.co.uk")
|
|
30
30
|
end
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
def test_is_domain_root_case_2?
|
|
33
33
|
assert_equal true, is_domain_root?("yahoo.co.uk")
|
|
34
34
|
end
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
def test_get_sub_domain
|
|
37
37
|
assert_equal "mail.yahoo.co.uk", get_sub_domain("www.mail.yahoo.co.uk")
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
def test_is_url_case_1?
|
|
41
41
|
assert_equal true, is_url?("http://www.mail.yahoo.co.uk/")
|
|
42
|
-
end
|
|
42
|
+
end
|
|
43
43
|
|
|
44
44
|
def test_is_url_case_2?
|
|
45
45
|
assert_equal true, is_url?("https://www.mail.yahoo.co.uk/")
|
|
46
|
-
end
|
|
46
|
+
end
|
|
47
47
|
|
|
48
48
|
def test_is_url_case_3?
|
|
49
49
|
assert_equal false, is_url?("http://www.mail.yahoo.uii/")
|
|
50
|
-
end
|
|
50
|
+
end
|
|
51
51
|
|
|
52
52
|
def test_is_url_case_4?
|
|
53
53
|
assert_equal false, is_url?("http:\\www.mail.yahoo.co.uk")
|
|
54
|
-
end
|
|
55
|
-
|
|
54
|
+
end
|
|
55
|
+
|
|
56
56
|
def test_is_ssl?
|
|
57
57
|
assert_equal false, is_ssl?("http://www.mail.yahoo.co.uk/")
|
|
58
|
-
end
|
|
59
|
-
|
|
58
|
+
end
|
|
59
|
+
|
|
60
60
|
def test_is_site?
|
|
61
61
|
assert_equal false, is_site?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
|
62
|
-
end
|
|
63
|
-
|
|
62
|
+
end
|
|
63
|
+
|
|
64
64
|
def test_url_2_host
|
|
65
65
|
assert_equal "login.yahoo.com", url_2_host("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
|
66
|
-
end
|
|
67
|
-
|
|
66
|
+
end
|
|
67
|
+
|
|
68
68
|
def test_url_2_site_case_1
|
|
69
69
|
assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
|
70
|
-
end
|
|
70
|
+
end
|
|
71
71
|
|
|
72
72
|
def test_url_2_site_case_2
|
|
73
73
|
assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
|
74
|
-
end
|
|
74
|
+
end
|
|
75
75
|
|
|
76
76
|
def test_url_2_site_case_3
|
|
77
77
|
assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com#.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
|
78
|
-
end
|
|
79
|
-
|
|
78
|
+
end
|
|
79
|
+
|
|
80
80
|
def test_url_2_path
|
|
81
81
|
assert_equal "/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", url_2_path("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
|
|
82
|
-
end
|
|
82
|
+
end
|
|
83
83
|
|
|
84
84
|
def test_urls_on_same_domain?
|
|
85
85
|
assert_equal true, urls_on_same_domain?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", "https://us-mg4.mail.yahoo.com/neo/launch?.rand=8hjd08hc6t1lq")
|
|
86
|
-
end
|
|
86
|
+
end
|
|
87
87
|
|
|
88
88
|
def test_host_2_url_case_1
|
|
89
89
|
assert_equal "https://mail.yahoo.com/", host_2_url("mail.yahoo.com",443)
|
|
90
|
-
end
|
|
90
|
+
end
|
|
91
91
|
|
|
92
92
|
def test_host_2_url_case_2
|
|
93
93
|
assert_equal "http://mail.yahoo.com/", host_2_url("mail.yahoo.com")
|
|
94
|
-
end
|
|
95
|
-
|
|
94
|
+
end
|
|
95
|
+
|
|
96
96
|
def test_make_absolute
|
|
97
97
|
assert_equal "http://games.yahoo.com/game/the-magic-snowman-flash.html", make_absolute("http://games.yahoo.com/","game/the-magic-snowman-flash.html")
|
|
98
98
|
end
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
def test_create_absolute_url_from_base
|
|
101
101
|
assert_equal "http://images.search.yahoo.com/search/images?p=raiders", create_absolute_url_from_base("http://images.search.yahoo.com/images","/search/images?p=raiders")
|
|
102
102
|
end
|
|
@@ -108,7 +108,7 @@ class UtilsTest < MiniTest::Unit::TestCase
|
|
|
108
108
|
def test_normalize_url_case_1
|
|
109
109
|
assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/./images/search/images?p=raiders")
|
|
110
110
|
end
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
def test_normalize_url_case_2
|
|
113
113
|
assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/../images/../search/images?p=raiders")
|
|
114
114
|
end
|
|
@@ -116,53 +116,58 @@ class UtilsTest < MiniTest::Unit::TestCase
|
|
|
116
116
|
def test_normalize_url_case_3
|
|
117
117
|
assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com./../images/../search/images?p=raiders")
|
|
118
118
|
end
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
def test_is_ip_case_1?
|
|
121
121
|
assert_equal false, is_ip?("256.2.3.1")
|
|
122
|
-
end
|
|
122
|
+
end
|
|
123
123
|
|
|
124
124
|
def test_is_ip_case_2?
|
|
125
125
|
assert_equal false, is_ip?("25.2.3.1.22")
|
|
126
|
-
end
|
|
126
|
+
end
|
|
127
127
|
|
|
128
128
|
def test_is_ip_case_3?
|
|
129
129
|
assert_equal true, is_ip?("196.168.230.1")
|
|
130
|
-
end
|
|
130
|
+
end
|
|
131
131
|
|
|
132
132
|
def test_is_fqdn_case_1?
|
|
133
133
|
assert_equal true, is_fqdn?("images.search.yahoo.com")
|
|
134
|
-
end
|
|
134
|
+
end
|
|
135
135
|
|
|
136
136
|
def test_is_fqdn_case_2?
|
|
137
137
|
assert_equal true, is_fqdn?("yahoo.com")
|
|
138
|
-
end
|
|
139
|
-
|
|
138
|
+
end
|
|
139
|
+
|
|
140
140
|
def test_is_fqdn_case_3?
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
141
|
+
# according to latest tlds list - http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
|
142
|
+
assert_equal true, is_fqdn?("images.search.yahoo")
|
|
143
|
+
end
|
|
144
|
+
|
|
144
145
|
def test_is_fqdn_case_4?
|
|
145
146
|
assert_equal false, is_fqdn?("images")
|
|
146
|
-
end
|
|
147
|
-
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def test_is_fqdn_case_5?
|
|
150
|
+
assert_equal false, is_fqdn?("images.search.gargle")
|
|
151
|
+
end
|
|
152
|
+
|
|
148
153
|
def test_is_cidr_case_1?
|
|
149
154
|
assert_equal false, is_cidr?("196.168.230.1")
|
|
150
|
-
end
|
|
155
|
+
end
|
|
151
156
|
|
|
152
157
|
def test_is_cidr_case_2?
|
|
153
158
|
assert_equal false, is_cidr?("196.168.2.257/12")
|
|
154
|
-
end
|
|
155
|
-
|
|
159
|
+
end
|
|
160
|
+
|
|
156
161
|
def test_is_cidr_case_3?
|
|
157
162
|
assert_equal true, is_cidr?("196.168.2.25/12")
|
|
158
|
-
end
|
|
159
|
-
|
|
163
|
+
end
|
|
164
|
+
|
|
160
165
|
def test_cidr_2_ips
|
|
161
166
|
assert_equal ["192.168.1.1"], cidr_2_ips("192.168.1.1/32")
|
|
162
|
-
end
|
|
163
|
-
|
|
167
|
+
end
|
|
168
|
+
|
|
164
169
|
def test_sort_ips
|
|
165
170
|
assert_equal ["192.168.1.1", "192.168.1.2", "192.168.2.1"], sort_ips(["192.168.1.2", "192.168.2.1","192.168.1.1"])
|
|
166
|
-
end
|
|
167
|
-
|
|
171
|
+
end
|
|
172
|
+
|
|
168
173
|
end
|
data/version.txt
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
###############################################################################
|
|
4
4
|
package = wmap
|
|
5
5
|
# wmap version 2.0 == web_discovery version 1.5.3
|
|
6
|
-
version = 2.
|
|
7
|
-
date =
|
|
6
|
+
version = 2.8.3
|
|
7
|
+
date = 2021-07-26
|
|
8
8
|
|
|
9
9
|
author = Sam (Yang) Li
|
|
10
10
|
email = yang.li@owasp.org
|
data/wmap.gemspec
CHANGED
|
@@ -36,7 +36,7 @@ Gem::Specification.new do |s|
|
|
|
36
36
|
s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
|
|
37
37
|
s.email = info["email"]
|
|
38
38
|
s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","trusts","distrust","run_tests"]
|
|
39
|
-
s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","
|
|
39
|
+
s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports", "LICENSE.txt",
|
|
40
40
|
"version.txt","README.md", "wmap.gemspec"]
|
|
41
41
|
s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['dicts/*']
|
|
42
42
|
#s.homepage = "none"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wmap
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.8.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sam (Yang) Li
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2021-07-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: dnsruby
|
|
@@ -234,7 +234,6 @@ files:
|
|
|
234
234
|
- LICENSE.txt
|
|
235
235
|
- README.md
|
|
236
236
|
- TODO
|
|
237
|
-
- bin/RHPG
|
|
238
237
|
- bin/deprime
|
|
239
238
|
- bin/distrust
|
|
240
239
|
- bin/googleBot
|
|
@@ -251,7 +250,6 @@ files:
|
|
|
251
250
|
- bin/wdel
|
|
252
251
|
- bin/wdump
|
|
253
252
|
- bin/wmap
|
|
254
|
-
- bin/wmaps
|
|
255
253
|
- bin/wscan
|
|
256
254
|
- demos/bruter.rb
|
|
257
255
|
- demos/dns_brutes.rb
|
|
@@ -308,7 +306,7 @@ files:
|
|
|
308
306
|
- settings/discovery_ports
|
|
309
307
|
- settings/google_keywords.txt
|
|
310
308
|
- settings/google_locator.txt
|
|
311
|
-
-
|
|
309
|
+
- test/cidr_tracker_test.rb
|
|
312
310
|
- test/domain_tracker_test.rb
|
|
313
311
|
- test/utils_test.rb
|
|
314
312
|
- version.txt
|
|
@@ -335,9 +333,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
335
333
|
- !ruby/object:Gem::Version
|
|
336
334
|
version: '0'
|
|
337
335
|
requirements: []
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
signing_key:
|
|
336
|
+
rubygems_version: 3.0.9
|
|
337
|
+
signing_key:
|
|
341
338
|
specification_version: 4
|
|
342
339
|
summary: A pure Ruby web application and service discovery API.
|
|
343
340
|
test_files: []
|
data/bin/RHPG
DELETED
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# Executable to lookup then merge site tech details into the RHPG asset spreadsheet in CSV format only
|
|
3
|
-
#
|
|
4
|
-
## Usage: RHPG [RHPG.csv]
|
|
5
|
-
require "wmap"
|
|
6
|
-
require "csv"
|
|
7
|
-
include Wmap::Utils
|
|
8
|
-
|
|
9
|
-
def print_usage
|
|
10
|
-
puts "Program to lookup then merge the site details into RHPG asset spreadsheet. \nUsage: RHPG [RHPG.csv]"
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
# Lookup the site store for a domain; then return the fingger print info of the site
|
|
14
|
-
def site_tracker_lookup(domain)
|
|
15
|
-
tracker=Wmap::SiteTracker.instance
|
|
16
|
-
tracker.verbose=false
|
|
17
|
-
#first order search
|
|
18
|
-
tracker.known_sites.each do |key,val|
|
|
19
|
-
if key.include?(domain.strip.downcase) && key.include?("https")
|
|
20
|
-
tracker=nil
|
|
21
|
-
return [key] + val.values
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
#second order search
|
|
25
|
-
tracker.known_sites.each do |key,val|
|
|
26
|
-
if key.include?(domain.strip.downcase)
|
|
27
|
-
tracker=nil
|
|
28
|
-
return [key] + val.values
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
tracker=nil
|
|
32
|
-
return [nil]*9
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# look up the wp site data store for a domain; then return the wp finger print info: [is_wp?,wp_ver]
|
|
36
|
-
def wp_tracker_lookup(domain)
|
|
37
|
-
tracker=Wmap::WpTracker.new(:verbose=>false)
|
|
38
|
-
# first order
|
|
39
|
-
tracker.known_wp_sites.each do |key,val|
|
|
40
|
-
if key.include?(domain.strip.downcase) && val
|
|
41
|
-
ver=tracker.wp_ver(key)
|
|
42
|
-
tracker=nil
|
|
43
|
-
return [val,ver]
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
# second order
|
|
47
|
-
tracker.known_wp_sites.each do |key,val|
|
|
48
|
-
if key.include?(domain.strip.downcase) && key.include?("https") && val
|
|
49
|
-
tracker=nil
|
|
50
|
-
return [val,nil]
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
# third order
|
|
54
|
-
tracker.known_wp_sites.each do |key,val|
|
|
55
|
-
if key.include?(domain.strip.downcase)
|
|
56
|
-
tracker=nil
|
|
57
|
-
return [val,nil]
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
tracker=nil
|
|
61
|
-
return [nil,nil]
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# perform the wpscan on a site
|
|
65
|
-
def wpscan(domain)
|
|
66
|
-
url=site_tracker_lookup(domain)[0]
|
|
67
|
-
return nil if url.nil?
|
|
68
|
-
if url.include?("https")
|
|
69
|
-
command="wpscan --disable-tls-checks --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
|
|
70
|
-
else
|
|
71
|
-
command="wpscan --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
|
|
72
|
-
end
|
|
73
|
-
system(command)
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
puts Wmap.banner
|
|
77
|
-
print_usage
|
|
78
|
-
|
|
79
|
-
# open output file to write
|
|
80
|
-
CSV.open("output.csv", "wb") do |csv|
|
|
81
|
-
cnt=1
|
|
82
|
-
# open RHPG input file to read
|
|
83
|
-
CSV.foreach(ARGV[0]) do |row|
|
|
84
|
-
puts "Processing row #{cnt}"
|
|
85
|
-
#puts row.inspect
|
|
86
|
-
my_row=Array.new
|
|
87
|
-
if cnt > 1
|
|
88
|
-
if is_domain?(row[0])
|
|
89
|
-
=begin
|
|
90
|
-
if row[3] =~ /Keep/i && row[3] != /Redirect/i
|
|
91
|
-
unless File.exist?(row[0]+".wpscan")
|
|
92
|
-
wpscan(row[0])
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
=end
|
|
96
|
-
my_row = row + site_tracker_lookup(row[0]) + wp_tracker_lookup(row[0])
|
|
97
|
-
else
|
|
98
|
-
my_row = row + [nil]*10
|
|
99
|
-
end
|
|
100
|
-
else
|
|
101
|
-
my_row = row + ["Website","Primary IP","Port","Hosting Status","Server","Response Code","MD5 Finger-print","Redirection","Timestamp", "WordPress", "WordPress Version"]
|
|
102
|
-
end
|
|
103
|
-
cnt+=1
|
|
104
|
-
csv << my_row
|
|
105
|
-
end
|
|
106
|
-
puts "All done. "
|
|
107
|
-
end
|
data/bin/wmaps
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# script to automate the new site discovery through by crawling all unique sites in the site store
|
|
3
|
-
require "wmap"
|
|
4
|
-
require "parallel"
|
|
5
|
-
|
|
6
|
-
def wmap_worker(domain)
|
|
7
|
-
cmd = "wmap " + domain
|
|
8
|
-
puts "wmap discovery on domain: ", domain
|
|
9
|
-
system(cmd)
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
tracker=Wmap::DomainTracker.instance
|
|
14
|
-
Parallel.map(tracker.known_internet_domains.keys, :in_processes => 10) { |target|
|
|
15
|
-
puts "Working on #{target} ..." if @verbose
|
|
16
|
-
wmap_worker(target)
|
|
17
|
-
}
|
|
18
|
-
=begin
|
|
19
|
-
tracker.known_internet_domains.keys.map do |domain|
|
|
20
|
-
wmap_worker(domain)
|
|
21
|
-
end
|
|
22
|
-
=end
|
|
23
|
-
tracker=nil
|