wmap 2.7.7 → 2.8.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 559cac84dd38902d968cc9e7327e77115ae3f946020caa21b7743ceb5777a96f
4
- data.tar.gz: a4dcc0eafc10d7497c47c1f9955774b880ae5cdafa5ed0c6904ef5362087bd98
3
+ metadata.gz: a6632168c88c35189b601d225ebbf99fddd034a561814e5fe34b2d57bb79c75c
4
+ data.tar.gz: 4beab7d92e6a5e4258d37dfa52a64f4edaf06d8c4213331cc9833d0be5cc70aa
5
5
  SHA512:
6
- metadata.gz: 0b430ed1da47cefd8cb8a7bedddd75ed2e7b1dafbfe94cdb2185ad2eb7e26d69a1429e79a9ece6b3cc68d6d964161f092e1143a00c0c64e241e930304d9e5a65
7
- data.tar.gz: fe4d50b292849e51c202f47083c0837228d11934eb0d71c5a6262da24467480ce26994f5db91bdae5cf3b52dc0e6ef4fef698396575e820779c7420c731d0d87
6
+ metadata.gz: 46482f94126bc1ad4af322cb23894e77ee5a0f85dc31741e25b75752be4a47b1a620c3d1a6f28786a2f53e37eecde42b571f011e770e4bacdaf00012d737ea9e
7
+ data.tar.gz: b5ed0e1af39d8b7910b0d77b5bde99461293a7e985d5e587405bbf929cae8bffa5a32a1cf31eb142f823a11057377537adecf9ee6fb7f475fb2252e9d3edc2b1
data/bin/wmap CHANGED
@@ -13,7 +13,7 @@ parser = OptionParser.new do|opts|
13
13
  opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
14
14
  options[:data_dir] = data_dir;
15
15
  end
16
- opts.on('-t', '--target target', 'Web Mapper target') do |target|
16
+ opts.on('-t', '--target target', 'Web Mapper target / seed for discovery') do |target|
17
17
  options[:target] = target;
18
18
  end
19
19
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
@@ -29,6 +29,10 @@ parser.parse!
29
29
  # print program banner
30
30
  puts Wmap.banner
31
31
  # print_usage unless options[:target]
32
+ unless options[:target]
33
+ puts "Usage: $ wmap -h"
34
+ exit 1
35
+ end
32
36
 
33
37
  # Preparing - check out the working logs directory
34
38
  if options[:data_dir]
@@ -129,6 +133,10 @@ Wmap.wlog(dis_urls.keys, "wmap", Log_dir+"discovered_urls.log") unless dis_urls.
129
133
  Wmap.wlog(dis_sites.keys, "wmap", Log_dir+"discovered_sites.log") unless dis_sites.empty?
130
134
  #crawler.wlog(c_start.keys,Log_dir+"crawler.log")
131
135
  #crawler.wlog(c_done.keys,Log_dir+"crawler.log")
136
+
137
+
138
+ # Save the current disovery urls only to a specific file, patched 07/23/2021
139
+ crawler.save_discovered_urls(Log_dir+"cur_urls.log")
132
140
  crawler=nil
133
141
 
134
142
 
@@ -138,22 +146,23 @@ when nil,[]
138
146
  puts "No new site found. There is no change to the site tracking data repository. "
139
147
  else
140
148
  puts "Automatically save the discovery results into the site tracking data repository: "
149
+ inventory = Wmap::SiteTracker.instance
141
150
  if options[:target] && options[:data_dir]
142
151
  puts "Start the SiteTracker with the optional directory setter. "
143
- inventory=Wmap::SiteTracker.instance
144
152
  inventory.data_dir = options[:data_dir]
153
+ #inventory.verbose = true
145
154
  inventory.sites_file = inventory.data_dir + "/" + "sites"
146
155
  inventory.load_site_stores_from_file(inventory.sites_file)
147
156
  elsif options[:target]
148
157
  puts "Start the SiteTracker. "
149
- inventory=Wmap::SiteTracker.instance
150
158
  else
151
159
  abort "Error firing up SiteTracker instance!"
152
160
  end
153
161
  new_sites=inventory.adds(dis_sites.keys-["",nil])
154
- if new_sites.size>0 && options[:data_dir]
162
+ puts "Newly discovery sties: #{new_sites}"
163
+ if options[:data_dir]
155
164
  inventory.save!(inventory.sites_file)
156
- elsif new_sites.size>0
165
+ else
157
166
  inventory.save!
158
167
  end
159
168
  inventory=nil
@@ -162,17 +171,17 @@ end
162
171
 
163
172
 
164
173
  # seventh step - update the hosts repository
174
+ puts "Invoke the HostTracker with optional directory setter."
175
+ host_tracker = Wmap::HostTracker.instance
165
176
  if options[:target] && options[:data_dir]
166
- puts "Invoke the HostTracker with optional directory setter."
167
- host_tracker = Wmap::HostTracker.instance
177
+ puts puts "Invoke the HostTracker with options: #{options[:data_dir]}, #{options[:target]}"
168
178
  host_tracker.verbose=options[:verbose]
169
179
  host_tracker.data_dir = options[:data_dir]
170
180
  host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
171
181
  host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
172
182
  elsif options[:target]
173
- puts puts "Invoke the HostTracker."
174
- host_tracker = Wmap::HostTracker.instance
175
- host_tracker.verbose=options[:verbose]
183
+ puts puts "Invoke the HostTracker with option: #{options[:target]}."
184
+ #host_tracker.verbose=options[:verbose]
176
185
  else
177
186
  abort "Error firing up HostTracker instance!"
178
187
  end
data/dicts/tlds.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  # http://data.iana.org/TLD/tlds-alpha-by-domain.txt
2
- # Version 2018110500, Last Updated Mon Nov 5 07:07:02 2018 UTC
2
+ # Version 2020033000, Last Updated Mon Mar 30 07:07:01 2020 UTC
3
3
  AAA
4
4
  AARP
5
5
  ABARTH
@@ -16,7 +16,6 @@ ACCENTURE
16
16
  ACCOUNTANT
17
17
  ACCOUNTANTS
18
18
  ACO
19
- ACTIVE
20
19
  ACTOR
21
20
  AD
22
21
  ADAC
@@ -146,7 +145,6 @@ BIZ
146
145
  BJ
147
146
  BLACK
148
147
  BLACKFRIDAY
149
- BLANCO
150
148
  BLOCKBUSTER
151
149
  BLOG
152
150
  BLOOMBERG
@@ -155,7 +153,6 @@ BM
155
153
  BMS
156
154
  BMW
157
155
  BN
158
- BNL
159
156
  BNPPARIBAS
160
157
  BO
161
158
  BOATS
@@ -214,7 +211,6 @@ CARE
214
211
  CAREER
215
212
  CAREERS
216
213
  CARS
217
- CARTIER
218
214
  CASA
219
215
  CASE
220
216
  CASEIH
@@ -247,7 +243,6 @@ CHEAP
247
243
  CHINTAI
248
244
  CHRISTMAS
249
245
  CHROME
250
- CHRYSLER
251
246
  CHURCH
252
247
  CI
253
248
  CIPRIANI
@@ -299,6 +294,7 @@ COUNTRY
299
294
  COUPON
300
295
  COUPONS
301
296
  COURSES
297
+ CPA
302
298
  CR
303
299
  CREDIT
304
300
  CREDITCARD
@@ -360,9 +356,7 @@ DNP
360
356
  DO
361
357
  DOCS
362
358
  DOCTOR
363
- DODGE
364
359
  DOG
365
- DOHA
366
360
  DOMAINS
367
361
  DOT
368
362
  DOWNLOAD
@@ -371,7 +365,6 @@ DTV
371
365
  DUBAI
372
366
  DUCK
373
367
  DUNLOP
374
- DUNS
375
368
  DUPONT
376
369
  DURBAN
377
370
  DVAG
@@ -392,7 +385,6 @@ ENERGY
392
385
  ENGINEER
393
386
  ENGINEERING
394
387
  ENTERPRISES
395
- EPOST
396
388
  EPSON
397
389
  EQUIPMENT
398
390
  ER
@@ -408,7 +400,6 @@ EU
408
400
  EUROVISION
409
401
  EUS
410
402
  EVENTS
411
- EVERBANK
412
403
  EXCHANGE
413
404
  EXPERT
414
405
  EXPOSED
@@ -488,6 +479,7 @@ GAME
488
479
  GAMES
489
480
  GAP
490
481
  GARDEN
482
+ GAY
491
483
  GB
492
484
  GBIZ
493
485
  GD
@@ -580,7 +572,6 @@ HOMEGOODS
580
572
  HOMES
581
573
  HOMESENSE
582
574
  HONDA
583
- HONEYWELL
584
575
  HORSE
585
576
  HOSPITAL
586
577
  HOST
@@ -634,7 +625,6 @@ IQ
634
625
  IR
635
626
  IRISH
636
627
  IS
637
- ISELECT
638
628
  ISMAILI
639
629
  IST
640
630
  ISTANBUL
@@ -699,12 +689,10 @@ KYOTO
699
689
  KZ
700
690
  LA
701
691
  LACAIXA
702
- LADBROKES
703
692
  LAMBORGHINI
704
693
  LAMER
705
694
  LANCASTER
706
695
  LANCIA
707
- LANCOME
708
696
  LAND
709
697
  LANDROVER
710
698
  LANXESS
@@ -725,7 +713,6 @@ LEGO
725
713
  LEXUS
726
714
  LGBT
727
715
  LI
728
- LIAISON
729
716
  LIDL
730
717
  LIFE
731
718
  LIFEINSURANCE
@@ -744,6 +731,7 @@ LIVING
744
731
  LIXIL
745
732
  LK
746
733
  LLC
734
+ LLP
747
735
  LOAN
748
736
  LOANS
749
737
  LOCKER
@@ -819,7 +807,6 @@ MN
819
807
  MO
820
808
  MOBI
821
809
  MOBILE
822
- MOBILY
823
810
  MODA
824
811
  MOE
825
812
  MOI
@@ -827,7 +814,6 @@ MOM
827
814
  MONASH
828
815
  MONEY
829
816
  MONSTER
830
- MOPAR
831
817
  MORMON
832
818
  MORTGAGE
833
819
  MOSCOW
@@ -835,7 +821,6 @@ MOTO
835
821
  MOTORCYCLES
836
822
  MOV
837
823
  MOVIE
838
- MOVISTAR
839
824
  MP
840
825
  MQ
841
826
  MR
@@ -854,7 +839,6 @@ MY
854
839
  MZ
855
840
  NA
856
841
  NAB
857
- NADEX
858
842
  NAGOYA
859
843
  NAME
860
844
  NATIONWIDE
@@ -955,7 +939,6 @@ PHOTO
955
939
  PHOTOGRAPHY
956
940
  PHOTOS
957
941
  PHYSIO
958
- PIAGET
959
942
  PICS
960
943
  PICTET
961
944
  PICTURES
@@ -1152,18 +1135,16 @@ SONG
1152
1135
  SONY
1153
1136
  SOY
1154
1137
  SPACE
1155
- SPIEGEL
1156
1138
  SPORT
1157
1139
  SPOT
1158
1140
  SPREADBETTING
1159
1141
  SR
1160
1142
  SRL
1161
- SRT
1143
+ SS
1162
1144
  ST
1163
1145
  STADA
1164
1146
  STAPLES
1165
1147
  STAR
1166
- STARHUB
1167
1148
  STATEBANK
1168
1149
  STATEFARM
1169
1150
  STC
@@ -1211,7 +1192,6 @@ TEAM
1211
1192
  TECH
1212
1193
  TECHNOLOGY
1213
1194
  TEL
1214
- TELEFONICA
1215
1195
  TEMASEK
1216
1196
  TENNIS
1217
1197
  TEVA
@@ -1271,7 +1251,6 @@ TZ
1271
1251
  UA
1272
1252
  UBANK
1273
1253
  UBS
1274
- UCONNECT
1275
1254
  UG
1276
1255
  UK
1277
1256
  UNICOM
@@ -1305,7 +1284,6 @@ VIP
1305
1284
  VIRGIN
1306
1285
  VISA
1307
1286
  VISION
1308
- VISTAPRINT
1309
1287
  VIVA
1310
1288
  VIVO
1311
1289
  VLAANDEREN
@@ -1324,7 +1302,6 @@ WALMART
1324
1302
  WALTER
1325
1303
  WANG
1326
1304
  WANGGOU
1327
- WARMAN
1328
1305
  WATCH
1329
1306
  WATCHES
1330
1307
  WEATHER
@@ -1452,13 +1429,14 @@ XN--MGBA7C0BBN0A
1452
1429
  XN--MGBAAKC7DVF
1453
1430
  XN--MGBAAM7A8H
1454
1431
  XN--MGBAB2BD
1432
+ XN--MGBAH1A3HJKRD
1455
1433
  XN--MGBAI9AZGQP6J
1456
1434
  XN--MGBAYH7GPA
1457
- XN--MGBB9FBPOB
1458
1435
  XN--MGBBH1A
1459
1436
  XN--MGBBH1A71E
1460
1437
  XN--MGBC0A9AZCG
1461
1438
  XN--MGBCA7DZDO
1439
+ XN--MGBCPQ6GPA1A
1462
1440
  XN--MGBERP4A5D4AR
1463
1441
  XN--MGBGU82A
1464
1442
  XN--MGBI4ECEXP
@@ -1484,8 +1462,10 @@ XN--P1AI
1484
1462
  XN--PBT977C
1485
1463
  XN--PGBS0DH
1486
1464
  XN--PSSY2U
1465
+ XN--Q7CE6A
1487
1466
  XN--Q9JYB4C
1488
1467
  XN--QCKA1PMC
1468
+ XN--QXA6A
1489
1469
  XN--QXAM
1490
1470
  XN--RHQV96G
1491
1471
  XN--ROVU88B
@@ -1530,7 +1510,6 @@ ZAPPOS
1530
1510
  ZARA
1531
1511
  ZERO
1532
1512
  ZIP
1533
- ZIPPO
1534
1513
  ZM
1535
1514
  ZONE
1536
1515
  ZUERICH
@@ -18,7 +18,7 @@ class Wmap::CidrTracker
18
18
  @verbose=params.fetch(:verbose, false)
19
19
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
20
20
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
21
- @cidr_seeds=params.fetch(:cidr_seeds, @data_dir + 'cidrs')
21
+ @cidr_seeds=params.fetch(:cidr_seeds, @data_dir + '/' + 'cidrs')
22
22
  File.write(@cidr_seeds, "") unless File.exist?(@cidr_seeds)
23
23
  load_cidr_blks_from_file(@cidr_seeds)
24
24
  end
@@ -99,8 +99,8 @@ class Wmap::CidrTracker
99
99
  #@known_cidr_blks_asce_index=NetAddr.sort(@known_cidr_blks.keys, :Desc=>false)
100
100
  @known_cidr_blks_asce_index=@known_cidr_blks.keys.sort
101
101
  @known_cidr_blks_desc_index=@known_cidr_blks_asce_index.reverse
102
- #rescue => ee
103
- # puts "Exception on method #{__method__}: #{ee}" # if @verbose
102
+ rescue => ee
103
+ puts "Exception on method #{__method__}: #{ee}" # if @verbose
104
104
  end
105
105
 
106
106
  # 'setter' to remove an entry to CIDR store @known_cidr_blks
@@ -167,6 +167,7 @@ class Wmap::CidrTracker
167
167
  known = cidr4.contains?(ip+'/32')
168
168
  break if known
169
169
  end
170
+ return known
170
171
  rescue => ee
171
172
  puts "Exception on method #{__method__}: #{ee}" if @verbose
172
173
  return false
@@ -169,7 +169,7 @@ class Wmap::DomainTracker
169
169
  end
170
170
  end
171
171
  @known_internet_domains.merge!(results)
172
- puts "Done loading entries."
172
+ puts "Done loading domain entries."
173
173
  return results
174
174
  else
175
175
  puts "Error: no entry is loaded. Please check your list and try again."
@@ -262,7 +262,7 @@ class Wmap::DomainTracker
262
262
  when "Wmap::DomainTracker::SubDomain"
263
263
  return @known_internet_sub_domains.key?(domain)
264
264
  else
265
- return nil
265
+ return false
266
266
  end
267
267
  rescue => ee
268
268
  puts "Exception on method #{__method__}: #{ee}" if @verbose
@@ -70,7 +70,7 @@ class SubDomain < Wmap::DomainTracker
70
70
  end
71
71
  end
72
72
  @known_internet_sub_domains.merge!(results)
73
- puts "Done loading entries."
73
+ puts "Done loading sub_domain entries."
74
74
  return results
75
75
  else
76
76
  puts "Error: no entry is loaded. Please check your list and try again."
@@ -27,13 +27,13 @@ class Wmap::HostTracker
27
27
  @max_parallel=params.fetch(:max_parallel, 40)
28
28
  # Initialize the instance variables
29
29
  File.write(@hosts_file, "") unless File.exist?(@hosts_file)
30
- load_known_hosts_from_file(@hosts_file)
30
+ @known_hosts=load_known_hosts_from_file(@hosts_file)
31
31
  end
32
32
 
33
33
  # Setter to load the known hosts from the local hosts file into a class instance
34
34
  def load_known_hosts_from_file (f_hosts=@hosts_file)
35
35
  puts "Loading local hosts from file: #{f_hosts} ..." if @verbose
36
- @known_hosts=Hash.new
36
+ known_hosts=Hash.new
37
37
  @alias = Hash.new
38
38
  File.write(f_hosts, "") unless File.exist?(f_hosts)
39
39
  f=File.open(f_hosts, 'r')
@@ -43,11 +43,11 @@ class Wmap::HostTracker
43
43
  key=entry[0].downcase
44
44
  value=entry[1]
45
45
  puts "Loading key value pair: #{key} - #{value}" if @verbose
46
- @known_hosts[key] = Hash.new unless @known_hosts.key?(key)
47
- @known_hosts[key]= value
46
+ known_hosts[key] = Hash.new unless known_hosts.key?(key)
47
+ known_hosts[key]= value
48
48
  # For reverse host lookup
49
- @known_hosts[value] = Hash.new unless @known_hosts.key?(value)
50
- @known_hosts[value] = key
49
+ known_hosts[value] = Hash.new unless known_hosts.key?(value)
50
+ known_hosts[value] = key
51
51
  # Count the number of alias for the recorded IP
52
52
  if @alias.key?(value)
53
53
  @alias[value]+=1
@@ -56,11 +56,12 @@ class Wmap::HostTracker
56
56
  end
57
57
  end
58
58
  f.close
59
- return @known_hosts
60
- rescue => ee
61
- puts "Exception on method #{__method__}: #{ee}"
62
59
  return known_hosts
60
+ #rescue => ee
61
+ # puts "Exception on method #{__method__}: #{ee}"
62
+ # return known_hosts
63
63
  end
64
+ alias_method :load, :load_known_hosts_from_file
64
65
 
65
66
  # Save the current local hosts hash table into a (random) data repository file
66
67
  def save_known_hosts_to_file!(f_hosts=@hosts_file)
@@ -96,30 +97,42 @@ class Wmap::HostTracker
96
97
  puts "Exception on method #{__method__}: #{ee}"
97
98
  end
98
99
 
100
+ # determine if host is part of trusted (known) root domains
101
+ def is_trusted?(host)
102
+ puts "Determin if host #{host} is part of trusted root domains" if @verbose
103
+ root=get_domain_root(host)
104
+ puts "Domain root: #{root}" if @verbose
105
+ domain_tracker=Wmap::DomainTracker.instance
106
+ domain_tracker.data_dir=@data_dir
107
+ domain_tracker.domains_file = domain_tracker.data_dir + "/" + "domains"
108
+ domain_tracker.load_domains_from_file
109
+ if domain_tracker.domain_known?(root)
110
+ domain_tracker=nil
111
+ return true
112
+ else
113
+ domain_tracker=nil
114
+ return false
115
+ end
116
+ end
117
+
99
118
  # Setter to add host entry to the cache once at a time
100
119
  def add(host)
101
120
  puts "Add entry to the local host repository: #{host}"
102
121
  host=host.strip.downcase unless host.nil?
122
+ root=get_domain_root(host)
103
123
  unless @known_hosts.key?(host)
104
124
  ip=host_2_ip(host)
105
125
  record=Hash.new
106
126
  if is_ip?(ip)
107
127
  # filter host to known domains only
108
- root=get_domain_root(host)
109
- puts "Domain root: #{root}" if @verbose
110
- domain_tracker=Wmap::DomainTracker.instance
111
- domain_tracker.data_dir=@data_dir
112
- domain_tracker.domains_file = domain_tracker.data_dir + "domains"
113
- domain_tracker.load_domains_from_file
114
- if domain_tracker.domain_known?(root)
115
- domain_tracker=nil
128
+ if is_trusted?(host)
116
129
  record[host]=ip
117
130
  record[ip]=host
118
131
  puts "Host data repository entry loaded: #{host} <=> #{ip}"
119
132
  # Replace instance with the class variable to avoid potential race condition under parallel engine
120
133
  # add additional logic to update the sub-domain table as well, 02/10/2014
121
134
  sub=get_sub_domain(host)
122
- if sub!=root
135
+ if sub!=nil
123
136
  tracker=Wmap::DomainTracker::SubDomain.instance
124
137
  tracker.data_dir=@data_dir
125
138
  tracker.sub_domains_file = tracker.data_dir + "sub_domains"
@@ -142,8 +155,8 @@ class Wmap::HostTracker
142
155
  else
143
156
  puts "Host is already exist. Skip: #{host}"
144
157
  end
145
- rescue => ee
146
- puts "Exception on method #{__method__}: #{ee}" if @verbose
158
+ #rescue => ee
159
+ # puts "Exception on method #{__method__}: #{ee}" if @verbose
147
160
  end
148
161
 
149
162
  # Setter to add host entry to the local hosts in batch (from an array)
@@ -164,7 +177,7 @@ class Wmap::HostTracker
164
177
  end
165
178
  end
166
179
  @known_hosts.merge!(results)
167
- puts "Done loading entries."
180
+ puts "Done loading host entries."
168
181
  return results
169
182
  else
170
183
  puts "Error: empty list - no entry is loaded. Please check your input list and try again."
@@ -77,7 +77,9 @@ class Wmap::SiteTracker
77
77
  f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
78
78
  f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
79
79
  @known_sites.keys.sort.map do |key|
80
- f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
80
+ if is_trusted?(key)
81
+ f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
82
+ end
81
83
  end
82
84
  f.close
83
85
  puts "site store table is successfully saved: #{file_sites}"
@@ -94,6 +96,24 @@ class Wmap::SiteTracker
94
96
  puts "Exception on method #{__method__}: #{ee}"
95
97
  end
96
98
 
99
+ # determine site is trusted based on the known domains
100
+ def is_trusted?(site)
101
+ trusted=false
102
+ host=url_2_host(site)
103
+ root=get_domain_root(host)
104
+ domain_tracker=Wmap::DomainTracker.instance
105
+ domain_tracker.data_dir=@data_dir
106
+ domain_tracker.domains_file=@data_dir + "/" + "domains"
107
+ File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
108
+ domain_tracker.load_domains_from_file(domain_tracker.domains_file)
109
+ trusted=domain_tracker.domain_known?(root)
110
+ domain_tracker=nil
111
+ return trusted
112
+ rescue => ee
113
+ puts "Exception on method #{__method__}: #{ee}"
114
+ return trusted
115
+ end
116
+
97
117
  # Setter to add site entry to the cache one at a time
98
118
  def add(site)
99
119
  puts "Add entry to the site store: #{site}"
@@ -132,6 +152,10 @@ class Wmap::SiteTracker
132
152
  end
133
153
  end
134
154
  # add record only if trusted
155
+ host_tracker = Wmap::HostTracker.instance
156
+ host_tracker.data_dir= @data_dir
157
+ host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
158
+ host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
135
159
  if trusted
136
160
  # Add logic to check site status before adding it
137
161
  checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
@@ -144,10 +168,6 @@ class Wmap::SiteTracker
144
168
  raise "Site is currently down. Skip #{site}" if checker['code']==10000
145
169
  end
146
170
  raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
147
- host_tracker = Wmap::HostTracker.instance
148
- host_tracker.data_dir= @data_dir
149
- host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
150
- host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
151
171
  # Update the local host table when necessary
152
172
  if is_ip?(host)
153
173
  # Case #1: Trusted site contains IP
@@ -341,8 +361,8 @@ class Wmap::SiteTracker
341
361
  else
342
362
  puts "Error: no entry is loaded. Please check your list and try again."
343
363
  end
344
- #rescue => ee
345
- # puts "Exception on method #{__method__}: #{ee}" if @verbose
364
+ rescue => ee
365
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
346
366
  end
347
367
  alias_method :dels, :bulk_delete
348
368
 
@@ -174,7 +174,7 @@ class WpTracker < Wmap::SiteTracker
174
174
  end
175
175
  end
176
176
  @known_wp_sites.merge!(results)
177
- puts "Done loading entries."
177
+ puts "Done loading wp entries."
178
178
  return results
179
179
  else
180
180
  puts "Error: no entry is loaded. Please check your list and try again."
@@ -88,9 +88,9 @@ class Wmap::UrlChecker
88
88
  checker['redirection']=nil
89
89
  checker['timestamp']=timestamp
90
90
  return checker
91
- rescue Exception => ee
92
- puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
93
- return nil
91
+ #rescue Exception => ee
92
+ # puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
93
+ # return nil
94
94
  end
95
95
  alias_method :check, :url_worker
96
96
 
@@ -17,7 +17,8 @@ require "parallel"
17
17
  class Wmap::UrlCrawler
18
18
  include Wmap::Utils
19
19
 
20
- attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, :verbose, :data_dir
20
+ attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, \
21
+ :verbose, :data_dir, :user_agent
21
22
  attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
22
23
  # Global variable used to store the combined result of all the forked child processes. Note that class variable
23
24
  # would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
@@ -35,13 +36,16 @@ class Wmap::UrlCrawler
35
36
  @crawl_depth=params.fetch(:crawl_depth, 4)
36
37
  @crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
37
38
  @max_parallel=params.fetch(:max_parallel, 40)
39
+ @user_agent=params.fetch(:user_agent, "OWASP WMAP Spider")
38
40
  # Discovered data store
39
41
  @discovered_urls_by_crawler=Hash.new
40
42
  @visited_urls_by_crawler=Hash.new
41
43
  @crawl_start=Hash.new
42
44
  @crawl_done=Hash.new
43
45
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
44
- @log_file=@data_dir + "/../logs/crawler.log"
46
+ @log_dir=@data_dir + "/logs/"
47
+ Dir.mkdir(@log_dir) unless Dir.exist?(@log_dir)
48
+ @log_file=@log_dir + "crawler.log"
45
49
  end
46
50
 
47
51
  # Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
@@ -86,7 +90,7 @@ class Wmap::UrlCrawler
86
90
 
87
91
  # The worker instance of crawler who perform the labour work
88
92
  def crawl_worker(url0)
89
- puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
93
+ puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and discovery contents."
90
94
  # Input URL sanity check first
91
95
  if is_url?(url0)
92
96
  host=url_2_host(url0)
@@ -216,14 +220,14 @@ class Wmap::UrlCrawler
216
220
  alias_method :crawl_file, :crawl_workers_on_file
217
221
 
218
222
  # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
219
- def open_url(url)
223
+ def open_url(url,user_agent=@user_agent)
220
224
  puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
221
225
  if url =~ /http\:/i
222
226
  # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
223
- url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
227
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
224
228
  #url_object = open(url)
225
229
  elsif url =~ /https\:/i
226
- url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
230
+ url_object = open(url, :ssl_verify_mode=>0, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
227
231
  #url_object = open(url,:ssl_verify_mode => 0)
228
232
  else
229
233
  raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
@@ -258,22 +262,6 @@ class Wmap::UrlCrawler
258
262
  return nil
259
263
  end
260
264
 
261
- =begin
262
- # Wrapper for the Nokogiri DOM parser
263
- def parse_html(html_body)
264
- begin
265
- #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
266
- doc = Nokogiri::HTML(html_body)
267
- #puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
268
- #puts "doc: #{doc}" if @verbose
269
- return doc
270
- rescue => ee
271
- puts "Exception on method #{__method__}: #{ee}" if @verbose
272
- return nil
273
- end
274
- end
275
- =end
276
-
277
265
  # Search 'current_url' and return found URLs under the same domain
278
266
  def find_urls_on_page(doc, current_url)
279
267
  puts "Search and return URLs within the doc: #{doc}" if @verbose
@@ -119,7 +119,7 @@ module Wmap
119
119
  end
120
120
  end
121
121
  @tag_store.merge!(results)
122
- puts "Done loading entries."
122
+ puts "Done loading adware entries."
123
123
  tags = nil
124
124
  return results
125
125
  else
@@ -192,7 +192,7 @@ module Wmap
192
192
  # Function to print instance variable - General top level domain list
193
193
  def print_gtld
194
194
  puts @gtld
195
- return @gtld
195
+ return @gtld
196
196
  end
197
197
 
198
198
  # Function to print instance variable - Country code top-level domain list
@@ -8,46 +8,43 @@
8
8
 
9
9
 
10
10
  module Wmap
11
- module Utils
11
+ module Utils
12
12
  # Module to log debugging and other messages
13
- module Logger
13
+ module Logger
14
14
  extend self
15
15
  # Append information into the log file for the trouble-shooting purpose
16
16
  def wlog (obj, agent, file)
17
17
  puts "Writing #{obj} into log file: #{file}" if @verbose
18
- begin
19
- return false if obj.nil?
20
- # 01/27/2015, implementing singleton pattern for the logger
21
- @@f=File.open(file,'a')
22
- timestamp=Time.now
23
- case obj
24
- when Array
25
- if obj.size >= 0
26
- @@f.write "#{timestamp}: #{agent}: \n"
27
- obj.map { |x| @@f.write " #{x}\n" }
28
- puts "The list is successfully saved into the log file: #{file} " if @verbose
29
- end
30
- when Hash
31
- if obj.length >= 0
32
- @@f.write "#{timestamp}: #{agent}: \n"
33
- obj.each_value { |value| @@f.write " #{value}\n" }
34
- puts "The hash is successfully saved into the log file: #{file} " if @verbose
35
- end
36
- when String
37
- @@f.write "#{timestamp}: #{agent}: #{obj}\n"
38
- puts "The string is successfully saved into the log file: #{file} " if @verbose
39
- else
40
- #do nothing
41
- puts "Un-handled exception on: #{obj}" if @verbose
18
+ return false if obj.nil?
19
+ @@f=File.open(file,'a')
20
+ timestamp=Time.now
21
+ case obj
22
+ when Array
23
+ if obj.size >= 0
24
+ @@f.write "#{timestamp}: #{agent}: \n"
25
+ obj.map { |x| @@f.write " #{x}\n" }
26
+ puts "The list is successfully saved into the log file: #{file} " if @verbose
42
27
  end
43
- @@f.close
44
- return true
45
- rescue => ee
46
- puts "Exception on method #{__method__}: #{ee}" if @verbose
47
- return false
48
- end
28
+ when Hash
29
+ if obj.length >= 0
30
+ @@f.write "#{timestamp}: #{agent}: \n"
31
+ obj.each_value { |value| @@f.write " #{value}\n" }
32
+ puts "The hash is successfully saved into the log file: #{file} " if @verbose
33
+ end
34
+ when String
35
+ @@f.write "#{timestamp}: #{agent}: #{obj}\n"
36
+ puts "The string is successfully saved into the log file: #{file} " if @verbose
37
+ else
38
+ #do nothing
39
+ puts "Un-handled exception on: #{obj}" if @verbose
40
+ end
41
+ @@f.close
42
+ return true
43
+ rescue => ee
44
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
45
+ return false
49
46
  end
50
-
51
- end
47
+
48
+ end
52
49
  end
53
50
  end
@@ -15,6 +15,7 @@ module Wmap
15
15
 
16
16
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
17
17
  Max_http_timeout=15000
18
+ User_agent = "OWASP WMAP Spider"
18
19
 
19
20
  # Simple sanity check on a 'claimed' URL string.
20
21
  def is_url?(url)
@@ -377,7 +378,8 @@ module Wmap
377
378
 
378
379
  # Given an URL, open the page, then return the DOM text from a normal user perspective
379
380
  def open_page(url)
380
- args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, read_timeout: Max_http_timeout/1000}
381
+ args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, \
382
+ read_timeout: Max_http_timeout/1000, "User-Agent"=>User_agent}
381
383
  doc = Nokogiri::HTML(open(url, args))
382
384
  if doc.text.include?("Please enable JavaScript to view the page content")
383
385
  puts "Invoke headless chrome through webdriver ..." if @verbose
@@ -385,7 +387,7 @@ module Wmap
385
387
  #driver = Selenium::WebDriver.for :chrome
386
388
  # http://watir.com/guides/chrome/
387
389
  args = ['--ignore-certificate-errors', '--disable-popup-blocking', '--disable-translate', '--disk-cache-size 8192']
388
- browser = Watir::Browser.new :chrome, headless: true, options: {args: args}
390
+ browser = Watir::Browser.new :chrome, headless: true, switches: %w[--user-agent=OWASP\ WMAP\ Spider]
389
391
  browser.goto(url)
390
392
  sleep(2) # wait for the loading
391
393
  doc = Nokogiri::HTML(browser.html)
@@ -159,20 +159,18 @@ module Wmap
159
159
  # Simple test a host string format. Return true if it contains a valid internet domain sub-string. Note: Don't be confused with another method 'valid_dns_record?', which is a stricter and time-consuming test on the DNS server for a resolvable internet host.
160
160
  def is_fqdn? (host)
161
161
  puts "Validate the host-name format is valid: #{host}" if @verbose
162
- begin
163
- return false if is_ip?(host) or is_url?(host)
164
- domain=get_domain_root(host)
165
- if domain.nil?
166
- return false
167
- elsif is_domain_root?(domain)
168
- return true
169
- else
170
- return false
171
- end
172
- rescue => ee
173
- puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
162
+ return false if is_ip?(host) or is_url?(host)
163
+ domain=get_domain_root(host)
164
+ if domain.nil?
165
+ return false
166
+ elsif is_domain_root?(domain)
167
+ return true
168
+ else
174
169
  return false
175
170
  end
171
+ # rescue => ee
172
+ # puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
173
+ # return false
176
174
  end
177
175
  alias_method :is_host?, :is_fqdn?
178
176
 
@@ -239,7 +239,11 @@ module Wmap
239
239
  if tag.to_s.include?(pattern)
240
240
  puts tag.to_s if @verbose
241
241
  k=nil
242
- return tag.to_s.scan(/[\d+\.]+\d+/).first
242
+ if tag.to_s.scan(/[\d+\.]+\d+/).first =~ /\d+\./
243
+ return tag.to_s.scan(/[\d+\.]+\d+/).first
244
+ else
245
+ return nil
246
+ end
243
247
  end
244
248
  end
245
249
  end
@@ -0,0 +1,36 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for the Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+ # Unit Test File for Wmap::DomainTracker.instance class
9
+
10
+ require "minitest/autorun"
11
+ require "Wmap"
12
+
13
+ class CidrTrackerTest < MiniTest::Unit::TestCase
14
+ include Wmap::Utils
15
+
16
+ def test_cidr_add
17
+ w = Wmap::CidrTracker.new
18
+ w.add("192.168.1.0/24")
19
+ assert_equal true, w.known_cidr_blks.key?("192.168.1.0/24")
20
+ end
21
+
22
+ def test_cidr_delete
23
+ w = Wmap::CidrTracker.new
24
+ w.add("10.0.0.0/8")
25
+ w.delete("10.0.0.0/8")
26
+ assert_equal false, w.known_cidr_blks.key?("10.0.0.0/8")
27
+ end
28
+
29
+ def test_ip_trusted?
30
+ w = Wmap::CidrTracker.new
31
+ w.add("192.168.1.0/24")
32
+ assert_equal true, w.ip_trusted?("192.168.1.1")
33
+ assert_equal true, w.ip_trusted?("192.168.1.255")
34
+ end
35
+
36
+ end
data/test/utils_test.rb CHANGED
@@ -12,7 +12,7 @@ require "Wmap"
12
12
 
13
13
  class UtilsTest < MiniTest::Unit::TestCase
14
14
  include Wmap::Utils
15
-
15
+
16
16
  def test_sld_domain_conversion
17
17
  assert_equal "yahoo.com", get_domain_root("yahoo.com")
18
18
  end
@@ -28,75 +28,75 @@ class UtilsTest < MiniTest::Unit::TestCase
28
28
  def test_is_domain_root_case_1?
29
29
  assert_equal false, is_domain_root?("www.yahoo.co.uk")
30
30
  end
31
-
31
+
32
32
  def test_is_domain_root_case_2?
33
33
  assert_equal true, is_domain_root?("yahoo.co.uk")
34
34
  end
35
-
35
+
36
36
  def test_get_sub_domain
37
37
  assert_equal "mail.yahoo.co.uk", get_sub_domain("www.mail.yahoo.co.uk")
38
38
  end
39
39
 
40
40
  def test_is_url_case_1?
41
41
  assert_equal true, is_url?("http://www.mail.yahoo.co.uk/")
42
- end
42
+ end
43
43
 
44
44
  def test_is_url_case_2?
45
45
  assert_equal true, is_url?("https://www.mail.yahoo.co.uk/")
46
- end
46
+ end
47
47
 
48
48
  def test_is_url_case_3?
49
49
  assert_equal false, is_url?("http://www.mail.yahoo.uii/")
50
- end
50
+ end
51
51
 
52
52
  def test_is_url_case_4?
53
53
  assert_equal false, is_url?("http:\\www.mail.yahoo.co.uk")
54
- end
55
-
54
+ end
55
+
56
56
  def test_is_ssl?
57
57
  assert_equal false, is_ssl?("http://www.mail.yahoo.co.uk/")
58
- end
59
-
58
+ end
59
+
60
60
  def test_is_site?
61
61
  assert_equal false, is_site?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
62
- end
63
-
62
+ end
63
+
64
64
  def test_url_2_host
65
65
  assert_equal "login.yahoo.com", url_2_host("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
66
- end
67
-
66
+ end
67
+
68
68
  def test_url_2_site_case_1
69
69
  assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
70
- end
70
+ end
71
71
 
72
72
  def test_url_2_site_case_2
73
73
  assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
74
- end
74
+ end
75
75
 
76
76
  def test_url_2_site_case_3
77
77
  assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com#.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
78
- end
79
-
78
+ end
79
+
80
80
  def test_url_2_path
81
81
  assert_equal "/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", url_2_path("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
82
- end
82
+ end
83
83
 
84
84
  def test_urls_on_same_domain?
85
85
  assert_equal true, urls_on_same_domain?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", "https://us-mg4.mail.yahoo.com/neo/launch?.rand=8hjd08hc6t1lq")
86
- end
86
+ end
87
87
 
88
88
  def test_host_2_url_case_1
89
89
  assert_equal "https://mail.yahoo.com/", host_2_url("mail.yahoo.com",443)
90
- end
90
+ end
91
91
 
92
92
  def test_host_2_url_case_2
93
93
  assert_equal "http://mail.yahoo.com/", host_2_url("mail.yahoo.com")
94
- end
95
-
94
+ end
95
+
96
96
  def test_make_absolute
97
97
  assert_equal "http://games.yahoo.com/game/the-magic-snowman-flash.html", make_absolute("http://games.yahoo.com/","game/the-magic-snowman-flash.html")
98
98
  end
99
-
99
+
100
100
  def test_create_absolute_url_from_base
101
101
  assert_equal "http://images.search.yahoo.com/search/images?p=raiders", create_absolute_url_from_base("http://images.search.yahoo.com/images","/search/images?p=raiders")
102
102
  end
@@ -108,7 +108,7 @@ class UtilsTest < MiniTest::Unit::TestCase
108
108
  def test_normalize_url_case_1
109
109
  assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/./images/search/images?p=raiders")
110
110
  end
111
-
111
+
112
112
  def test_normalize_url_case_2
113
113
  assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/../images/../search/images?p=raiders")
114
114
  end
@@ -116,53 +116,58 @@ class UtilsTest < MiniTest::Unit::TestCase
116
116
  def test_normalize_url_case_3
117
117
  assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com./../images/../search/images?p=raiders")
118
118
  end
119
-
119
+
120
120
  def test_is_ip_case_1?
121
121
  assert_equal false, is_ip?("256.2.3.1")
122
- end
122
+ end
123
123
 
124
124
  def test_is_ip_case_2?
125
125
  assert_equal false, is_ip?("25.2.3.1.22")
126
- end
126
+ end
127
127
 
128
128
  def test_is_ip_case_3?
129
129
  assert_equal true, is_ip?("196.168.230.1")
130
- end
130
+ end
131
131
 
132
132
  def test_is_fqdn_case_1?
133
133
  assert_equal true, is_fqdn?("images.search.yahoo.com")
134
- end
134
+ end
135
135
 
136
136
  def test_is_fqdn_case_2?
137
137
  assert_equal true, is_fqdn?("yahoo.com")
138
- end
139
-
138
+ end
139
+
140
140
  def test_is_fqdn_case_3?
141
- assert_equal false, is_fqdn?("images.search.yahoo")
142
- end
143
-
141
+ # according to latest tlds list - http://data.iana.org/TLD/tlds-alpha-by-domain.txt
142
+ assert_equal true, is_fqdn?("images.search.yahoo")
143
+ end
144
+
144
145
  def test_is_fqdn_case_4?
145
146
  assert_equal false, is_fqdn?("images")
146
- end
147
-
147
+ end
148
+
149
+ def test_is_fqdn_case_5?
150
+ assert_equal false, is_fqdn?("images.search.gargle")
151
+ end
152
+
148
153
  def test_is_cidr_case_1?
149
154
  assert_equal false, is_cidr?("196.168.230.1")
150
- end
155
+ end
151
156
 
152
157
  def test_is_cidr_case_2?
153
158
  assert_equal false, is_cidr?("196.168.2.257/12")
154
- end
155
-
159
+ end
160
+
156
161
  def test_is_cidr_case_3?
157
162
  assert_equal true, is_cidr?("196.168.2.25/12")
158
- end
159
-
163
+ end
164
+
160
165
  def test_cidr_2_ips
161
166
  assert_equal ["192.168.1.1"], cidr_2_ips("192.168.1.1/32")
162
- end
163
-
167
+ end
168
+
164
169
  def test_sort_ips
165
170
  assert_equal ["192.168.1.1", "192.168.1.2", "192.168.2.1"], sort_ips(["192.168.1.2", "192.168.2.1","192.168.1.1"])
166
- end
167
-
171
+ end
172
+
168
173
  end
data/version.txt CHANGED
@@ -3,8 +3,8 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.7.7
7
- date = 2020-03-24
6
+ version = 2.8.3
7
+ date = 2021-07-26
8
8
 
9
9
  author = Sam (Yang) Li
10
10
  email = yang.li@owasp.org
data/wmap.gemspec CHANGED
@@ -36,7 +36,7 @@ Gem::Specification.new do |s|
36
36
  s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
37
37
  s.email = info["email"]
38
38
  s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","trusts","distrust","run_tests"]
39
- s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","data/","LICENSE.txt",
39
+ s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports", "LICENSE.txt",
40
40
  "version.txt","README.md", "wmap.gemspec"]
41
41
  s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['dicts/*']
42
42
  #s.homepage = "none"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.7
4
+ version: 2.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-24 00:00:00.000000000 Z
11
+ date: 2021-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dnsruby
@@ -234,7 +234,6 @@ files:
234
234
  - LICENSE.txt
235
235
  - README.md
236
236
  - TODO
237
- - bin/RHPG
238
237
  - bin/deprime
239
238
  - bin/distrust
240
239
  - bin/googleBot
@@ -251,7 +250,6 @@ files:
251
250
  - bin/wdel
252
251
  - bin/wdump
253
252
  - bin/wmap
254
- - bin/wmaps
255
253
  - bin/wscan
256
254
  - demos/bruter.rb
257
255
  - demos/dns_brutes.rb
@@ -308,7 +306,7 @@ files:
308
306
  - settings/discovery_ports
309
307
  - settings/google_keywords.txt
310
308
  - settings/google_locator.txt
311
- - settings/tag_signatures
309
+ - test/cidr_tracker_test.rb
312
310
  - test/domain_tracker_test.rb
313
311
  - test/utils_test.rb
314
312
  - version.txt
@@ -335,9 +333,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
335
333
  - !ruby/object:Gem::Version
336
334
  version: '0'
337
335
  requirements: []
338
- rubyforge_project: wmap
339
- rubygems_version: 2.7.10
340
- signing_key:
336
+ rubygems_version: 3.0.9
337
+ signing_key:
341
338
  specification_version: 4
342
339
  summary: A pure Ruby web application and service discovery API.
343
340
  test_files: []
data/bin/RHPG DELETED
@@ -1,107 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # Executable to lookup then merge site tech details into the RHPG asset spreadsheet in CSV format only
3
- #
4
- ## Usage: RHPG [RHPG.csv]
5
- require "wmap"
6
- require "csv"
7
- include Wmap::Utils
8
-
9
- def print_usage
10
- puts "Program to lookup then merge the site details into RHPG asset spreadsheet. \nUsage: RHPG [RHPG.csv]"
11
- end
12
-
13
- # Lookup the site store for a domain; then return the fingger print info of the site
14
- def site_tracker_lookup(domain)
15
- tracker=Wmap::SiteTracker.instance
16
- tracker.verbose=false
17
- #first order search
18
- tracker.known_sites.each do |key,val|
19
- if key.include?(domain.strip.downcase) && key.include?("https")
20
- tracker=nil
21
- return [key] + val.values
22
- end
23
- end
24
- #second order search
25
- tracker.known_sites.each do |key,val|
26
- if key.include?(domain.strip.downcase)
27
- tracker=nil
28
- return [key] + val.values
29
- end
30
- end
31
- tracker=nil
32
- return [nil]*9
33
- end
34
-
35
- # look up the wp site data store for a domain; then return the wp finger print info: [is_wp?,wp_ver]
36
- def wp_tracker_lookup(domain)
37
- tracker=Wmap::WpTracker.new(:verbose=>false)
38
- # first order
39
- tracker.known_wp_sites.each do |key,val|
40
- if key.include?(domain.strip.downcase) && val
41
- ver=tracker.wp_ver(key)
42
- tracker=nil
43
- return [val,ver]
44
- end
45
- end
46
- # second order
47
- tracker.known_wp_sites.each do |key,val|
48
- if key.include?(domain.strip.downcase) && key.include?("https") && val
49
- tracker=nil
50
- return [val,nil]
51
- end
52
- end
53
- # third order
54
- tracker.known_wp_sites.each do |key,val|
55
- if key.include?(domain.strip.downcase)
56
- tracker=nil
57
- return [val,nil]
58
- end
59
- end
60
- tracker=nil
61
- return [nil,nil]
62
- end
63
-
64
- # perform the wpscan on a site
65
- def wpscan(domain)
66
- url=site_tracker_lookup(domain)[0]
67
- return nil if url.nil?
68
- if url.include?("https")
69
- command="wpscan --disable-tls-checks --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
70
- else
71
- command="wpscan --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
72
- end
73
- system(command)
74
- end
75
-
76
- puts Wmap.banner
77
- print_usage
78
-
79
- # open output file to write
80
- CSV.open("output.csv", "wb") do |csv|
81
- cnt=1
82
- # open RHPG input file to read
83
- CSV.foreach(ARGV[0]) do |row|
84
- puts "Processing row #{cnt}"
85
- #puts row.inspect
86
- my_row=Array.new
87
- if cnt > 1
88
- if is_domain?(row[0])
89
- =begin
90
- if row[3] =~ /Keep/i && row[3] != /Redirect/i
91
- unless File.exist?(row[0]+".wpscan")
92
- wpscan(row[0])
93
- end
94
- end
95
- =end
96
- my_row = row + site_tracker_lookup(row[0]) + wp_tracker_lookup(row[0])
97
- else
98
- my_row = row + [nil]*10
99
- end
100
- else
101
- my_row = row + ["Website","Primary IP","Port","Hosting Status","Server","Response Code","MD5 Finger-print","Redirection","Timestamp", "WordPress", "WordPress Version"]
102
- end
103
- cnt+=1
104
- csv << my_row
105
- end
106
- puts "All done. "
107
- end
data/bin/wmaps DELETED
@@ -1,23 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # script to automate the new site discovery through by crawling all unique sites in the site store
3
- require "wmap"
4
- require "parallel"
5
-
6
- def wmap_worker(domain)
7
- cmd = "wmap " + domain
8
- puts "wmap discovery on domain: ", domain
9
- system(cmd)
10
- end
11
-
12
-
13
- tracker=Wmap::DomainTracker.instance
14
- Parallel.map(tracker.known_internet_domains.keys, :in_processes => 10) { |target|
15
- puts "Working on #{target} ..." if @verbose
16
- wmap_worker(target)
17
- }
18
- =begin
19
- tracker.known_internet_domains.keys.map do |domain|
20
- wmap_worker(domain)
21
- end
22
- =end
23
- tracker=nil
@@ -1,6 +0,0 @@
1
- # Adware signature file: signture string, description
2
- gtag.js, Google / DoubleClick Floodlight Tag
3
- analytics.js, Google Universal Analytics Tag
4
- ga.js, Google Analytics Tag
5
- utag.js, Tealium Tag
6
- all.js, Facebook Tag