wmap 2.7.7 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 559cac84dd38902d968cc9e7327e77115ae3f946020caa21b7743ceb5777a96f
4
- data.tar.gz: a4dcc0eafc10d7497c47c1f9955774b880ae5cdafa5ed0c6904ef5362087bd98
3
+ metadata.gz: a6632168c88c35189b601d225ebbf99fddd034a561814e5fe34b2d57bb79c75c
4
+ data.tar.gz: 4beab7d92e6a5e4258d37dfa52a64f4edaf06d8c4213331cc9833d0be5cc70aa
5
5
  SHA512:
6
- metadata.gz: 0b430ed1da47cefd8cb8a7bedddd75ed2e7b1dafbfe94cdb2185ad2eb7e26d69a1429e79a9ece6b3cc68d6d964161f092e1143a00c0c64e241e930304d9e5a65
7
- data.tar.gz: fe4d50b292849e51c202f47083c0837228d11934eb0d71c5a6262da24467480ce26994f5db91bdae5cf3b52dc0e6ef4fef698396575e820779c7420c731d0d87
6
+ metadata.gz: 46482f94126bc1ad4af322cb23894e77ee5a0f85dc31741e25b75752be4a47b1a620c3d1a6f28786a2f53e37eecde42b571f011e770e4bacdaf00012d737ea9e
7
+ data.tar.gz: b5ed0e1af39d8b7910b0d77b5bde99461293a7e985d5e587405bbf929cae8bffa5a32a1cf31eb142f823a11057377537adecf9ee6fb7f475fb2252e9d3edc2b1
data/bin/wmap CHANGED
@@ -13,7 +13,7 @@ parser = OptionParser.new do|opts|
13
13
  opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
14
14
  options[:data_dir] = data_dir;
15
15
  end
16
- opts.on('-t', '--target target', 'Web Mapper target') do |target|
16
+ opts.on('-t', '--target target', 'Web Mapper target / seed for discovery') do |target|
17
17
  options[:target] = target;
18
18
  end
19
19
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
@@ -29,6 +29,10 @@ parser.parse!
29
29
  # print program banner
30
30
  puts Wmap.banner
31
31
  # print_usage unless options[:target]
32
+ unless options[:target]
33
+ puts "Usage: $ wmap -h"
34
+ exit 1
35
+ end
32
36
 
33
37
  # Preparing - check out the working logs directory
34
38
  if options[:data_dir]
@@ -129,6 +133,10 @@ Wmap.wlog(dis_urls.keys, "wmap", Log_dir+"discovered_urls.log") unless dis_urls.
129
133
  Wmap.wlog(dis_sites.keys, "wmap", Log_dir+"discovered_sites.log") unless dis_sites.empty?
130
134
  #crawler.wlog(c_start.keys,Log_dir+"crawler.log")
131
135
  #crawler.wlog(c_done.keys,Log_dir+"crawler.log")
136
+
137
+
138
+ # Save the current disovery urls only to a specific file, patched 07/23/2021
139
+ crawler.save_discovered_urls(Log_dir+"cur_urls.log")
132
140
  crawler=nil
133
141
 
134
142
 
@@ -138,22 +146,23 @@ when nil,[]
138
146
  puts "No new site found. There is no change to the site tracking data repository. "
139
147
  else
140
148
  puts "Automatically save the discovery results into the site tracking data repository: "
149
+ inventory = Wmap::SiteTracker.instance
141
150
  if options[:target] && options[:data_dir]
142
151
  puts "Start the SiteTracker with the optional directory setter. "
143
- inventory=Wmap::SiteTracker.instance
144
152
  inventory.data_dir = options[:data_dir]
153
+ #inventory.verbose = true
145
154
  inventory.sites_file = inventory.data_dir + "/" + "sites"
146
155
  inventory.load_site_stores_from_file(inventory.sites_file)
147
156
  elsif options[:target]
148
157
  puts "Start the SiteTracker. "
149
- inventory=Wmap::SiteTracker.instance
150
158
  else
151
159
  abort "Error firing up SiteTracker instance!"
152
160
  end
153
161
  new_sites=inventory.adds(dis_sites.keys-["",nil])
154
- if new_sites.size>0 && options[:data_dir]
162
+ puts "Newly discovery sties: #{new_sites}"
163
+ if options[:data_dir]
155
164
  inventory.save!(inventory.sites_file)
156
- elsif new_sites.size>0
165
+ else
157
166
  inventory.save!
158
167
  end
159
168
  inventory=nil
@@ -162,17 +171,17 @@ end
162
171
 
163
172
 
164
173
  # seventh step - update the hosts repository
174
+ puts "Invoke the HostTracker with optional directory setter."
175
+ host_tracker = Wmap::HostTracker.instance
165
176
  if options[:target] && options[:data_dir]
166
- puts "Invoke the HostTracker with optional directory setter."
167
- host_tracker = Wmap::HostTracker.instance
177
+ puts puts "Invoke the HostTracker with options: #{options[:data_dir]}, #{options[:target]}"
168
178
  host_tracker.verbose=options[:verbose]
169
179
  host_tracker.data_dir = options[:data_dir]
170
180
  host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
171
181
  host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
172
182
  elsif options[:target]
173
- puts puts "Invoke the HostTracker."
174
- host_tracker = Wmap::HostTracker.instance
175
- host_tracker.verbose=options[:verbose]
183
+ puts puts "Invoke the HostTracker with option: #{options[:target]}."
184
+ #host_tracker.verbose=options[:verbose]
176
185
  else
177
186
  abort "Error firing up HostTracker instance!"
178
187
  end
data/dicts/tlds.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  # http://data.iana.org/TLD/tlds-alpha-by-domain.txt
2
- # Version 2018110500, Last Updated Mon Nov 5 07:07:02 2018 UTC
2
+ # Version 2020033000, Last Updated Mon Mar 30 07:07:01 2020 UTC
3
3
  AAA
4
4
  AARP
5
5
  ABARTH
@@ -16,7 +16,6 @@ ACCENTURE
16
16
  ACCOUNTANT
17
17
  ACCOUNTANTS
18
18
  ACO
19
- ACTIVE
20
19
  ACTOR
21
20
  AD
22
21
  ADAC
@@ -146,7 +145,6 @@ BIZ
146
145
  BJ
147
146
  BLACK
148
147
  BLACKFRIDAY
149
- BLANCO
150
148
  BLOCKBUSTER
151
149
  BLOG
152
150
  BLOOMBERG
@@ -155,7 +153,6 @@ BM
155
153
  BMS
156
154
  BMW
157
155
  BN
158
- BNL
159
156
  BNPPARIBAS
160
157
  BO
161
158
  BOATS
@@ -214,7 +211,6 @@ CARE
214
211
  CAREER
215
212
  CAREERS
216
213
  CARS
217
- CARTIER
218
214
  CASA
219
215
  CASE
220
216
  CASEIH
@@ -247,7 +243,6 @@ CHEAP
247
243
  CHINTAI
248
244
  CHRISTMAS
249
245
  CHROME
250
- CHRYSLER
251
246
  CHURCH
252
247
  CI
253
248
  CIPRIANI
@@ -299,6 +294,7 @@ COUNTRY
299
294
  COUPON
300
295
  COUPONS
301
296
  COURSES
297
+ CPA
302
298
  CR
303
299
  CREDIT
304
300
  CREDITCARD
@@ -360,9 +356,7 @@ DNP
360
356
  DO
361
357
  DOCS
362
358
  DOCTOR
363
- DODGE
364
359
  DOG
365
- DOHA
366
360
  DOMAINS
367
361
  DOT
368
362
  DOWNLOAD
@@ -371,7 +365,6 @@ DTV
371
365
  DUBAI
372
366
  DUCK
373
367
  DUNLOP
374
- DUNS
375
368
  DUPONT
376
369
  DURBAN
377
370
  DVAG
@@ -392,7 +385,6 @@ ENERGY
392
385
  ENGINEER
393
386
  ENGINEERING
394
387
  ENTERPRISES
395
- EPOST
396
388
  EPSON
397
389
  EQUIPMENT
398
390
  ER
@@ -408,7 +400,6 @@ EU
408
400
  EUROVISION
409
401
  EUS
410
402
  EVENTS
411
- EVERBANK
412
403
  EXCHANGE
413
404
  EXPERT
414
405
  EXPOSED
@@ -488,6 +479,7 @@ GAME
488
479
  GAMES
489
480
  GAP
490
481
  GARDEN
482
+ GAY
491
483
  GB
492
484
  GBIZ
493
485
  GD
@@ -580,7 +572,6 @@ HOMEGOODS
580
572
  HOMES
581
573
  HOMESENSE
582
574
  HONDA
583
- HONEYWELL
584
575
  HORSE
585
576
  HOSPITAL
586
577
  HOST
@@ -634,7 +625,6 @@ IQ
634
625
  IR
635
626
  IRISH
636
627
  IS
637
- ISELECT
638
628
  ISMAILI
639
629
  IST
640
630
  ISTANBUL
@@ -699,12 +689,10 @@ KYOTO
699
689
  KZ
700
690
  LA
701
691
  LACAIXA
702
- LADBROKES
703
692
  LAMBORGHINI
704
693
  LAMER
705
694
  LANCASTER
706
695
  LANCIA
707
- LANCOME
708
696
  LAND
709
697
  LANDROVER
710
698
  LANXESS
@@ -725,7 +713,6 @@ LEGO
725
713
  LEXUS
726
714
  LGBT
727
715
  LI
728
- LIAISON
729
716
  LIDL
730
717
  LIFE
731
718
  LIFEINSURANCE
@@ -744,6 +731,7 @@ LIVING
744
731
  LIXIL
745
732
  LK
746
733
  LLC
734
+ LLP
747
735
  LOAN
748
736
  LOANS
749
737
  LOCKER
@@ -819,7 +807,6 @@ MN
819
807
  MO
820
808
  MOBI
821
809
  MOBILE
822
- MOBILY
823
810
  MODA
824
811
  MOE
825
812
  MOI
@@ -827,7 +814,6 @@ MOM
827
814
  MONASH
828
815
  MONEY
829
816
  MONSTER
830
- MOPAR
831
817
  MORMON
832
818
  MORTGAGE
833
819
  MOSCOW
@@ -835,7 +821,6 @@ MOTO
835
821
  MOTORCYCLES
836
822
  MOV
837
823
  MOVIE
838
- MOVISTAR
839
824
  MP
840
825
  MQ
841
826
  MR
@@ -854,7 +839,6 @@ MY
854
839
  MZ
855
840
  NA
856
841
  NAB
857
- NADEX
858
842
  NAGOYA
859
843
  NAME
860
844
  NATIONWIDE
@@ -955,7 +939,6 @@ PHOTO
955
939
  PHOTOGRAPHY
956
940
  PHOTOS
957
941
  PHYSIO
958
- PIAGET
959
942
  PICS
960
943
  PICTET
961
944
  PICTURES
@@ -1152,18 +1135,16 @@ SONG
1152
1135
  SONY
1153
1136
  SOY
1154
1137
  SPACE
1155
- SPIEGEL
1156
1138
  SPORT
1157
1139
  SPOT
1158
1140
  SPREADBETTING
1159
1141
  SR
1160
1142
  SRL
1161
- SRT
1143
+ SS
1162
1144
  ST
1163
1145
  STADA
1164
1146
  STAPLES
1165
1147
  STAR
1166
- STARHUB
1167
1148
  STATEBANK
1168
1149
  STATEFARM
1169
1150
  STC
@@ -1211,7 +1192,6 @@ TEAM
1211
1192
  TECH
1212
1193
  TECHNOLOGY
1213
1194
  TEL
1214
- TELEFONICA
1215
1195
  TEMASEK
1216
1196
  TENNIS
1217
1197
  TEVA
@@ -1271,7 +1251,6 @@ TZ
1271
1251
  UA
1272
1252
  UBANK
1273
1253
  UBS
1274
- UCONNECT
1275
1254
  UG
1276
1255
  UK
1277
1256
  UNICOM
@@ -1305,7 +1284,6 @@ VIP
1305
1284
  VIRGIN
1306
1285
  VISA
1307
1286
  VISION
1308
- VISTAPRINT
1309
1287
  VIVA
1310
1288
  VIVO
1311
1289
  VLAANDEREN
@@ -1324,7 +1302,6 @@ WALMART
1324
1302
  WALTER
1325
1303
  WANG
1326
1304
  WANGGOU
1327
- WARMAN
1328
1305
  WATCH
1329
1306
  WATCHES
1330
1307
  WEATHER
@@ -1452,13 +1429,14 @@ XN--MGBA7C0BBN0A
1452
1429
  XN--MGBAAKC7DVF
1453
1430
  XN--MGBAAM7A8H
1454
1431
  XN--MGBAB2BD
1432
+ XN--MGBAH1A3HJKRD
1455
1433
  XN--MGBAI9AZGQP6J
1456
1434
  XN--MGBAYH7GPA
1457
- XN--MGBB9FBPOB
1458
1435
  XN--MGBBH1A
1459
1436
  XN--MGBBH1A71E
1460
1437
  XN--MGBC0A9AZCG
1461
1438
  XN--MGBCA7DZDO
1439
+ XN--MGBCPQ6GPA1A
1462
1440
  XN--MGBERP4A5D4AR
1463
1441
  XN--MGBGU82A
1464
1442
  XN--MGBI4ECEXP
@@ -1484,8 +1462,10 @@ XN--P1AI
1484
1462
  XN--PBT977C
1485
1463
  XN--PGBS0DH
1486
1464
  XN--PSSY2U
1465
+ XN--Q7CE6A
1487
1466
  XN--Q9JYB4C
1488
1467
  XN--QCKA1PMC
1468
+ XN--QXA6A
1489
1469
  XN--QXAM
1490
1470
  XN--RHQV96G
1491
1471
  XN--ROVU88B
@@ -1530,7 +1510,6 @@ ZAPPOS
1530
1510
  ZARA
1531
1511
  ZERO
1532
1512
  ZIP
1533
- ZIPPO
1534
1513
  ZM
1535
1514
  ZONE
1536
1515
  ZUERICH
@@ -18,7 +18,7 @@ class Wmap::CidrTracker
18
18
  @verbose=params.fetch(:verbose, false)
19
19
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
20
20
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
21
- @cidr_seeds=params.fetch(:cidr_seeds, @data_dir + 'cidrs')
21
+ @cidr_seeds=params.fetch(:cidr_seeds, @data_dir + '/' + 'cidrs')
22
22
  File.write(@cidr_seeds, "") unless File.exist?(@cidr_seeds)
23
23
  load_cidr_blks_from_file(@cidr_seeds)
24
24
  end
@@ -99,8 +99,8 @@ class Wmap::CidrTracker
99
99
  #@known_cidr_blks_asce_index=NetAddr.sort(@known_cidr_blks.keys, :Desc=>false)
100
100
  @known_cidr_blks_asce_index=@known_cidr_blks.keys.sort
101
101
  @known_cidr_blks_desc_index=@known_cidr_blks_asce_index.reverse
102
- #rescue => ee
103
- # puts "Exception on method #{__method__}: #{ee}" # if @verbose
102
+ rescue => ee
103
+ puts "Exception on method #{__method__}: #{ee}" # if @verbose
104
104
  end
105
105
 
106
106
  # 'setter' to remove an entry to CIDR store @known_cidr_blks
@@ -167,6 +167,7 @@ class Wmap::CidrTracker
167
167
  known = cidr4.contains?(ip+'/32')
168
168
  break if known
169
169
  end
170
+ return known
170
171
  rescue => ee
171
172
  puts "Exception on method #{__method__}: #{ee}" if @verbose
172
173
  return false
@@ -169,7 +169,7 @@ class Wmap::DomainTracker
169
169
  end
170
170
  end
171
171
  @known_internet_domains.merge!(results)
172
- puts "Done loading entries."
172
+ puts "Done loading domain entries."
173
173
  return results
174
174
  else
175
175
  puts "Error: no entry is loaded. Please check your list and try again."
@@ -262,7 +262,7 @@ class Wmap::DomainTracker
262
262
  when "Wmap::DomainTracker::SubDomain"
263
263
  return @known_internet_sub_domains.key?(domain)
264
264
  else
265
- return nil
265
+ return false
266
266
  end
267
267
  rescue => ee
268
268
  puts "Exception on method #{__method__}: #{ee}" if @verbose
@@ -70,7 +70,7 @@ class SubDomain < Wmap::DomainTracker
70
70
  end
71
71
  end
72
72
  @known_internet_sub_domains.merge!(results)
73
- puts "Done loading entries."
73
+ puts "Done loading sub_domain entries."
74
74
  return results
75
75
  else
76
76
  puts "Error: no entry is loaded. Please check your list and try again."
@@ -27,13 +27,13 @@ class Wmap::HostTracker
27
27
  @max_parallel=params.fetch(:max_parallel, 40)
28
28
  # Initialize the instance variables
29
29
  File.write(@hosts_file, "") unless File.exist?(@hosts_file)
30
- load_known_hosts_from_file(@hosts_file)
30
+ @known_hosts=load_known_hosts_from_file(@hosts_file)
31
31
  end
32
32
 
33
33
  # Setter to load the known hosts from the local hosts file into a class instance
34
34
  def load_known_hosts_from_file (f_hosts=@hosts_file)
35
35
  puts "Loading local hosts from file: #{f_hosts} ..." if @verbose
36
- @known_hosts=Hash.new
36
+ known_hosts=Hash.new
37
37
  @alias = Hash.new
38
38
  File.write(f_hosts, "") unless File.exist?(f_hosts)
39
39
  f=File.open(f_hosts, 'r')
@@ -43,11 +43,11 @@ class Wmap::HostTracker
43
43
  key=entry[0].downcase
44
44
  value=entry[1]
45
45
  puts "Loading key value pair: #{key} - #{value}" if @verbose
46
- @known_hosts[key] = Hash.new unless @known_hosts.key?(key)
47
- @known_hosts[key]= value
46
+ known_hosts[key] = Hash.new unless known_hosts.key?(key)
47
+ known_hosts[key]= value
48
48
  # For reverse host lookup
49
- @known_hosts[value] = Hash.new unless @known_hosts.key?(value)
50
- @known_hosts[value] = key
49
+ known_hosts[value] = Hash.new unless known_hosts.key?(value)
50
+ known_hosts[value] = key
51
51
  # Count the number of alias for the recorded IP
52
52
  if @alias.key?(value)
53
53
  @alias[value]+=1
@@ -56,11 +56,12 @@ class Wmap::HostTracker
56
56
  end
57
57
  end
58
58
  f.close
59
- return @known_hosts
60
- rescue => ee
61
- puts "Exception on method #{__method__}: #{ee}"
62
59
  return known_hosts
60
+ #rescue => ee
61
+ # puts "Exception on method #{__method__}: #{ee}"
62
+ # return known_hosts
63
63
  end
64
+ alias_method :load, :load_known_hosts_from_file
64
65
 
65
66
  # Save the current local hosts hash table into a (random) data repository file
66
67
  def save_known_hosts_to_file!(f_hosts=@hosts_file)
@@ -96,30 +97,42 @@ class Wmap::HostTracker
96
97
  puts "Exception on method #{__method__}: #{ee}"
97
98
  end
98
99
 
100
+ # determine if host is part of trusted (known) root domains
101
+ def is_trusted?(host)
102
+ puts "Determin if host #{host} is part of trusted root domains" if @verbose
103
+ root=get_domain_root(host)
104
+ puts "Domain root: #{root}" if @verbose
105
+ domain_tracker=Wmap::DomainTracker.instance
106
+ domain_tracker.data_dir=@data_dir
107
+ domain_tracker.domains_file = domain_tracker.data_dir + "/" + "domains"
108
+ domain_tracker.load_domains_from_file
109
+ if domain_tracker.domain_known?(root)
110
+ domain_tracker=nil
111
+ return true
112
+ else
113
+ domain_tracker=nil
114
+ return false
115
+ end
116
+ end
117
+
99
118
  # Setter to add host entry to the cache once at a time
100
119
  def add(host)
101
120
  puts "Add entry to the local host repository: #{host}"
102
121
  host=host.strip.downcase unless host.nil?
122
+ root=get_domain_root(host)
103
123
  unless @known_hosts.key?(host)
104
124
  ip=host_2_ip(host)
105
125
  record=Hash.new
106
126
  if is_ip?(ip)
107
127
  # filter host to known domains only
108
- root=get_domain_root(host)
109
- puts "Domain root: #{root}" if @verbose
110
- domain_tracker=Wmap::DomainTracker.instance
111
- domain_tracker.data_dir=@data_dir
112
- domain_tracker.domains_file = domain_tracker.data_dir + "domains"
113
- domain_tracker.load_domains_from_file
114
- if domain_tracker.domain_known?(root)
115
- domain_tracker=nil
128
+ if is_trusted?(host)
116
129
  record[host]=ip
117
130
  record[ip]=host
118
131
  puts "Host data repository entry loaded: #{host} <=> #{ip}"
119
132
  # Replace instance with the class variable to avoid potential race condition under parallel engine
120
133
  # add additional logic to update the sub-domain table as well, 02/10/2014
121
134
  sub=get_sub_domain(host)
122
- if sub!=root
135
+ if sub!=nil
123
136
  tracker=Wmap::DomainTracker::SubDomain.instance
124
137
  tracker.data_dir=@data_dir
125
138
  tracker.sub_domains_file = tracker.data_dir + "sub_domains"
@@ -142,8 +155,8 @@ class Wmap::HostTracker
142
155
  else
143
156
  puts "Host is already exist. Skip: #{host}"
144
157
  end
145
- rescue => ee
146
- puts "Exception on method #{__method__}: #{ee}" if @verbose
158
+ #rescue => ee
159
+ # puts "Exception on method #{__method__}: #{ee}" if @verbose
147
160
  end
148
161
 
149
162
  # Setter to add host entry to the local hosts in batch (from an array)
@@ -164,7 +177,7 @@ class Wmap::HostTracker
164
177
  end
165
178
  end
166
179
  @known_hosts.merge!(results)
167
- puts "Done loading entries."
180
+ puts "Done loading host entries."
168
181
  return results
169
182
  else
170
183
  puts "Error: empty list - no entry is loaded. Please check your input list and try again."
@@ -77,7 +77,9 @@ class Wmap::SiteTracker
77
77
  f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
78
78
  f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
79
79
  @known_sites.keys.sort.map do |key|
80
- f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
80
+ if is_trusted?(key)
81
+ f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
82
+ end
81
83
  end
82
84
  f.close
83
85
  puts "site store table is successfully saved: #{file_sites}"
@@ -94,6 +96,24 @@ class Wmap::SiteTracker
94
96
  puts "Exception on method #{__method__}: #{ee}"
95
97
  end
96
98
 
99
+ # determine site is trusted based on the known domains
100
+ def is_trusted?(site)
101
+ trusted=false
102
+ host=url_2_host(site)
103
+ root=get_domain_root(host)
104
+ domain_tracker=Wmap::DomainTracker.instance
105
+ domain_tracker.data_dir=@data_dir
106
+ domain_tracker.domains_file=@data_dir + "/" + "domains"
107
+ File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
108
+ domain_tracker.load_domains_from_file(domain_tracker.domains_file)
109
+ trusted=domain_tracker.domain_known?(root)
110
+ domain_tracker=nil
111
+ return trusted
112
+ rescue => ee
113
+ puts "Exception on method #{__method__}: #{ee}"
114
+ return trusted
115
+ end
116
+
97
117
  # Setter to add site entry to the cache one at a time
98
118
  def add(site)
99
119
  puts "Add entry to the site store: #{site}"
@@ -132,6 +152,10 @@ class Wmap::SiteTracker
132
152
  end
133
153
  end
134
154
  # add record only if trusted
155
+ host_tracker = Wmap::HostTracker.instance
156
+ host_tracker.data_dir= @data_dir
157
+ host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
158
+ host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
135
159
  if trusted
136
160
  # Add logic to check site status before adding it
137
161
  checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
@@ -144,10 +168,6 @@ class Wmap::SiteTracker
144
168
  raise "Site is currently down. Skip #{site}" if checker['code']==10000
145
169
  end
146
170
  raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
147
- host_tracker = Wmap::HostTracker.instance
148
- host_tracker.data_dir= @data_dir
149
- host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
150
- host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
151
171
  # Update the local host table when necessary
152
172
  if is_ip?(host)
153
173
  # Case #1: Trusted site contains IP
@@ -341,8 +361,8 @@ class Wmap::SiteTracker
341
361
  else
342
362
  puts "Error: no entry is loaded. Please check your list and try again."
343
363
  end
344
- #rescue => ee
345
- # puts "Exception on method #{__method__}: #{ee}" if @verbose
364
+ rescue => ee
365
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
346
366
  end
347
367
  alias_method :dels, :bulk_delete
348
368
 
@@ -174,7 +174,7 @@ class WpTracker < Wmap::SiteTracker
174
174
  end
175
175
  end
176
176
  @known_wp_sites.merge!(results)
177
- puts "Done loading entries."
177
+ puts "Done loading wp entries."
178
178
  return results
179
179
  else
180
180
  puts "Error: no entry is loaded. Please check your list and try again."
@@ -88,9 +88,9 @@ class Wmap::UrlChecker
88
88
  checker['redirection']=nil
89
89
  checker['timestamp']=timestamp
90
90
  return checker
91
- rescue Exception => ee
92
- puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
93
- return nil
91
+ #rescue Exception => ee
92
+ # puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
93
+ # return nil
94
94
  end
95
95
  alias_method :check, :url_worker
96
96
 
@@ -17,7 +17,8 @@ require "parallel"
17
17
  class Wmap::UrlCrawler
18
18
  include Wmap::Utils
19
19
 
20
- attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, :verbose, :data_dir
20
+ attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, \
21
+ :verbose, :data_dir, :user_agent
21
22
  attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
22
23
  # Global variable used to store the combined result of all the forked child processes. Note that class variable
23
24
  # would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
@@ -35,13 +36,16 @@ class Wmap::UrlCrawler
35
36
  @crawl_depth=params.fetch(:crawl_depth, 4)
36
37
  @crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
37
38
  @max_parallel=params.fetch(:max_parallel, 40)
39
+ @user_agent=params.fetch(:user_agent, "OWASP WMAP Spider")
38
40
  # Discovered data store
39
41
  @discovered_urls_by_crawler=Hash.new
40
42
  @visited_urls_by_crawler=Hash.new
41
43
  @crawl_start=Hash.new
42
44
  @crawl_done=Hash.new
43
45
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
44
- @log_file=@data_dir + "/../logs/crawler.log"
46
+ @log_dir=@data_dir + "/logs/"
47
+ Dir.mkdir(@log_dir) unless Dir.exist?(@log_dir)
48
+ @log_file=@log_dir + "crawler.log"
45
49
  end
46
50
 
47
51
  # Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
@@ -86,7 +90,7 @@ class Wmap::UrlCrawler
86
90
 
87
91
  # The worker instance of crawler who perform the labour work
88
92
  def crawl_worker(url0)
89
- puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
93
+ puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and discovery contents."
90
94
  # Input URL sanity check first
91
95
  if is_url?(url0)
92
96
  host=url_2_host(url0)
@@ -216,14 +220,14 @@ class Wmap::UrlCrawler
216
220
  alias_method :crawl_file, :crawl_workers_on_file
217
221
 
218
222
  # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
219
- def open_url(url)
223
+ def open_url(url,user_agent=@user_agent)
220
224
  puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
221
225
  if url =~ /http\:/i
222
226
  # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
223
- url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
227
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
224
228
  #url_object = open(url)
225
229
  elsif url =~ /https\:/i
226
- url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
230
+ url_object = open(url, :ssl_verify_mode=>0, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000, "User-Agent"=>user_agent)
227
231
  #url_object = open(url,:ssl_verify_mode => 0)
228
232
  else
229
233
  raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
@@ -258,22 +262,6 @@ class Wmap::UrlCrawler
258
262
  return nil
259
263
  end
260
264
 
261
- =begin
262
- # Wrapper for the Nokogiri DOM parser
263
- def parse_html(html_body)
264
- begin
265
- #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
266
- doc = Nokogiri::HTML(html_body)
267
- #puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
268
- #puts "doc: #{doc}" if @verbose
269
- return doc
270
- rescue => ee
271
- puts "Exception on method #{__method__}: #{ee}" if @verbose
272
- return nil
273
- end
274
- end
275
- =end
276
-
277
265
  # Search 'current_url' and return found URLs under the same domain
278
266
  def find_urls_on_page(doc, current_url)
279
267
  puts "Search and return URLs within the doc: #{doc}" if @verbose
@@ -119,7 +119,7 @@ module Wmap
119
119
  end
120
120
  end
121
121
  @tag_store.merge!(results)
122
- puts "Done loading entries."
122
+ puts "Done loading adware entries."
123
123
  tags = nil
124
124
  return results
125
125
  else
@@ -192,7 +192,7 @@ module Wmap
192
192
  # Function to print instance variable - General top level domain list
193
193
  def print_gtld
194
194
  puts @gtld
195
- return @gtld
195
+ return @gtld
196
196
  end
197
197
 
198
198
  # Function to print instance variable - Country code top-level domain list
@@ -8,46 +8,43 @@
8
8
 
9
9
 
10
10
  module Wmap
11
- module Utils
11
+ module Utils
12
12
  # Module to log debugging and other messages
13
- module Logger
13
+ module Logger
14
14
  extend self
15
15
  # Append information into the log file for the trouble-shooting purpose
16
16
  def wlog (obj, agent, file)
17
17
  puts "Writing #{obj} into log file: #{file}" if @verbose
18
- begin
19
- return false if obj.nil?
20
- # 01/27/2015, implementing singleton pattern for the logger
21
- @@f=File.open(file,'a')
22
- timestamp=Time.now
23
- case obj
24
- when Array
25
- if obj.size >= 0
26
- @@f.write "#{timestamp}: #{agent}: \n"
27
- obj.map { |x| @@f.write " #{x}\n" }
28
- puts "The list is successfully saved into the log file: #{file} " if @verbose
29
- end
30
- when Hash
31
- if obj.length >= 0
32
- @@f.write "#{timestamp}: #{agent}: \n"
33
- obj.each_value { |value| @@f.write " #{value}\n" }
34
- puts "The hash is successfully saved into the log file: #{file} " if @verbose
35
- end
36
- when String
37
- @@f.write "#{timestamp}: #{agent}: #{obj}\n"
38
- puts "The string is successfully saved into the log file: #{file} " if @verbose
39
- else
40
- #do nothing
41
- puts "Un-handled exception on: #{obj}" if @verbose
18
+ return false if obj.nil?
19
+ @@f=File.open(file,'a')
20
+ timestamp=Time.now
21
+ case obj
22
+ when Array
23
+ if obj.size >= 0
24
+ @@f.write "#{timestamp}: #{agent}: \n"
25
+ obj.map { |x| @@f.write " #{x}\n" }
26
+ puts "The list is successfully saved into the log file: #{file} " if @verbose
42
27
  end
43
- @@f.close
44
- return true
45
- rescue => ee
46
- puts "Exception on method #{__method__}: #{ee}" if @verbose
47
- return false
48
- end
28
+ when Hash
29
+ if obj.length >= 0
30
+ @@f.write "#{timestamp}: #{agent}: \n"
31
+ obj.each_value { |value| @@f.write " #{value}\n" }
32
+ puts "The hash is successfully saved into the log file: #{file} " if @verbose
33
+ end
34
+ when String
35
+ @@f.write "#{timestamp}: #{agent}: #{obj}\n"
36
+ puts "The string is successfully saved into the log file: #{file} " if @verbose
37
+ else
38
+ #do nothing
39
+ puts "Un-handled exception on: #{obj}" if @verbose
40
+ end
41
+ @@f.close
42
+ return true
43
+ rescue => ee
44
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
45
+ return false
49
46
  end
50
-
51
- end
47
+
48
+ end
52
49
  end
53
50
  end
@@ -15,6 +15,7 @@ module Wmap
15
15
 
16
16
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
17
17
  Max_http_timeout=15000
18
+ User_agent = "OWASP WMAP Spider"
18
19
 
19
20
  # Simple sanity check on a 'claimed' URL string.
20
21
  def is_url?(url)
@@ -377,7 +378,8 @@ module Wmap
377
378
 
378
379
  # Given an URL, open the page, then return the DOM text from a normal user perspective
379
380
  def open_page(url)
380
- args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, read_timeout: Max_http_timeout/1000}
381
+ args = {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, allow_redirections: :safe, \
382
+ read_timeout: Max_http_timeout/1000, "User-Agent"=>User_agent}
381
383
  doc = Nokogiri::HTML(open(url, args))
382
384
  if doc.text.include?("Please enable JavaScript to view the page content")
383
385
  puts "Invoke headless chrome through webdriver ..." if @verbose
@@ -385,7 +387,7 @@ module Wmap
385
387
  #driver = Selenium::WebDriver.for :chrome
386
388
  # http://watir.com/guides/chrome/
387
389
  args = ['--ignore-certificate-errors', '--disable-popup-blocking', '--disable-translate', '--disk-cache-size 8192']
388
- browser = Watir::Browser.new :chrome, headless: true, options: {args: args}
390
+ browser = Watir::Browser.new :chrome, headless: true, switches: %w[--user-agent=OWASP\ WMAP\ Spider]
389
391
  browser.goto(url)
390
392
  sleep(2) # wait for the loading
391
393
  doc = Nokogiri::HTML(browser.html)
@@ -159,20 +159,18 @@ module Wmap
159
159
  # Simple test a host string format. Return true if it contains a valid internet domain sub-string. Note: Don't be confused with another method 'valid_dns_record?', which is a stricter and time-consuming test on the DNS server for a resolvable internet host.
160
160
  def is_fqdn? (host)
161
161
  puts "Validate the host-name format is valid: #{host}" if @verbose
162
- begin
163
- return false if is_ip?(host) or is_url?(host)
164
- domain=get_domain_root(host)
165
- if domain.nil?
166
- return false
167
- elsif is_domain_root?(domain)
168
- return true
169
- else
170
- return false
171
- end
172
- rescue => ee
173
- puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
162
+ return false if is_ip?(host) or is_url?(host)
163
+ domain=get_domain_root(host)
164
+ if domain.nil?
165
+ return false
166
+ elsif is_domain_root?(domain)
167
+ return true
168
+ else
174
169
  return false
175
170
  end
171
+ # rescue => ee
172
+ # puts "Exception on method is_fqdn? for #{host}: #{ee}" if @verbose
173
+ # return false
176
174
  end
177
175
  alias_method :is_host?, :is_fqdn?
178
176
 
@@ -239,7 +239,11 @@ module Wmap
239
239
  if tag.to_s.include?(pattern)
240
240
  puts tag.to_s if @verbose
241
241
  k=nil
242
- return tag.to_s.scan(/[\d+\.]+\d+/).first
242
+ if tag.to_s.scan(/[\d+\.]+\d+/).first =~ /\d+\./
243
+ return tag.to_s.scan(/[\d+\.]+\d+/).first
244
+ else
245
+ return nil
246
+ end
243
247
  end
244
248
  end
245
249
  end
@@ -0,0 +1,36 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for the Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+ # Unit Test File for Wmap::DomainTracker.instance class
9
+
10
+ require "minitest/autorun"
11
+ require "Wmap"
12
+
13
+ class CidrTrackerTest < MiniTest::Unit::TestCase
14
+ include Wmap::Utils
15
+
16
+ def test_cidr_add
17
+ w = Wmap::CidrTracker.new
18
+ w.add("192.168.1.0/24")
19
+ assert_equal true, w.known_cidr_blks.key?("192.168.1.0/24")
20
+ end
21
+
22
+ def test_cidr_delete
23
+ w = Wmap::CidrTracker.new
24
+ w.add("10.0.0.0/8")
25
+ w.delete("10.0.0.0/8")
26
+ assert_equal false, w.known_cidr_blks.key?("10.0.0.0/8")
27
+ end
28
+
29
+ def test_ip_trusted?
30
+ w = Wmap::CidrTracker.new
31
+ w.add("192.168.1.0/24")
32
+ assert_equal true, w.ip_trusted?("192.168.1.1")
33
+ assert_equal true, w.ip_trusted?("192.168.1.255")
34
+ end
35
+
36
+ end
data/test/utils_test.rb CHANGED
@@ -12,7 +12,7 @@ require "Wmap"
12
12
 
13
13
  class UtilsTest < MiniTest::Unit::TestCase
14
14
  include Wmap::Utils
15
-
15
+
16
16
  def test_sld_domain_conversion
17
17
  assert_equal "yahoo.com", get_domain_root("yahoo.com")
18
18
  end
@@ -28,75 +28,75 @@ class UtilsTest < MiniTest::Unit::TestCase
28
28
  def test_is_domain_root_case_1?
29
29
  assert_equal false, is_domain_root?("www.yahoo.co.uk")
30
30
  end
31
-
31
+
32
32
  def test_is_domain_root_case_2?
33
33
  assert_equal true, is_domain_root?("yahoo.co.uk")
34
34
  end
35
-
35
+
36
36
  def test_get_sub_domain
37
37
  assert_equal "mail.yahoo.co.uk", get_sub_domain("www.mail.yahoo.co.uk")
38
38
  end
39
39
 
40
40
  def test_is_url_case_1?
41
41
  assert_equal true, is_url?("http://www.mail.yahoo.co.uk/")
42
- end
42
+ end
43
43
 
44
44
  def test_is_url_case_2?
45
45
  assert_equal true, is_url?("https://www.mail.yahoo.co.uk/")
46
- end
46
+ end
47
47
 
48
48
  def test_is_url_case_3?
49
49
  assert_equal false, is_url?("http://www.mail.yahoo.uii/")
50
- end
50
+ end
51
51
 
52
52
  def test_is_url_case_4?
53
53
  assert_equal false, is_url?("http:\\www.mail.yahoo.co.uk")
54
- end
55
-
54
+ end
55
+
56
56
  def test_is_ssl?
57
57
  assert_equal false, is_ssl?("http://www.mail.yahoo.co.uk/")
58
- end
59
-
58
+ end
59
+
60
60
  def test_is_site?
61
61
  assert_equal false, is_site?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
62
- end
63
-
62
+ end
63
+
64
64
  def test_url_2_host
65
65
  assert_equal "login.yahoo.com", url_2_host("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
66
- end
67
-
66
+ end
67
+
68
68
  def test_url_2_site_case_1
69
69
  assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
70
- end
70
+ end
71
71
 
72
72
  def test_url_2_site_case_2
73
73
  assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
74
- end
74
+ end
75
75
 
76
76
  def test_url_2_site_case_3
77
77
  assert_equal "https://login.yahoo.com/", url_2_site("https://login.yahoo.com#.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
78
- end
79
-
78
+ end
79
+
80
80
  def test_url_2_path
81
81
  assert_equal "/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", url_2_path("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com")
82
- end
82
+ end
83
83
 
84
84
  def test_urls_on_same_domain?
85
85
  assert_equal true, urls_on_same_domain?("https://login.yahoo.com/?.src=ym&.intl=us&.lang=en-US&.done=https%3a//mail.yahoo.com", "https://us-mg4.mail.yahoo.com/neo/launch?.rand=8hjd08hc6t1lq")
86
- end
86
+ end
87
87
 
88
88
  def test_host_2_url_case_1
89
89
  assert_equal "https://mail.yahoo.com/", host_2_url("mail.yahoo.com",443)
90
- end
90
+ end
91
91
 
92
92
  def test_host_2_url_case_2
93
93
  assert_equal "http://mail.yahoo.com/", host_2_url("mail.yahoo.com")
94
- end
95
-
94
+ end
95
+
96
96
  def test_make_absolute
97
97
  assert_equal "http://games.yahoo.com/game/the-magic-snowman-flash.html", make_absolute("http://games.yahoo.com/","game/the-magic-snowman-flash.html")
98
98
  end
99
-
99
+
100
100
  def test_create_absolute_url_from_base
101
101
  assert_equal "http://images.search.yahoo.com/search/images?p=raiders", create_absolute_url_from_base("http://images.search.yahoo.com/images","/search/images?p=raiders")
102
102
  end
@@ -108,7 +108,7 @@ class UtilsTest < MiniTest::Unit::TestCase
108
108
  def test_normalize_url_case_1
109
109
  assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/./images/search/images?p=raiders")
110
110
  end
111
-
111
+
112
112
  def test_normalize_url_case_2
113
113
  assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com/../images/../search/images?p=raiders")
114
114
  end
@@ -116,53 +116,58 @@ class UtilsTest < MiniTest::Unit::TestCase
116
116
  def test_normalize_url_case_3
117
117
  assert_equal "http://images.search.yahoo.com/images/search/images?p=raiders", normalize_url("http://images.search.yahoo.com./../images/../search/images?p=raiders")
118
118
  end
119
-
119
+
120
120
  def test_is_ip_case_1?
121
121
  assert_equal false, is_ip?("256.2.3.1")
122
- end
122
+ end
123
123
 
124
124
  def test_is_ip_case_2?
125
125
  assert_equal false, is_ip?("25.2.3.1.22")
126
- end
126
+ end
127
127
 
128
128
  def test_is_ip_case_3?
129
129
  assert_equal true, is_ip?("196.168.230.1")
130
- end
130
+ end
131
131
 
132
132
  def test_is_fqdn_case_1?
133
133
  assert_equal true, is_fqdn?("images.search.yahoo.com")
134
- end
134
+ end
135
135
 
136
136
  def test_is_fqdn_case_2?
137
137
  assert_equal true, is_fqdn?("yahoo.com")
138
- end
139
-
138
+ end
139
+
140
140
  def test_is_fqdn_case_3?
141
- assert_equal false, is_fqdn?("images.search.yahoo")
142
- end
143
-
141
+ # according to latest tlds list - http://data.iana.org/TLD/tlds-alpha-by-domain.txt
142
+ assert_equal true, is_fqdn?("images.search.yahoo")
143
+ end
144
+
144
145
  def test_is_fqdn_case_4?
145
146
  assert_equal false, is_fqdn?("images")
146
- end
147
-
147
+ end
148
+
149
+ def test_is_fqdn_case_5?
150
+ assert_equal false, is_fqdn?("images.search.gargle")
151
+ end
152
+
148
153
  def test_is_cidr_case_1?
149
154
  assert_equal false, is_cidr?("196.168.230.1")
150
- end
155
+ end
151
156
 
152
157
  def test_is_cidr_case_2?
153
158
  assert_equal false, is_cidr?("196.168.2.257/12")
154
- end
155
-
159
+ end
160
+
156
161
  def test_is_cidr_case_3?
157
162
  assert_equal true, is_cidr?("196.168.2.25/12")
158
- end
159
-
163
+ end
164
+
160
165
  def test_cidr_2_ips
161
166
  assert_equal ["192.168.1.1"], cidr_2_ips("192.168.1.1/32")
162
- end
163
-
167
+ end
168
+
164
169
  def test_sort_ips
165
170
  assert_equal ["192.168.1.1", "192.168.1.2", "192.168.2.1"], sort_ips(["192.168.1.2", "192.168.2.1","192.168.1.1"])
166
- end
167
-
171
+ end
172
+
168
173
  end
data/version.txt CHANGED
@@ -3,8 +3,8 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.7.7
7
- date = 2020-03-24
6
+ version = 2.8.3
7
+ date = 2021-07-26
8
8
 
9
9
  author = Sam (Yang) Li
10
10
  email = yang.li@owasp.org
data/wmap.gemspec CHANGED
@@ -36,7 +36,7 @@ Gem::Specification.new do |s|
36
36
  s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
37
37
  s.email = info["email"]
38
38
  s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","trusts","distrust","run_tests"]
39
- s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","data/","LICENSE.txt",
39
+ s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports", "LICENSE.txt",
40
40
  "version.txt","README.md", "wmap.gemspec"]
41
41
  s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['dicts/*']
42
42
  #s.homepage = "none"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.7
4
+ version: 2.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-24 00:00:00.000000000 Z
11
+ date: 2021-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dnsruby
@@ -234,7 +234,6 @@ files:
234
234
  - LICENSE.txt
235
235
  - README.md
236
236
  - TODO
237
- - bin/RHPG
238
237
  - bin/deprime
239
238
  - bin/distrust
240
239
  - bin/googleBot
@@ -251,7 +250,6 @@ files:
251
250
  - bin/wdel
252
251
  - bin/wdump
253
252
  - bin/wmap
254
- - bin/wmaps
255
253
  - bin/wscan
256
254
  - demos/bruter.rb
257
255
  - demos/dns_brutes.rb
@@ -308,7 +306,7 @@ files:
308
306
  - settings/discovery_ports
309
307
  - settings/google_keywords.txt
310
308
  - settings/google_locator.txt
311
- - settings/tag_signatures
309
+ - test/cidr_tracker_test.rb
312
310
  - test/domain_tracker_test.rb
313
311
  - test/utils_test.rb
314
312
  - version.txt
@@ -335,9 +333,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
335
333
  - !ruby/object:Gem::Version
336
334
  version: '0'
337
335
  requirements: []
338
- rubyforge_project: wmap
339
- rubygems_version: 2.7.10
340
- signing_key:
336
+ rubygems_version: 3.0.9
337
+ signing_key:
341
338
  specification_version: 4
342
339
  summary: A pure Ruby web application and service discovery API.
343
340
  test_files: []
data/bin/RHPG DELETED
@@ -1,107 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # Executable to lookup then merge site tech details into the RHPG asset spreadsheet in CSV format only
3
- #
4
- ## Usage: RHPG [RHPG.csv]
5
- require "wmap"
6
- require "csv"
7
- include Wmap::Utils
8
-
9
- def print_usage
10
- puts "Program to lookup then merge the site details into RHPG asset spreadsheet. \nUsage: RHPG [RHPG.csv]"
11
- end
12
-
13
- # Lookup the site store for a domain; then return the fingger print info of the site
14
- def site_tracker_lookup(domain)
15
- tracker=Wmap::SiteTracker.instance
16
- tracker.verbose=false
17
- #first order search
18
- tracker.known_sites.each do |key,val|
19
- if key.include?(domain.strip.downcase) && key.include?("https")
20
- tracker=nil
21
- return [key] + val.values
22
- end
23
- end
24
- #second order search
25
- tracker.known_sites.each do |key,val|
26
- if key.include?(domain.strip.downcase)
27
- tracker=nil
28
- return [key] + val.values
29
- end
30
- end
31
- tracker=nil
32
- return [nil]*9
33
- end
34
-
35
- # look up the wp site data store for a domain; then return the wp finger print info: [is_wp?,wp_ver]
36
- def wp_tracker_lookup(domain)
37
- tracker=Wmap::WpTracker.new(:verbose=>false)
38
- # first order
39
- tracker.known_wp_sites.each do |key,val|
40
- if key.include?(domain.strip.downcase) && val
41
- ver=tracker.wp_ver(key)
42
- tracker=nil
43
- return [val,ver]
44
- end
45
- end
46
- # second order
47
- tracker.known_wp_sites.each do |key,val|
48
- if key.include?(domain.strip.downcase) && key.include?("https") && val
49
- tracker=nil
50
- return [val,nil]
51
- end
52
- end
53
- # third order
54
- tracker.known_wp_sites.each do |key,val|
55
- if key.include?(domain.strip.downcase)
56
- tracker=nil
57
- return [val,nil]
58
- end
59
- end
60
- tracker=nil
61
- return [nil,nil]
62
- end
63
-
64
- # perform the wpscan on a site
65
- def wpscan(domain)
66
- url=site_tracker_lookup(domain)[0]
67
- return nil if url.nil?
68
- if url.include?("https")
69
- command="wpscan --disable-tls-checks --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
70
- else
71
- command="wpscan --ignore-main-redirect --url=" + url + " -o " + domain + ".wpscan"
72
- end
73
- system(command)
74
- end
75
-
76
- puts Wmap.banner
77
- print_usage
78
-
79
- # open output file to write
80
- CSV.open("output.csv", "wb") do |csv|
81
- cnt=1
82
- # open RHPG input file to read
83
- CSV.foreach(ARGV[0]) do |row|
84
- puts "Processing row #{cnt}"
85
- #puts row.inspect
86
- my_row=Array.new
87
- if cnt > 1
88
- if is_domain?(row[0])
89
- =begin
90
- if row[3] =~ /Keep/i && row[3] != /Redirect/i
91
- unless File.exist?(row[0]+".wpscan")
92
- wpscan(row[0])
93
- end
94
- end
95
- =end
96
- my_row = row + site_tracker_lookup(row[0]) + wp_tracker_lookup(row[0])
97
- else
98
- my_row = row + [nil]*10
99
- end
100
- else
101
- my_row = row + ["Website","Primary IP","Port","Hosting Status","Server","Response Code","MD5 Finger-print","Redirection","Timestamp", "WordPress", "WordPress Version"]
102
- end
103
- cnt+=1
104
- csv << my_row
105
- end
106
- puts "All done. "
107
- end
data/bin/wmaps DELETED
@@ -1,23 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # script to automate the new site discovery through by crawling all unique sites in the site store
3
- require "wmap"
4
- require "parallel"
5
-
6
- def wmap_worker(domain)
7
- cmd = "wmap " + domain
8
- puts "wmap discovery on domain: ", domain
9
- system(cmd)
10
- end
11
-
12
-
13
- tracker=Wmap::DomainTracker.instance
14
- Parallel.map(tracker.known_internet_domains.keys, :in_processes => 10) { |target|
15
- puts "Working on #{target} ..." if @verbose
16
- wmap_worker(target)
17
- }
18
- =begin
19
- tracker.known_internet_domains.keys.map do |domain|
20
- wmap_worker(domain)
21
- end
22
- =end
23
- tracker=nil
@@ -1,6 +0,0 @@
1
- # Adware signature file: signture string, description
2
- gtag.js, Google / DoubleClick Floodlight Tag
3
- analytics.js, Google Universal Analytics Tag
4
- ga.js, Google Analytics Tag
5
- utag.js, Tealium Tag
6
- all.js, Facebook Tag