wmap 2.7.6 → 2.8.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1f2c900ee2ac330d411f3f6496868ea519dd322b236cd00be0b5f3d1d8d43db
4
- data.tar.gz: 054a534426e0b9b99730ac6252a9b0e2424866df914d02f6f7f1bdb925588bc1
3
+ metadata.gz: 2dee2577809daef8231ddc3d97a7e0abab5312700f36ab2233366ff6729ae388
4
+ data.tar.gz: 5dee805b28f88e2ae320e6afd9b4ef7d6f25bff99a34cf46b42c32249bf7eaad
5
5
  SHA512:
6
- metadata.gz: c591b085614ec050ff8637750dfff57c3238ccf42c2fce9cab985ccd0f3e4761ed085632a9db7f4af87be39e2921b33d330c890c1af87213a134bdd37f43553d
7
- data.tar.gz: 414720066fee25c30b41beb9c7529f7bfffda513e01e1bf4445cfefcd9ecbf2c421aa64db931a4d489845d664705ededb2474c3240750ad12cd2423b21078987
6
+ metadata.gz: c78742dad1d356b88d2b45a0773527fd8f4e54cbff13c0ccb5f9fd9a228fac56cc2c26a705e8c5ce068b4d2bdf50d1fd190060501a1fc17e0a1040e583080db6
7
+ data.tar.gz: 7eea2eaa6ee45e9ac7857eaa603177b00424a6842cdd89a200062bdcd737c1a66fa2310645e80272fce71d0193fb9ca8044f232e0dabdf20c2da453e436c84cc
data/bin/wmap CHANGED
@@ -13,7 +13,7 @@ parser = OptionParser.new do|opts|
13
13
  opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
14
14
  options[:data_dir] = data_dir;
15
15
  end
16
- opts.on('-t', '--target target', 'Web Mapper target') do |target|
16
+ opts.on('-t', '--target target', 'Web Mapper target / seed for discovery') do |target|
17
17
  options[:target] = target;
18
18
  end
19
19
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
@@ -29,6 +29,10 @@ parser.parse!
29
29
  # print program banner
30
30
  puts Wmap.banner
31
31
  # print_usage unless options[:target]
32
+ unless options[:target]
33
+ puts "Usage: $ wmap -h"
34
+ exit 1
35
+ end
32
36
 
33
37
  # Preparing - check out the working logs directory
34
38
  if options[:data_dir]
@@ -129,6 +133,10 @@ Wmap.wlog(dis_urls.keys, "wmap", Log_dir+"discovered_urls.log") unless dis_urls.
129
133
  Wmap.wlog(dis_sites.keys, "wmap", Log_dir+"discovered_sites.log") unless dis_sites.empty?
130
134
  #crawler.wlog(c_start.keys,Log_dir+"crawler.log")
131
135
  #crawler.wlog(c_done.keys,Log_dir+"crawler.log")
136
+
137
+
138
+ # Save the current disovery urls only to a specific file, patched 07/23/2021
139
+ crawler.save_discovered_urls(Log_dir+"cur_urls.log")
132
140
  crawler=nil
133
141
 
134
142
 
data/dicts/tlds.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  # http://data.iana.org/TLD/tlds-alpha-by-domain.txt
2
- # Version 2018110500, Last Updated Mon Nov 5 07:07:02 2018 UTC
2
+ # Version 2020033000, Last Updated Mon Mar 30 07:07:01 2020 UTC
3
3
  AAA
4
4
  AARP
5
5
  ABARTH
@@ -16,7 +16,6 @@ ACCENTURE
16
16
  ACCOUNTANT
17
17
  ACCOUNTANTS
18
18
  ACO
19
- ACTIVE
20
19
  ACTOR
21
20
  AD
22
21
  ADAC
@@ -146,7 +145,6 @@ BIZ
146
145
  BJ
147
146
  BLACK
148
147
  BLACKFRIDAY
149
- BLANCO
150
148
  BLOCKBUSTER
151
149
  BLOG
152
150
  BLOOMBERG
@@ -155,7 +153,6 @@ BM
155
153
  BMS
156
154
  BMW
157
155
  BN
158
- BNL
159
156
  BNPPARIBAS
160
157
  BO
161
158
  BOATS
@@ -214,7 +211,6 @@ CARE
214
211
  CAREER
215
212
  CAREERS
216
213
  CARS
217
- CARTIER
218
214
  CASA
219
215
  CASE
220
216
  CASEIH
@@ -247,7 +243,6 @@ CHEAP
247
243
  CHINTAI
248
244
  CHRISTMAS
249
245
  CHROME
250
- CHRYSLER
251
246
  CHURCH
252
247
  CI
253
248
  CIPRIANI
@@ -299,6 +294,7 @@ COUNTRY
299
294
  COUPON
300
295
  COUPONS
301
296
  COURSES
297
+ CPA
302
298
  CR
303
299
  CREDIT
304
300
  CREDITCARD
@@ -360,9 +356,7 @@ DNP
360
356
  DO
361
357
  DOCS
362
358
  DOCTOR
363
- DODGE
364
359
  DOG
365
- DOHA
366
360
  DOMAINS
367
361
  DOT
368
362
  DOWNLOAD
@@ -371,7 +365,6 @@ DTV
371
365
  DUBAI
372
366
  DUCK
373
367
  DUNLOP
374
- DUNS
375
368
  DUPONT
376
369
  DURBAN
377
370
  DVAG
@@ -392,7 +385,6 @@ ENERGY
392
385
  ENGINEER
393
386
  ENGINEERING
394
387
  ENTERPRISES
395
- EPOST
396
388
  EPSON
397
389
  EQUIPMENT
398
390
  ER
@@ -408,7 +400,6 @@ EU
408
400
  EUROVISION
409
401
  EUS
410
402
  EVENTS
411
- EVERBANK
412
403
  EXCHANGE
413
404
  EXPERT
414
405
  EXPOSED
@@ -488,6 +479,7 @@ GAME
488
479
  GAMES
489
480
  GAP
490
481
  GARDEN
482
+ GAY
491
483
  GB
492
484
  GBIZ
493
485
  GD
@@ -580,7 +572,6 @@ HOMEGOODS
580
572
  HOMES
581
573
  HOMESENSE
582
574
  HONDA
583
- HONEYWELL
584
575
  HORSE
585
576
  HOSPITAL
586
577
  HOST
@@ -634,7 +625,6 @@ IQ
634
625
  IR
635
626
  IRISH
636
627
  IS
637
- ISELECT
638
628
  ISMAILI
639
629
  IST
640
630
  ISTANBUL
@@ -699,12 +689,10 @@ KYOTO
699
689
  KZ
700
690
  LA
701
691
  LACAIXA
702
- LADBROKES
703
692
  LAMBORGHINI
704
693
  LAMER
705
694
  LANCASTER
706
695
  LANCIA
707
- LANCOME
708
696
  LAND
709
697
  LANDROVER
710
698
  LANXESS
@@ -725,7 +713,6 @@ LEGO
725
713
  LEXUS
726
714
  LGBT
727
715
  LI
728
- LIAISON
729
716
  LIDL
730
717
  LIFE
731
718
  LIFEINSURANCE
@@ -744,6 +731,7 @@ LIVING
744
731
  LIXIL
745
732
  LK
746
733
  LLC
734
+ LLP
747
735
  LOAN
748
736
  LOANS
749
737
  LOCKER
@@ -819,7 +807,6 @@ MN
819
807
  MO
820
808
  MOBI
821
809
  MOBILE
822
- MOBILY
823
810
  MODA
824
811
  MOE
825
812
  MOI
@@ -827,7 +814,6 @@ MOM
827
814
  MONASH
828
815
  MONEY
829
816
  MONSTER
830
- MOPAR
831
817
  MORMON
832
818
  MORTGAGE
833
819
  MOSCOW
@@ -835,7 +821,6 @@ MOTO
835
821
  MOTORCYCLES
836
822
  MOV
837
823
  MOVIE
838
- MOVISTAR
839
824
  MP
840
825
  MQ
841
826
  MR
@@ -854,7 +839,6 @@ MY
854
839
  MZ
855
840
  NA
856
841
  NAB
857
- NADEX
858
842
  NAGOYA
859
843
  NAME
860
844
  NATIONWIDE
@@ -955,7 +939,6 @@ PHOTO
955
939
  PHOTOGRAPHY
956
940
  PHOTOS
957
941
  PHYSIO
958
- PIAGET
959
942
  PICS
960
943
  PICTET
961
944
  PICTURES
@@ -1152,18 +1135,16 @@ SONG
1152
1135
  SONY
1153
1136
  SOY
1154
1137
  SPACE
1155
- SPIEGEL
1156
1138
  SPORT
1157
1139
  SPOT
1158
1140
  SPREADBETTING
1159
1141
  SR
1160
1142
  SRL
1161
- SRT
1143
+ SS
1162
1144
  ST
1163
1145
  STADA
1164
1146
  STAPLES
1165
1147
  STAR
1166
- STARHUB
1167
1148
  STATEBANK
1168
1149
  STATEFARM
1169
1150
  STC
@@ -1211,7 +1192,6 @@ TEAM
1211
1192
  TECH
1212
1193
  TECHNOLOGY
1213
1194
  TEL
1214
- TELEFONICA
1215
1195
  TEMASEK
1216
1196
  TENNIS
1217
1197
  TEVA
@@ -1271,7 +1251,6 @@ TZ
1271
1251
  UA
1272
1252
  UBANK
1273
1253
  UBS
1274
- UCONNECT
1275
1254
  UG
1276
1255
  UK
1277
1256
  UNICOM
@@ -1305,7 +1284,6 @@ VIP
1305
1284
  VIRGIN
1306
1285
  VISA
1307
1286
  VISION
1308
- VISTAPRINT
1309
1287
  VIVA
1310
1288
  VIVO
1311
1289
  VLAANDEREN
@@ -1324,7 +1302,6 @@ WALMART
1324
1302
  WALTER
1325
1303
  WANG
1326
1304
  WANGGOU
1327
- WARMAN
1328
1305
  WATCH
1329
1306
  WATCHES
1330
1307
  WEATHER
@@ -1452,13 +1429,14 @@ XN--MGBA7C0BBN0A
1452
1429
  XN--MGBAAKC7DVF
1453
1430
  XN--MGBAAM7A8H
1454
1431
  XN--MGBAB2BD
1432
+ XN--MGBAH1A3HJKRD
1455
1433
  XN--MGBAI9AZGQP6J
1456
1434
  XN--MGBAYH7GPA
1457
- XN--MGBB9FBPOB
1458
1435
  XN--MGBBH1A
1459
1436
  XN--MGBBH1A71E
1460
1437
  XN--MGBC0A9AZCG
1461
1438
  XN--MGBCA7DZDO
1439
+ XN--MGBCPQ6GPA1A
1462
1440
  XN--MGBERP4A5D4AR
1463
1441
  XN--MGBGU82A
1464
1442
  XN--MGBI4ECEXP
@@ -1484,8 +1462,10 @@ XN--P1AI
1484
1462
  XN--PBT977C
1485
1463
  XN--PGBS0DH
1486
1464
  XN--PSSY2U
1465
+ XN--Q7CE6A
1487
1466
  XN--Q9JYB4C
1488
1467
  XN--QCKA1PMC
1468
+ XN--QXA6A
1489
1469
  XN--QXAM
1490
1470
  XN--RHQV96G
1491
1471
  XN--ROVU88B
@@ -1530,7 +1510,6 @@ ZAPPOS
1530
1510
  ZARA
1531
1511
  ZERO
1532
1512
  ZIP
1533
- ZIPPO
1534
1513
  ZM
1535
1514
  ZONE
1536
1515
  ZUERICH
data/lib/wmap.rb CHANGED
@@ -8,6 +8,7 @@
8
8
  require 'wmap/utils/domain_root'
9
9
  require 'wmap/utils/url_magic'
10
10
  require 'wmap/utils/logger'
11
+ require 'wmap/utils/wp_detect'
11
12
  require 'wmap/utils/utils'
12
13
  require 'wmap/cidr_tracker'
13
14
  require 'wmap/domain_tracker'
@@ -99,8 +99,8 @@ class Wmap::CidrTracker
99
99
  #@known_cidr_blks_asce_index=NetAddr.sort(@known_cidr_blks.keys, :Desc=>false)
100
100
  @known_cidr_blks_asce_index=@known_cidr_blks.keys.sort
101
101
  @known_cidr_blks_desc_index=@known_cidr_blks_asce_index.reverse
102
- #rescue => ee
103
- # puts "Exception on method #{__method__}: #{ee}" # if @verbose
102
+ rescue => ee
103
+ puts "Exception on method #{__method__}: #{ee}" # if @verbose
104
104
  end
105
105
 
106
106
  # 'setter' to remove an entry to CIDR store @known_cidr_blks
@@ -167,6 +167,7 @@ class Wmap::CidrTracker
167
167
  known = cidr4.contains?(ip+'/32')
168
168
  break if known
169
169
  end
170
+ return known
170
171
  rescue => ee
171
172
  puts "Exception on method #{__method__}: #{ee}" if @verbose
172
173
  return false
@@ -7,16 +7,14 @@
7
7
  #++
8
8
  require "parallel"
9
9
  #require "singleton"
10
- require "open-uri"
11
- require "open_uri_redirections"
12
- require "nokogiri"
13
- require "css_parser"
10
+
14
11
 
15
12
  module Wmap
16
13
  class SiteTracker
17
14
 
18
15
  class WpTracker < Wmap::SiteTracker
19
16
  include Wmap::Utils
17
+ include Wmap::Utils::WpDetect
20
18
  #include Singleton
21
19
 
22
20
  attr_accessor :http_timeout, :max_parallel, :verbose, :sites_wp, :data_dir
@@ -152,27 +150,6 @@ class WpTracker < Wmap::SiteTracker
152
150
  end
153
151
  alias_method :adds, :bulk_add
154
152
 
155
- # logic to determin if it's a wordpress site
156
- def is_wp?(url)
157
- site=url_2_site(url)
158
- if wp_readme?(site)
159
- found=true
160
- elsif wp_css?(site)
161
- found=true
162
- elsif wp_meta?(site)
163
- found=true
164
- elsif wp_login?(site)
165
- found=true
166
- elsif wp_rpc?(site)
167
- found=true
168
- else
169
- found=false
170
- end
171
- return found
172
- rescue => ee
173
- puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
174
- end
175
-
176
153
  # Refresh one site entry then update the instance variable (cache)
177
154
  def refresh (target,use_cache=false)
178
155
  return add(target,use_cache)
@@ -209,207 +186,6 @@ class WpTracker < Wmap::SiteTracker
209
186
  # return Hash.new
210
187
  end
211
188
 
212
- # Wordpress detection checkpoint - readme.html
213
- def wp_readme?(url)
214
- site = url_2_site(url)
215
- readme_url=site + "readme.html"
216
- k=Wmap::UrlChecker.new
217
- if k.response_code(readme_url) == 200
218
- k=nil
219
- doc=open_page(readme_url)
220
- title=doc.css('title')
221
- if title.to_s =~ /wordpress/i
222
- return true
223
- else
224
- return false
225
- end
226
- else
227
- k=nil
228
- return false
229
- end
230
- rescue => ee
231
- puts "Exception on method #{__method__} for site #{url}: #{ee}" if @verbose
232
- return false
233
- end
234
-
235
- # Wordpress detection checkpoint - install.css
236
- def wp_css?(url)
237
- site = url_2_site(url)
238
- css_url = site + "wp-admin/css/install.css"
239
- k=Wmap::UrlChecker.new
240
- if k.response_code(css_url) == 200
241
- k=nil
242
- parser = CssParser::Parser.new
243
- parser.load_uri!(css_url)
244
- rule = parser.find_by_selector('#logo a')
245
- if rule.length >0
246
- if rule[0] =~ /wordpress/i
247
- return true
248
- end
249
- end
250
- else
251
- k=nil
252
- return false
253
- end
254
- return false
255
- rescue => ee
256
- puts "Exception on method #{__method__} for site #{url}: #{ee}" if @verbose
257
- return false
258
- end
259
-
260
- # Wordpress detection checkpoint - meta generator
261
- def wp_meta?(url)
262
- site=url_2_site(url)
263
- k=Wmap::UrlChecker.new
264
- if k.response_code(site) == 200
265
- k=nil
266
- doc=open_page(site)
267
- meta=doc.css('meta')
268
- if meta.to_s =~ /wordpress/i
269
- return true
270
- else
271
- return false
272
- end
273
- end
274
- return false
275
- rescue => ee
276
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
277
- return false
278
- end
279
-
280
- # Wordpress detection checkpoint - wp-login
281
- def wp_login?(url)
282
- site=url_2_site(url)
283
- login_url=site + "wp-login.php"
284
- k=Wmap::UrlChecker.new
285
- if k.response_code(login_url) == 200
286
- k=nil
287
- doc=open_page(login_url)
288
- links=doc.css('link')
289
- if links.to_s =~ /login.min.css/i
290
- return true
291
- else
292
- return false
293
- end
294
- end
295
- return false
296
- rescue => ee
297
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
298
- return false
299
- end
300
-
301
- # Wordpress detection checkpoint - xml-rpc
302
- def wp_rpc?(url)
303
- site=url_2_site(url)
304
- rpc_url=site + "xmlrpc.php"
305
- k=Wmap::UrlChecker.new
306
- #puts "res code", k.response_code(rpc_url)
307
- if k.response_code(rpc_url) == 405 # method not allowed
308
- k=nil
309
- return true
310
- end
311
- return false
312
- rescue => ee
313
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
314
- return false
315
- end
316
-
317
- # Extract the WordPress version
318
- def wp_ver(url)
319
- if !wp_ver_readme(url).nil?
320
- puts "WordPress version found by wp_ver_readme method. " if @verbose
321
- return wp_ver_readme(url)
322
- elsif !wp_ver_login(url,"login.min.css").nil?
323
- puts "WordPress version found by login.min.css file. " if @verbose
324
- return wp_ver_login(url,"login.min.css")
325
- elsif !wp_ver_login(url,"buttons.min.css").nil?
326
- puts "WordPress version found by buttons.min.css file. " if @verbose
327
- return wp_ver_login(url,"buttons.min.css")
328
- elsif !wp_ver_login(url,"wp-admin.min.css").nil?
329
- puts "WordPress version found by wp-admin.min.css file. " if @verbose
330
- return wp_ver_login(url,"wp-admin.min.css")
331
- elsif !wp_ver_meta(url).nil?
332
- puts "WordPress version found by wp_ver_meta method. " if @verbose
333
- return wp_ver_meta(url)
334
- else
335
- return nil
336
- end
337
- rescue => ee
338
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
339
- return nil
340
- end
341
-
342
- # Identify wordpress version through the login page
343
- def wp_ver_login(url,pattern)
344
- puts "Check for #{pattern}" if @verbose
345
- site=url_2_site(url)
346
- login_url=site + "wp-login.php"
347
- k=Wmap::UrlChecker.new
348
- #puts "Res code: #{k.response_code(login_url)}" if @verbose
349
- if k.response_code(login_url) == 200
350
- doc=open_page(login_url)
351
- #puts doc.inspect
352
- links=doc.css('link')
353
- #puts links.inspect if @verbose
354
- links.each do |tag|
355
- if tag.to_s.include?(pattern)
356
- puts tag.to_s if @verbose
357
- k=nil
358
- return tag.to_s.scan(/[\d+\.]+\d+/).first
359
- end
360
- end
361
- end
362
- k=nil
363
- return nil
364
- rescue => ee
365
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
366
- return nil
367
- end
368
-
369
- # Identify wordpress version through the meta link
370
- def wp_ver_meta(url)
371
- site=url_2_site(url)
372
- k=Wmap::UrlChecker.new
373
- if k.response_code(site) == 200
374
- doc=open_page(site)
375
- #puts doc.inspect
376
- meta=doc.css('meta')
377
- #puts meta.inspect
378
- meta.each do |tag|
379
- if tag['content'].to_s =~ /wordpress/i
380
- #puts tag.to_s
381
- k=nil
382
- return tag['content'].to_s.scan(/[\d+\.]+\d+/).first
383
- end
384
- end
385
- end
386
- k=nil
387
- return nil
388
- rescue => ee
389
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
390
- return nil
391
- end
392
-
393
- # Wordpress version detection via - readme.html
394
- def wp_ver_readme(url)
395
- site=url_2_site(url)
396
- readme_url=site + "readme.html"
397
- k=Wmap::UrlChecker.new
398
- puts "Res code: #{k.response_code(readme_url)}" if @verbose
399
- if k.response_code(readme_url) == 200
400
- k=nil
401
- doc=open_page(readme_url)
402
- puts doc if @verbose
403
- logo=doc.css('h1#logo')[0]
404
- puts logo.inspect if @verbose
405
- return logo.to_s.scan(/[\d+\.]+\d+/).first
406
- end
407
- k=nil
408
- return nil
409
- rescue => ee
410
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
411
- return nil
412
- end
413
189
 
414
190
  end
415
191
  end