wmap 2.7.6 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1f2c900ee2ac330d411f3f6496868ea519dd322b236cd00be0b5f3d1d8d43db
4
- data.tar.gz: 054a534426e0b9b99730ac6252a9b0e2424866df914d02f6f7f1bdb925588bc1
3
+ metadata.gz: 2dee2577809daef8231ddc3d97a7e0abab5312700f36ab2233366ff6729ae388
4
+ data.tar.gz: 5dee805b28f88e2ae320e6afd9b4ef7d6f25bff99a34cf46b42c32249bf7eaad
5
5
  SHA512:
6
- metadata.gz: c591b085614ec050ff8637750dfff57c3238ccf42c2fce9cab985ccd0f3e4761ed085632a9db7f4af87be39e2921b33d330c890c1af87213a134bdd37f43553d
7
- data.tar.gz: 414720066fee25c30b41beb9c7529f7bfffda513e01e1bf4445cfefcd9ecbf2c421aa64db931a4d489845d664705ededb2474c3240750ad12cd2423b21078987
6
+ metadata.gz: c78742dad1d356b88d2b45a0773527fd8f4e54cbff13c0ccb5f9fd9a228fac56cc2c26a705e8c5ce068b4d2bdf50d1fd190060501a1fc17e0a1040e583080db6
7
+ data.tar.gz: 7eea2eaa6ee45e9ac7857eaa603177b00424a6842cdd89a200062bdcd737c1a66fa2310645e80272fce71d0193fb9ca8044f232e0dabdf20c2da453e436c84cc
data/bin/wmap CHANGED
@@ -13,7 +13,7 @@ parser = OptionParser.new do|opts|
13
13
  opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
14
14
  options[:data_dir] = data_dir;
15
15
  end
16
- opts.on('-t', '--target target', 'Web Mapper target') do |target|
16
+ opts.on('-t', '--target target', 'Web Mapper target / seed for discovery') do |target|
17
17
  options[:target] = target;
18
18
  end
19
19
  opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
@@ -29,6 +29,10 @@ parser.parse!
29
29
  # print program banner
30
30
  puts Wmap.banner
31
31
  # print_usage unless options[:target]
32
+ unless options[:target]
33
+ puts "Usage: $ wmap -h"
34
+ exit 1
35
+ end
32
36
 
33
37
  # Preparing - check out the working logs directory
34
38
  if options[:data_dir]
@@ -129,6 +133,10 @@ Wmap.wlog(dis_urls.keys, "wmap", Log_dir+"discovered_urls.log") unless dis_urls.
129
133
  Wmap.wlog(dis_sites.keys, "wmap", Log_dir+"discovered_sites.log") unless dis_sites.empty?
130
134
  #crawler.wlog(c_start.keys,Log_dir+"crawler.log")
131
135
  #crawler.wlog(c_done.keys,Log_dir+"crawler.log")
136
+
137
+
138
+ # Save the current disovery urls only to a specific file, patched 07/23/2021
139
+ crawler.save_discovered_urls(Log_dir+"cur_urls.log")
132
140
  crawler=nil
133
141
 
134
142
 
data/dicts/tlds.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  # http://data.iana.org/TLD/tlds-alpha-by-domain.txt
2
- # Version 2018110500, Last Updated Mon Nov 5 07:07:02 2018 UTC
2
+ # Version 2020033000, Last Updated Mon Mar 30 07:07:01 2020 UTC
3
3
  AAA
4
4
  AARP
5
5
  ABARTH
@@ -16,7 +16,6 @@ ACCENTURE
16
16
  ACCOUNTANT
17
17
  ACCOUNTANTS
18
18
  ACO
19
- ACTIVE
20
19
  ACTOR
21
20
  AD
22
21
  ADAC
@@ -146,7 +145,6 @@ BIZ
146
145
  BJ
147
146
  BLACK
148
147
  BLACKFRIDAY
149
- BLANCO
150
148
  BLOCKBUSTER
151
149
  BLOG
152
150
  BLOOMBERG
@@ -155,7 +153,6 @@ BM
155
153
  BMS
156
154
  BMW
157
155
  BN
158
- BNL
159
156
  BNPPARIBAS
160
157
  BO
161
158
  BOATS
@@ -214,7 +211,6 @@ CARE
214
211
  CAREER
215
212
  CAREERS
216
213
  CARS
217
- CARTIER
218
214
  CASA
219
215
  CASE
220
216
  CASEIH
@@ -247,7 +243,6 @@ CHEAP
247
243
  CHINTAI
248
244
  CHRISTMAS
249
245
  CHROME
250
- CHRYSLER
251
246
  CHURCH
252
247
  CI
253
248
  CIPRIANI
@@ -299,6 +294,7 @@ COUNTRY
299
294
  COUPON
300
295
  COUPONS
301
296
  COURSES
297
+ CPA
302
298
  CR
303
299
  CREDIT
304
300
  CREDITCARD
@@ -360,9 +356,7 @@ DNP
360
356
  DO
361
357
  DOCS
362
358
  DOCTOR
363
- DODGE
364
359
  DOG
365
- DOHA
366
360
  DOMAINS
367
361
  DOT
368
362
  DOWNLOAD
@@ -371,7 +365,6 @@ DTV
371
365
  DUBAI
372
366
  DUCK
373
367
  DUNLOP
374
- DUNS
375
368
  DUPONT
376
369
  DURBAN
377
370
  DVAG
@@ -392,7 +385,6 @@ ENERGY
392
385
  ENGINEER
393
386
  ENGINEERING
394
387
  ENTERPRISES
395
- EPOST
396
388
  EPSON
397
389
  EQUIPMENT
398
390
  ER
@@ -408,7 +400,6 @@ EU
408
400
  EUROVISION
409
401
  EUS
410
402
  EVENTS
411
- EVERBANK
412
403
  EXCHANGE
413
404
  EXPERT
414
405
  EXPOSED
@@ -488,6 +479,7 @@ GAME
488
479
  GAMES
489
480
  GAP
490
481
  GARDEN
482
+ GAY
491
483
  GB
492
484
  GBIZ
493
485
  GD
@@ -580,7 +572,6 @@ HOMEGOODS
580
572
  HOMES
581
573
  HOMESENSE
582
574
  HONDA
583
- HONEYWELL
584
575
  HORSE
585
576
  HOSPITAL
586
577
  HOST
@@ -634,7 +625,6 @@ IQ
634
625
  IR
635
626
  IRISH
636
627
  IS
637
- ISELECT
638
628
  ISMAILI
639
629
  IST
640
630
  ISTANBUL
@@ -699,12 +689,10 @@ KYOTO
699
689
  KZ
700
690
  LA
701
691
  LACAIXA
702
- LADBROKES
703
692
  LAMBORGHINI
704
693
  LAMER
705
694
  LANCASTER
706
695
  LANCIA
707
- LANCOME
708
696
  LAND
709
697
  LANDROVER
710
698
  LANXESS
@@ -725,7 +713,6 @@ LEGO
725
713
  LEXUS
726
714
  LGBT
727
715
  LI
728
- LIAISON
729
716
  LIDL
730
717
  LIFE
731
718
  LIFEINSURANCE
@@ -744,6 +731,7 @@ LIVING
744
731
  LIXIL
745
732
  LK
746
733
  LLC
734
+ LLP
747
735
  LOAN
748
736
  LOANS
749
737
  LOCKER
@@ -819,7 +807,6 @@ MN
819
807
  MO
820
808
  MOBI
821
809
  MOBILE
822
- MOBILY
823
810
  MODA
824
811
  MOE
825
812
  MOI
@@ -827,7 +814,6 @@ MOM
827
814
  MONASH
828
815
  MONEY
829
816
  MONSTER
830
- MOPAR
831
817
  MORMON
832
818
  MORTGAGE
833
819
  MOSCOW
@@ -835,7 +821,6 @@ MOTO
835
821
  MOTORCYCLES
836
822
  MOV
837
823
  MOVIE
838
- MOVISTAR
839
824
  MP
840
825
  MQ
841
826
  MR
@@ -854,7 +839,6 @@ MY
854
839
  MZ
855
840
  NA
856
841
  NAB
857
- NADEX
858
842
  NAGOYA
859
843
  NAME
860
844
  NATIONWIDE
@@ -955,7 +939,6 @@ PHOTO
955
939
  PHOTOGRAPHY
956
940
  PHOTOS
957
941
  PHYSIO
958
- PIAGET
959
942
  PICS
960
943
  PICTET
961
944
  PICTURES
@@ -1152,18 +1135,16 @@ SONG
1152
1135
  SONY
1153
1136
  SOY
1154
1137
  SPACE
1155
- SPIEGEL
1156
1138
  SPORT
1157
1139
  SPOT
1158
1140
  SPREADBETTING
1159
1141
  SR
1160
1142
  SRL
1161
- SRT
1143
+ SS
1162
1144
  ST
1163
1145
  STADA
1164
1146
  STAPLES
1165
1147
  STAR
1166
- STARHUB
1167
1148
  STATEBANK
1168
1149
  STATEFARM
1169
1150
  STC
@@ -1211,7 +1192,6 @@ TEAM
1211
1192
  TECH
1212
1193
  TECHNOLOGY
1213
1194
  TEL
1214
- TELEFONICA
1215
1195
  TEMASEK
1216
1196
  TENNIS
1217
1197
  TEVA
@@ -1271,7 +1251,6 @@ TZ
1271
1251
  UA
1272
1252
  UBANK
1273
1253
  UBS
1274
- UCONNECT
1275
1254
  UG
1276
1255
  UK
1277
1256
  UNICOM
@@ -1305,7 +1284,6 @@ VIP
1305
1284
  VIRGIN
1306
1285
  VISA
1307
1286
  VISION
1308
- VISTAPRINT
1309
1287
  VIVA
1310
1288
  VIVO
1311
1289
  VLAANDEREN
@@ -1324,7 +1302,6 @@ WALMART
1324
1302
  WALTER
1325
1303
  WANG
1326
1304
  WANGGOU
1327
- WARMAN
1328
1305
  WATCH
1329
1306
  WATCHES
1330
1307
  WEATHER
@@ -1452,13 +1429,14 @@ XN--MGBA7C0BBN0A
1452
1429
  XN--MGBAAKC7DVF
1453
1430
  XN--MGBAAM7A8H
1454
1431
  XN--MGBAB2BD
1432
+ XN--MGBAH1A3HJKRD
1455
1433
  XN--MGBAI9AZGQP6J
1456
1434
  XN--MGBAYH7GPA
1457
- XN--MGBB9FBPOB
1458
1435
  XN--MGBBH1A
1459
1436
  XN--MGBBH1A71E
1460
1437
  XN--MGBC0A9AZCG
1461
1438
  XN--MGBCA7DZDO
1439
+ XN--MGBCPQ6GPA1A
1462
1440
  XN--MGBERP4A5D4AR
1463
1441
  XN--MGBGU82A
1464
1442
  XN--MGBI4ECEXP
@@ -1484,8 +1462,10 @@ XN--P1AI
1484
1462
  XN--PBT977C
1485
1463
  XN--PGBS0DH
1486
1464
  XN--PSSY2U
1465
+ XN--Q7CE6A
1487
1466
  XN--Q9JYB4C
1488
1467
  XN--QCKA1PMC
1468
+ XN--QXA6A
1489
1469
  XN--QXAM
1490
1470
  XN--RHQV96G
1491
1471
  XN--ROVU88B
@@ -1530,7 +1510,6 @@ ZAPPOS
1530
1510
  ZARA
1531
1511
  ZERO
1532
1512
  ZIP
1533
- ZIPPO
1534
1513
  ZM
1535
1514
  ZONE
1536
1515
  ZUERICH
data/lib/wmap.rb CHANGED
@@ -8,6 +8,7 @@
8
8
  require 'wmap/utils/domain_root'
9
9
  require 'wmap/utils/url_magic'
10
10
  require 'wmap/utils/logger'
11
+ require 'wmap/utils/wp_detect'
11
12
  require 'wmap/utils/utils'
12
13
  require 'wmap/cidr_tracker'
13
14
  require 'wmap/domain_tracker'
@@ -99,8 +99,8 @@ class Wmap::CidrTracker
99
99
  #@known_cidr_blks_asce_index=NetAddr.sort(@known_cidr_blks.keys, :Desc=>false)
100
100
  @known_cidr_blks_asce_index=@known_cidr_blks.keys.sort
101
101
  @known_cidr_blks_desc_index=@known_cidr_blks_asce_index.reverse
102
- #rescue => ee
103
- # puts "Exception on method #{__method__}: #{ee}" # if @verbose
102
+ rescue => ee
103
+ puts "Exception on method #{__method__}: #{ee}" # if @verbose
104
104
  end
105
105
 
106
106
  # 'setter' to remove an entry to CIDR store @known_cidr_blks
@@ -167,6 +167,7 @@ class Wmap::CidrTracker
167
167
  known = cidr4.contains?(ip+'/32')
168
168
  break if known
169
169
  end
170
+ return known
170
171
  rescue => ee
171
172
  puts "Exception on method #{__method__}: #{ee}" if @verbose
172
173
  return false
@@ -7,16 +7,14 @@
7
7
  #++
8
8
  require "parallel"
9
9
  #require "singleton"
10
- require "open-uri"
11
- require "open_uri_redirections"
12
- require "nokogiri"
13
- require "css_parser"
10
+
14
11
 
15
12
  module Wmap
16
13
  class SiteTracker
17
14
 
18
15
  class WpTracker < Wmap::SiteTracker
19
16
  include Wmap::Utils
17
+ include Wmap::Utils::WpDetect
20
18
  #include Singleton
21
19
 
22
20
  attr_accessor :http_timeout, :max_parallel, :verbose, :sites_wp, :data_dir
@@ -152,27 +150,6 @@ class WpTracker < Wmap::SiteTracker
152
150
  end
153
151
  alias_method :adds, :bulk_add
154
152
 
155
- # logic to determin if it's a wordpress site
156
- def is_wp?(url)
157
- site=url_2_site(url)
158
- if wp_readme?(site)
159
- found=true
160
- elsif wp_css?(site)
161
- found=true
162
- elsif wp_meta?(site)
163
- found=true
164
- elsif wp_login?(site)
165
- found=true
166
- elsif wp_rpc?(site)
167
- found=true
168
- else
169
- found=false
170
- end
171
- return found
172
- rescue => ee
173
- puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
174
- end
175
-
176
153
  # Refresh one site entry then update the instance variable (cache)
177
154
  def refresh (target,use_cache=false)
178
155
  return add(target,use_cache)
@@ -209,207 +186,6 @@ class WpTracker < Wmap::SiteTracker
209
186
  # return Hash.new
210
187
  end
211
188
 
212
- # Wordpress detection checkpoint - readme.html
213
- def wp_readme?(url)
214
- site = url_2_site(url)
215
- readme_url=site + "readme.html"
216
- k=Wmap::UrlChecker.new
217
- if k.response_code(readme_url) == 200
218
- k=nil
219
- doc=open_page(readme_url)
220
- title=doc.css('title')
221
- if title.to_s =~ /wordpress/i
222
- return true
223
- else
224
- return false
225
- end
226
- else
227
- k=nil
228
- return false
229
- end
230
- rescue => ee
231
- puts "Exception on method #{__method__} for site #{url}: #{ee}" if @verbose
232
- return false
233
- end
234
-
235
- # Wordpress detection checkpoint - install.css
236
- def wp_css?(url)
237
- site = url_2_site(url)
238
- css_url = site + "wp-admin/css/install.css"
239
- k=Wmap::UrlChecker.new
240
- if k.response_code(css_url) == 200
241
- k=nil
242
- parser = CssParser::Parser.new
243
- parser.load_uri!(css_url)
244
- rule = parser.find_by_selector('#logo a')
245
- if rule.length >0
246
- if rule[0] =~ /wordpress/i
247
- return true
248
- end
249
- end
250
- else
251
- k=nil
252
- return false
253
- end
254
- return false
255
- rescue => ee
256
- puts "Exception on method #{__method__} for site #{url}: #{ee}" if @verbose
257
- return false
258
- end
259
-
260
- # Wordpress detection checkpoint - meta generator
261
- def wp_meta?(url)
262
- site=url_2_site(url)
263
- k=Wmap::UrlChecker.new
264
- if k.response_code(site) == 200
265
- k=nil
266
- doc=open_page(site)
267
- meta=doc.css('meta')
268
- if meta.to_s =~ /wordpress/i
269
- return true
270
- else
271
- return false
272
- end
273
- end
274
- return false
275
- rescue => ee
276
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
277
- return false
278
- end
279
-
280
- # Wordpress detection checkpoint - wp-login
281
- def wp_login?(url)
282
- site=url_2_site(url)
283
- login_url=site + "wp-login.php"
284
- k=Wmap::UrlChecker.new
285
- if k.response_code(login_url) == 200
286
- k=nil
287
- doc=open_page(login_url)
288
- links=doc.css('link')
289
- if links.to_s =~ /login.min.css/i
290
- return true
291
- else
292
- return false
293
- end
294
- end
295
- return false
296
- rescue => ee
297
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
298
- return false
299
- end
300
-
301
- # Wordpress detection checkpoint - xml-rpc
302
- def wp_rpc?(url)
303
- site=url_2_site(url)
304
- rpc_url=site + "xmlrpc.php"
305
- k=Wmap::UrlChecker.new
306
- #puts "res code", k.response_code(rpc_url)
307
- if k.response_code(rpc_url) == 405 # method not allowed
308
- k=nil
309
- return true
310
- end
311
- return false
312
- rescue => ee
313
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
314
- return false
315
- end
316
-
317
- # Extract the WordPress version
318
- def wp_ver(url)
319
- if !wp_ver_readme(url).nil?
320
- puts "WordPress version found by wp_ver_readme method. " if @verbose
321
- return wp_ver_readme(url)
322
- elsif !wp_ver_login(url,"login.min.css").nil?
323
- puts "WordPress version found by login.min.css file. " if @verbose
324
- return wp_ver_login(url,"login.min.css")
325
- elsif !wp_ver_login(url,"buttons.min.css").nil?
326
- puts "WordPress version found by buttons.min.css file. " if @verbose
327
- return wp_ver_login(url,"buttons.min.css")
328
- elsif !wp_ver_login(url,"wp-admin.min.css").nil?
329
- puts "WordPress version found by wp-admin.min.css file. " if @verbose
330
- return wp_ver_login(url,"wp-admin.min.css")
331
- elsif !wp_ver_meta(url).nil?
332
- puts "WordPress version found by wp_ver_meta method. " if @verbose
333
- return wp_ver_meta(url)
334
- else
335
- return nil
336
- end
337
- rescue => ee
338
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
339
- return nil
340
- end
341
-
342
- # Identify wordpress version through the login page
343
- def wp_ver_login(url,pattern)
344
- puts "Check for #{pattern}" if @verbose
345
- site=url_2_site(url)
346
- login_url=site + "wp-login.php"
347
- k=Wmap::UrlChecker.new
348
- #puts "Res code: #{k.response_code(login_url)}" if @verbose
349
- if k.response_code(login_url) == 200
350
- doc=open_page(login_url)
351
- #puts doc.inspect
352
- links=doc.css('link')
353
- #puts links.inspect if @verbose
354
- links.each do |tag|
355
- if tag.to_s.include?(pattern)
356
- puts tag.to_s if @verbose
357
- k=nil
358
- return tag.to_s.scan(/[\d+\.]+\d+/).first
359
- end
360
- end
361
- end
362
- k=nil
363
- return nil
364
- rescue => ee
365
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
366
- return nil
367
- end
368
-
369
- # Identify wordpress version through the meta link
370
- def wp_ver_meta(url)
371
- site=url_2_site(url)
372
- k=Wmap::UrlChecker.new
373
- if k.response_code(site) == 200
374
- doc=open_page(site)
375
- #puts doc.inspect
376
- meta=doc.css('meta')
377
- #puts meta.inspect
378
- meta.each do |tag|
379
- if tag['content'].to_s =~ /wordpress/i
380
- #puts tag.to_s
381
- k=nil
382
- return tag['content'].to_s.scan(/[\d+\.]+\d+/).first
383
- end
384
- end
385
- end
386
- k=nil
387
- return nil
388
- rescue => ee
389
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
390
- return nil
391
- end
392
-
393
- # Wordpress version detection via - readme.html
394
- def wp_ver_readme(url)
395
- site=url_2_site(url)
396
- readme_url=site + "readme.html"
397
- k=Wmap::UrlChecker.new
398
- puts "Res code: #{k.response_code(readme_url)}" if @verbose
399
- if k.response_code(readme_url) == 200
400
- k=nil
401
- doc=open_page(readme_url)
402
- puts doc if @verbose
403
- logo=doc.css('h1#logo')[0]
404
- puts logo.inspect if @verbose
405
- return logo.to_s.scan(/[\d+\.]+\d+/).first
406
- end
407
- k=nil
408
- return nil
409
- rescue => ee
410
- puts "Exception on method #{__method__} for url #{url}: #{ee}" if @verbose
411
- return nil
412
- end
413
189
 
414
190
  end
415
191
  end