wmap 2.5.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wmap.rb +1 -0
- data/lib/wmap/domain_tracker.rb +1 -1
- data/lib/wmap/site_tracker.rb +2 -2
- data/lib/wmap/url_checker.rb +24 -4
- data/lib/wmap/url_crawler.rb +49 -49
- data/lib/wmap/url_crawler/adware_tag.rb +281 -0
- data/lib/wmap/wp_tracker.rb +5 -5
- data/logs/wmap.log +1 -0
- data/settings/tag_signatures +6 -0
- data/version.txt +2 -2
- data/wmap.gemspec +2 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f2042c146724dcfa9852bcb1920a2d9baded35fb8ba589d3af277277d678d36
|
4
|
+
data.tar.gz: d0ae5c5f90a4707eddbb91119b71ae2e9dc1c9ace6631e3f66e03d892ef3d8f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a543f31d23e42604a0c86445eb3c2b469485527c0897a7495a8d74f593be19a1e915cf38e8ae03246012d5bb21734f8d2f93b0180863383a3a5260030fb1336
|
7
|
+
data.tar.gz: 830a9645c9633f0cd396cf9dfc40654a902ad3275ec37870f2fa1e06f64365bf9e0229f5acb0972af8569497c253e13d88b87c5d86710d324cb1510e3cd6679c
|
data/lib/wmap.rb
CHANGED
@@ -20,6 +20,7 @@ require 'wmap/wp_tracker'
|
|
20
20
|
require 'wmap/network_profiler'
|
21
21
|
require 'wmap/port_scanner'
|
22
22
|
require 'wmap/url_crawler'
|
23
|
+
require 'wmap/url_crawler/adware_tag'
|
23
24
|
require 'wmap/dns_bruter'
|
24
25
|
require 'wmap/site_tracker'
|
25
26
|
require 'wmap/site_tracker/deactivated_site'
|
data/lib/wmap/domain_tracker.rb
CHANGED
@@ -143,7 +143,7 @@ class Wmap::DomainTracker
|
|
143
143
|
@known_internet_domains.merge!(record)
|
144
144
|
return record
|
145
145
|
else
|
146
|
-
puts "Problem add domain #{host} - please use legal root domain or sub domain only."
|
146
|
+
puts "Problem add domain #{host}: unknown domain format - please use legal root domain or sub domain only."
|
147
147
|
end
|
148
148
|
end
|
149
149
|
rescue => ee
|
data/lib/wmap/site_tracker.rb
CHANGED
@@ -108,7 +108,7 @@ class Wmap::SiteTracker
|
|
108
108
|
# Preliminary sanity check
|
109
109
|
site=site.strip.downcase unless site.nil?
|
110
110
|
if site_known?(site)
|
111
|
-
puts "Site
|
111
|
+
puts "Site already exists. Skip it: #{site}"
|
112
112
|
return nil
|
113
113
|
end
|
114
114
|
site=normalize_url(site) if is_url?(site)
|
@@ -242,7 +242,7 @@ class Wmap::SiteTracker
|
|
242
242
|
puts "Exception on method #{__method__}: #{ee}"
|
243
243
|
checker=nil
|
244
244
|
deact=nil
|
245
|
-
host_tracker=nil
|
245
|
+
host_tracker=nil
|
246
246
|
return nil
|
247
247
|
end
|
248
248
|
end
|
data/lib/wmap/url_checker.rb
CHANGED
@@ -32,8 +32,8 @@ class Wmap::UrlChecker
|
|
32
32
|
|
33
33
|
# Main worker method to perform various checks on the URL / site
|
34
34
|
def url_worker (url)
|
35
|
-
puts "Checking out an unknown URL: #{url}" if @verbose
|
36
35
|
begin
|
36
|
+
puts "Checking out an unknown URL: #{url}" if @verbose
|
37
37
|
url=url.strip.downcase
|
38
38
|
raise "Invalid URL format: #{url}" unless is_url?(url)
|
39
39
|
timestamp=Time.now
|
@@ -46,10 +46,10 @@ class Wmap::UrlChecker
|
|
46
46
|
else
|
47
47
|
code=response_code(url)
|
48
48
|
end
|
49
|
-
if
|
50
|
-
loc
|
49
|
+
if code>=300 && code < 400
|
50
|
+
loc=landing_location(4,url)
|
51
51
|
else
|
52
|
-
loc=
|
52
|
+
loc=nil
|
53
53
|
end
|
54
54
|
if @url_finger_print.key?(url)
|
55
55
|
fp=@url_finger_print[url]
|
@@ -212,6 +212,26 @@ class Wmap::UrlChecker
|
|
212
212
|
end
|
213
213
|
alias_method :location, :redirect_location
|
214
214
|
|
215
|
+
# Test the URL / Site and return the landing url location (recursive with the depth = 4 )
|
216
|
+
def landing_location (depth=4, url)
|
217
|
+
begin
|
218
|
+
depth -= 1
|
219
|
+
return url if depth < 1
|
220
|
+
timeo = @http_timeout/1000.0
|
221
|
+
uri = URI.parse(url)
|
222
|
+
code = response_code (url)
|
223
|
+
if code >= 300 && code < 400
|
224
|
+
url = redirect_location (url)
|
225
|
+
url = landing_location(depth,url)
|
226
|
+
else
|
227
|
+
return url
|
228
|
+
end
|
229
|
+
return url
|
230
|
+
rescue Exception => ee
|
231
|
+
puts "Exception on method #{__method__} on URL #{url}: #{ee}" if @verbose
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
215
235
|
# Test the URL / site and return the web server type from the HTTP header "server" field
|
216
236
|
def get_server_header (url)
|
217
237
|
begin
|
data/lib/wmap/url_crawler.rb
CHANGED
@@ -21,7 +21,6 @@ class Wmap::UrlCrawler
|
|
21
21
|
attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
|
22
22
|
# Global variable used to store the combined result of all the forked child processes. Note that class variable
|
23
23
|
# would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
|
24
|
-
# $discovered_urls=Hash.new
|
25
24
|
|
26
25
|
# set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
|
27
26
|
Max_http_timeout=8000
|
@@ -47,8 +46,8 @@ class Wmap::UrlCrawler
|
|
47
46
|
|
48
47
|
# Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
|
49
48
|
def pre_crawl(url)
|
50
|
-
puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
|
51
49
|
begin
|
50
|
+
puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
|
52
51
|
host=url_2_host(url)
|
53
52
|
# Use the following formula to 'guess' the right http time-out threshold for the scanner
|
54
53
|
nwk_to=Wmap::NetworkProfiler.new.profile(host).to_i
|
@@ -67,8 +66,8 @@ class Wmap::UrlCrawler
|
|
67
66
|
# A web crawler to crawl a known website and search for html links within the same root domain. For example,
|
68
67
|
# by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
|
69
68
|
def crawl(url)
|
70
|
-
|
71
|
-
|
69
|
+
begin
|
70
|
+
puts "Start web crawling on #{url}"
|
72
71
|
result=Array.new
|
73
72
|
url=url.chomp.strip
|
74
73
|
result.push(url_2_site(url))
|
@@ -80,17 +79,17 @@ class Wmap::UrlCrawler
|
|
80
79
|
}
|
81
80
|
puts "Web crawling time-out on #{url}: #{status}" if @verbose
|
82
81
|
return result
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
82
|
+
rescue => ee
|
83
|
+
puts "Exception on method #{__method__} for URL #{url}: #{ee}"
|
84
|
+
return result
|
85
|
+
end
|
87
86
|
end
|
88
87
|
alias_method :query, :crawl
|
89
88
|
|
90
89
|
# The worker instance of crawler who perform the labour work
|
91
90
|
def crawl_worker(url0)
|
92
|
-
|
93
|
-
|
91
|
+
begin
|
92
|
+
puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
|
94
93
|
# Input URL sanity check first
|
95
94
|
if is_url?(url0)
|
96
95
|
host=url_2_host(url0)
|
@@ -121,7 +120,7 @@ class Wmap::UrlCrawler
|
|
121
120
|
url_stores[url]=true unless url_stores.key?(url)
|
122
121
|
@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
|
123
122
|
# $discovered_urls[url]=true unless $discovered_urls.key?(url)
|
124
|
-
doc =
|
123
|
+
doc = Nokogiri::HTML(url_body)
|
125
124
|
next if doc == nil
|
126
125
|
if url_stores.size >= @crawl_page_limit
|
127
126
|
#@visited_urls_by_crawler.merge!(url_stores)
|
@@ -145,12 +144,12 @@ class Wmap::UrlCrawler
|
|
145
144
|
wlog(log_info, "UrlCrawler", @log_file)
|
146
145
|
@crawl_done[url0]=true unless @crawl_done.key?(url0)
|
147
146
|
return url_stores
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
147
|
+
rescue => ee
|
148
|
+
puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
|
149
|
+
log_info[3]="Exception on #{url0}"
|
150
|
+
wlog(log_info,"UrlCrawler",@log_file)
|
151
|
+
return url_stores
|
152
|
+
end
|
154
153
|
end
|
155
154
|
|
156
155
|
# Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
|
@@ -211,14 +210,14 @@ class Wmap::UrlCrawler
|
|
211
210
|
|
212
211
|
# Fast crawling method - build the target pool from the input file
|
213
212
|
def crawl_workers_on_file (file)
|
214
|
-
puts "Web crawl the list of targets from file: #{file}"
|
215
213
|
begin
|
214
|
+
puts "Web crawl the list of targets from file: #{file}"
|
216
215
|
targets=file_2_list(file)
|
217
216
|
sites=crawl_workers(targets,num=@max_parallel)
|
218
217
|
return sites
|
219
218
|
rescue => ee
|
220
|
-
|
221
|
-
|
219
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
220
|
+
return nil
|
222
221
|
end
|
223
222
|
end
|
224
223
|
alias_method :query_file, :crawl_workers_on_file
|
@@ -226,7 +225,6 @@ class Wmap::UrlCrawler
|
|
226
225
|
|
227
226
|
# Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
|
228
227
|
def open_url(url)
|
229
|
-
#url_object = nil
|
230
228
|
begin
|
231
229
|
puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
|
232
230
|
if url =~ /http\:/i
|
@@ -262,8 +260,8 @@ class Wmap::UrlCrawler
|
|
262
260
|
|
263
261
|
# Return the destination url in case of url re-direct
|
264
262
|
def update_url_if_redirected(url, url_object)
|
265
|
-
#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
|
266
263
|
begin
|
264
|
+
#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
|
267
265
|
if url != url_object.base_uri.to_s
|
268
266
|
return url_object.base_uri.to_s
|
269
267
|
end
|
@@ -274,24 +272,26 @@ class Wmap::UrlCrawler
|
|
274
272
|
end
|
275
273
|
end
|
276
274
|
|
275
|
+
=begin
|
277
276
|
# Wrapper for the Nokogiri DOM parser
|
278
277
|
def parse_html(html_body)
|
279
|
-
#puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
|
280
278
|
begin
|
281
|
-
|
279
|
+
#puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
|
280
|
+
doc = Nokogiri::HTML(html_body)
|
282
281
|
#puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
|
283
282
|
#puts "doc: #{doc}" if @verbose
|
284
283
|
return doc
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
284
|
+
rescue => ee
|
285
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
286
|
+
return nil
|
287
|
+
end
|
289
288
|
end
|
289
|
+
=end
|
290
290
|
|
291
|
-
|
291
|
+
# Search 'current_url' and return found URLs under the same domain
|
292
292
|
def find_urls_on_page(doc, current_url)
|
293
|
-
#puts "Search and return URLs within the doc: #{doc}" if @verbose
|
294
293
|
begin
|
294
|
+
puts "Search and return URLs within the doc: #{doc}" if @verbose
|
295
295
|
urls_list = []
|
296
296
|
# case 1 - search embedded HTML tag <a href='url'> for the url elements
|
297
297
|
links=doc.css('a')
|
@@ -320,46 +320,46 @@ class Wmap::UrlCrawler
|
|
320
320
|
end
|
321
321
|
#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
|
322
322
|
return urls_list.uniq-["",nil]
|
323
|
-
|
324
|
-
|
325
|
-
|
323
|
+
rescue => ee
|
324
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
325
|
+
return nil
|
326
326
|
end
|
327
|
-
|
327
|
+
end
|
328
328
|
|
329
329
|
# Method to print out discovery URL result
|
330
330
|
def print_discovered_urls_by_crawler
|
331
|
-
puts "Print discovered url by the crawler. " if @verbose
|
332
331
|
begin
|
332
|
+
puts "Print discovered url by the crawler. " if @verbose
|
333
333
|
puts "\nSummary Report of Discovered URLs from the Crawler:"
|
334
334
|
@discovered_urls_by_crawler.keys.each do |url|
|
335
335
|
puts url
|
336
336
|
end
|
337
337
|
puts "Total: #{@discovered_urls_by_crawler.keys.size}"
|
338
338
|
puts "End of the summary"
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
339
|
+
rescue => ee
|
340
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
341
|
+
return nil
|
342
|
+
end
|
343
343
|
end
|
344
344
|
alias_method :print, :print_discovered_urls_by_crawler
|
345
345
|
|
346
346
|
# Method to save URL discovery result
|
347
347
|
def save_discovered_urls (file)
|
348
|
-
puts "Save discovered urls by the crawler to file: #{file} "
|
349
348
|
begin
|
349
|
+
puts "Save discovered urls by the crawler to file: #{file} "
|
350
350
|
list_2_file(@discovered_urls_by_crawler.keys, file)
|
351
351
|
puts "Done!"
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
352
|
+
rescue => ee
|
353
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
354
|
+
return nil
|
355
|
+
end
|
356
356
|
end
|
357
357
|
alias_method :save, :save_discovered_urls
|
358
358
|
|
359
359
|
# Method to retrieve discovery site result
|
360
360
|
def get_discovered_sites_by_crawler
|
361
|
-
puts "Print summary report of discovered sites. " if @verbose
|
362
361
|
begin
|
362
|
+
puts "Print summary report of discovered sites. " if @verbose
|
363
363
|
puts "\nSummary Report of Discovered Sites from the Crawler:"
|
364
364
|
sites = Hash.new
|
365
365
|
@discovered_urls_by_crawler.keys.each do |url|
|
@@ -370,12 +370,12 @@ class Wmap::UrlCrawler
|
|
370
370
|
puts "Total: #{sites.size}"
|
371
371
|
puts "End of the summary"
|
372
372
|
return sites.keys
|
373
|
-
|
373
|
+
rescue => ee
|
374
374
|
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
375
|
-
|
376
|
-
|
375
|
+
return nil
|
376
|
+
end
|
377
377
|
end
|
378
378
|
alias_method :get_sites, :get_discovered_sites_by_crawler
|
379
379
|
|
380
|
-
private :open_url, :read_url, :update_url_if_redirected, :
|
380
|
+
private :open_url, :read_url, :update_url_if_redirected, :find_urls_on_page
|
381
381
|
end
|
@@ -0,0 +1,281 @@
|
|
1
|
+
#--
|
2
|
+
# Wmap
|
3
|
+
#
|
4
|
+
# A pure Ruby library for Internet web application discovery and tracking.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module Wmap
|
11
|
+
class UrlCrawler
|
12
|
+
|
13
|
+
# Class to identify and track adware within the site store
|
14
|
+
include Wmap::Utils
|
15
|
+
attr_accessor :signature_file, :tag_file, :verbose, :data_dir, :data_store
|
16
|
+
attr_reader :tag_store, :tag_signatures
|
17
|
+
|
18
|
+
|
19
|
+
class AdwareTag < Wmap::UrlCrawler
|
20
|
+
|
21
|
+
# Initialize the instance variables
|
22
|
+
def initialize (params = {})
|
23
|
+
@verbose=params.fetch(:verbose, false)
|
24
|
+
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../../data/')
|
25
|
+
@tag_file=@data_dir + 'tag_sites'
|
26
|
+
# Set default instance variables
|
27
|
+
@signature_file=File.dirname(__FILE__) + '/../../../settings/' + 'tag_signatures'
|
28
|
+
file=params.fetch(:signature_file, @signature_file)
|
29
|
+
@tag_signatures=load_from_file(file)
|
30
|
+
file2=params.fetch(:tag_file, @tag_file)
|
31
|
+
File.write(file2, "") unless File.exist?(@tag_file)
|
32
|
+
# load the known tag store
|
33
|
+
@tag_store=load_tag_from_file(file2)
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# load the known tag signatures into an instance variable
|
38
|
+
def load_from_file (file, lc=true)
|
39
|
+
begin
|
40
|
+
puts "Loading data file: #{file}" if @verbose
|
41
|
+
data_store=Hash.new
|
42
|
+
f = File.open(file, 'r')
|
43
|
+
f.each_line do |line|
|
44
|
+
puts "Processing line: #{line}" if @verbose
|
45
|
+
line=line.chomp.strip
|
46
|
+
next if line.nil?
|
47
|
+
next if line.empty?
|
48
|
+
next if line =~ /^\s*#/
|
49
|
+
line=line.downcase if lc==true
|
50
|
+
entry=line.split(',')
|
51
|
+
if data_store.key?(entry[0])
|
52
|
+
next
|
53
|
+
else
|
54
|
+
data_store[entry[0]]=entry[1].strip
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
f.close
|
59
|
+
return data_store
|
60
|
+
rescue => ee
|
61
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
62
|
+
return nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# load the known tag store cache into an instance variable
|
67
|
+
def load_tag_from_file (file, lc=true)
|
68
|
+
begin
|
69
|
+
puts "Loading tag data file: #{file}" if @verbose
|
70
|
+
data_store=Hash.new
|
71
|
+
f = File.open(file, 'r')
|
72
|
+
f.each_line do |line|
|
73
|
+
puts "Processing line: #{line}" if @verbose
|
74
|
+
line=line.chomp.strip
|
75
|
+
next if line.nil?
|
76
|
+
next if line.empty?
|
77
|
+
next if line =~ /^\s*#/
|
78
|
+
line=line.downcase if lc==true
|
79
|
+
entry=line.split(',')
|
80
|
+
if data_store.key?(entry[0])
|
81
|
+
next
|
82
|
+
else
|
83
|
+
data_store[entry[0]]=[entry[1].strip, entry[2].strip, entry[3]]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
f.close
|
87
|
+
return data_store
|
88
|
+
rescue => ee
|
89
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
90
|
+
return nil
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Save the current tag store hash table into a file
|
95
|
+
def save_to_file!(file_tag=@tag_file, tags=@tag_store)
|
96
|
+
begin
|
97
|
+
puts "Saving the current wordpress site table from memory to file: #{file_tag} ..." if @verbose
|
98
|
+
timestamp=Time.now
|
99
|
+
f=File.open(file_tag, 'w')
|
100
|
+
f.write "# Local tag file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
101
|
+
f.write "# Site, Landing URL, Detected Adware Tag, Tag Version, Tag Description\n"
|
102
|
+
tags.each do |key, val|
|
103
|
+
f.write "#{key}, #{val[0]}, #{val[1]}, #{val[2]}, #{val[3]}\n"
|
104
|
+
end
|
105
|
+
f.close
|
106
|
+
puts "Tag store cache table is successfully saved: #{file_tag}"
|
107
|
+
rescue => ee
|
108
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
109
|
+
end
|
110
|
+
end
|
111
|
+
alias_method :save!, :save_to_file!
|
112
|
+
|
113
|
+
# add tag entries (from the sitetracker list)
|
114
|
+
def refresh (num=@max_parallel,use_cache=true)
|
115
|
+
#begin
|
116
|
+
puts "Add entries to the local cache table from site tracker: " if @verbose
|
117
|
+
results=Hash.new
|
118
|
+
tags=Wmap::SiteTracker.instance.known_sites.keys
|
119
|
+
if tags.size > 0
|
120
|
+
Parallel.map(tags, :in_processes => num) { |target|
|
121
|
+
check_adware(target,use_cache)
|
122
|
+
}.each do |process|
|
123
|
+
if !process
|
124
|
+
next
|
125
|
+
else
|
126
|
+
results.merge!(process)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
@tag_store.merge!(results)
|
130
|
+
puts "Done loading entries."
|
131
|
+
tags=nil
|
132
|
+
return results
|
133
|
+
else
|
134
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
135
|
+
end
|
136
|
+
tags=nil
|
137
|
+
return results
|
138
|
+
#rescue => ee
|
139
|
+
# puts "Exception on method #{__method__}: #{ee}" if @verbose
|
140
|
+
#end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Give a site, locate the landing page, then sift out the adware tag if found
|
144
|
+
def check_adware(site,use_cache=true)
|
145
|
+
#begin
|
146
|
+
puts "Check the site for known Adware tags: #{site}" if @verbose
|
147
|
+
record = Hash.new
|
148
|
+
if use_cache && @tag_store.key?(site)
|
149
|
+
puts "Site entry already exist. Skipping: #{site}" if @verbose
|
150
|
+
else
|
151
|
+
url = fast_landing(site)
|
152
|
+
tags = find_tags(url)
|
153
|
+
return record if tags.size==0
|
154
|
+
tag_vers=tags.map do |tag|
|
155
|
+
get_ver(url,tag)
|
156
|
+
end
|
157
|
+
tag_descs=tags.map do |tag|
|
158
|
+
Base64.urlsafe_encode64(get_desc(url,tag))
|
159
|
+
end
|
160
|
+
if tags
|
161
|
+
record[site]=[url, tags.join("|"), tag_vers.join("|"), tag_descs.join("|")]
|
162
|
+
@tag_store.merge!(record)
|
163
|
+
puts "Tag entry loaded: #{record}" if @verbose
|
164
|
+
else
|
165
|
+
puts "No tag found. Skip site #{site}" if @verbose
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return record
|
169
|
+
#rescue => ee
|
170
|
+
# puts "Exception on method #{__method__}: #{ee}: #{site}" if @verbose
|
171
|
+
#end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Given a site, determine the landing url
|
175
|
+
def fast_landing(site)
|
176
|
+
puts "Locate the landing url for: #{site}" if @verbose
|
177
|
+
my_tracker=Wmap::SiteTracker.instance
|
178
|
+
if my_tracker.known_sites.key?(site)
|
179
|
+
# looking into the cache first
|
180
|
+
if my_tracker.known_sites[site]['code'] >= 300 && my_tracker.known_sites[site]['code'] < 400
|
181
|
+
url = my_tracker.known_sites[site]['redirection']
|
182
|
+
else
|
183
|
+
url = site
|
184
|
+
end
|
185
|
+
my_tracker = nil
|
186
|
+
else
|
187
|
+
# no cache, then need to do it fresh
|
188
|
+
my_checker = Wmap::UrlChecker.new
|
189
|
+
url = my_checker.landing_location(site)
|
190
|
+
my_checker = nil
|
191
|
+
end
|
192
|
+
puts "Landing url found: #{url}" if @verbose
|
193
|
+
return url
|
194
|
+
end
|
195
|
+
|
196
|
+
# Search the page for known tag signatures. If found return them in an array
|
197
|
+
def find_tags(url)
|
198
|
+
begin
|
199
|
+
puts "Search and return tags within the url payload: #{url}" if @verbose
|
200
|
+
tag_list = []
|
201
|
+
doc = Nokogiri::HTML(open(url))
|
202
|
+
doc.text.each_line do |line|
|
203
|
+
my_line = line.downcase
|
204
|
+
@tag_signatures.keys.map do |tag|
|
205
|
+
tag_list.push(tag) if my_line.include?(tag)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
doc = nil
|
209
|
+
return tag_list
|
210
|
+
rescue => ee
|
211
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
212
|
+
return []
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# Search the url payload for known tag version identifier. If found return a string, else empty string.
|
217
|
+
def get_ver(url,tag)
|
218
|
+
puts "Search and return tag version within the url payload: #{url}, #{tag}" if @verbose
|
219
|
+
tag_ver=""
|
220
|
+
doc = Nokogiri::HTML(open(url))
|
221
|
+
case tag
|
222
|
+
when "utag.js" # sample: ...,"code_release_version":"cb20190312032612",...
|
223
|
+
doc.text.each_line do |line|
|
224
|
+
my_line = line.downcase
|
225
|
+
if my_line.include?("code_release_version")
|
226
|
+
puts "Extract tag version from line: #{my_line}" if @verbose
|
227
|
+
m = my_line.match(/\"code\_release\_version\"\:\"(?<ver>[a-z]+\d+)\"/)
|
228
|
+
tag_ver = m[:ver]
|
229
|
+
break
|
230
|
+
end
|
231
|
+
end
|
232
|
+
when "analytics.js" # sample: ga('create', 'UA-19175804-2', 'knopfdoubleday.com');
|
233
|
+
doc.text.each_line do |line|
|
234
|
+
my_line = line.downcase
|
235
|
+
if my_line.include?("ga(") && my_line.include?("create")
|
236
|
+
puts "Extract tag version from line: #{my_line}" if @verbose
|
237
|
+
m = my_line.match(/[\'|\"]create[\'|\"]\s*\,\s*[\'|\"](?<ver>\w+\-\d+\-\d+)[\'|\"]\s*\,/)
|
238
|
+
tag_ver = m[:ver]
|
239
|
+
break
|
240
|
+
end
|
241
|
+
end
|
242
|
+
when "all.js" # sample: appId : '749936668352954',
|
243
|
+
doc.text.each_line do |line|
|
244
|
+
my_line = line.downcase
|
245
|
+
if my_line.include?("appid") && my_line.include?(":")
|
246
|
+
puts "Extract tag version from line: #{my_line}" if @verbose
|
247
|
+
m = my_line.match(/appid\s+\:\s+[\'|\"](?<ver>\d+)[\'|\"]\s*\,/)
|
248
|
+
tag_ver = m[:ver]
|
249
|
+
break
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
else
|
254
|
+
puts "Unknown Adware Tag: #{tag}"
|
255
|
+
# do nothing
|
256
|
+
end
|
257
|
+
doc = nil
|
258
|
+
return tag_ver
|
259
|
+
end
|
260
|
+
|
261
|
+
# Search the url payload for known tag. If found return the base64 encode whole script snippet.
|
262
|
+
def get_desc(url,tag)
|
263
|
+
puts "Search and return tag script in url payload: #{url}, #{tag}" if @verbose
|
264
|
+
recording=false
|
265
|
+
tag_found=false
|
266
|
+
tag_desc=""
|
267
|
+
doc = Nokogiri::HTML(open(url))
|
268
|
+
doc.search('script').map do |script|
|
269
|
+
if script.text.include?(tag)
|
270
|
+
return script.text
|
271
|
+
end
|
272
|
+
end
|
273
|
+
doc = nil
|
274
|
+
return tag_desc
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
data/lib/wmap/wp_tracker.rb
CHANGED
@@ -71,7 +71,7 @@ class Wmap::WpTracker
|
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
74
|
-
# Save the current
|
74
|
+
# Save the current hash table into a file
|
75
75
|
def save_to_file!(file_wps=@file_wps, wps=@known_wp_sites)
|
76
76
|
puts "Saving the current wordpress site table from memory to file: #{file_wps} ..." if @verbose
|
77
77
|
begin
|
@@ -95,11 +95,11 @@ class Wmap::WpTracker
|
|
95
95
|
alias_method :save!, :save_to_file!
|
96
96
|
|
97
97
|
# 'setter' to add wordpress entry to the cache one at a time
|
98
|
-
def add(url)
|
98
|
+
def add(url, use_cache=true)
|
99
99
|
begin
|
100
100
|
puts "Add entry to the local cache table: #{url}" if @verbose
|
101
101
|
site=url_2_site(url)
|
102
|
-
if @known_wp_sites.key?(site)
|
102
|
+
if use_cache && @known_wp_sites.key?(site)
|
103
103
|
puts "Site is already exist. Skipping: #{site}"
|
104
104
|
else
|
105
105
|
record=Hash.new
|
@@ -141,14 +141,14 @@ class Wmap::WpTracker
|
|
141
141
|
end
|
142
142
|
|
143
143
|
# add wordpress site entries (from a sitetracker list)
|
144
|
-
def refresh (num=@max_parallel)
|
144
|
+
def refresh (num=@max_parallel,use_cache=true)
|
145
145
|
#begin
|
146
146
|
puts "Add entries to the local cache table from site tracker: " if @verbose
|
147
147
|
results=Hash.new
|
148
148
|
wps=Wmap::SiteTracker.instance.known_sites.keys
|
149
149
|
if wps.size > 0
|
150
150
|
Parallel.map(wps, :in_processes => num) { |target|
|
151
|
-
add(target)
|
151
|
+
add(target,use_cache)
|
152
152
|
}.each do |process|
|
153
153
|
if process.nil?
|
154
154
|
next
|
data/logs/wmap.log
CHANGED
@@ -1550,3 +1550,4 @@
|
|
1550
1550
|
2019-02-20 21:10:59 -0500: googleBot: Execute the command: googleBot
|
1551
1551
|
2019-02-20 21:17:21 -0500: googleBot: Execute the command: googleBot
|
1552
1552
|
2019-02-20 21:26:13 -0500: googleBot: Execute the command: googleBot
|
1553
|
+
2019-02-22 09:59:09 -0500: wmap: Execute the command: wmap /Users/sli/prh_wmap/shared/data/seed
|
data/version.txt
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
###############################################################################
|
4
4
|
package = wmap
|
5
5
|
# wmap version 2.0 == web_discovery version 1.5.3
|
6
|
-
version = 2.5.
|
7
|
-
date = 2019-
|
6
|
+
version = 2.5.1
|
7
|
+
date = 2019-03-17
|
8
8
|
|
9
9
|
author = Sam (Yang) Li
|
10
10
|
email = yang.li@owasp.org
|
data/wmap.gemspec
CHANGED
@@ -36,9 +36,9 @@ Gem::Specification.new do |s|
|
|
36
36
|
s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
|
37
37
|
s.email = info["email"]
|
38
38
|
s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","distrust","run_tests"]
|
39
|
-
s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","
|
39
|
+
s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","data/","LICENSE.txt",
|
40
40
|
"version.txt","README.rdoc", "wmap.gemspec"]
|
41
|
-
s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap
|
41
|
+
s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['ruby_whois_patches/*'] + Dir['dicts/*'] + Dir['logs/wmap.log']
|
42
42
|
#s.homepage = "none"
|
43
43
|
s.post_install_message = "*"*80 + "\n\nThank you for installing the wmap gem - a pure Ruby library for Internet web application discovery and tracking. Please refer to the README.rdoc for more information of using this gem. \n\n" + "*"*80 + "\n"
|
44
44
|
s.require_paths = ["lib"]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.5.
|
4
|
+
version: 2.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam (Yang) Li
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dnsruby
|
@@ -268,6 +268,7 @@ files:
|
|
268
268
|
- lib/wmap/site_tracker/deactivated_site.rb
|
269
269
|
- lib/wmap/url_checker.rb
|
270
270
|
- lib/wmap/url_crawler.rb
|
271
|
+
- lib/wmap/url_crawler/adware_tag.rb
|
271
272
|
- lib/wmap/utils/domain_root.rb
|
272
273
|
- lib/wmap/utils/logger.rb
|
273
274
|
- lib/wmap/utils/url_magic.rb
|
@@ -334,6 +335,7 @@ files:
|
|
334
335
|
- settings/discovery_ports
|
335
336
|
- settings/google_keywords.txt
|
336
337
|
- settings/google_locator.txt
|
338
|
+
- settings/tag_signatures
|
337
339
|
- test/domain_tracker_test.rb
|
338
340
|
- test/utils_test.rb
|
339
341
|
- version.txt
|