wmap 2.5.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d240d91e365ae3d5d0e43cf0cc9921f334d309c963c95da7599bf2a64743142
4
- data.tar.gz: a64728d73c6e6a07772edd42fd1c46cb3748feb6f67705691e6c20dc6962bd2b
3
+ metadata.gz: 6f2042c146724dcfa9852bcb1920a2d9baded35fb8ba589d3af277277d678d36
4
+ data.tar.gz: d0ae5c5f90a4707eddbb91119b71ae2e9dc1c9ace6631e3f66e03d892ef3d8f1
5
5
  SHA512:
6
- metadata.gz: 889518a8645797863f7c9b879b46802566a8c05c9f1f18dfc26326380bb387ad7905ab0daa48fda6981c5e52d21efab6c64abb327117c0cbb5661d490947cd0b
7
- data.tar.gz: a2928c8147507c19571ce4294d981525848d6767696a63a58d1db016a5a81a4ec3ee9da07a3ccea927ae7a12259731730abaa66474e35b69f480cb3a05a7afd7
6
+ metadata.gz: 2a543f31d23e42604a0c86445eb3c2b469485527c0897a7495a8d74f593be19a1e915cf38e8ae03246012d5bb21734f8d2f93b0180863383a3a5260030fb1336
7
+ data.tar.gz: 830a9645c9633f0cd396cf9dfc40654a902ad3275ec37870f2fa1e06f64365bf9e0229f5acb0972af8569497c253e13d88b87c5d86710d324cb1510e3cd6679c
data/lib/wmap.rb CHANGED
@@ -20,6 +20,7 @@ require 'wmap/wp_tracker'
20
20
  require 'wmap/network_profiler'
21
21
  require 'wmap/port_scanner'
22
22
  require 'wmap/url_crawler'
23
+ require 'wmap/url_crawler/adware_tag'
23
24
  require 'wmap/dns_bruter'
24
25
  require 'wmap/site_tracker'
25
26
  require 'wmap/site_tracker/deactivated_site'
@@ -143,7 +143,7 @@ class Wmap::DomainTracker
143
143
  @known_internet_domains.merge!(record)
144
144
  return record
145
145
  else
146
- puts "Problem add domain #{host} - please use legal root domain or sub domain only."
146
+ puts "Problem add domain #{host}: unknown domain format - please use legal root domain or sub domain only."
147
147
  end
148
148
  end
149
149
  rescue => ee
@@ -108,7 +108,7 @@ class Wmap::SiteTracker
108
108
  # Preliminary sanity check
109
109
  site=site.strip.downcase unless site.nil?
110
110
  if site_known?(site)
111
- puts "Site is already exist. Skip #{site}"
111
+ puts "Site already exists. Skip it: #{site}"
112
112
  return nil
113
113
  end
114
114
  site=normalize_url(site) if is_url?(site)
@@ -242,7 +242,7 @@ class Wmap::SiteTracker
242
242
  puts "Exception on method #{__method__}: #{ee}"
243
243
  checker=nil
244
244
  deact=nil
245
- host_tracker=nil
245
+ host_tracker=nil
246
246
  return nil
247
247
  end
248
248
  end
@@ -32,8 +32,8 @@ class Wmap::UrlChecker
32
32
 
33
33
  # Main worker method to perform various checks on the URL / site
34
34
  def url_worker (url)
35
- puts "Checking out an unknown URL: #{url}" if @verbose
36
35
  begin
36
+ puts "Checking out an unknown URL: #{url}" if @verbose
37
37
  url=url.strip.downcase
38
38
  raise "Invalid URL format: #{url}" unless is_url?(url)
39
39
  timestamp=Time.now
@@ -46,10 +46,10 @@ class Wmap::UrlChecker
46
46
  else
47
47
  code=response_code(url)
48
48
  end
49
- if @url_redirection.key?(url)
50
- loc=@url_redirection[url]
49
+ if code>=300 && code < 400
50
+ loc=landing_location(4,url)
51
51
  else
52
- loc=redirect_location(url)
52
+ loc=nil
53
53
  end
54
54
  if @url_finger_print.key?(url)
55
55
  fp=@url_finger_print[url]
@@ -212,6 +212,26 @@ class Wmap::UrlChecker
212
212
  end
213
213
  alias_method :location, :redirect_location
214
214
 
215
+ # Test the URL / Site and return the landing url location (recursive with the depth = 4 )
216
+ def landing_location (depth=4, url)
217
+ begin
218
+ depth -= 1
219
+ return url if depth < 1
220
+ timeo = @http_timeout/1000.0
221
+ uri = URI.parse(url)
222
+ code = response_code (url)
223
+ if code >= 300 && code < 400
224
+ url = redirect_location (url)
225
+ url = landing_location(depth,url)
226
+ else
227
+ return url
228
+ end
229
+ return url
230
+ rescue Exception => ee
231
+ puts "Exception on method #{__method__} on URL #{url}: #{ee}" if @verbose
232
+ end
233
+ end
234
+
215
235
  # Test the URL / site and return the web server type from the HTTP header "server" field
216
236
  def get_server_header (url)
217
237
  begin
@@ -21,7 +21,6 @@ class Wmap::UrlCrawler
21
21
  attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
22
22
  # Global variable used to store the combined result of all the forked child processes. Note that class variable
23
23
  # would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
24
- # $discovered_urls=Hash.new
25
24
 
26
25
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
27
26
  Max_http_timeout=8000
@@ -47,8 +46,8 @@ class Wmap::UrlCrawler
47
46
 
48
47
  # Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
49
48
  def pre_crawl(url)
50
- puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
51
49
  begin
50
+ puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
52
51
  host=url_2_host(url)
53
52
  # Use the following formula to 'guess' the right http time-out threshold for the scanner
54
53
  nwk_to=Wmap::NetworkProfiler.new.profile(host).to_i
@@ -67,8 +66,8 @@ class Wmap::UrlCrawler
67
66
  # A web crawler to crawl a known website and search for html links within the same root domain. For example,
68
67
  # by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
69
68
  def crawl(url)
70
- puts "Start web crawling on #{url}"
71
- #begin
69
+ begin
70
+ puts "Start web crawling on #{url}"
72
71
  result=Array.new
73
72
  url=url.chomp.strip
74
73
  result.push(url_2_site(url))
@@ -80,17 +79,17 @@ class Wmap::UrlCrawler
80
79
  }
81
80
  puts "Web crawling time-out on #{url}: #{status}" if @verbose
82
81
  return result
83
- #rescue => ee
84
- #puts "Exception on method #{__method__} for URL #{url}: #{ee}"
85
- #return result
86
- #end
82
+ rescue => ee
83
+ puts "Exception on method #{__method__} for URL #{url}: #{ee}"
84
+ return result
85
+ end
87
86
  end
88
87
  alias_method :query, :crawl
89
88
 
90
89
  # The worker instance of crawler who perform the labour work
91
90
  def crawl_worker(url0)
92
- puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
93
- #begin
91
+ begin
92
+ puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
94
93
  # Input URL sanity check first
95
94
  if is_url?(url0)
96
95
  host=url_2_host(url0)
@@ -121,7 +120,7 @@ class Wmap::UrlCrawler
121
120
  url_stores[url]=true unless url_stores.key?(url)
122
121
  @discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
123
122
  # $discovered_urls[url]=true unless $discovered_urls.key?(url)
124
- doc = parse_html(url_body)
123
+ doc = Nokogiri::HTML(url_body)
125
124
  next if doc == nil
126
125
  if url_stores.size >= @crawl_page_limit
127
126
  #@visited_urls_by_crawler.merge!(url_stores)
@@ -145,12 +144,12 @@ class Wmap::UrlCrawler
145
144
  wlog(log_info, "UrlCrawler", @log_file)
146
145
  @crawl_done[url0]=true unless @crawl_done.key?(url0)
147
146
  return url_stores
148
- #rescue => ee
149
- #puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
150
- #log_info[3]="Exception on #{url0}"
151
- #wlog(log_info,"UrlCrawler",@log_file)
152
- #return url_stores
153
- #end
147
+ rescue => ee
148
+ puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
149
+ log_info[3]="Exception on #{url0}"
150
+ wlog(log_info,"UrlCrawler",@log_file)
151
+ return url_stores
152
+ end
154
153
  end
155
154
 
156
155
  # Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
@@ -211,14 +210,14 @@ class Wmap::UrlCrawler
211
210
 
212
211
  # Fast crawling method - build the target pool from the input file
213
212
  def crawl_workers_on_file (file)
214
- puts "Web crawl the list of targets from file: #{file}"
215
213
  begin
214
+ puts "Web crawl the list of targets from file: #{file}"
216
215
  targets=file_2_list(file)
217
216
  sites=crawl_workers(targets,num=@max_parallel)
218
217
  return sites
219
218
  rescue => ee
220
- puts "Exception on method #{__method__}: #{ee}" if @verbose
221
- return nil
219
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
220
+ return nil
222
221
  end
223
222
  end
224
223
  alias_method :query_file, :crawl_workers_on_file
@@ -226,7 +225,6 @@ class Wmap::UrlCrawler
226
225
 
227
226
  # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
228
227
  def open_url(url)
229
- #url_object = nil
230
228
  begin
231
229
  puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
232
230
  if url =~ /http\:/i
@@ -262,8 +260,8 @@ class Wmap::UrlCrawler
262
260
 
263
261
  # Return the destination url in case of url re-direct
264
262
  def update_url_if_redirected(url, url_object)
265
- #puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
266
263
  begin
264
+ #puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
267
265
  if url != url_object.base_uri.to_s
268
266
  return url_object.base_uri.to_s
269
267
  end
@@ -274,24 +272,26 @@ class Wmap::UrlCrawler
274
272
  end
275
273
  end
276
274
 
275
+ =begin
277
276
  # Wrapper for the Nokogiri DOM parser
278
277
  def parse_html(html_body)
279
- #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
280
278
  begin
281
- doc = Nokogiri::HTML(html_body)
279
+ #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
280
+ doc = Nokogiri::HTML(html_body)
282
281
  #puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
283
282
  #puts "doc: #{doc}" if @verbose
284
283
  return doc
285
- rescue => ee
286
- puts "Exception on method #{__method__}: #{ee}" if @verbose
287
- return nil
288
- end
284
+ rescue => ee
285
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
286
+ return nil
287
+ end
289
288
  end
289
+ =end
290
290
 
291
- # Search 'current_url' and return found URLs under the same domain
291
+ # Search 'current_url' and return found URLs under the same domain
292
292
  def find_urls_on_page(doc, current_url)
293
- #puts "Search and return URLs within the doc: #{doc}" if @verbose
294
293
  begin
294
+ puts "Search and return URLs within the doc: #{doc}" if @verbose
295
295
  urls_list = []
296
296
  # case 1 - search embedded HTML tag <a href='url'> for the url elements
297
297
  links=doc.css('a')
@@ -320,46 +320,46 @@ class Wmap::UrlCrawler
320
320
  end
321
321
  #puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
322
322
  return urls_list.uniq-["",nil]
323
- rescue => ee
324
- puts "Exception on method #{__method__}: #{ee}" if @verbose
325
- return nil
323
+ rescue => ee
324
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
325
+ return nil
326
326
  end
327
- end
327
+ end
328
328
 
329
329
  # Method to print out discovery URL result
330
330
  def print_discovered_urls_by_crawler
331
- puts "Print discovered url by the crawler. " if @verbose
332
331
  begin
332
+ puts "Print discovered url by the crawler. " if @verbose
333
333
  puts "\nSummary Report of Discovered URLs from the Crawler:"
334
334
  @discovered_urls_by_crawler.keys.each do |url|
335
335
  puts url
336
336
  end
337
337
  puts "Total: #{@discovered_urls_by_crawler.keys.size}"
338
338
  puts "End of the summary"
339
- rescue => ee
340
- puts "Exception on method #{__method__}: #{ee}" if @verbose
341
- return nil
342
- end
339
+ rescue => ee
340
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
341
+ return nil
342
+ end
343
343
  end
344
344
  alias_method :print, :print_discovered_urls_by_crawler
345
345
 
346
346
  # Method to save URL discovery result
347
347
  def save_discovered_urls (file)
348
- puts "Save discovered urls by the crawler to file: #{file} "
349
348
  begin
349
+ puts "Save discovered urls by the crawler to file: #{file} "
350
350
  list_2_file(@discovered_urls_by_crawler.keys, file)
351
351
  puts "Done!"
352
- rescue => ee
353
- puts "Exception on method #{__method__}: #{ee}" if @verbose
354
- return nil
355
- end
352
+ rescue => ee
353
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
354
+ return nil
355
+ end
356
356
  end
357
357
  alias_method :save, :save_discovered_urls
358
358
 
359
359
  # Method to retrieve discovery site result
360
360
  def get_discovered_sites_by_crawler
361
- puts "Print summary report of discovered sites. " if @verbose
362
361
  begin
362
+ puts "Print summary report of discovered sites. " if @verbose
363
363
  puts "\nSummary Report of Discovered Sites from the Crawler:"
364
364
  sites = Hash.new
365
365
  @discovered_urls_by_crawler.keys.each do |url|
@@ -370,12 +370,12 @@ class Wmap::UrlCrawler
370
370
  puts "Total: #{sites.size}"
371
371
  puts "End of the summary"
372
372
  return sites.keys
373
- rescue => ee
373
+ rescue => ee
374
374
  puts "Exception on method #{__method__}: #{ee}" if @verbose
375
- return nil
376
- end
375
+ return nil
376
+ end
377
377
  end
378
378
  alias_method :get_sites, :get_discovered_sites_by_crawler
379
379
 
380
- private :open_url, :read_url, :update_url_if_redirected, :parse_html, :find_urls_on_page
380
+ private :open_url, :read_url, :update_url_if_redirected, :find_urls_on_page
381
381
  end
@@ -0,0 +1,281 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+
9
+
10
+ module Wmap
11
+ class UrlCrawler
12
+
13
+ # Class to identify and track adware within the site store
14
+ include Wmap::Utils
15
+ attr_accessor :signature_file, :tag_file, :verbose, :data_dir, :data_store
16
+ attr_reader :tag_store, :tag_signatures
17
+
18
+
19
+ class AdwareTag < Wmap::UrlCrawler
20
+
21
+ # Initialize the instance variables
22
+ def initialize (params = {})
23
+ @verbose=params.fetch(:verbose, false)
24
+ @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../../data/')
25
+ @tag_file=@data_dir + 'tag_sites'
26
+ # Set default instance variables
27
+ @signature_file=File.dirname(__FILE__) + '/../../../settings/' + 'tag_signatures'
28
+ file=params.fetch(:signature_file, @signature_file)
29
+ @tag_signatures=load_from_file(file)
30
+ file2=params.fetch(:tag_file, @tag_file)
31
+ File.write(file2, "") unless File.exist?(@tag_file)
32
+ # load the known tag store
33
+ @tag_store=load_tag_from_file(file2)
34
+ end
35
+
36
+
37
+ # load the known tag signatures into an instance variable
38
+ def load_from_file (file, lc=true)
39
+ begin
40
+ puts "Loading data file: #{file}" if @verbose
41
+ data_store=Hash.new
42
+ f = File.open(file, 'r')
43
+ f.each_line do |line|
44
+ puts "Processing line: #{line}" if @verbose
45
+ line=line.chomp.strip
46
+ next if line.nil?
47
+ next if line.empty?
48
+ next if line =~ /^\s*#/
49
+ line=line.downcase if lc==true
50
+ entry=line.split(',')
51
+ if data_store.key?(entry[0])
52
+ next
53
+ else
54
+ data_store[entry[0]]=entry[1].strip
55
+ end
56
+
57
+ end
58
+ f.close
59
+ return data_store
60
+ rescue => ee
61
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
62
+ return nil
63
+ end
64
+ end
65
+
66
+ # load the known tag store cache into an instance variable
67
+ def load_tag_from_file (file, lc=true)
68
+ begin
69
+ puts "Loading tag data file: #{file}" if @verbose
70
+ data_store=Hash.new
71
+ f = File.open(file, 'r')
72
+ f.each_line do |line|
73
+ puts "Processing line: #{line}" if @verbose
74
+ line=line.chomp.strip
75
+ next if line.nil?
76
+ next if line.empty?
77
+ next if line =~ /^\s*#/
78
+ line=line.downcase if lc==true
79
+ entry=line.split(',')
80
+ if data_store.key?(entry[0])
81
+ next
82
+ else
83
+ data_store[entry[0]]=[entry[1].strip, entry[2].strip, entry[3]]
84
+ end
85
+ end
86
+ f.close
87
+ return data_store
88
+ rescue => ee
89
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
90
+ return nil
91
+ end
92
+ end
93
+
94
+ # Save the current tag store hash table into a file
95
+ def save_to_file!(file_tag=@tag_file, tags=@tag_store)
96
+ begin
97
+ puts "Saving the current wordpress site table from memory to file: #{file_tag} ..." if @verbose
98
+ timestamp=Time.now
99
+ f=File.open(file_tag, 'w')
100
+ f.write "# Local tag file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
101
+ f.write "# Site, Landing URL, Detected Adware Tag, Tag Version, Tag Description\n"
102
+ tags.each do |key, val|
103
+ f.write "#{key}, #{val[0]}, #{val[1]}, #{val[2]}, #{val[3]}\n"
104
+ end
105
+ f.close
106
+ puts "Tag store cache table is successfully saved: #{file_tag}"
107
+ rescue => ee
108
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
109
+ end
110
+ end
111
+ alias_method :save!, :save_to_file!
112
+
113
+ # add tag entries (from the sitetracker list)
114
+ def refresh (num=@max_parallel,use_cache=true)
115
+ #begin
116
+ puts "Add entries to the local cache table from site tracker: " if @verbose
117
+ results=Hash.new
118
+ tags=Wmap::SiteTracker.instance.known_sites.keys
119
+ if tags.size > 0
120
+ Parallel.map(tags, :in_processes => num) { |target|
121
+ check_adware(target,use_cache)
122
+ }.each do |process|
123
+ if !process
124
+ next
125
+ else
126
+ results.merge!(process)
127
+ end
128
+ end
129
+ @tag_store.merge!(results)
130
+ puts "Done loading entries."
131
+ tags=nil
132
+ return results
133
+ else
134
+ puts "Error: no entry is loaded. Please check your list and try again."
135
+ end
136
+ tags=nil
137
+ return results
138
+ #rescue => ee
139
+ # puts "Exception on method #{__method__}: #{ee}" if @verbose
140
+ #end
141
+ end
142
+
143
+ # Give a site, locate the landing page, then sift out the adware tag if found
144
+ def check_adware(site,use_cache=true)
145
+ #begin
146
+ puts "Check the site for known Adware tags: #{site}" if @verbose
147
+ record = Hash.new
148
+ if use_cache && @tag_store.key?(site)
149
+ puts "Site entry already exist. Skipping: #{site}" if @verbose
150
+ else
151
+ url = fast_landing(site)
152
+ tags = find_tags(url)
153
+ return record if tags.size==0
154
+ tag_vers=tags.map do |tag|
155
+ get_ver(url,tag)
156
+ end
157
+ tag_descs=tags.map do |tag|
158
+ Base64.urlsafe_encode64(get_desc(url,tag))
159
+ end
160
+ if tags
161
+ record[site]=[url, tags.join("|"), tag_vers.join("|"), tag_descs.join("|")]
162
+ @tag_store.merge!(record)
163
+ puts "Tag entry loaded: #{record}" if @verbose
164
+ else
165
+ puts "No tag found. Skip site #{site}" if @verbose
166
+ end
167
+ end
168
+ return record
169
+ #rescue => ee
170
+ # puts "Exception on method #{__method__}: #{ee}: #{site}" if @verbose
171
+ #end
172
+ end
173
+
174
+ # Given a site, determine the landing url
175
+ def fast_landing(site)
176
+ puts "Locate the landing url for: #{site}" if @verbose
177
+ my_tracker=Wmap::SiteTracker.instance
178
+ if my_tracker.known_sites.key?(site)
179
+ # looking into the cache first
180
+ if my_tracker.known_sites[site]['code'] >= 300 && my_tracker.known_sites[site]['code'] < 400
181
+ url = my_tracker.known_sites[site]['redirection']
182
+ else
183
+ url = site
184
+ end
185
+ my_tracker = nil
186
+ else
187
+ # no cache, then need to do it fresh
188
+ my_checker = Wmap::UrlChecker.new
189
+ url = my_checker.landing_location(site)
190
+ my_checker = nil
191
+ end
192
+ puts "Landing url found: #{url}" if @verbose
193
+ return url
194
+ end
195
+
196
+ # Search the page for known tag signatures. If found return them in an array
197
+ def find_tags(url)
198
+ begin
199
+ puts "Search and return tags within the url payload: #{url}" if @verbose
200
+ tag_list = []
201
+ doc = Nokogiri::HTML(open(url))
202
+ doc.text.each_line do |line|
203
+ my_line = line.downcase
204
+ @tag_signatures.keys.map do |tag|
205
+ tag_list.push(tag) if my_line.include?(tag)
206
+ end
207
+ end
208
+ doc = nil
209
+ return tag_list
210
+ rescue => ee
211
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
212
+ return []
213
+ end
214
+ end
215
+
216
+ # Search the url payload for known tag version identifier. If found return a string, else empty string.
217
+ def get_ver(url,tag)
218
+ puts "Search and return tag version within the url payload: #{url}, #{tag}" if @verbose
219
+ tag_ver=""
220
+ doc = Nokogiri::HTML(open(url))
221
+ case tag
222
+ when "utag.js" # sample: ...,"code_release_version":"cb20190312032612",...
223
+ doc.text.each_line do |line|
224
+ my_line = line.downcase
225
+ if my_line.include?("code_release_version")
226
+ puts "Extract tag version from line: #{my_line}" if @verbose
227
+ m = my_line.match(/\"code\_release\_version\"\:\"(?<ver>[a-z]+\d+)\"/)
228
+ tag_ver = m[:ver]
229
+ break
230
+ end
231
+ end
232
+ when "analytics.js" # sample: ga('create', 'UA-19175804-2', 'knopfdoubleday.com');
233
+ doc.text.each_line do |line|
234
+ my_line = line.downcase
235
+ if my_line.include?("ga(") && my_line.include?("create")
236
+ puts "Extract tag version from line: #{my_line}" if @verbose
237
+ m = my_line.match(/[\'|\"]create[\'|\"]\s*\,\s*[\'|\"](?<ver>\w+\-\d+\-\d+)[\'|\"]\s*\,/)
238
+ tag_ver = m[:ver]
239
+ break
240
+ end
241
+ end
242
+ when "all.js" # sample: appId : '749936668352954',
243
+ doc.text.each_line do |line|
244
+ my_line = line.downcase
245
+ if my_line.include?("appid") && my_line.include?(":")
246
+ puts "Extract tag version from line: #{my_line}" if @verbose
247
+ m = my_line.match(/appid\s+\:\s+[\'|\"](?<ver>\d+)[\'|\"]\s*\,/)
248
+ tag_ver = m[:ver]
249
+ break
250
+ end
251
+ end
252
+
253
+ else
254
+ puts "Unknown Adware Tag: #{tag}"
255
+ # do nothing
256
+ end
257
+ doc = nil
258
+ return tag_ver
259
+ end
260
+
261
+ # Search the url payload for known tag. If found return the base64 encode whole script snippet.
262
+ def get_desc(url,tag)
263
+ puts "Search and return tag script in url payload: #{url}, #{tag}" if @verbose
264
+ recording=false
265
+ tag_found=false
266
+ tag_desc=""
267
+ doc = Nokogiri::HTML(open(url))
268
+ doc.search('script').map do |script|
269
+ if script.text.include?(tag)
270
+ return script.text
271
+ end
272
+ end
273
+ doc = nil
274
+ return tag_desc
275
+ end
276
+
277
+
278
+
279
+ end
280
+ end
281
+ end
@@ -71,7 +71,7 @@ class Wmap::WpTracker
71
71
  end
72
72
  end
73
73
 
74
- # Save the current domain hash table into a file
74
+ # Save the current hash table into a file
75
75
  def save_to_file!(file_wps=@file_wps, wps=@known_wp_sites)
76
76
  puts "Saving the current wordpress site table from memory to file: #{file_wps} ..." if @verbose
77
77
  begin
@@ -95,11 +95,11 @@ class Wmap::WpTracker
95
95
  alias_method :save!, :save_to_file!
96
96
 
97
97
  # 'setter' to add wordpress entry to the cache one at a time
98
- def add(url)
98
+ def add(url, use_cache=true)
99
99
  begin
100
100
  puts "Add entry to the local cache table: #{url}" if @verbose
101
101
  site=url_2_site(url)
102
- if @known_wp_sites.key?(site)
102
+ if use_cache && @known_wp_sites.key?(site)
103
103
  puts "Site is already exist. Skipping: #{site}"
104
104
  else
105
105
  record=Hash.new
@@ -141,14 +141,14 @@ class Wmap::WpTracker
141
141
  end
142
142
 
143
143
  # add wordpress site entries (from a sitetracker list)
144
- def refresh (num=@max_parallel)
144
+ def refresh (num=@max_parallel,use_cache=true)
145
145
  #begin
146
146
  puts "Add entries to the local cache table from site tracker: " if @verbose
147
147
  results=Hash.new
148
148
  wps=Wmap::SiteTracker.instance.known_sites.keys
149
149
  if wps.size > 0
150
150
  Parallel.map(wps, :in_processes => num) { |target|
151
- add(target)
151
+ add(target,use_cache)
152
152
  }.each do |process|
153
153
  if process.nil?
154
154
  next
data/logs/wmap.log CHANGED
@@ -1550,3 +1550,4 @@
1550
1550
  2019-02-20 21:10:59 -0500: googleBot: Execute the command: googleBot
1551
1551
  2019-02-20 21:17:21 -0500: googleBot: Execute the command: googleBot
1552
1552
  2019-02-20 21:26:13 -0500: googleBot: Execute the command: googleBot
1553
+ 2019-02-22 09:59:09 -0500: wmap: Execute the command: wmap /Users/sli/prh_wmap/shared/data/seed
@@ -0,0 +1,6 @@
1
+ # Adware signature file: signture string, description
2
+ gtag.js, Google / DoubleClick Floodlight Tag
3
+ analytics.js, Google Universal Analytics Tag
4
+ ga.js, Google Analytics Tag
5
+ utag.js, Tealium Tag
6
+ all.js, Facebook Tag
data/version.txt CHANGED
@@ -3,8 +3,8 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.5.0
7
- date = 2019-02-21
6
+ version = 2.5.1
7
+ date = 2019-03-17
8
8
 
9
9
  author = Sam (Yang) Li
10
10
  email = yang.li@owasp.org
data/wmap.gemspec CHANGED
@@ -36,9 +36,9 @@ Gem::Specification.new do |s|
36
36
  s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
37
37
  s.email = info["email"]
38
38
  s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","distrust","run_tests"]
39
- s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","settings/google_keywords.txt","settings/google_locator.txt","data/","LICENSE.txt",
39
+ s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","data/","LICENSE.txt",
40
40
  "version.txt","README.rdoc", "wmap.gemspec"]
41
- s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*.rb'] + Dir['bin/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['ruby_whois_patches/*'] + Dir['dicts/*'] + Dir['logs/wmap.log']
41
+ s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['ruby_whois_patches/*'] + Dir['dicts/*'] + Dir['logs/wmap.log']
42
42
  #s.homepage = "none"
43
43
  s.post_install_message = "*"*80 + "\n\nThank you for installing the wmap gem - a pure Ruby library for Internet web application discovery and tracking. Please refer to the README.rdoc for more information of using this gem. \n\n" + "*"*80 + "\n"
44
44
  s.require_paths = ["lib"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-21 00:00:00.000000000 Z
11
+ date: 2019-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dnsruby
@@ -268,6 +268,7 @@ files:
268
268
  - lib/wmap/site_tracker/deactivated_site.rb
269
269
  - lib/wmap/url_checker.rb
270
270
  - lib/wmap/url_crawler.rb
271
+ - lib/wmap/url_crawler/adware_tag.rb
271
272
  - lib/wmap/utils/domain_root.rb
272
273
  - lib/wmap/utils/logger.rb
273
274
  - lib/wmap/utils/url_magic.rb
@@ -334,6 +335,7 @@ files:
334
335
  - settings/discovery_ports
335
336
  - settings/google_keywords.txt
336
337
  - settings/google_locator.txt
338
+ - settings/tag_signatures
337
339
  - test/domain_tracker_test.rb
338
340
  - test/utils_test.rb
339
341
  - version.txt