wmap 2.7.0 → 2.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0306e6992ff754b34724ecd2c7e538531d747a8a0a7b75f6eabe12698f161ddd
4
- data.tar.gz: 3282d7c8a30e6526c201dcb8c3270554b2079bb675d50b16e3753c55f144881c
3
+ metadata.gz: fbbbf1b71d804f4682d5827d07aa6079a27953b04643a7479db78b2a8c68f865
4
+ data.tar.gz: 471585e726794ca17d72ec92d8f9963d8c10433f5df2fb941b0ad930880f9bcb
5
5
  SHA512:
6
- metadata.gz: 50a6e01ed24576c33592a1564116ee709ec8f9eaca5122fbe04408e546d4ea5799321dbf5f23ea4b8e3b2e90da0cdb9772733f295d28629cb05be9115c62da89
7
- data.tar.gz: 38931b0d2de732debd9e4c09fca532b8f192067e3058ceb86a1db61a86a80bffa9a43b334c79a87e0e6887da83b21af2076044ca45b2bc43bf4b5845fafb1dfb
6
+ metadata.gz: 222487917ebee32bf3f1ebed8356fcd12b2c21fead45f6ade6e95e1cc714a9bff2a70bd7fea4ce70551fdc6589cc8b92b8625e45919b18a02ff972f2e43c65d4
7
+ data.tar.gz: a96bec97937ec3ff93e6f65081e54608f80dd78916a7fb600fe82f7b41e0458f66890c551eb9f2987dc35e4f09d75cdeeb4068e91ce01139b73f3f4cdf7c11e3
@@ -119,110 +119,6 @@ class Wmap::UrlChecker
119
119
  end
120
120
  alias_method :checks, :url_workers
121
121
 
122
- =begin
123
- # Test the URL and return the response code
124
- def response_code (url)
125
- puts "Check the http response code on the url: #{url}" if @verbose
126
- code = 10000 # All unknown url connection exceptions go here
127
- raise "Invalid url: #{url}" unless is_url?(url)
128
- url=url.strip.downcase
129
- timeo = @http_timeout/1000.0
130
- uri = URI.parse(url)
131
- http = Net::HTTP.new(uri.host, uri.port)
132
- http.open_timeout = timeo
133
- http.read_timeout = timeo
134
- if (url =~ /https\:/i)
135
- http.use_ssl = true
136
- #http.ssl_version = :SSLv3
137
- # Bypass the remote web server cert validation test
138
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
139
- end
140
- request = Net::HTTP::Get.new(uri.request_uri)
141
- response = http.request(request)
142
- puts "Server response the following: #{response}" if @verbose
143
- code = response.code.to_i
144
- #response.finish if response.started?()
145
- @url_code[url]=code
146
- puts "Response code on #{url}: #{code}" if @verbose
147
- return code
148
- rescue Exception => ee
149
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
150
- case ee
151
- # rescue "Connection reset by peer" error type
152
- when Errno::ECONNRESET
153
- code=104
154
- when Errno::ECONNABORTED,Errno::ETIMEDOUT
155
- #code=10000
156
- when Timeout::Error # Quick fix
157
- if (url =~ /https\:/i) # try again for ssl timeout session, in case of default :TLSv1 failure
158
- http.ssl_version = :SSLv3
159
- response = http.request(request)
160
- code = response.code.to_i
161
- unless code.nil?
162
- @ssl_version = http.ssl_version
163
- end
164
- end
165
- else
166
- #code=10000
167
- end
168
- @url_code[url]=code
169
- return code
170
- end
171
-
172
- # Test the URL / site and return the redirection location (3xx response code only)
173
- def redirect_location (url)
174
- puts "Test the redirection location for the url: #{url}" if @verbose
175
- location=""
176
- raise "Invalid url: #{url}" unless is_url?(url)
177
- url=url.strip.downcase
178
- timeo = @http_timeout/1000.0
179
- uri = URI.parse(url)
180
- code = response_code (url)
181
- if code >= 300 && code < 400
182
- http = Net::HTTP.new(uri.host, uri.port)
183
- http.open_timeout = timeo
184
- http.read_timeout = timeo
185
- if (url =~ /https\:/i)
186
- http.use_ssl = true
187
- # Bypass the remote web server cert validation test
188
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
189
- http.ssl_version = @ssl_version
190
- end
191
- request = Net::HTTP::Get.new(uri.request_uri)
192
- response = http.request(request)
193
- puts "Response: #{response}" if @verbose
194
- case response
195
- when Net::HTTPRedirection then
196
- location = response['location']
197
- end
198
- end
199
- @url_redirection[url]=location
200
- return location
201
- rescue Exception => ee
202
- puts "Exception on method redirect_location for URL #{url}: #{ee}" if @verbose
203
- return ""
204
- end
205
- alias_method :location, :redirect_location
206
-
207
- # Test the URL / Site and return the landing url location (recursive with the depth = 4 )
208
- def landing_location (depth=5, url)
209
- depth -= 1
210
- return url if depth < 1
211
- timeo = @http_timeout/1000.0
212
- uri = URI.parse(url)
213
- code = response_code (url)
214
- if code >= 300 && code < 400
215
- url = redirect_location (url)
216
- url = landing_location(depth,url)
217
- else
218
- return url
219
- end
220
- return url
221
- rescue Exception => ee
222
- puts "Exception on method #{__method__} on URL #{url}: #{ee}" if @verbose
223
- end
224
- =end
225
-
226
122
  # Test the URL / site and return the web server type from the HTTP header "server" field
227
123
  def get_server_header (url)
228
124
  puts "Retrieve the server header field from the url: #{url}" if @verbose
@@ -14,176 +14,161 @@ module Wmap
14
14
  extend self
15
15
 
16
16
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
17
- Max_http_timeout=8000
17
+ Max_http_timeout=15000
18
18
 
19
19
  # Simple sanity check on a 'claimed' URL string.
20
20
  def is_url?(url)
21
21
  puts "Validate the URL format is valid: #{url}" if @verbose
22
- begin
23
- if url =~ /(http|https)\:\/\/((.)+)/i
24
- host=$2.split('/')[0]
25
- host=host.split(':')[0]
26
- if is_ip?(host) or is_fqdn?(host)
27
- return true
28
- else
29
- return false
30
- end
22
+ if url =~ /(http|https)\:\/\/((.)+)/i
23
+ host=$2.split('/')[0]
24
+ host=host.split(':')[0]
25
+ if is_ip?(host) or is_fqdn?(host)
26
+ return true
31
27
  else
32
- puts "Unknown URL format: #{url}" if @verbose
33
28
  return false
34
29
  end
35
- rescue => ee
36
- puts "Exception on method #{__method__}: #{ee}" if @verbose
30
+ else
31
+ puts "Unknown URL format: #{url}" if @verbose
37
32
  return false
38
33
  end
34
+ rescue => ee
35
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
36
+ return false
39
37
  end
40
38
 
41
39
  # Simple sanity check on a 'claimed' SSL enabled URL string
42
40
  def is_ssl?(url)
43
41
  puts "Validate if SSL is enabled on: #{url}" if @verbose
44
- begin
45
- url=url.strip
46
- if is_url?(url) && url =~ /https/i
47
- return true
48
- else
49
- return false
50
- end
51
- rescue => ee
52
- puts "Exception on method #{__method__}: #{ee}" if @verbose
42
+ url=url.strip
43
+ if is_url?(url) && url =~ /https/i
44
+ return true
45
+ else
53
46
  return false
54
47
  end
48
+ rescue => ee
49
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
50
+ return false
55
51
  end
56
52
  alias_method :is_https?, :is_ssl?
57
53
 
58
54
  # Simple sanity check on a 'claimed' web site base string.
59
55
  def is_site?(url)
60
- puts "Validate the website string format for: #{url}" if @verbose
61
- begin
62
- url=url.strip.downcase
63
- if is_url?(url)
64
- if url == url_2_site(url)
65
- return true
66
- else
67
- return false
68
- end
56
+ puts "Validate the website string format for: #{url}" if @verbose
57
+ url=url.strip.downcase
58
+ if is_url?(url)
59
+ if url == url_2_site(url)
60
+ return true
69
61
  else
70
- puts "Unknown site format: #{url}" if @verbose
71
62
  return false
72
63
  end
73
- rescue => ee
74
- puts "Exception on method #{__method__}: #{ee}" if @verbose
75
- return nil
64
+ else
65
+ puts "Unknown site format: #{url}" if @verbose
66
+ return false
76
67
  end
68
+ rescue => ee
69
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
70
+ return nil
77
71
  end
78
72
 
79
73
  # Extract the web server host's Fully Qualified Domain Name (FQDN) from the url. For example: "https://login.yahoo.com/email/help" -> "login.yahoo.com"
80
74
  def url_2_host (url)
81
- begin
82
- url = url.strip.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
83
- record1 = url.split('/')
84
- if record1[0].nil?
85
- puts "Error process url: #{url}"
86
- return nil
87
- else
88
- record2 = record1[0].split(':')
89
- return record2[0]
90
- end
91
- rescue => ee
92
- puts "Exception on method #{__method__}: #{ee}" if @verbose
75
+ url = url.strip.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
76
+ record1 = url.split('/')
77
+ if record1[0].nil?
78
+ puts "Error process url: #{url}"
93
79
  return nil
80
+ else
81
+ record2 = record1[0].split(':')
82
+ return record2[0]
94
83
  end
84
+ rescue => ee
85
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
86
+ return nil
95
87
  end
96
88
 
97
89
  # Extract web service port from the url. For example: "https://login.yahoo.com/email/help" -> 443
98
90
  def url_2_port (url)
99
91
  puts "Retrieve service port on URL: #{url}" if @verbose
100
- begin
101
- ssl = (url =~ /https/i)
102
- url = url.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
103
- record1 = url.split('/')
104
- record2 = record1[0].split(':')
105
- if (record2.length == 2)
106
- puts "The service port: #{record2[1]}" if @verbose
107
- return record2[1].to_i
108
- elsif ssl
109
- puts "The service port: 443" if @verbose
110
- return 443
111
- else
112
- puts "The service port: 80" if @verbose
113
- return 80
114
- end
115
- rescue => ee
116
- puts "Exception on method #{__method__}: #{ee}" if @verbose
117
- return nil
92
+ ssl = (url =~ /https/i)
93
+ url = url.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
94
+ record1 = url.split('/')
95
+ record2 = record1[0].split(':')
96
+ if (record2.length == 2)
97
+ puts "The service port: #{record2[1]}" if @verbose
98
+ return record2[1].to_i
99
+ elsif ssl
100
+ puts "The service port: 443" if @verbose
101
+ return 443
102
+ else
103
+ puts "The service port: 80" if @verbose
104
+ return 80
118
105
  end
106
+ rescue => ee
107
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
108
+ return nil
119
109
  end
120
110
 
121
111
  # Extract site in (host:port) format from a url: "https://login.yahoo.com:8443/email/help" -> "http://login.yahoo.com:8443/"
122
112
  def url_2_site (url)
123
113
  puts "Retrieve the web site base for url: #{url}" if @verbose
124
- begin
125
- url = url.downcase
126
- url = url.sub(/^(.*?)http/i,'http')
127
- entry = url.split(%r{\/\/})
128
- prot=entry[0]
129
- # step 1, extract the host:port pair from the url
130
- host_port=entry[1].split(%r{\/})[0]
131
- if host_port =~ /\:/
132
- host=host_port.split(%r{\:})[0]
133
- port=host_port.split(%r{\:})[1].to_i
134
- elsif prot =~ /https/i
135
- host=host_port
136
- port=443
137
- elsif prot =~ /http/i
138
- host=host_port
139
- port=80
140
- else
141
- host=host_port
142
- #raise "Unknown url format: #{url}"
143
- end
144
- # additional logic to handle uncommon url base structures
145
- unless is_fqdn?(host)
146
- case host
147
- # "https://letmechoose.barclays.co.uk?source=btorganic/" => "https://letmechoose.barclays.co.uk"
148
- when /\?|\#/
149
- host=host.split(%r{\?|\#})[0]
150
- else
151
- #do nothing
152
- end
153
- end
154
- # step 2, put the host:port pair back to the normal site format
155
- prot="https:" if port==443
156
- if port==80 || port==443
157
- site=prot+"//"+host+"/"
158
- else
159
- site=prot+"//"+host+":"+port.to_s+"/"
160
- end
161
- if site=~ /http/i
162
- #puts "Base found: #{site}" if @verbose
163
- return site
164
- else
165
- raise "Problem encountered on method url_2_site: Unable to convert #{url}"
166
- return nil
114
+ url = url.downcase
115
+ url = url.sub(/^(.*?)http/i,'http')
116
+ entry = url.split(%r{\/\/})
117
+ prot=entry[0]
118
+ # step 1, extract the host:port pair from the url
119
+ host_port=entry[1].split(%r{\/})[0]
120
+ if host_port =~ /\:/
121
+ host=host_port.split(%r{\:})[0]
122
+ port=host_port.split(%r{\:})[1].to_i
123
+ elsif prot =~ /https/i
124
+ host=host_port
125
+ port=443
126
+ elsif prot =~ /http/i
127
+ host=host_port
128
+ port=80
129
+ else
130
+ host=host_port
131
+ #raise "Unknown url format: #{url}"
132
+ end
133
+ # additional logic to handle uncommon url base structures
134
+ unless is_fqdn?(host)
135
+ case host
136
+ # "https://letmechoose.barclays.co.uk?source=btorganic/" => "https://letmechoose.barclays.co.uk"
137
+ when /\?|\#/
138
+ host=host.split(%r{\?|\#})[0]
139
+ else
140
+ #do nothing
167
141
  end
168
- rescue => ee
169
- puts "Exception on method #{__method__}: #{ee}" if @verbose
142
+ end
143
+ # step 2, put the host:port pair back to the normal site format
144
+ prot="https:" if port==443
145
+ if port==80 || port==443
146
+ site=prot+"//"+host+"/"
147
+ else
148
+ site=prot+"//"+host+":"+port.to_s+"/"
149
+ end
150
+ if site=~ /http/i
151
+ #puts "Base found: #{site}" if @verbose
152
+ return site
153
+ else
154
+ raise "Problem encountered on method url_2_site: Unable to convert #{url}"
170
155
  return nil
171
156
  end
157
+ rescue => ee
158
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
159
+ return nil
172
160
  end
173
161
 
174
162
  # Wrapper to return relative path component of the URL. i.e. http://www.yahoo.com/login.html => /login.html
175
163
  def url_2_path(url)
176
164
  #puts "Retrieve the relative path component of the url: #{url}" if @verbose
177
- begin
178
- url.strip!
179
- base = url_2_site(url).chop
180
- path=url.sub(base,'')
181
- #puts "Path component found: #{path}" if @verbose
182
- return path
183
- rescue => ee
184
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
185
- end
186
-
165
+ url.strip!
166
+ base = url_2_site(url).chop
167
+ path=url.sub(base,'')
168
+ #puts "Path component found: #{path}" if @verbose
169
+ return path
170
+ rescue => ee
171
+ puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
187
172
  end
188
173
 
189
174
  # Test if the two URLs are both under the same domain: http://login.yahoo.com, http://mail.yahoo.com => true
@@ -200,121 +185,111 @@ module Wmap
200
185
  # Input is host and open port, output is a URL for valid http response code or nil
201
186
  def host_2_url (host,port=80)
202
187
  puts "Perform simple http(s) service detection on host #{host}, port #{port}" if @verbose
203
- begin
204
- host=host.strip
205
- if port.to_i == 80
206
- url_1 = "http://" + host + "/"
207
- elsif port.to_i ==443
208
- url_1 = "https://" + host + "/"
209
- else
210
- url_1 = "http://" + host + ":" + port.to_s + "/"
211
- url_2 = "https://" + host + ":" + port.to_s + "/"
212
- end
213
- puts "Please ensure your internet connection is active before running this method: #{__method__}" if @verbose
214
- checker=Wmap::UrlChecker.new
215
- if checker.response_code(url_1) != 10000
216
- puts "Found URL: #{url_1}" if @verbose
217
- return url_1
218
- elsif checker.response_code(url_2) != 10000
219
- puts "Found URL: #{url_2}" if @verbose
220
- return url_2
221
- else
222
- puts "No http(s) service found on: #{host}:#{port}" if @verbose
223
- return nil
224
- end
225
- rescue => ee
226
- puts "Exception on method #{__method__}: #{ee}" if @verbose
188
+ host=host.strip
189
+ if port.to_i == 80
190
+ url_1 = "http://" + host + "/"
191
+ elsif port.to_i ==443
192
+ url_1 = "https://" + host + "/"
193
+ else
194
+ url_1 = "http://" + host + ":" + port.to_s + "/"
195
+ url_2 = "https://" + host + ":" + port.to_s + "/"
196
+ end
197
+ puts "Please ensure your internet connection is active before running this method: #{__method__}" if @verbose
198
+ checker=Wmap::UrlChecker.new
199
+ if checker.response_code(url_1) != 10000
200
+ puts "Found URL: #{url_1}" if @verbose
201
+ return url_1
202
+ elsif checker.response_code(url_2) != 10000
203
+ puts "Found URL: #{url_2}" if @verbose
204
+ return url_2
205
+ else
206
+ puts "No http(s) service found on: #{host}:#{port}" if @verbose
227
207
  return nil
228
208
  end
209
+ rescue => ee
210
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
211
+ return nil
229
212
  end
230
213
 
231
214
  # Convert a relative URL to an absolute one. For example, from URL base 'http://games.yahoo.com/' and file path '/game/the-magic-snowman-flash.html' => 'http://games.yahoo.com/game/the-magic-snowman-flash.html'
232
215
  def make_absolute(base, relative_url)
233
- puts "Determine and return the absolute URL:\n Base: #{base}, Relative: #{relative_url} " if @verbose
234
- begin
235
- absolute_url = nil;
236
- if relative_url =~ /^\//
237
- absolute_url = create_absolute_url_from_base(base, relative_url)
238
- else
239
- absolute_url = create_absolute_url_from_context(base, relative_url)
240
- end
241
- puts "Found absolute URL: #{absolute_url}" if @verbose
242
- return absolute_url
243
- rescue => ee
244
- puts "Exception on method #{__method__}: #{ee}" if @verbose
245
- return nil
246
- end
247
- end
216
+ puts "Determine and return the absolute URL:\n Base: #{base}, Relative: #{relative_url} " if @verbose
217
+ absolute_url = nil;
218
+ if relative_url =~ /^\//
219
+ absolute_url = create_absolute_url_from_base(base, relative_url)
220
+ else
221
+ absolute_url = create_absolute_url_from_context(base, relative_url)
222
+ end
223
+ puts "Found absolute URL: #{absolute_url}" if @verbose
224
+ return absolute_url
225
+ rescue => ee
226
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
227
+ return nil
228
+ end
248
229
 
249
230
  # Create / construct the absolute URL from a known URL and relative file path. For example, 'http://images.search.yahoo.com/images' + '/search/images?p=raiders' => 'http://images.search.yahoo.com/search/images?p=raiders'
250
231
  def create_absolute_url_from_base(potential_base, relative_url)
251
- begin
252
- #puts "Determine the absolute URL from potential base #{potential_base} and relative URL #{relative_url}" if @verbose
253
- naked_base = url_2_site(potential_base).strip.chop
254
- puts "Found absolute URL: #{naked_base+relative_url}" if @verbose
255
- return naked_base + relative_url
256
- rescue => ee
257
- puts "Exception on method #{__method__}: #{ee}" if @verbose
258
- return nil
259
- end
260
- end
232
+ #puts "Determine the absolute URL from potential base #{potential_base} and relative URL #{relative_url}" if @verbose
233
+ naked_base = url_2_site(potential_base).strip.chop
234
+ puts "Found absolute URL: #{naked_base+relative_url}" if @verbose
235
+ return naked_base + relative_url
236
+ rescue => ee
237
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
238
+ return nil
239
+ end
261
240
 
262
241
  # Construct the absolute URL by comparing a known URL and the relative file path
263
242
  def create_absolute_url_from_context(potential_base, relative_url)
264
- puts "Determine the absolute URL from context:\n Known base: #{potential_base}, Relative path: #{relative_url}" if @verbose
265
- begin
266
- absolute_url = nil
267
- # make relative URL naked by removing the beginning '/'
268
- relative_url.sub!(/^\//,'')
269
- if potential_base =~ /\/$/
270
- absolute_url = potential_base+relative_url.strip
243
+ puts "Determine the absolute URL from context:\n Known base: #{potential_base}, Relative path: #{relative_url}" if @verbose
244
+ absolute_url = nil
245
+ # make relative URL naked by removing the beginning '/'
246
+ relative_url.sub!(/^\//,'')
247
+ if potential_base =~ /\/$/
248
+ absolute_url = potential_base+relative_url.strip
249
+ else
250
+ last_index_of_slash = potential_base.rindex('/')
251
+ if potential_base[last_index_of_slash-2, 2] == ':/'
252
+ absolute_url = potential_base+relative_url
271
253
  else
272
- last_index_of_slash = potential_base.rindex('/')
273
- if potential_base[last_index_of_slash-2, 2] == ':/'
274
- absolute_url = potential_base+relative_url
254
+ last_index_of_dot = potential_base.rindex('.')
255
+ if last_index_of_dot < last_index_of_slash
256
+ absolute_url = potential_base.strip.chop+relative_url
275
257
  else
276
- last_index_of_dot = potential_base.rindex('.')
277
- if last_index_of_dot < last_index_of_slash
278
- absolute_url = potential_base.strip.chop+relative_url
279
- else
280
- absolute_url = potential_base[0, last_index_of_slash+1] + relative_url
281
- end
258
+ absolute_url = potential_base[0, last_index_of_slash+1] + relative_url
282
259
  end
283
260
  end
284
- puts "Found absolute URL: #{absolute_url}" if @verbose
285
- return absolute_url
286
- rescue => ee
287
- puts "Exception on method #{__method__}: #{ee}" if @verbose
288
- return nil
289
- end
290
- end
261
+ end
262
+ puts "Found absolute URL: #{absolute_url}" if @verbose
263
+ return absolute_url
264
+ rescue => ee
265
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
266
+ return nil
267
+ end
291
268
 
292
269
  # Normalize the URL to a consistent manner in order to determine if a link has been visited or cached before
293
270
  # See http://en.wikipedia.org/wiki/URL_normalization for more explanation
294
271
  def normalize_url(url)
295
- begin
296
- url.strip!
297
- # Converting the scheme and host to lower case in the process, i.e. 'HTTP://www.Example.com/' => 'http://www.example.com/'
298
- # Normalize the base
299
- base=url_2_site(url)
300
- # Case#1, remove the trailing dot after the hostname, i.e, 'http://www.yahoo.com./' => 'http://www.yahoo.com/'
301
- base=base.sub(/\.\/$/,'/')
302
- # Normalize the relative path, case#1
303
- # retrieve the file path and remove the first '/' or '.',
304
- # i.e. 'http://www.example.com/mypath' or 'http://www.example.com/./mypath' => 'mypath'
305
- path=url_2_path(url).sub(/^(\/|\.)*/,'')
306
- # Normalize the relative path, case#2
307
- # Replace dot-segments. "/../" and "/./" with "/", i.e. 'http://www.example.com/../a/b/../c/./d.html" => 'http://www.example.com/a/c/d.html'
308
- path=path.gsub(/\/\.{1,2}\//,'/')
309
- if path.nil?
310
- return base
311
- else
312
- return base+path
313
- end
314
- rescue => ee
315
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
316
- return url
272
+ url.strip!
273
+ # Converting the scheme and host to lower case in the process, i.e. 'HTTP://www.Example.com/' => 'http://www.example.com/'
274
+ # Normalize the base
275
+ base=url_2_site(url)
276
+ # Case#1, remove the trailing dot after the hostname, i.e, 'http://www.yahoo.com./' => 'http://www.yahoo.com/'
277
+ base=base.sub(/\.\/$/,'/')
278
+ # Normalize the relative path, case#1
279
+ # retrieve the file path and remove the first '/' or '.',
280
+ # i.e. 'http://www.example.com/mypath' or 'http://www.example.com/./mypath' => 'mypath'
281
+ path=url_2_path(url).sub(/^(\/|\.)*/,'')
282
+ # Normalize the relative path, case#2
283
+ # Replace dot-segments. "/../" and "/./" with "/", i.e. 'http://www.example.com/../a/b/../c/./d.html" => 'http://www.example.com/a/c/d.html'
284
+ path=path.gsub(/\/\.{1,2}\//,'/')
285
+ if path.nil?
286
+ return base
287
+ else
288
+ return base+path
317
289
  end
290
+ rescue => ee
291
+ puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
292
+ return url
318
293
  end
319
294
 
320
295
 
@@ -3,7 +3,7 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.7.0
6
+ version = 2.7.1
7
7
  date = 2020-03-09
8
8
 
9
9
  author = Sam (Yang) Li
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.0
4
+ version: 2.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li