wmap 2.7.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0306e6992ff754b34724ecd2c7e538531d747a8a0a7b75f6eabe12698f161ddd
4
- data.tar.gz: 3282d7c8a30e6526c201dcb8c3270554b2079bb675d50b16e3753c55f144881c
3
+ metadata.gz: fbbbf1b71d804f4682d5827d07aa6079a27953b04643a7479db78b2a8c68f865
4
+ data.tar.gz: 471585e726794ca17d72ec92d8f9963d8c10433f5df2fb941b0ad930880f9bcb
5
5
  SHA512:
6
- metadata.gz: 50a6e01ed24576c33592a1564116ee709ec8f9eaca5122fbe04408e546d4ea5799321dbf5f23ea4b8e3b2e90da0cdb9772733f295d28629cb05be9115c62da89
7
- data.tar.gz: 38931b0d2de732debd9e4c09fca532b8f192067e3058ceb86a1db61a86a80bffa9a43b334c79a87e0e6887da83b21af2076044ca45b2bc43bf4b5845fafb1dfb
6
+ metadata.gz: 222487917ebee32bf3f1ebed8356fcd12b2c21fead45f6ade6e95e1cc714a9bff2a70bd7fea4ce70551fdc6589cc8b92b8625e45919b18a02ff972f2e43c65d4
7
+ data.tar.gz: a96bec97937ec3ff93e6f65081e54608f80dd78916a7fb600fe82f7b41e0458f66890c551eb9f2987dc35e4f09d75cdeeb4068e91ce01139b73f3f4cdf7c11e3
@@ -119,110 +119,6 @@ class Wmap::UrlChecker
119
119
  end
120
120
  alias_method :checks, :url_workers
121
121
 
122
- =begin
123
- # Test the URL and return the response code
124
- def response_code (url)
125
- puts "Check the http response code on the url: #{url}" if @verbose
126
- code = 10000 # All unknown url connection exceptions go here
127
- raise "Invalid url: #{url}" unless is_url?(url)
128
- url=url.strip.downcase
129
- timeo = @http_timeout/1000.0
130
- uri = URI.parse(url)
131
- http = Net::HTTP.new(uri.host, uri.port)
132
- http.open_timeout = timeo
133
- http.read_timeout = timeo
134
- if (url =~ /https\:/i)
135
- http.use_ssl = true
136
- #http.ssl_version = :SSLv3
137
- # Bypass the remote web server cert validation test
138
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
139
- end
140
- request = Net::HTTP::Get.new(uri.request_uri)
141
- response = http.request(request)
142
- puts "Server response the following: #{response}" if @verbose
143
- code = response.code.to_i
144
- #response.finish if response.started?()
145
- @url_code[url]=code
146
- puts "Response code on #{url}: #{code}" if @verbose
147
- return code
148
- rescue Exception => ee
149
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
150
- case ee
151
- # rescue "Connection reset by peer" error type
152
- when Errno::ECONNRESET
153
- code=104
154
- when Errno::ECONNABORTED,Errno::ETIMEDOUT
155
- #code=10000
156
- when Timeout::Error # Quick fix
157
- if (url =~ /https\:/i) # try again for ssl timeout session, in case of default :TLSv1 failure
158
- http.ssl_version = :SSLv3
159
- response = http.request(request)
160
- code = response.code.to_i
161
- unless code.nil?
162
- @ssl_version = http.ssl_version
163
- end
164
- end
165
- else
166
- #code=10000
167
- end
168
- @url_code[url]=code
169
- return code
170
- end
171
-
172
- # Test the URL / site and return the redirection location (3xx response code only)
173
- def redirect_location (url)
174
- puts "Test the redirection location for the url: #{url}" if @verbose
175
- location=""
176
- raise "Invalid url: #{url}" unless is_url?(url)
177
- url=url.strip.downcase
178
- timeo = @http_timeout/1000.0
179
- uri = URI.parse(url)
180
- code = response_code (url)
181
- if code >= 300 && code < 400
182
- http = Net::HTTP.new(uri.host, uri.port)
183
- http.open_timeout = timeo
184
- http.read_timeout = timeo
185
- if (url =~ /https\:/i)
186
- http.use_ssl = true
187
- # Bypass the remote web server cert validation test
188
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
189
- http.ssl_version = @ssl_version
190
- end
191
- request = Net::HTTP::Get.new(uri.request_uri)
192
- response = http.request(request)
193
- puts "Response: #{response}" if @verbose
194
- case response
195
- when Net::HTTPRedirection then
196
- location = response['location']
197
- end
198
- end
199
- @url_redirection[url]=location
200
- return location
201
- rescue Exception => ee
202
- puts "Exception on method redirect_location for URL #{url}: #{ee}" if @verbose
203
- return ""
204
- end
205
- alias_method :location, :redirect_location
206
-
207
- # Test the URL / Site and return the landing url location (recursive with the depth = 4 )
208
- def landing_location (depth=5, url)
209
- depth -= 1
210
- return url if depth < 1
211
- timeo = @http_timeout/1000.0
212
- uri = URI.parse(url)
213
- code = response_code (url)
214
- if code >= 300 && code < 400
215
- url = redirect_location (url)
216
- url = landing_location(depth,url)
217
- else
218
- return url
219
- end
220
- return url
221
- rescue Exception => ee
222
- puts "Exception on method #{__method__} on URL #{url}: #{ee}" if @verbose
223
- end
224
- =end
225
-
226
122
  # Test the URL / site and return the web server type from the HTTP header "server" field
227
123
  def get_server_header (url)
228
124
  puts "Retrieve the server header field from the url: #{url}" if @verbose
@@ -14,176 +14,161 @@ module Wmap
14
14
  extend self
15
15
 
16
16
  # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
17
- Max_http_timeout=8000
17
+ Max_http_timeout=15000
18
18
 
19
19
  # Simple sanity check on a 'claimed' URL string.
20
20
  def is_url?(url)
21
21
  puts "Validate the URL format is valid: #{url}" if @verbose
22
- begin
23
- if url =~ /(http|https)\:\/\/((.)+)/i
24
- host=$2.split('/')[0]
25
- host=host.split(':')[0]
26
- if is_ip?(host) or is_fqdn?(host)
27
- return true
28
- else
29
- return false
30
- end
22
+ if url =~ /(http|https)\:\/\/((.)+)/i
23
+ host=$2.split('/')[0]
24
+ host=host.split(':')[0]
25
+ if is_ip?(host) or is_fqdn?(host)
26
+ return true
31
27
  else
32
- puts "Unknown URL format: #{url}" if @verbose
33
28
  return false
34
29
  end
35
- rescue => ee
36
- puts "Exception on method #{__method__}: #{ee}" if @verbose
30
+ else
31
+ puts "Unknown URL format: #{url}" if @verbose
37
32
  return false
38
33
  end
34
+ rescue => ee
35
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
36
+ return false
39
37
  end
40
38
 
41
39
  # Simple sanity check on a 'claimed' SSL enabled URL string
42
40
  def is_ssl?(url)
43
41
  puts "Validate if SSL is enabled on: #{url}" if @verbose
44
- begin
45
- url=url.strip
46
- if is_url?(url) && url =~ /https/i
47
- return true
48
- else
49
- return false
50
- end
51
- rescue => ee
52
- puts "Exception on method #{__method__}: #{ee}" if @verbose
42
+ url=url.strip
43
+ if is_url?(url) && url =~ /https/i
44
+ return true
45
+ else
53
46
  return false
54
47
  end
48
+ rescue => ee
49
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
50
+ return false
55
51
  end
56
52
  alias_method :is_https?, :is_ssl?
57
53
 
58
54
  # Simple sanity check on a 'claimed' web site base string.
59
55
  def is_site?(url)
60
- puts "Validate the website string format for: #{url}" if @verbose
61
- begin
62
- url=url.strip.downcase
63
- if is_url?(url)
64
- if url == url_2_site(url)
65
- return true
66
- else
67
- return false
68
- end
56
+ puts "Validate the website string format for: #{url}" if @verbose
57
+ url=url.strip.downcase
58
+ if is_url?(url)
59
+ if url == url_2_site(url)
60
+ return true
69
61
  else
70
- puts "Unknown site format: #{url}" if @verbose
71
62
  return false
72
63
  end
73
- rescue => ee
74
- puts "Exception on method #{__method__}: #{ee}" if @verbose
75
- return nil
64
+ else
65
+ puts "Unknown site format: #{url}" if @verbose
66
+ return false
76
67
  end
68
+ rescue => ee
69
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
70
+ return nil
77
71
  end
78
72
 
79
73
  # Extract the web server host's Fully Qualified Domain Name (FQDN) from the url. For example: "https://login.yahoo.com/email/help" -> "login.yahoo.com"
80
74
  def url_2_host (url)
81
- begin
82
- url = url.strip.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
83
- record1 = url.split('/')
84
- if record1[0].nil?
85
- puts "Error process url: #{url}"
86
- return nil
87
- else
88
- record2 = record1[0].split(':')
89
- return record2[0]
90
- end
91
- rescue => ee
92
- puts "Exception on method #{__method__}: #{ee}" if @verbose
75
+ url = url.strip.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
76
+ record1 = url.split('/')
77
+ if record1[0].nil?
78
+ puts "Error process url: #{url}"
93
79
  return nil
80
+ else
81
+ record2 = record1[0].split(':')
82
+ return record2[0]
94
83
  end
84
+ rescue => ee
85
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
86
+ return nil
95
87
  end
96
88
 
97
89
  # Extract web service port from the url. For example: "https://login.yahoo.com/email/help" -> 443
98
90
  def url_2_port (url)
99
91
  puts "Retrieve service port on URL: #{url}" if @verbose
100
- begin
101
- ssl = (url =~ /https/i)
102
- url = url.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
103
- record1 = url.split('/')
104
- record2 = record1[0].split(':')
105
- if (record2.length == 2)
106
- puts "The service port: #{record2[1]}" if @verbose
107
- return record2[1].to_i
108
- elsif ssl
109
- puts "The service port: 443" if @verbose
110
- return 443
111
- else
112
- puts "The service port: 80" if @verbose
113
- return 80
114
- end
115
- rescue => ee
116
- puts "Exception on method #{__method__}: #{ee}" if @verbose
117
- return nil
92
+ ssl = (url =~ /https/i)
93
+ url = url.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
94
+ record1 = url.split('/')
95
+ record2 = record1[0].split(':')
96
+ if (record2.length == 2)
97
+ puts "The service port: #{record2[1]}" if @verbose
98
+ return record2[1].to_i
99
+ elsif ssl
100
+ puts "The service port: 443" if @verbose
101
+ return 443
102
+ else
103
+ puts "The service port: 80" if @verbose
104
+ return 80
118
105
  end
106
+ rescue => ee
107
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
108
+ return nil
119
109
  end
120
110
 
121
111
  # Extract site in (host:port) format from a url: "https://login.yahoo.com:8443/email/help" -> "http://login.yahoo.com:8443/"
122
112
  def url_2_site (url)
123
113
  puts "Retrieve the web site base for url: #{url}" if @verbose
124
- begin
125
- url = url.downcase
126
- url = url.sub(/^(.*?)http/i,'http')
127
- entry = url.split(%r{\/\/})
128
- prot=entry[0]
129
- # step 1, extract the host:port pair from the url
130
- host_port=entry[1].split(%r{\/})[0]
131
- if host_port =~ /\:/
132
- host=host_port.split(%r{\:})[0]
133
- port=host_port.split(%r{\:})[1].to_i
134
- elsif prot =~ /https/i
135
- host=host_port
136
- port=443
137
- elsif prot =~ /http/i
138
- host=host_port
139
- port=80
140
- else
141
- host=host_port
142
- #raise "Unknown url format: #{url}"
143
- end
144
- # additional logic to handle uncommon url base structures
145
- unless is_fqdn?(host)
146
- case host
147
- # "https://letmechoose.barclays.co.uk?source=btorganic/" => "https://letmechoose.barclays.co.uk"
148
- when /\?|\#/
149
- host=host.split(%r{\?|\#})[0]
150
- else
151
- #do nothing
152
- end
153
- end
154
- # step 2, put the host:port pair back to the normal site format
155
- prot="https:" if port==443
156
- if port==80 || port==443
157
- site=prot+"//"+host+"/"
158
- else
159
- site=prot+"//"+host+":"+port.to_s+"/"
160
- end
161
- if site=~ /http/i
162
- #puts "Base found: #{site}" if @verbose
163
- return site
164
- else
165
- raise "Problem encountered on method url_2_site: Unable to convert #{url}"
166
- return nil
114
+ url = url.downcase
115
+ url = url.sub(/^(.*?)http/i,'http')
116
+ entry = url.split(%r{\/\/})
117
+ prot=entry[0]
118
+ # step 1, extract the host:port pair from the url
119
+ host_port=entry[1].split(%r{\/})[0]
120
+ if host_port =~ /\:/
121
+ host=host_port.split(%r{\:})[0]
122
+ port=host_port.split(%r{\:})[1].to_i
123
+ elsif prot =~ /https/i
124
+ host=host_port
125
+ port=443
126
+ elsif prot =~ /http/i
127
+ host=host_port
128
+ port=80
129
+ else
130
+ host=host_port
131
+ #raise "Unknown url format: #{url}"
132
+ end
133
+ # additional logic to handle uncommon url base structures
134
+ unless is_fqdn?(host)
135
+ case host
136
+ # "https://letmechoose.barclays.co.uk?source=btorganic/" => "https://letmechoose.barclays.co.uk"
137
+ when /\?|\#/
138
+ host=host.split(%r{\?|\#})[0]
139
+ else
140
+ #do nothing
167
141
  end
168
- rescue => ee
169
- puts "Exception on method #{__method__}: #{ee}" if @verbose
142
+ end
143
+ # step 2, put the host:port pair back to the normal site format
144
+ prot="https:" if port==443
145
+ if port==80 || port==443
146
+ site=prot+"//"+host+"/"
147
+ else
148
+ site=prot+"//"+host+":"+port.to_s+"/"
149
+ end
150
+ if site=~ /http/i
151
+ #puts "Base found: #{site}" if @verbose
152
+ return site
153
+ else
154
+ raise "Problem encountered on method url_2_site: Unable to convert #{url}"
170
155
  return nil
171
156
  end
157
+ rescue => ee
158
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
159
+ return nil
172
160
  end
173
161
 
174
162
  # Wrapper to return relative path component of the URL. i.e. http://www.yahoo.com/login.html => /login.html
175
163
  def url_2_path(url)
176
164
  #puts "Retrieve the relative path component of the url: #{url}" if @verbose
177
- begin
178
- url.strip!
179
- base = url_2_site(url).chop
180
- path=url.sub(base,'')
181
- #puts "Path component found: #{path}" if @verbose
182
- return path
183
- rescue => ee
184
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
185
- end
186
-
165
+ url.strip!
166
+ base = url_2_site(url).chop
167
+ path=url.sub(base,'')
168
+ #puts "Path component found: #{path}" if @verbose
169
+ return path
170
+ rescue => ee
171
+ puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
187
172
  end
188
173
 
189
174
  # Test if the two URLs are both under the same domain: http://login.yahoo.com, http://mail.yahoo.com => true
@@ -200,121 +185,111 @@ module Wmap
200
185
  # Input is host and open port, output is a URL for valid http response code or nil
201
186
  def host_2_url (host,port=80)
202
187
  puts "Perform simple http(s) service detection on host #{host}, port #{port}" if @verbose
203
- begin
204
- host=host.strip
205
- if port.to_i == 80
206
- url_1 = "http://" + host + "/"
207
- elsif port.to_i ==443
208
- url_1 = "https://" + host + "/"
209
- else
210
- url_1 = "http://" + host + ":" + port.to_s + "/"
211
- url_2 = "https://" + host + ":" + port.to_s + "/"
212
- end
213
- puts "Please ensure your internet connection is active before running this method: #{__method__}" if @verbose
214
- checker=Wmap::UrlChecker.new
215
- if checker.response_code(url_1) != 10000
216
- puts "Found URL: #{url_1}" if @verbose
217
- return url_1
218
- elsif checker.response_code(url_2) != 10000
219
- puts "Found URL: #{url_2}" if @verbose
220
- return url_2
221
- else
222
- puts "No http(s) service found on: #{host}:#{port}" if @verbose
223
- return nil
224
- end
225
- rescue => ee
226
- puts "Exception on method #{__method__}: #{ee}" if @verbose
188
+ host=host.strip
189
+ if port.to_i == 80
190
+ url_1 = "http://" + host + "/"
191
+ elsif port.to_i ==443
192
+ url_1 = "https://" + host + "/"
193
+ else
194
+ url_1 = "http://" + host + ":" + port.to_s + "/"
195
+ url_2 = "https://" + host + ":" + port.to_s + "/"
196
+ end
197
+ puts "Please ensure your internet connection is active before running this method: #{__method__}" if @verbose
198
+ checker=Wmap::UrlChecker.new
199
+ if checker.response_code(url_1) != 10000
200
+ puts "Found URL: #{url_1}" if @verbose
201
+ return url_1
202
+ elsif checker.response_code(url_2) != 10000
203
+ puts "Found URL: #{url_2}" if @verbose
204
+ return url_2
205
+ else
206
+ puts "No http(s) service found on: #{host}:#{port}" if @verbose
227
207
  return nil
228
208
  end
209
+ rescue => ee
210
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
211
+ return nil
229
212
  end
230
213
 
231
214
  # Convert a relative URL to an absolute one. For example, from URL base 'http://games.yahoo.com/' and file path '/game/the-magic-snowman-flash.html' => 'http://games.yahoo.com/game/the-magic-snowman-flash.html'
232
215
  def make_absolute(base, relative_url)
233
- puts "Determine and return the absolute URL:\n Base: #{base}, Relative: #{relative_url} " if @verbose
234
- begin
235
- absolute_url = nil;
236
- if relative_url =~ /^\//
237
- absolute_url = create_absolute_url_from_base(base, relative_url)
238
- else
239
- absolute_url = create_absolute_url_from_context(base, relative_url)
240
- end
241
- puts "Found absolute URL: #{absolute_url}" if @verbose
242
- return absolute_url
243
- rescue => ee
244
- puts "Exception on method #{__method__}: #{ee}" if @verbose
245
- return nil
246
- end
247
- end
216
+ puts "Determine and return the absolute URL:\n Base: #{base}, Relative: #{relative_url} " if @verbose
217
+ absolute_url = nil;
218
+ if relative_url =~ /^\//
219
+ absolute_url = create_absolute_url_from_base(base, relative_url)
220
+ else
221
+ absolute_url = create_absolute_url_from_context(base, relative_url)
222
+ end
223
+ puts "Found absolute URL: #{absolute_url}" if @verbose
224
+ return absolute_url
225
+ rescue => ee
226
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
227
+ return nil
228
+ end
248
229
 
249
230
  # Create / construct the absolute URL from a known URL and relative file path. For example, 'http://images.search.yahoo.com/images' + '/search/images?p=raiders' => 'http://images.search.yahoo.com/search/images?p=raiders'
250
231
  def create_absolute_url_from_base(potential_base, relative_url)
251
- begin
252
- #puts "Determine the absolute URL from potential base #{potential_base} and relative URL #{relative_url}" if @verbose
253
- naked_base = url_2_site(potential_base).strip.chop
254
- puts "Found absolute URL: #{naked_base+relative_url}" if @verbose
255
- return naked_base + relative_url
256
- rescue => ee
257
- puts "Exception on method #{__method__}: #{ee}" if @verbose
258
- return nil
259
- end
260
- end
232
+ #puts "Determine the absolute URL from potential base #{potential_base} and relative URL #{relative_url}" if @verbose
233
+ naked_base = url_2_site(potential_base).strip.chop
234
+ puts "Found absolute URL: #{naked_base+relative_url}" if @verbose
235
+ return naked_base + relative_url
236
+ rescue => ee
237
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
238
+ return nil
239
+ end
261
240
 
262
241
  # Construct the absolute URL by comparing a known URL and the relative file path
263
242
  def create_absolute_url_from_context(potential_base, relative_url)
264
- puts "Determine the absolute URL from context:\n Known base: #{potential_base}, Relative path: #{relative_url}" if @verbose
265
- begin
266
- absolute_url = nil
267
- # make relative URL naked by removing the beginning '/'
268
- relative_url.sub!(/^\//,'')
269
- if potential_base =~ /\/$/
270
- absolute_url = potential_base+relative_url.strip
243
+ puts "Determine the absolute URL from context:\n Known base: #{potential_base}, Relative path: #{relative_url}" if @verbose
244
+ absolute_url = nil
245
+ # make relative URL naked by removing the beginning '/'
246
+ relative_url.sub!(/^\//,'')
247
+ if potential_base =~ /\/$/
248
+ absolute_url = potential_base+relative_url.strip
249
+ else
250
+ last_index_of_slash = potential_base.rindex('/')
251
+ if potential_base[last_index_of_slash-2, 2] == ':/'
252
+ absolute_url = potential_base+relative_url
271
253
  else
272
- last_index_of_slash = potential_base.rindex('/')
273
- if potential_base[last_index_of_slash-2, 2] == ':/'
274
- absolute_url = potential_base+relative_url
254
+ last_index_of_dot = potential_base.rindex('.')
255
+ if last_index_of_dot < last_index_of_slash
256
+ absolute_url = potential_base.strip.chop+relative_url
275
257
  else
276
- last_index_of_dot = potential_base.rindex('.')
277
- if last_index_of_dot < last_index_of_slash
278
- absolute_url = potential_base.strip.chop+relative_url
279
- else
280
- absolute_url = potential_base[0, last_index_of_slash+1] + relative_url
281
- end
258
+ absolute_url = potential_base[0, last_index_of_slash+1] + relative_url
282
259
  end
283
260
  end
284
- puts "Found absolute URL: #{absolute_url}" if @verbose
285
- return absolute_url
286
- rescue => ee
287
- puts "Exception on method #{__method__}: #{ee}" if @verbose
288
- return nil
289
- end
290
- end
261
+ end
262
+ puts "Found absolute URL: #{absolute_url}" if @verbose
263
+ return absolute_url
264
+ rescue => ee
265
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
266
+ return nil
267
+ end
291
268
 
292
269
  # Normalize the URL to a consistent manner in order to determine if a link has been visited or cached before
293
270
  # See http://en.wikipedia.org/wiki/URL_normalization for more explanation
294
271
  def normalize_url(url)
295
- begin
296
- url.strip!
297
- # Converting the scheme and host to lower case in the process, i.e. 'HTTP://www.Example.com/' => 'http://www.example.com/'
298
- # Normalize the base
299
- base=url_2_site(url)
300
- # Case#1, remove the trailing dot after the hostname, i.e, 'http://www.yahoo.com./' => 'http://www.yahoo.com/'
301
- base=base.sub(/\.\/$/,'/')
302
- # Normalize the relative path, case#1
303
- # retrieve the file path and remove the first '/' or '.',
304
- # i.e. 'http://www.example.com/mypath' or 'http://www.example.com/./mypath' => 'mypath'
305
- path=url_2_path(url).sub(/^(\/|\.)*/,'')
306
- # Normalize the relative path, case#2
307
- # Replace dot-segments. "/../" and "/./" with "/", i.e. 'http://www.example.com/../a/b/../c/./d.html" => 'http://www.example.com/a/c/d.html'
308
- path=path.gsub(/\/\.{1,2}\//,'/')
309
- if path.nil?
310
- return base
311
- else
312
- return base+path
313
- end
314
- rescue => ee
315
- puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
316
- return url
272
+ url.strip!
273
+ # Converting the scheme and host to lower case in the process, i.e. 'HTTP://www.Example.com/' => 'http://www.example.com/'
274
+ # Normalize the base
275
+ base=url_2_site(url)
276
+ # Case#1, remove the trailing dot after the hostname, i.e, 'http://www.yahoo.com./' => 'http://www.yahoo.com/'
277
+ base=base.sub(/\.\/$/,'/')
278
+ # Normalize the relative path, case#1
279
+ # retrieve the file path and remove the first '/' or '.',
280
+ # i.e. 'http://www.example.com/mypath' or 'http://www.example.com/./mypath' => 'mypath'
281
+ path=url_2_path(url).sub(/^(\/|\.)*/,'')
282
+ # Normalize the relative path, case#2
283
+ # Replace dot-segments. "/../" and "/./" with "/", i.e. 'http://www.example.com/../a/b/../c/./d.html" => 'http://www.example.com/a/c/d.html'
284
+ path=path.gsub(/\/\.{1,2}\//,'/')
285
+ if path.nil?
286
+ return base
287
+ else
288
+ return base+path
317
289
  end
290
+ rescue => ee
291
+ puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
292
+ return url
318
293
  end
319
294
 
320
295
 
@@ -3,7 +3,7 @@
3
3
  ###############################################################################
4
4
  package = wmap
5
5
  # wmap version 2.0 == web_discovery version 1.5.3
6
- version = 2.7.0
6
+ version = 2.7.1
7
7
  date = 2020-03-09
8
8
 
9
9
  author = Sam (Yang) Li
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.0
4
+ version: 2.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam (Yang) Li