google_safe_browsing 0.3.9 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/google_safe_browsing/canonicalize.rb +219 -173
- data/lib/google_safe_browsing/version.rb +1 -1
- metadata +5 -5
@@ -33,6 +33,7 @@ module GoogleSafeBrowsing
|
|
33
33
|
|
34
34
|
#split into host and path components
|
35
35
|
splits = split_host_path(cann)
|
36
|
+
|
36
37
|
cann = fix_host( splits[:host] ) + '/' + fix_path( splits[:path] )
|
37
38
|
|
38
39
|
# add leading protocol
|
@@ -48,6 +49,8 @@ module GoogleSafeBrowsing
|
|
48
49
|
# @return (Array) array of cannonicalized url permutation strings
|
49
50
|
def self.urls_for_lookup(lookup_url)
|
50
51
|
lookup_url = url(lookup_url)
|
52
|
+
#return empty array if url returns nil; for invalid url
|
53
|
+
return [] if lookup_url.blank?
|
51
54
|
|
52
55
|
lookup_url = remove_protocol(lookup_url)
|
53
56
|
|
@@ -71,219 +74,262 @@ module GoogleSafeBrowsing
|
|
71
74
|
cart_prod(host_strings, path_strings)
|
72
75
|
end
|
73
76
|
|
74
|
-
|
77
|
+
# private
|
75
78
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
79
|
+
# Generates the path permutations from the raw path string
|
80
|
+
#
|
81
|
+
# @param (String) raw_path path split from the full url string
|
82
|
+
# @return (Array) array of path permutation strings
|
83
|
+
def self.generate_path_strings(raw_path)
|
84
|
+
return [ '/', '' ] if raw_path == ''
|
82
85
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
+
path_split = raw_path.split('?')
|
87
|
+
path = path_split[0] || ''
|
88
|
+
params = path_split[1] || ''
|
86
89
|
|
87
90
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
91
|
+
path_components = path.split('/').first(3)
|
92
|
+
path_strings = [ '/' ]
|
93
|
+
path_components.length.times do
|
94
|
+
path_strings << '/' + path_components.join('/')
|
95
|
+
path_components.pop
|
96
|
+
end
|
94
97
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
else
|
99
|
-
p
|
100
|
-
end
|
101
|
-
end
|
102
|
-
path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
|
103
|
-
path_strings.compact!
|
104
|
-
path_strings.uniq!
|
105
|
-
|
106
|
-
unless params.blank?
|
107
|
-
path_strings | path_strings.map do |p|
|
108
|
-
if p[-1] == '/'
|
109
|
-
p
|
110
|
-
else
|
111
|
-
"#{p}?#{params}"
|
112
|
-
end
|
113
|
-
end
|
98
|
+
path_strings.map! do |p|
|
99
|
+
unless p.index('.')
|
100
|
+
p + '/'
|
114
101
|
else
|
115
|
-
|
102
|
+
p
|
116
103
|
end
|
117
104
|
end
|
105
|
+
path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
|
106
|
+
path_strings.compact!
|
107
|
+
path_strings.uniq!
|
118
108
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
result = []
|
126
|
-
a_one.each do |i|
|
127
|
-
a_two.each do |j|
|
128
|
-
result << "#{i}#{j}"
|
109
|
+
unless params.blank?
|
110
|
+
path_strings | path_strings.map do |p|
|
111
|
+
if p[-1] == '/'
|
112
|
+
p
|
113
|
+
else
|
114
|
+
"#{p}?#{params}"
|
129
115
|
end
|
130
116
|
end
|
131
|
-
|
117
|
+
else
|
118
|
+
return path_strings
|
132
119
|
end
|
120
|
+
end
|
133
121
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
122
|
+
# Returns the cartesian product of two arrays by concatination of the string representation of the elements
|
123
|
+
#
|
124
|
+
# @param (Array) a_one array of strings
|
125
|
+
# @param (Array) a_two array of strings
|
126
|
+
# @return (Array) cartesian product of arrays with elements concatinated
|
127
|
+
def self.cart_prod(a_one, a_two)
|
128
|
+
result = []
|
129
|
+
a_one.each do |i|
|
130
|
+
a_two.each do |j|
|
131
|
+
result << "#{i}#{j}"
|
144
132
|
end
|
145
|
-
|
146
|
-
ret
|
147
133
|
end
|
134
|
+
result
|
135
|
+
end
|
148
136
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
137
|
+
# Takes the canonicalized url and splits the host and the path apart
|
138
|
+
#
|
139
|
+
# @param (String) cann canonicalized url string
|
140
|
+
# @return (Hash) !{ :host => host_part, :path => path_part }
|
141
|
+
def self.split_host_path(cann)
|
142
|
+
ret= { :host => cann, :path => '' }
|
143
|
+
split_point = cann.index('/')
|
144
|
+
if split_point
|
145
|
+
ret[:host] = cann[0..split_point-1]
|
146
|
+
ret[:path] = cann[split_point+1..-1]
|
156
147
|
end
|
157
148
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
149
|
+
ret
|
150
|
+
end
|
151
|
+
|
152
|
+
# Strips the fragment portion of the url string (the last '#' and everything after)
|
153
|
+
#
|
154
|
+
# @param (String) string url
|
155
|
+
# @return (String) parameter with the fragment removed
|
156
|
+
def self.remove_fragment(string)
|
157
|
+
string = string[0..string.index('#')-1] if string.index('#')
|
158
|
+
string
|
159
|
+
end
|
160
|
+
|
161
|
+
# Continues to unescape the url until unescaping has no effect
|
162
|
+
#
|
163
|
+
# @param (String) url url string
|
164
|
+
# @return (String) fully unescaped url string
|
165
|
+
def self.recursively_unescape(url)
|
166
|
+
compare_url = url.clone
|
167
|
+
url = URI.unescape(url)
|
168
|
+
while(compare_url != url)
|
169
|
+
compare_url = url.clone
|
164
170
|
url = URI.unescape(url)
|
165
|
-
while(compare_url != url)
|
166
|
-
compare_url = url.clone
|
167
|
-
url = URI.unescape(url)
|
168
|
-
end
|
169
|
-
url
|
170
171
|
end
|
172
|
+
url
|
173
|
+
end
|
171
174
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
175
|
+
# Apply initial fixes to host string
|
176
|
+
#
|
177
|
+
# @param (String) host host string
|
178
|
+
# @return (String) standardized host string
|
179
|
+
def self.fix_host(host)
|
180
|
+
#puts "In Host: #{host}"
|
181
|
+
# remove leading and trailing dots, multiple dots to one
|
182
|
+
host.gsub!(/\A\.+|\.+\Z/, '')
|
183
|
+
host.gsub!(/\.+/, '.')
|
184
|
+
|
185
|
+
host.downcase!
|
186
|
+
|
187
|
+
host_splits = self.split_username_password_and_port(host)
|
188
|
+
|
189
|
+
if host_splits[:host] =~ /^\d+$/
|
190
|
+
host_splits[:host] = IP::V4.new(host.to_i).to_addr
|
191
|
+
elsif host_splits[:host] =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
|
192
|
+
begin
|
193
|
+
host_splits[:host] = IP.new(host).to_addr
|
194
|
+
rescue ArgumentError
|
188
195
|
end
|
189
|
-
|
190
|
-
host
|
191
196
|
end
|
192
197
|
|
193
|
-
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
#puts "In Path: #{path}"
|
199
|
-
|
200
|
-
#remove leading slash
|
201
|
-
path = path[1..-1] if path[0..0] == '/'
|
198
|
+
result = host_splits[:host]
|
199
|
+
result = "#{host_splits[:creds]}@#{result}" unless host_splits[:creds].blank?
|
200
|
+
result = "#{result}:#{host_splits[:port]}" unless host_splits[:port].blank?
|
201
|
+
result
|
202
|
+
end
|
202
203
|
|
203
|
-
|
204
|
+
# Apply initial fixes to path string
|
205
|
+
#
|
206
|
+
# @param (String) path path string
|
207
|
+
# @return (String) standardized path string
|
208
|
+
def self.fix_path(path)
|
209
|
+
#puts "In Path: #{path}"
|
204
210
|
|
205
|
-
|
206
|
-
|
207
|
-
params = path[first_ques..-1]
|
208
|
-
path = path[0..first_ques-1]
|
209
|
-
end
|
211
|
+
#remove leading slash
|
212
|
+
path = path[1..-1] if path[0..0] == '/'
|
210
213
|
|
211
|
-
|
212
|
-
path.gsub!(/\/+/, '/')
|
214
|
+
preserve_trailing_slash = ( path[-1..-1] == '/' )
|
213
215
|
|
214
|
-
|
215
|
-
path.
|
216
|
-
|
217
|
-
|
218
|
-
|
216
|
+
if path.index('?')
|
217
|
+
first_ques = path.index('?')
|
218
|
+
params = path[first_ques..-1]
|
219
|
+
path = path[0..first_ques-1]
|
220
|
+
end
|
219
221
|
|
220
|
-
|
221
|
-
|
222
|
-
path += params if params
|
222
|
+
# remove multiple '/'
|
223
|
+
path.gsub!(/\/+/, '/')
|
223
224
|
|
224
|
-
|
225
|
+
new_path_array = []
|
226
|
+
path.split('/').each do |p|
|
227
|
+
new_path_array << p unless p == '.' || p == '..'
|
228
|
+
new_path_array.pop if p == '..'
|
225
229
|
end
|
226
230
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
# @return (String) escaped url string
|
231
|
-
def self.strict_escape(url)
|
232
|
-
url = URI.escape url
|
231
|
+
path = new_path_array.join('/')
|
232
|
+
path += '/' if preserve_trailing_slash
|
233
|
+
path += params if params
|
233
234
|
|
234
|
-
|
235
|
-
|
235
|
+
path
|
236
|
+
end
|
236
237
|
|
237
|
-
|
238
|
-
|
238
|
+
# Escape the url, but do not escape certain characters; such as the carat
|
239
|
+
#
|
240
|
+
# @param (String) url url string
|
241
|
+
# @return (String) escaped url string
|
242
|
+
def self.strict_escape(url)
|
243
|
+
url = URI.escape url
|
239
244
|
|
240
|
-
#
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
if cann.index(PROTOCOL_DELIMITER)
|
246
|
-
delimiting_index = cann.index(PROTOCOL_DELIMITER)
|
247
|
-
@protocol = cann[0..delimiting_index-1]
|
248
|
-
protocol_end_index = delimiting_index + PROTOCOL_DELIMITER.length
|
249
|
-
cann = cann[protocol_end_index..-1]
|
250
|
-
end
|
251
|
-
cann
|
252
|
-
end
|
245
|
+
# unescape carat, may need other optionally escapeable chars
|
246
|
+
url.gsub!('%5E','^')
|
247
|
+
|
248
|
+
url
|
249
|
+
end
|
253
250
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
251
|
+
# Strip the leading protocol from the url string
|
252
|
+
#
|
253
|
+
# @param (String) cann url string
|
254
|
+
# @return (String) url string without the protocol
|
255
|
+
def self.remove_protocol(cann)
|
256
|
+
if cann.index(PROTOCOL_DELIMITER)
|
257
|
+
delimiting_index = cann.index(PROTOCOL_DELIMITER)
|
258
|
+
@protocol = cann[0..delimiting_index-1]
|
259
|
+
protocol_end_index = delimiting_index + PROTOCOL_DELIMITER.length
|
260
|
+
cann = cann[protocol_end_index..-1]
|
261
261
|
end
|
262
|
+
cann
|
263
|
+
end
|
262
264
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
265
|
+
# Strip the user name, password and port number from the url
|
266
|
+
#
|
267
|
+
# @param (String) host_string host portion of the url
|
268
|
+
# @return (String) host portion of the url without the username, password and port
|
269
|
+
def self.strip_username_password_and_port_from_host(host_string)
|
270
|
+
host_string = remove_port(host_string)
|
271
|
+
remove_username_and_password(host_string)
|
272
|
+
end
|
273
|
+
|
274
|
+
# Strip port number from host string
|
275
|
+
#
|
276
|
+
# @param (see strip_username_password_and_port_from_host)
|
277
|
+
# @return (String) host part without the port number
|
278
|
+
def self.remove_port(host_string)
|
279
|
+
self.split_port(host_string)[:host]
|
280
|
+
end
|
281
|
+
|
282
|
+
# Strip user name and password from host part of url
|
283
|
+
#
|
284
|
+
# @param (see remove_port)
|
285
|
+
# @return (String) host part of url without user name or password
|
286
|
+
def self.remove_username_and_password(host_string)
|
287
|
+
self.split_username_and_password(host_string)[:host]
|
288
|
+
end
|
289
|
+
|
290
|
+
# Split user name, passowrd from the host
|
291
|
+
#
|
292
|
+
# @param (see remove_port)_
|
293
|
+
# @return (Hash) :host has the host string, :creds holds the username and password string
|
294
|
+
def self.split_username_and_password(host_string)
|
295
|
+
un_sep = host_string.index('@')
|
296
|
+
result = {}
|
297
|
+
if un_sep
|
298
|
+
splits = host_string.split('@')
|
299
|
+
result[:host] = splits[1]
|
300
|
+
result[:creds] = splits[0]
|
301
|
+
else
|
302
|
+
result[:host] = host_string
|
303
|
+
result[:creds] = nil
|
274
304
|
end
|
305
|
+
result
|
306
|
+
end
|
275
307
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
308
|
+
# Split post number and host string into a hash
|
309
|
+
#
|
310
|
+
# @param (See remove_port)
|
311
|
+
# @return (Hash) :host has the host string, :port holds the port number
|
312
|
+
def self.split_port(host_string)
|
313
|
+
port_sep = host_string.rindex(':')
|
314
|
+
result = {}
|
315
|
+
if port_sep
|
316
|
+
splits = host_string.split(':')
|
317
|
+
result[:host] = splits[0]
|
318
|
+
result[:port] = splits[1]
|
319
|
+
else
|
320
|
+
result[:host] = host_string
|
321
|
+
result[:port] = nil
|
287
322
|
end
|
323
|
+
result
|
324
|
+
end
|
325
|
+
|
326
|
+
# Split the user name, password and port from the host string
|
327
|
+
#
|
328
|
+
# @param (see remove_port)
|
329
|
+
# @return (Hash) :host as the host string; :creds has the username and password; :port holds the port number
|
330
|
+
def self.split_username_password_and_port(host_string)
|
331
|
+
result = self.split_username_and_password(host_string)
|
332
|
+
result.merge(self.split_port(result[:host]))
|
333
|
+
end
|
288
334
|
end
|
289
335
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_safe_browsing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rails
|
@@ -151,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
151
151
|
version: '0'
|
152
152
|
segments:
|
153
153
|
- 0
|
154
|
-
hash: -
|
154
|
+
hash: -4373468862577111822
|
155
155
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
156
|
none: false
|
157
157
|
requirements:
|
@@ -160,10 +160,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
160
160
|
version: '0'
|
161
161
|
segments:
|
162
162
|
- 0
|
163
|
-
hash: -
|
163
|
+
hash: -4373468862577111822
|
164
164
|
requirements: []
|
165
165
|
rubyforge_project:
|
166
|
-
rubygems_version: 1.8.
|
166
|
+
rubygems_version: 1.8.24
|
167
167
|
signing_key:
|
168
168
|
specification_version: 3
|
169
169
|
summary: Rails 3 plugin for Google's Safe Browsing API v2
|