google_safe_browsing 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.mkd +6 -8
- data/lib/google_safe_browsing.rb +9 -0
- data/lib/google_safe_browsing/api_v2.rb +12 -0
- data/lib/google_safe_browsing/binary_helper.rb +29 -0
- data/lib/google_safe_browsing/canonicalize.rb +90 -29
- data/lib/google_safe_browsing/top_level_domain.rb +3 -0
- data/lib/google_safe_browsing/version.rb +1 -1
- metadata +5 -5
data/README.mkd
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
# Google Safe Browsing
|
1
|
+
# Google Safe Browsing Gem
|
2
2
|
|
3
|
-
This gem allows easy Google Safe Browsing integration
|
4
|
-
|
3
|
+
This gem allows easy Google Safe Browsing APIv2 usage with optional integration
|
4
|
+
into Rails 3 apps.
|
5
5
|
|
6
6
|
It includes:
|
7
7
|
|
8
8
|
* a migration generator for database schema
|
9
|
-
* method to update
|
9
|
+
* method to update hash database
|
10
10
|
* method to lookup a url
|
11
11
|
* rake tasks to update hash database
|
12
|
-
* Autonomous updating via Resque and Resque Scheduler
|
12
|
+
* Autonomous updating via Resque and Resque Scheduler (optional)
|
13
13
|
|
14
14
|
----------------------
|
15
15
|
|
@@ -96,6 +96,4 @@ https://github.com/codelux/malware_api
|
|
96
96
|
|
97
97
|
------------------
|
98
98
|
|
99
|
-
Thank you for using
|
100
|
-
|
101
|
-
-- Chris Marshall
|
99
|
+
Thank you for using this gem! Please report any bugs or issues.
|
data/lib/google_safe_browsing.rb
CHANGED
@@ -20,6 +20,8 @@ require File.dirname(__FILE__) + '/google_safe_browsing/full_hash'
|
|
20
20
|
require File.dirname(__FILE__) + '/google_safe_browsing/rescheduler'
|
21
21
|
|
22
22
|
module GoogleSafeBrowsing
|
23
|
+
|
24
|
+
# Handles the configuration values for the module
|
23
25
|
class Config
|
24
26
|
attr_accessor :client, :app_ver, :p_ver, :host, :current_lists, :api_key
|
25
27
|
|
@@ -32,19 +34,26 @@ module GoogleSafeBrowsing
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
37
|
+
# Returns of initializes the Module configuration
|
35
38
|
def self.config
|
36
39
|
@@config ||= Config.new
|
37
40
|
end
|
38
41
|
|
42
|
+
# Allows for setting config values via a block
|
39
43
|
def self.configure
|
40
44
|
yield self.config
|
41
45
|
end
|
42
46
|
|
47
|
+
# Adds the Rescheduler job to Resque
|
43
48
|
def self.kick_off
|
44
49
|
Resque.enqueue(Rescheduler)
|
45
50
|
end
|
46
51
|
|
47
52
|
|
53
|
+
# Converts the official Google list name into the name to return
|
54
|
+
#
|
55
|
+
# @param (String) list the 'official' list name
|
56
|
+
# @return (String) the friendly list name
|
48
57
|
def self.friendly_list_name(list)
|
49
58
|
case list
|
50
59
|
when 'goog-malware-shavar'
|
@@ -1,5 +1,9 @@
|
|
1
1
|
module GoogleSafeBrowsing
|
2
|
+
# Main Interface for Module
|
2
3
|
class APIv2
|
4
|
+
# Completes an update
|
5
|
+
#
|
6
|
+
# @return (Integer) the number of seconds before this method should be called again
|
3
7
|
def self.update
|
4
8
|
data_response = HttpHelper.get_data
|
5
9
|
|
@@ -14,8 +18,13 @@ module GoogleSafeBrowsing
|
|
14
18
|
to_do_array[:delay_seconds]
|
15
19
|
end
|
16
20
|
|
21
|
+
# Performs a lookup of the given url
|
22
|
+
#
|
23
|
+
# @param (String) url a url string to be looked up
|
24
|
+
# @return (String, nil) the friendly list name if found, or `nil`
|
17
25
|
def self.lookup(url)
|
18
26
|
urls = Canonicalize.urls_for_lookup(url)
|
27
|
+
return '' if urls.empty?
|
19
28
|
|
20
29
|
hashes = HashHelper.urls_to_hashes(urls)
|
21
30
|
raw_hash_array = hashes.collect{ |h| h.to_s }
|
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
|
|
46
55
|
nil
|
47
56
|
end
|
48
57
|
|
58
|
+
# Can be used to force a delay into a script running updates
|
59
|
+
#
|
60
|
+
# @param (Integer) delay_seconds the number of seconds to delay, should be the return value of {update}
|
49
61
|
def self.delay(delay_seconds)
|
50
62
|
puts "Google told us to wait for #{delay_seconds} seconds"
|
51
63
|
puts "We will wait...."
|
@@ -1,15 +1,29 @@
|
|
1
1
|
module GoogleSafeBrowsing
|
2
|
+
# Helper methods for working with binary encoded data from Forwarding URLs
|
2
3
|
class BinaryHelper
|
3
4
|
|
5
|
+
# Reads `counter` byes from byte iterator `iter` and returns the hex string represnetation
|
6
|
+
#
|
7
|
+
# @param [ByteIterator] iter byte iterator already at correct position
|
8
|
+
# @param [Integer] count number of bytes to read
|
9
|
+
# @return [String] hexidecimal string
|
4
10
|
def self.read_bytes_as_hex(iter, count)
|
5
11
|
read_bytes_from(iter, count).unpack("H#{count * 2}")[0]
|
6
12
|
end
|
7
13
|
|
14
|
+
# Returns the first four bytes of `string` as hexidecimal
|
15
|
+
#
|
16
|
+
# @param [String] string to unpack the first four bytes as hex
|
17
|
+
# @return (see read_bytes_as_hex)
|
8
18
|
def self.four_as_hex(string)
|
9
19
|
string.unpack('H8')[0]
|
10
20
|
end
|
11
21
|
|
12
22
|
|
23
|
+
# Read `count` bytes from `iter` without unpacking the result
|
24
|
+
#
|
25
|
+
# @param (see read_bytes_as_hex)
|
26
|
+
# @return (String) not unpacked string from `iter`
|
13
27
|
def self.read_bytes_from(iter, count)
|
14
28
|
ret = ''
|
15
29
|
count.to_i.times { ret << iter.next }
|
@@ -19,19 +33,34 @@ module GoogleSafeBrowsing
|
|
19
33
|
# return nil
|
20
34
|
end
|
21
35
|
|
36
|
+
# Returns the first four bytes of `string` as hexidecimal; for host key
|
37
|
+
# @param (String) bin string to unpack
|
38
|
+
# @return (String) unpacked string
|
22
39
|
def self.unpack_host_key(bin)
|
23
40
|
bin.unpack('H8')[0]
|
24
41
|
end
|
25
42
|
|
43
|
+
# Unpack string as an unsigned integer; for count
|
44
|
+
#
|
45
|
+
# @param (see unpack_host_key)
|
46
|
+
# @return (see unpack_host_key)
|
26
47
|
def self.unpack_count(bin)
|
27
48
|
# this may not be correct
|
28
49
|
bin.unpack('U')[0]
|
29
50
|
end
|
30
51
|
|
52
|
+
# Unpack string as big-endian network byte order
|
53
|
+
#
|
54
|
+
# @param (see unpack_count)
|
55
|
+
# @return (see unpack_count)
|
31
56
|
def self.unpack_add_chunk_num(bin)
|
32
57
|
bin.unpack('N')[0]
|
33
58
|
end
|
34
59
|
|
60
|
+
# Pack a Hex String into binary
|
61
|
+
#
|
62
|
+
# @param (String) hex string to encode
|
63
|
+
# @return (String) encoded string
|
35
64
|
def self.hex_to_bin(hex)
|
36
65
|
hex.to_a.pack('H*')
|
37
66
|
end
|
@@ -3,11 +3,16 @@ require 'ip'
|
|
3
3
|
require File.dirname(__FILE__) + '/top_level_domain.rb'
|
4
4
|
|
5
5
|
module GoogleSafeBrowsing
|
6
|
+
# Helpers to Canonicalize urls and generate url permutations for lookups
|
6
7
|
class Canonicalize
|
7
8
|
|
8
9
|
PROTOCOL_DELIMITER = '://'
|
9
10
|
DEFAULT_PROTOCOL = 'http'
|
10
11
|
|
12
|
+
# Base Canonicalizer method
|
13
|
+
#
|
14
|
+
# @param (String) uncanonicalized url string
|
15
|
+
# @return (String) canonicalized url string
|
11
16
|
def self.url(raw_url)
|
12
17
|
#puts raw_url
|
13
18
|
#remove tabs, carriage returns and line feeds
|
@@ -37,6 +42,10 @@ module GoogleSafeBrowsing
|
|
37
42
|
strict_escape(cann)
|
38
43
|
end
|
39
44
|
|
45
|
+
# Generate the url permutations for lookup
|
46
|
+
#
|
47
|
+
# @param (String) lookup_url uncanonicalized url string
|
48
|
+
# @return (Array) array of cannonicalized url permutation strings
|
40
49
|
def self.urls_for_lookup(lookup_url)
|
41
50
|
lookup_url = url(lookup_url)
|
42
51
|
|
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
|
|
46
55
|
|
47
56
|
host_string = strip_username_password_and_port_from_host(splits[:host])
|
48
57
|
|
58
|
+
#return empty array unless host_string has at least one period
|
59
|
+
return [] unless host_string.include?('.')
|
60
|
+
|
49
61
|
host_strings = [host_string]
|
50
62
|
host = TopLevelDomain.split_from_host(host_string).last(5)
|
51
63
|
( host.length - 1 ).times do
|
@@ -61,45 +73,54 @@ module GoogleSafeBrowsing
|
|
61
73
|
|
62
74
|
private
|
63
75
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
76
|
+
# Generates the path permutations from the raw path string
|
77
|
+
#
|
78
|
+
# @param (String) raw_path path split from the full url string
|
79
|
+
# @return (Array) array of path permutation strings
|
80
|
+
def self.generate_path_strings(raw_path)
|
81
|
+
return [ '/', '' ] if raw_path == ''
|
70
82
|
|
83
|
+
path_split = raw_path.split('?')
|
84
|
+
path = path_split[0]
|
85
|
+
params = path_split[1]
|
71
86
|
|
72
|
-
path_components = path.split('/').first(3)
|
73
|
-
path_strings = [ '/' ]
|
74
|
-
path_components.length.times do
|
75
|
-
path_strings << '/' + path_components.join('/')
|
76
|
-
path_components.pop
|
77
|
-
end
|
78
87
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
88
|
+
path_components = path.split('/').first(3)
|
89
|
+
path_strings = [ '/' ]
|
90
|
+
path_components.length.times do
|
91
|
+
path_strings << '/' + path_components.join('/')
|
92
|
+
path_components.pop
|
84
93
|
end
|
85
|
-
end
|
86
|
-
path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
|
87
|
-
path_strings.compact!
|
88
|
-
path_strings.uniq!
|
89
94
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
p
|
95
|
+
path_strings.map! do |p|
|
96
|
+
unless p.index('.')
|
97
|
+
p + '/'
|
94
98
|
else
|
95
|
-
|
99
|
+
p
|
96
100
|
end
|
97
101
|
end
|
98
|
-
|
99
|
-
|
102
|
+
path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
|
103
|
+
path_strings.compact!
|
104
|
+
path_strings.uniq!
|
105
|
+
|
106
|
+
if params
|
107
|
+
path_strings | path_strings.map do |p|
|
108
|
+
if p[-1..-1] == '/'
|
109
|
+
p
|
110
|
+
else
|
111
|
+
"#{p}?#{params}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
else
|
115
|
+
return path_strings
|
116
|
+
end
|
100
117
|
end
|
101
|
-
end
|
102
118
|
|
119
|
+
# Returns the cartesian product of two arrays by concatination of the string representation of the elements
|
120
|
+
#
|
121
|
+
# @param (Array) a_one array of strings
|
122
|
+
# @param (Array) a_two array of strings
|
123
|
+
# @return (Array) cartesian product of arrays with elements concatinated
|
103
124
|
def self.cart_prod(a_one, a_two)
|
104
125
|
result = []
|
105
126
|
a_one.each do |i|
|
@@ -110,6 +131,10 @@ module GoogleSafeBrowsing
|
|
110
131
|
result
|
111
132
|
end
|
112
133
|
|
134
|
+
# Takes the canonicalized url and splits the host and the path apart
|
135
|
+
#
|
136
|
+
# @param (String) cann canonicalized url string
|
137
|
+
# @return (Hash) !{ :host => host_part, :path => path_part }
|
113
138
|
def self.split_host_path(cann)
|
114
139
|
ret= { :host => cann, :path => '' }
|
115
140
|
split_point = cann.index('/')
|
@@ -121,11 +146,19 @@ module GoogleSafeBrowsing
|
|
121
146
|
ret
|
122
147
|
end
|
123
148
|
|
149
|
+
# Strips the fragment portion of the url string (the last '#' and everything after)
|
150
|
+
#
|
151
|
+
# @param (String) string url
|
152
|
+
# @return (String) parameter with the fragment removed
|
124
153
|
def self.remove_fragment(string)
|
125
154
|
string = string[0..string.index('#')-1] if string.index('#')
|
126
155
|
string
|
127
156
|
end
|
128
157
|
|
158
|
+
# Continues to unescape the url until unescaping has no effect
|
159
|
+
#
|
160
|
+
# @param (String) url url string
|
161
|
+
# @return (String) fully unescaped url string
|
129
162
|
def self.recursively_unescape(url)
|
130
163
|
compare_url = url.clone
|
131
164
|
url = URI.unescape(url)
|
@@ -136,6 +169,10 @@ module GoogleSafeBrowsing
|
|
136
169
|
url
|
137
170
|
end
|
138
171
|
|
172
|
+
# Apply initial fixes to host string
|
173
|
+
#
|
174
|
+
# @param (String) host host string
|
175
|
+
# @return (String) standardized host string
|
139
176
|
def self.fix_host(host)
|
140
177
|
#puts "In Host: #{host}"
|
141
178
|
# remove leading and trailing dots, multiple dots to one
|
@@ -149,6 +186,10 @@ module GoogleSafeBrowsing
|
|
149
186
|
host
|
150
187
|
end
|
151
188
|
|
189
|
+
# Apply initial fixes to path string
|
190
|
+
#
|
191
|
+
# @param (String) path path string
|
192
|
+
# @return (String) standardized path string
|
152
193
|
def self.fix_path(path)
|
153
194
|
#puts "In Path: #{path}"
|
154
195
|
|
@@ -179,6 +220,10 @@ module GoogleSafeBrowsing
|
|
179
220
|
path
|
180
221
|
end
|
181
222
|
|
223
|
+
# Escape the url, but do not escape certain characters; such as the carat
|
224
|
+
#
|
225
|
+
# @param (String) url url string
|
226
|
+
# @return (String) escaped url string
|
182
227
|
def self.strict_escape(url)
|
183
228
|
url = URI.escape url
|
184
229
|
|
@@ -188,6 +233,10 @@ module GoogleSafeBrowsing
|
|
188
233
|
url
|
189
234
|
end
|
190
235
|
|
236
|
+
# Strip the leading protocol from the url string
|
237
|
+
#
|
238
|
+
# @param (String) cann url string
|
239
|
+
# @return (String) url string without the protocol
|
191
240
|
def self.remove_protocol(cann)
|
192
241
|
if cann.index(PROTOCOL_DELIMITER)
|
193
242
|
delimiting_index = cann.index(PROTOCOL_DELIMITER)
|
@@ -198,11 +247,19 @@ module GoogleSafeBrowsing
|
|
198
247
|
cann
|
199
248
|
end
|
200
249
|
|
250
|
+
# Strip the user name, password and port number from the url
|
251
|
+
#
|
252
|
+
# @param (String) host_string host portion of the url
|
253
|
+
# @return (String) host portion of the url without the username, password and port
|
201
254
|
def self.strip_username_password_and_port_from_host(host_string)
|
202
255
|
host_string = remove_port(host_string)
|
203
256
|
remove_username_and_password(host_string)
|
204
257
|
end
|
205
258
|
|
259
|
+
# Strip port number from host string
|
260
|
+
#
|
261
|
+
# @param (see strip_username_password_and_port_from_host)
|
262
|
+
# @return (String) host part without the port number
|
206
263
|
def self.remove_port(host_string)
|
207
264
|
port_sep = host_string.rindex(':')
|
208
265
|
if port_sep
|
@@ -212,6 +269,10 @@ module GoogleSafeBrowsing
|
|
212
269
|
end
|
213
270
|
end
|
214
271
|
|
272
|
+
# Strip user name and password from host part of url
|
273
|
+
#
|
274
|
+
# @param (see remove_port)
|
275
|
+
# @return (String) host part of url without user name or password
|
215
276
|
def self.remove_username_and_password(host_string)
|
216
277
|
un_sep = host_string.index('@')
|
217
278
|
if un_sep
|
@@ -20,6 +20,9 @@ module GoogleSafeBrowsing
|
|
20
20
|
tld
|
21
21
|
end
|
22
22
|
|
23
|
+
# return array of host components (www, example, com from www.example.com)
|
24
|
+
# taking into account of top level domains
|
25
|
+
# e.g. 'sub.domain.example.co.uk' => [ 'sub', 'domain', 'example', 'co.uk' ]
|
23
26
|
def self.split_from_host(host)
|
24
27
|
components = host.split('.')
|
25
28
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_safe_browsing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chris Marshall
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-04-09 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
prerelease: false
|
@@ -136,7 +136,7 @@ files:
|
|
136
136
|
- MIT-LICENSE
|
137
137
|
- Rakefile
|
138
138
|
- README.mkd
|
139
|
-
homepage: https://github.com/
|
139
|
+
homepage: https://github.com/mobiledefense/mobiledefense_google_safe_browsing
|
140
140
|
licenses: []
|
141
141
|
|
142
142
|
post_install_message:
|