google_safe_browsing 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.mkd +6 -8
- data/lib/google_safe_browsing.rb +9 -0
- data/lib/google_safe_browsing/api_v2.rb +12 -0
- data/lib/google_safe_browsing/binary_helper.rb +29 -0
- data/lib/google_safe_browsing/canonicalize.rb +90 -29
- data/lib/google_safe_browsing/top_level_domain.rb +3 -0
- data/lib/google_safe_browsing/version.rb +1 -1
- metadata +5 -5
data/README.mkd
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
# Google Safe Browsing
|
1
|
+
# Google Safe Browsing Gem
|
2
2
|
|
3
|
-
This gem allows easy Google Safe Browsing integration
|
4
|
-
|
3
|
+
This gem allows easy Google Safe Browsing APIv2 usage with optional integration
|
4
|
+
into Rails 3 apps.
|
5
5
|
|
6
6
|
It includes:
|
7
7
|
|
8
8
|
* a migration generator for database schema
|
9
|
-
* method to update
|
9
|
+
* method to update hash database
|
10
10
|
* method to lookup a url
|
11
11
|
* rake tasks to update hash database
|
12
|
-
* Autonomous updating via Resque and Resque Scheduler
|
12
|
+
* Autonomous updating via Resque and Resque Scheduler (optional)
|
13
13
|
|
14
14
|
----------------------
|
15
15
|
|
@@ -96,6 +96,4 @@ https://github.com/codelux/malware_api
|
|
96
96
|
|
97
97
|
------------------
|
98
98
|
|
99
|
-
Thank you for using
|
100
|
-
|
101
|
-
-- Chris Marshall
|
99
|
+
Thank you for using this gem! Please report any bugs or issues.
|
data/lib/google_safe_browsing.rb
CHANGED
@@ -20,6 +20,8 @@ require File.dirname(__FILE__) + '/google_safe_browsing/full_hash'
|
|
20
20
|
require File.dirname(__FILE__) + '/google_safe_browsing/rescheduler'
|
21
21
|
|
22
22
|
module GoogleSafeBrowsing
|
23
|
+
|
24
|
+
# Handles the configuration values for the module
|
23
25
|
class Config
|
24
26
|
attr_accessor :client, :app_ver, :p_ver, :host, :current_lists, :api_key
|
25
27
|
|
@@ -32,19 +34,26 @@ module GoogleSafeBrowsing
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
37
|
+
# Returns of initializes the Module configuration
|
35
38
|
def self.config
|
36
39
|
@@config ||= Config.new
|
37
40
|
end
|
38
41
|
|
42
|
+
# Allows for setting config values via a block
|
39
43
|
def self.configure
|
40
44
|
yield self.config
|
41
45
|
end
|
42
46
|
|
47
|
+
# Adds the Rescheduler job to Resque
|
43
48
|
def self.kick_off
|
44
49
|
Resque.enqueue(Rescheduler)
|
45
50
|
end
|
46
51
|
|
47
52
|
|
53
|
+
# Converts the official Google list name into the name to return
|
54
|
+
#
|
55
|
+
# @param (String) list the 'official' list name
|
56
|
+
# @return (String) the friendly list name
|
48
57
|
def self.friendly_list_name(list)
|
49
58
|
case list
|
50
59
|
when 'goog-malware-shavar'
|
@@ -1,5 +1,9 @@
|
|
1
1
|
module GoogleSafeBrowsing
|
2
|
+
# Main Interface for Module
|
2
3
|
class APIv2
|
4
|
+
# Completes an update
|
5
|
+
#
|
6
|
+
# @return (Integer) the number of seconds before this method should be called again
|
3
7
|
def self.update
|
4
8
|
data_response = HttpHelper.get_data
|
5
9
|
|
@@ -14,8 +18,13 @@ module GoogleSafeBrowsing
|
|
14
18
|
to_do_array[:delay_seconds]
|
15
19
|
end
|
16
20
|
|
21
|
+
# Performs a lookup of the given url
|
22
|
+
#
|
23
|
+
# @param (String) url a url string to be looked up
|
24
|
+
# @return (String, nil) the friendly list name if found, or `nil`
|
17
25
|
def self.lookup(url)
|
18
26
|
urls = Canonicalize.urls_for_lookup(url)
|
27
|
+
return '' if urls.empty?
|
19
28
|
|
20
29
|
hashes = HashHelper.urls_to_hashes(urls)
|
21
30
|
raw_hash_array = hashes.collect{ |h| h.to_s }
|
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
|
|
46
55
|
nil
|
47
56
|
end
|
48
57
|
|
58
|
+
# Can be used to force a delay into a script running updates
|
59
|
+
#
|
60
|
+
# @param (Integer) delay_seconds the number of seconds to delay, should be the return value of {update}
|
49
61
|
def self.delay(delay_seconds)
|
50
62
|
puts "Google told us to wait for #{delay_seconds} seconds"
|
51
63
|
puts "We will wait...."
|
@@ -1,15 +1,29 @@
|
|
1
1
|
module GoogleSafeBrowsing
|
2
|
+
# Helper methods for working with binary encoded data from Forwarding URLs
|
2
3
|
class BinaryHelper
|
3
4
|
|
5
|
+
# Reads `counter` byes from byte iterator `iter` and returns the hex string represnetation
|
6
|
+
#
|
7
|
+
# @param [ByteIterator] iter byte iterator already at correct position
|
8
|
+
# @param [Integer] count number of bytes to read
|
9
|
+
# @return [String] hexidecimal string
|
4
10
|
def self.read_bytes_as_hex(iter, count)
|
5
11
|
read_bytes_from(iter, count).unpack("H#{count * 2}")[0]
|
6
12
|
end
|
7
13
|
|
14
|
+
# Returns the first four bytes of `string` as hexidecimal
|
15
|
+
#
|
16
|
+
# @param [String] string to unpack the first four bytes as hex
|
17
|
+
# @return (see read_bytes_as_hex)
|
8
18
|
def self.four_as_hex(string)
|
9
19
|
string.unpack('H8')[0]
|
10
20
|
end
|
11
21
|
|
12
22
|
|
23
|
+
# Read `count` bytes from `iter` without unpacking the result
|
24
|
+
#
|
25
|
+
# @param (see read_bytes_as_hex)
|
26
|
+
# @return (String) not unpacked string from `iter`
|
13
27
|
def self.read_bytes_from(iter, count)
|
14
28
|
ret = ''
|
15
29
|
count.to_i.times { ret << iter.next }
|
@@ -19,19 +33,34 @@ module GoogleSafeBrowsing
|
|
19
33
|
# return nil
|
20
34
|
end
|
21
35
|
|
36
|
+
# Returns the first four bytes of `string` as hexidecimal; for host key
|
37
|
+
# @param (String) bin string to unpack
|
38
|
+
# @return (String) unpacked string
|
22
39
|
def self.unpack_host_key(bin)
|
23
40
|
bin.unpack('H8')[0]
|
24
41
|
end
|
25
42
|
|
43
|
+
# Unpack string as an unsigned integer; for count
|
44
|
+
#
|
45
|
+
# @param (see unpack_host_key)
|
46
|
+
# @return (see unpack_host_key)
|
26
47
|
def self.unpack_count(bin)
|
27
48
|
# this may not be correct
|
28
49
|
bin.unpack('U')[0]
|
29
50
|
end
|
30
51
|
|
52
|
+
# Unpack string as big-endian network byte order
|
53
|
+
#
|
54
|
+
# @param (see unpack_count)
|
55
|
+
# @return (see unpack_count)
|
31
56
|
def self.unpack_add_chunk_num(bin)
|
32
57
|
bin.unpack('N')[0]
|
33
58
|
end
|
34
59
|
|
60
|
+
# Pack a Hex String into binary
|
61
|
+
#
|
62
|
+
# @param (String) hex string to encode
|
63
|
+
# @return (String) encoded string
|
35
64
|
def self.hex_to_bin(hex)
|
36
65
|
hex.to_a.pack('H*')
|
37
66
|
end
|
@@ -3,11 +3,16 @@ require 'ip'
|
|
3
3
|
require File.dirname(__FILE__) + '/top_level_domain.rb'
|
4
4
|
|
5
5
|
module GoogleSafeBrowsing
|
6
|
+
# Helpers to Canonicalize urls and generate url permutations for lookups
|
6
7
|
class Canonicalize
|
7
8
|
|
8
9
|
PROTOCOL_DELIMITER = '://'
|
9
10
|
DEFAULT_PROTOCOL = 'http'
|
10
11
|
|
12
|
+
# Base Canonicalizer method
|
13
|
+
#
|
14
|
+
# @param (String) uncanonicalized url string
|
15
|
+
# @return (String) canonicalized url string
|
11
16
|
def self.url(raw_url)
|
12
17
|
#puts raw_url
|
13
18
|
#remove tabs, carriage returns and line feeds
|
@@ -37,6 +42,10 @@ module GoogleSafeBrowsing
|
|
37
42
|
strict_escape(cann)
|
38
43
|
end
|
39
44
|
|
45
|
+
# Generate the url permutations for lookup
|
46
|
+
#
|
47
|
+
# @param (String) lookup_url uncanonicalized url string
|
48
|
+
# @return (Array) array of cannonicalized url permutation strings
|
40
49
|
def self.urls_for_lookup(lookup_url)
|
41
50
|
lookup_url = url(lookup_url)
|
42
51
|
|
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
|
|
46
55
|
|
47
56
|
host_string = strip_username_password_and_port_from_host(splits[:host])
|
48
57
|
|
58
|
+
#return empty array unless host_string has at least one period
|
59
|
+
return [] unless host_string.include?('.')
|
60
|
+
|
49
61
|
host_strings = [host_string]
|
50
62
|
host = TopLevelDomain.split_from_host(host_string).last(5)
|
51
63
|
( host.length - 1 ).times do
|
@@ -61,45 +73,54 @@ module GoogleSafeBrowsing
|
|
61
73
|
|
62
74
|
private
|
63
75
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
76
|
+
# Generates the path permutations from the raw path string
|
77
|
+
#
|
78
|
+
# @param (String) raw_path path split from the full url string
|
79
|
+
# @return (Array) array of path permutation strings
|
80
|
+
def self.generate_path_strings(raw_path)
|
81
|
+
return [ '/', '' ] if raw_path == ''
|
70
82
|
|
83
|
+
path_split = raw_path.split('?')
|
84
|
+
path = path_split[0]
|
85
|
+
params = path_split[1]
|
71
86
|
|
72
|
-
path_components = path.split('/').first(3)
|
73
|
-
path_strings = [ '/' ]
|
74
|
-
path_components.length.times do
|
75
|
-
path_strings << '/' + path_components.join('/')
|
76
|
-
path_components.pop
|
77
|
-
end
|
78
87
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
88
|
+
path_components = path.split('/').first(3)
|
89
|
+
path_strings = [ '/' ]
|
90
|
+
path_components.length.times do
|
91
|
+
path_strings << '/' + path_components.join('/')
|
92
|
+
path_components.pop
|
84
93
|
end
|
85
|
-
end
|
86
|
-
path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
|
87
|
-
path_strings.compact!
|
88
|
-
path_strings.uniq!
|
89
94
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
p
|
95
|
+
path_strings.map! do |p|
|
96
|
+
unless p.index('.')
|
97
|
+
p + '/'
|
94
98
|
else
|
95
|
-
|
99
|
+
p
|
96
100
|
end
|
97
101
|
end
|
98
|
-
|
99
|
-
|
102
|
+
path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
|
103
|
+
path_strings.compact!
|
104
|
+
path_strings.uniq!
|
105
|
+
|
106
|
+
if params
|
107
|
+
path_strings | path_strings.map do |p|
|
108
|
+
if p[-1..-1] == '/'
|
109
|
+
p
|
110
|
+
else
|
111
|
+
"#{p}?#{params}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
else
|
115
|
+
return path_strings
|
116
|
+
end
|
100
117
|
end
|
101
|
-
end
|
102
118
|
|
119
|
+
# Returns the cartesian product of two arrays by concatination of the string representation of the elements
|
120
|
+
#
|
121
|
+
# @param (Array) a_one array of strings
|
122
|
+
# @param (Array) a_two array of strings
|
123
|
+
# @return (Array) cartesian product of arrays with elements concatinated
|
103
124
|
def self.cart_prod(a_one, a_two)
|
104
125
|
result = []
|
105
126
|
a_one.each do |i|
|
@@ -110,6 +131,10 @@ module GoogleSafeBrowsing
|
|
110
131
|
result
|
111
132
|
end
|
112
133
|
|
134
|
+
# Takes the canonicalized url and splits the host and the path apart
|
135
|
+
#
|
136
|
+
# @param (String) cann canonicalized url string
|
137
|
+
# @return (Hash) !{ :host => host_part, :path => path_part }
|
113
138
|
def self.split_host_path(cann)
|
114
139
|
ret= { :host => cann, :path => '' }
|
115
140
|
split_point = cann.index('/')
|
@@ -121,11 +146,19 @@ module GoogleSafeBrowsing
|
|
121
146
|
ret
|
122
147
|
end
|
123
148
|
|
149
|
+
# Strips the fragment portion of the url string (the last '#' and everything after)
|
150
|
+
#
|
151
|
+
# @param (String) string url
|
152
|
+
# @return (String) parameter with the fragment removed
|
124
153
|
def self.remove_fragment(string)
|
125
154
|
string = string[0..string.index('#')-1] if string.index('#')
|
126
155
|
string
|
127
156
|
end
|
128
157
|
|
158
|
+
# Continues to unescape the url until unescaping has no effect
|
159
|
+
#
|
160
|
+
# @param (String) url url string
|
161
|
+
# @return (String) fully unescaped url string
|
129
162
|
def self.recursively_unescape(url)
|
130
163
|
compare_url = url.clone
|
131
164
|
url = URI.unescape(url)
|
@@ -136,6 +169,10 @@ module GoogleSafeBrowsing
|
|
136
169
|
url
|
137
170
|
end
|
138
171
|
|
172
|
+
# Apply initial fixes to host string
|
173
|
+
#
|
174
|
+
# @param (String) host host string
|
175
|
+
# @return (String) standardized host string
|
139
176
|
def self.fix_host(host)
|
140
177
|
#puts "In Host: #{host}"
|
141
178
|
# remove leading and trailing dots, multiple dots to one
|
@@ -149,6 +186,10 @@ module GoogleSafeBrowsing
|
|
149
186
|
host
|
150
187
|
end
|
151
188
|
|
189
|
+
# Apply initial fixes to path string
|
190
|
+
#
|
191
|
+
# @param (String) path path string
|
192
|
+
# @return (String) standardized path string
|
152
193
|
def self.fix_path(path)
|
153
194
|
#puts "In Path: #{path}"
|
154
195
|
|
@@ -179,6 +220,10 @@ module GoogleSafeBrowsing
|
|
179
220
|
path
|
180
221
|
end
|
181
222
|
|
223
|
+
# Escape the url, but do not escape certain characters; such as the carat
|
224
|
+
#
|
225
|
+
# @param (String) url url string
|
226
|
+
# @return (String) escaped url string
|
182
227
|
def self.strict_escape(url)
|
183
228
|
url = URI.escape url
|
184
229
|
|
@@ -188,6 +233,10 @@ module GoogleSafeBrowsing
|
|
188
233
|
url
|
189
234
|
end
|
190
235
|
|
236
|
+
# Strip the leading protocol from the url string
|
237
|
+
#
|
238
|
+
# @param (String) cann url string
|
239
|
+
# @return (String) url string without the protocol
|
191
240
|
def self.remove_protocol(cann)
|
192
241
|
if cann.index(PROTOCOL_DELIMITER)
|
193
242
|
delimiting_index = cann.index(PROTOCOL_DELIMITER)
|
@@ -198,11 +247,19 @@ module GoogleSafeBrowsing
|
|
198
247
|
cann
|
199
248
|
end
|
200
249
|
|
250
|
+
# Strip the user name, password and port number from the url
|
251
|
+
#
|
252
|
+
# @param (String) host_string host portion of the url
|
253
|
+
# @return (String) host portion of the url without the username, password and port
|
201
254
|
def self.strip_username_password_and_port_from_host(host_string)
|
202
255
|
host_string = remove_port(host_string)
|
203
256
|
remove_username_and_password(host_string)
|
204
257
|
end
|
205
258
|
|
259
|
+
# Strip port number from host string
|
260
|
+
#
|
261
|
+
# @param (see strip_username_password_and_port_from_host)
|
262
|
+
# @return (String) host part without the port number
|
206
263
|
def self.remove_port(host_string)
|
207
264
|
port_sep = host_string.rindex(':')
|
208
265
|
if port_sep
|
@@ -212,6 +269,10 @@ module GoogleSafeBrowsing
|
|
212
269
|
end
|
213
270
|
end
|
214
271
|
|
272
|
+
# Strip user name and password from host part of url
|
273
|
+
#
|
274
|
+
# @param (see remove_port)
|
275
|
+
# @return (String) host part of url without user name or password
|
215
276
|
def self.remove_username_and_password(host_string)
|
216
277
|
un_sep = host_string.index('@')
|
217
278
|
if un_sep
|
@@ -20,6 +20,9 @@ module GoogleSafeBrowsing
|
|
20
20
|
tld
|
21
21
|
end
|
22
22
|
|
23
|
+
# return array of host components (www, example, com from www.example.com)
|
24
|
+
# taking into account of top level domains
|
25
|
+
# e.g. 'sub.domain.example.co.uk' => [ 'sub', 'domain', 'example', 'co.uk' ]
|
23
26
|
def self.split_from_host(host)
|
24
27
|
components = host.split('.')
|
25
28
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_safe_browsing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chris Marshall
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-04-09 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
prerelease: false
|
@@ -136,7 +136,7 @@ files:
|
|
136
136
|
- MIT-LICENSE
|
137
137
|
- Rakefile
|
138
138
|
- README.mkd
|
139
|
-
homepage: https://github.com/
|
139
|
+
homepage: https://github.com/mobiledefense/mobiledefense_google_safe_browsing
|
140
140
|
licenses: []
|
141
141
|
|
142
142
|
post_install_message:
|