google_safe_browsing_redis 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/google_safe_browsing.rb +52 -8
- metadata +1 -1
data/lib/google_safe_browsing.rb
CHANGED
@@ -54,21 +54,29 @@ class GoogleSafeBrowsing
|
|
54
54
|
hosts, paths = get_possible_hosts_paths(parts)
|
55
55
|
|
56
56
|
# get all possible host+path combination hash prefixes
|
57
|
-
|
57
|
+
full_urls = hosts.product(paths).collect{|a, b| a + b}
|
58
|
+
prefixes = get_hash_prefixes(full_urls)
|
59
|
+
full_url_hashes = get_hashes(full_urls)
|
58
60
|
|
59
61
|
# add a trailing slash to all hosts, and get their hash prefixes
|
60
|
-
|
62
|
+
host_hash_prefixes = get_hash_prefixes(hosts.collect{|a| a + '/'})
|
61
63
|
|
64
|
+
host_num = 0
|
62
65
|
$lists.each do |list|
|
63
|
-
|
66
|
+
host_hash_prefixes.each do |host|
|
64
67
|
is_member = $redis.sismember("#{list}:hosts", host)
|
65
68
|
if(is_member)
|
66
69
|
suffixes = $redis.smembers("#{list}:host_#{host}")
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
+
hits = suffixes & prefixes
|
71
|
+
if(suffixes.length == 0 || hits != [])
|
72
|
+
full_hashes = get_full_hashes(hits)
|
73
|
+
if(full_url_hashes & full_hashes != [])
|
74
|
+
say("URL matches a list: #{list} (#{url})")
|
75
|
+
return list
|
76
|
+
end
|
70
77
|
end
|
71
78
|
end
|
79
|
+
host_num += 1
|
72
80
|
end
|
73
81
|
end
|
74
82
|
|
@@ -81,6 +89,31 @@ class GoogleSafeBrowsing
|
|
81
89
|
return Canonicalize::canonicalize(url)
|
82
90
|
end
|
83
91
|
|
92
|
+
def get_full_hashes(prefixes)
|
93
|
+
body = "4:#{prefixes.length*4}\n"
|
94
|
+
prefixes.each do |prefix|
|
95
|
+
body += "#{[prefix].pack('H*')}"
|
96
|
+
end
|
97
|
+
|
98
|
+
response = api_request("gethash", body)
|
99
|
+
if(response == nil)
|
100
|
+
return []
|
101
|
+
end
|
102
|
+
|
103
|
+
response = StringIO.new(response)
|
104
|
+
full_hashes = []
|
105
|
+
while(line = response.gets)
|
106
|
+
line = line.split(':')
|
107
|
+
list = line[0]
|
108
|
+
chunk_num = line[1].to_i
|
109
|
+
chunk_len = line[2].to_i
|
110
|
+
data = response.read(chunk_len)
|
111
|
+
full_hashes.push(data.unpack("H*").join())
|
112
|
+
end
|
113
|
+
|
114
|
+
return full_hashes
|
115
|
+
end
|
116
|
+
|
84
117
|
# convert an array of strings into an array of 32 bit hash prefixes
|
85
118
|
def get_hash_prefixes(items)
|
86
119
|
prefixes = []
|
@@ -91,6 +124,16 @@ class GoogleSafeBrowsing
|
|
91
124
|
return prefixes
|
92
125
|
end
|
93
126
|
|
127
|
+
# convert an array of strings into an array of hashes
|
128
|
+
def get_hashes(items)
|
129
|
+
hashes = []
|
130
|
+
items.each do |item|
|
131
|
+
hashes.push((Digest::SHA2.new << item).to_s)
|
132
|
+
end
|
133
|
+
|
134
|
+
return hashes
|
135
|
+
end
|
136
|
+
|
94
137
|
# expand a url into its possible host-path combinations according to the Google API
|
95
138
|
def get_possible_hosts_paths(parts)
|
96
139
|
case parts['host']
|
@@ -174,6 +217,7 @@ class GoogleSafeBrowsing
|
|
174
217
|
request_body += "\n"
|
175
218
|
end
|
176
219
|
|
220
|
+
say "Request body: #{request_body}"
|
177
221
|
response = api_request("downloads", request_body)
|
178
222
|
response = response.split("\n")
|
179
223
|
|
@@ -429,8 +473,8 @@ class GoogleSafeBrowsing
|
|
429
473
|
http = Net::HTTP.new(uri.host, uri.port)
|
430
474
|
request = Net::HTTP::Post.new(uri.request_uri)
|
431
475
|
request.body = body || ''
|
432
|
-
response = http.request(request)
|
433
|
-
return response
|
476
|
+
response = http.request(request)
|
477
|
+
return response.body
|
434
478
|
end
|
435
479
|
|
436
480
|
def say(msg)
|