google_safe_browsing_redis 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/google_safe_browsing.rb +52 -8
- metadata +1 -1
data/lib/google_safe_browsing.rb
CHANGED
@@ -54,21 +54,29 @@ class GoogleSafeBrowsing
|
|
54
54
|
hosts, paths = get_possible_hosts_paths(parts)
|
55
55
|
|
56
56
|
# get all possible host+path combination hash prefixes
|
57
|
-
|
57
|
+
full_urls = hosts.product(paths).collect{|a, b| a + b}
|
58
|
+
prefixes = get_hash_prefixes(full_urls)
|
59
|
+
full_url_hashes = get_hashes(full_urls)
|
58
60
|
|
59
61
|
# add a trailing slash to all hosts, and get their hash prefixes
|
60
|
-
|
62
|
+
host_hash_prefixes = get_hash_prefixes(hosts.collect{|a| a + '/'})
|
61
63
|
|
64
|
+
host_num = 0
|
62
65
|
$lists.each do |list|
|
63
|
-
|
66
|
+
host_hash_prefixes.each do |host|
|
64
67
|
is_member = $redis.sismember("#{list}:hosts", host)
|
65
68
|
if(is_member)
|
66
69
|
suffixes = $redis.smembers("#{list}:host_#{host}")
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
+
hits = suffixes & prefixes
|
71
|
+
if(suffixes.length == 0 || hits != [])
|
72
|
+
full_hashes = get_full_hashes(hits)
|
73
|
+
if(full_url_hashes & full_hashes != [])
|
74
|
+
say("URL matches a list: #{list} (#{url})")
|
75
|
+
return list
|
76
|
+
end
|
70
77
|
end
|
71
78
|
end
|
79
|
+
host_num += 1
|
72
80
|
end
|
73
81
|
end
|
74
82
|
|
@@ -81,6 +89,31 @@ class GoogleSafeBrowsing
|
|
81
89
|
return Canonicalize::canonicalize(url)
|
82
90
|
end
|
83
91
|
|
92
|
+
def get_full_hashes(prefixes)
|
93
|
+
body = "4:#{prefixes.length*4}\n"
|
94
|
+
prefixes.each do |prefix|
|
95
|
+
body += "#{[prefix].pack('H*')}"
|
96
|
+
end
|
97
|
+
|
98
|
+
response = api_request("gethash", body)
|
99
|
+
if(response == nil)
|
100
|
+
return []
|
101
|
+
end
|
102
|
+
|
103
|
+
response = StringIO.new(response)
|
104
|
+
full_hashes = []
|
105
|
+
while(line = response.gets)
|
106
|
+
line = line.split(':')
|
107
|
+
list = line[0]
|
108
|
+
chunk_num = line[1].to_i
|
109
|
+
chunk_len = line[2].to_i
|
110
|
+
data = response.read(chunk_len)
|
111
|
+
full_hashes.push(data.unpack("H*").join())
|
112
|
+
end
|
113
|
+
|
114
|
+
return full_hashes
|
115
|
+
end
|
116
|
+
|
84
117
|
# convert an array of strings into an array of 32 bit hash prefixes
|
85
118
|
def get_hash_prefixes(items)
|
86
119
|
prefixes = []
|
@@ -91,6 +124,16 @@ class GoogleSafeBrowsing
|
|
91
124
|
return prefixes
|
92
125
|
end
|
93
126
|
|
127
|
+
# convert an array of strings into an array of hashes
|
128
|
+
def get_hashes(items)
|
129
|
+
hashes = []
|
130
|
+
items.each do |item|
|
131
|
+
hashes.push((Digest::SHA2.new << item).to_s)
|
132
|
+
end
|
133
|
+
|
134
|
+
return hashes
|
135
|
+
end
|
136
|
+
|
94
137
|
# expand a url into its possible host-path combinations according to the Google API
|
95
138
|
def get_possible_hosts_paths(parts)
|
96
139
|
case parts['host']
|
@@ -174,6 +217,7 @@ class GoogleSafeBrowsing
|
|
174
217
|
request_body += "\n"
|
175
218
|
end
|
176
219
|
|
220
|
+
say "Request body: #{request_body}"
|
177
221
|
response = api_request("downloads", request_body)
|
178
222
|
response = response.split("\n")
|
179
223
|
|
@@ -429,8 +473,8 @@ class GoogleSafeBrowsing
|
|
429
473
|
http = Net::HTTP.new(uri.host, uri.port)
|
430
474
|
request = Net::HTTP::Post.new(uri.request_uri)
|
431
475
|
request.body = body || ''
|
432
|
-
response = http.request(request)
|
433
|
-
return response
|
476
|
+
response = http.request(request)
|
477
|
+
return response.body
|
434
478
|
end
|
435
479
|
|
436
480
|
def say(msg)
|