google_safe_browsing_redis 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/google_safe_browsing.rb +52 -8
  2. metadata +1 -1
@@ -54,21 +54,29 @@ class GoogleSafeBrowsing
54
54
  hosts, paths = get_possible_hosts_paths(parts)
55
55
 
56
56
  # get all possible host+path combination hash prefixes
57
- hostpaths = get_hash_prefixes(hosts.product(paths).collect{|a, b| a + b})
57
+ full_urls = hosts.product(paths).collect{|a, b| a + b}
58
+ prefixes = get_hash_prefixes(full_urls)
59
+ full_url_hashes = get_hashes(full_urls)
58
60
 
59
61
  # add a trailing slash to all hosts, and get their hash prefixes
60
- hosts = get_hash_prefixes(hosts.collect{|a| a + '/'})
62
+ host_hash_prefixes = get_hash_prefixes(hosts.collect{|a| a + '/'})
61
63
 
64
+ host_num = 0
62
65
  $lists.each do |list|
63
- hosts.each do |host|
66
+ host_hash_prefixes.each do |host|
64
67
  is_member = $redis.sismember("#{list}:hosts", host)
65
68
  if(is_member)
66
69
  suffixes = $redis.smembers("#{list}:host_#{host}")
67
- if(suffixes.length == 0 || suffixes & hostpaths != [])
68
- say("URL matches a list: #{list} (#{url})")
69
- return list
70
+ hits = suffixes & prefixes
71
+ if(suffixes.length == 0 || hits != [])
72
+ full_hashes = get_full_hashes(hits)
73
+ if(full_url_hashes & full_hashes != [])
74
+ say("URL matches a list: #{list} (#{url})")
75
+ return list
76
+ end
70
77
  end
71
78
  end
79
+ host_num += 1
72
80
  end
73
81
  end
74
82
 
@@ -81,6 +89,31 @@ class GoogleSafeBrowsing
81
89
  return Canonicalize::canonicalize(url)
82
90
  end
83
91
 
92
+ def get_full_hashes(prefixes)
93
+ body = "4:#{prefixes.length*4}\n"
94
+ prefixes.each do |prefix|
95
+ body += "#{[prefix].pack('H*')}"
96
+ end
97
+
98
+ response = api_request("gethash", body)
99
+ if(response == nil)
100
+ return []
101
+ end
102
+
103
+ response = StringIO.new(response)
104
+ full_hashes = []
105
+ while(line = response.gets)
106
+ line = line.split(':')
107
+ list = line[0]
108
+ chunk_num = line[1].to_i
109
+ chunk_len = line[2].to_i
110
+ data = response.read(chunk_len)
111
+ full_hashes.push(data.unpack("H*").join())
112
+ end
113
+
114
+ return full_hashes
115
+ end
116
+
84
117
  # convert an array of strings into an array of 32 bit hash prefixes
85
118
  def get_hash_prefixes(items)
86
119
  prefixes = []
@@ -91,6 +124,16 @@ class GoogleSafeBrowsing
91
124
  return prefixes
92
125
  end
93
126
 
127
+ # convert an array of strings into an array of hashes
128
+ def get_hashes(items)
129
+ hashes = []
130
+ items.each do |item|
131
+ hashes.push((Digest::SHA2.new << item).to_s)
132
+ end
133
+
134
+ return hashes
135
+ end
136
+
94
137
  # expand a url into its possible host-path combinations according to the Google API
95
138
  def get_possible_hosts_paths(parts)
96
139
  case parts['host']
@@ -174,6 +217,7 @@ class GoogleSafeBrowsing
174
217
  request_body += "\n"
175
218
  end
176
219
 
220
+ say "Request body: #{request_body}"
177
221
  response = api_request("downloads", request_body)
178
222
  response = response.split("\n")
179
223
 
@@ -429,8 +473,8 @@ class GoogleSafeBrowsing
429
473
  http = Net::HTTP.new(uri.host, uri.port)
430
474
  request = Net::HTTP::Post.new(uri.request_uri)
431
475
  request.body = body || ''
432
- response = http.request(request).body
433
- return response
476
+ response = http.request(request)
477
+ return response.body
434
478
  end
435
479
 
436
480
  def say(msg)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_safe_browsing_redis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: