google_safe_browsing_redis 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/google_safe_browsing.rb +52 -8
  2. metadata +1 -1
@@ -54,21 +54,29 @@ class GoogleSafeBrowsing
54
54
  hosts, paths = get_possible_hosts_paths(parts)
55
55
 
56
56
  # get all possible host+path combination hash prefixes
57
- hostpaths = get_hash_prefixes(hosts.product(paths).collect{|a, b| a + b})
57
+ full_urls = hosts.product(paths).collect{|a, b| a + b}
58
+ prefixes = get_hash_prefixes(full_urls)
59
+ full_url_hashes = get_hashes(full_urls)
58
60
 
59
61
  # add a trailing slash to all hosts, and get their hash prefixes
60
- hosts = get_hash_prefixes(hosts.collect{|a| a + '/'})
62
+ host_hash_prefixes = get_hash_prefixes(hosts.collect{|a| a + '/'})
61
63
 
64
+ host_num = 0
62
65
  $lists.each do |list|
63
- hosts.each do |host|
66
+ host_hash_prefixes.each do |host|
64
67
  is_member = $redis.sismember("#{list}:hosts", host)
65
68
  if(is_member)
66
69
  suffixes = $redis.smembers("#{list}:host_#{host}")
67
- if(suffixes.length == 0 || suffixes & hostpaths != [])
68
- say("URL matches a list: #{list} (#{url})")
69
- return list
70
+ hits = suffixes & prefixes
71
+ if(suffixes.length == 0 || hits != [])
72
+ full_hashes = get_full_hashes(hits)
73
+ if(full_url_hashes & full_hashes != [])
74
+ say("URL matches a list: #{list} (#{url})")
75
+ return list
76
+ end
70
77
  end
71
78
  end
79
+ host_num += 1
72
80
  end
73
81
  end
74
82
 
@@ -81,6 +89,31 @@ class GoogleSafeBrowsing
81
89
  return Canonicalize::canonicalize(url)
82
90
  end
83
91
 
92
+ def get_full_hashes(prefixes)
93
+ body = "4:#{prefixes.length*4}\n"
94
+ prefixes.each do |prefix|
95
+ body += "#{[prefix].pack('H*')}"
96
+ end
97
+
98
+ response = api_request("gethash", body)
99
+ if(response == nil)
100
+ return []
101
+ end
102
+
103
+ response = StringIO.new(response)
104
+ full_hashes = []
105
+ while(line = response.gets)
106
+ line = line.split(':')
107
+ list = line[0]
108
+ chunk_num = line[1].to_i
109
+ chunk_len = line[2].to_i
110
+ data = response.read(chunk_len)
111
+ full_hashes.push(data.unpack("H*").join())
112
+ end
113
+
114
+ return full_hashes
115
+ end
116
+
84
117
  # convert an array of strings into an array of 32 bit hash prefixes
85
118
  def get_hash_prefixes(items)
86
119
  prefixes = []
@@ -91,6 +124,16 @@ class GoogleSafeBrowsing
91
124
  return prefixes
92
125
  end
93
126
 
127
+ # convert an array of strings into an array of hashes
128
+ def get_hashes(items)
129
+ hashes = []
130
+ items.each do |item|
131
+ hashes.push((Digest::SHA2.new << item).to_s)
132
+ end
133
+
134
+ return hashes
135
+ end
136
+
94
137
  # expand a url into its possible host-path combinations according to the Google API
95
138
  def get_possible_hosts_paths(parts)
96
139
  case parts['host']
@@ -174,6 +217,7 @@ class GoogleSafeBrowsing
174
217
  request_body += "\n"
175
218
  end
176
219
 
220
+ say "Request body: #{request_body}"
177
221
  response = api_request("downloads", request_body)
178
222
  response = response.split("\n")
179
223
 
@@ -429,8 +473,8 @@ class GoogleSafeBrowsing
429
473
  http = Net::HTTP.new(uri.host, uri.port)
430
474
  request = Net::HTTP::Post.new(uri.request_uri)
431
475
  request.body = body || ''
432
- response = http.request(request).body
433
- return response
476
+ response = http.request(request)
477
+ return response.body
434
478
  end
435
479
 
436
480
  def say(msg)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_safe_browsing_redis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: