google-safe-browsing-plugin 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +20 -0
- data/README.md +88 -0
- data/lib/faraday/response/safe_browsing_update_parser.rb +119 -0
- data/lib/google/safe_browsing_client.rb +211 -0
- data/lib/google/safe_browsing_parser.rb +214 -0
- data/lib/google/safe_browsing_update_helper.rb +171 -0
- data/lib/google/sha_util.rb +22 -0
- data/lib/google/url_canonicalizer.rb +36 -0
- data/lib/google/url_scramble.rb +54 -0
- data/lib/google_safe_browsing_plugin.rb +29 -0
- data/lib/rails/generators/google/config/config_generator.rb +16 -0
- data/lib/rails/generators/google/config/templates/google_safe_browsing.yml +16 -0
- data/lib/rails/generators/google/helper/helper_generator.rb +16 -0
- data/lib/rails/generators/google/helper/templates/safe_browsing_helper.rb +168 -0
- data/lib/rails/generators/google/install_generator.rb +20 -0
- data/lib/rails/generators/google/model/model_generator.rb +47 -0
- data/lib/rails/generators/google/model/templates/create_google_functions.rb +18 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_full_hash_requests.rb +22 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_full_hashes.rb +20 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_list.rb +15 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_redirect_urls.rb +26 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_shavar.rb +27 -0
- data/lib/rails/generators/google/model/templates/google.rb +2 -0
- data/lib/rails/generators/google/model/templates/google/error.rb +11 -0
- data/lib/rails/generators/google/model/templates/google/function.rb +6 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_full_hash.rb +7 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_full_hash_request.rb +19 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_list.rb +41 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_redirect_url.rb +36 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_shavar.rb +38 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_update.rb +77 -0
- data/lib/rails/generators/google/rspec/rspec_generator.rb +28 -0
- data/lib/rails/generators/google/rspec/templates/bin_sample_1.data +0 -0
- data/lib/rails/generators/google/rspec/templates/bin_sample_2.data +0 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_parse_spec.rb +58 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_0.data +0 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_1.data +0 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_2.data +3 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_3.data +3 -0
- data/lib/rails/generators/google/rspec/templates/shavar_encode_data_parse_spec.rb +56 -0
- data/lib/rails/generators/google/rspec/templates/shavar_list_info_parse_spec.rb +48 -0
- data/lib/safe_browsing_task.rb +5 -0
- data/lib/tasks/google.rake +122 -0
- metadata +222 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 stonelonely
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
## Google Safe Browsing Plugin
|
2
|
+
|
3
|
+
A Rails 3 plugin for [Google Safe Browsing API v2](https://developers.google.com/safe-browsing/developers_guide_v2).
|
4
|
+
It supports Google malware and phishing list.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add the plugin to your Gemfile
|
9
|
+
|
10
|
+
gem 'google_safe_browsing_plugin', '~> 0.1'
|
11
|
+
|
12
|
+
After bundle install, run the following to generate the db migration, model classes and other code
|
13
|
+
|
14
|
+
bundle install
|
15
|
+
bundle exec rails g google:install
|
16
|
+
|
17
|
+
Run the migrations generated from the previous step, and then seed the databse
|
18
|
+
|
19
|
+
bundle exec rake db:migrate
|
20
|
+
bundle exec rake google:safe_browsing:db_seed
|
21
|
+
|
22
|
+
Edit the configration file with your Google API key
|
23
|
+
|
24
|
+
# Edit config/google_safe_browsing.yml, and replace the real API key in line 2.
|
25
|
+
|
26
|
+
|
27
|
+
## Build the hash prefix data locally
|
28
|
+
|
29
|
+
The plugin stores the hash prefixes in the relational database. The following rake task needs to be run under a _cron schedule_ to keep the local data in sync with the Google server lists. It may require several runs initially before you have a relatively complete hash prefix set before you can do any meaningful full hash lookup. The first run may take a while because it needs to download quite a bit of data and store them in the local database. The initial run of the rake task could generate quarter a million shavar records in the database.
|
30
|
+
|
31
|
+
bundle exec rake google:safe_browsing:load_remote
|
32
|
+
|
33
|
+
## Url lookup
|
34
|
+
|
35
|
+
After you run the _'load_remote'_ rake task several times, your local cache of the hash prefixes will be ready. Now you can start to do url lookup. Start the Rails console, and then try the following
|
36
|
+
|
37
|
+
url = 'financestudyhelp.com'
|
38
|
+
r = Google::SafeBrowsingHelper.lookup_url url
|
39
|
+
|
40
|
+
Since the Google Safe Browsing data get updated frequently, the previous query may not necessarily generate hit on Malware, other urls you can try are 'http://gumblar.cn' and 'http://ianfette.org'.
|
41
|
+
|
42
|
+
Upon a match on full-length hash lookup, the _lookup\_url_ call will return a hash object that contains the match. E.g.
|
43
|
+
|
44
|
+
{"financestudyhelp.com"=>["goog-malware-shavar"]}
|
45
|
+
|
46
|
+
The key of the hash is the url that's been queried. The array is the match themselves. If the url is both a malware and
|
47
|
+
a phishing link, the value will be
|
48
|
+
|
49
|
+
["goog-malware-shavar","googpub-phish-shavar"]
|
50
|
+
|
51
|
+
If the url is neither a malware nor phishing link, the lookup result will be an empty array [].
|
52
|
+
|
53
|
+
|
54
|
+
## Uninstall
|
55
|
+
|
56
|
+
If you want to uninstall the gem and remove the generated files
|
57
|
+
|
58
|
+
rails d google:install
|
59
|
+
|
60
|
+
## Features and limitations
|
61
|
+
|
62
|
+
* The plugin does not hide the ActiveRecord models and the helper inside the Gem. It instead uses the generator function provided by Rails and Thor to copy the template model/migration/helper code to the Rails application source tree. The generated code is under the _Google_ namespace.
|
63
|
+
* The plugin uses the relational database as the data store. But it doesn't have to. Redis may be a better choice considering lookup speed and cache expire. But the local data have to be kept under certain limit. The tables generated from the migration have _google_ as the prefix.
|
64
|
+
* The hash prefix is stored in the database as plain text but not in binary/encoded format.
|
65
|
+
* The config parameters, in _config/google\_safe\_browsing.yml_, can be changed to your need. E.g. _full\_length\_hash\_expires_ is for how long the full-length hash will be cached locally.
|
66
|
+
* The plugin has backoff strategy built in when error happens. So if you notice that the full-length hash request doesn't go to Google, it probably is still in the backoff mode that prevents the request being sent to Google.
|
67
|
+
* Some RSpec tests are provided in the spec folder.
|
68
|
+
* The _google:safe\_browsing:load\_remote_ rake task cannot be run repeatedly within a short time span. It honors the _NEXT_ instruction from Google's response. This value is kept in _google\_functions#next\_updated\_at_. You have to change the value if you want to run it immediately after the previous run.
|
69
|
+
* When Google sends out _'reset'_ instruction to the request, the plugin will _not_ clean up the local data by default. You can change the behavior to reset the data by adding the following configuration in application.rb or the environment file in your Rails app:
|
70
|
+
<pre><code>
|
71
|
+
# This will reset your local data
|
72
|
+
config.google_safe_browsing_upon_reset = lambda {
|
73
|
+
Google::SafeBrowsingShavar.delete_all
|
74
|
+
}
|
75
|
+
</code></pre>
|
76
|
+
|
77
|
+
* _'rekey'_ instruction is not supported.
|
78
|
+
* The plugin works for Rails 3.2. But it should be relatively trivial to make it work with other Rails version. The reason for using Rails is mostly because of the value ActiveRecord provides for the data mapping and store. The download, parsing and encoding/decoding of the Google Safe Browsing data do not have to use Rails.
|
79
|
+
|
80
|
+
|
81
|
+
## Reference
|
82
|
+
|
83
|
+
* [Safe Browsing API v2](https://developers.google.com/safe-browsing/developers_guide_v2)
|
84
|
+
* [google-safe-browsing](http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec) project on Google code.
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
Copyright (c) 2013 stonelonely and contributors, released under the MIT license.
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
|
3
|
+
module Faraday
|
4
|
+
class Response
|
5
|
+
|
6
|
+
class SafeBrowsingUpdateParser < ::Faraday::Response::Middleware
|
7
|
+
|
8
|
+
REKEY ||= /(e):(pleaserekey)/
|
9
|
+
NEXT ||= /(n):(\d+)/
|
10
|
+
RESET ||= /(r):(pleasereset)/
|
11
|
+
LIST ||= /(i):(.+)/
|
12
|
+
|
13
|
+
MIX_LINE ||= /(i|u|ad|sd):(.+)/
|
14
|
+
CHUNK_LIST ||= /(\d+-\d+|\d+)/
|
15
|
+
|
16
|
+
# define_parser do |body|
|
17
|
+
def parse body
|
18
|
+
@update_obj = Google::SafeBrowsingUpdate.new
|
19
|
+
parse_data_response(body)
|
20
|
+
@update_obj
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# BODY = [(REKEY | MAC) LF] NEXT LF (RESET | (LIST LF)+) EOF
|
25
|
+
# NEXT = "n:" DIGIT+ # Minimum delay before polling again in seconds
|
26
|
+
# REKEY = "e:pleaserekey"
|
27
|
+
# RESET = "r:pleasereset"
|
28
|
+
# LIST = "i:" LISTNAME [MAC] (LF LISTDATA)+
|
29
|
+
# LISTNAME = (LOALPHA | DIGIT | "-")+ # e.g. "goog-phish-sha128"
|
30
|
+
# MAC = "," (LOALPHA | DIGIT)+
|
31
|
+
# LISTDATA = ((REDIRECT_URL | ADDDEL-HEAD | SUBDEL-HEAD) LF)+
|
32
|
+
# REDIRECT_URL = "u:" URL [MAC]
|
33
|
+
# URL = Defined in RFC 1738
|
34
|
+
# ADDDEL-HEAD = "ad:" CHUNKLIST
|
35
|
+
# SUBDEL-HEAD = "sd:" CHUNKLIST
|
36
|
+
# CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
|
37
|
+
# NUMBER = DIGIT+ # Chunk number >= 1
|
38
|
+
# RANGE = NUMBER "-" NUMBER
|
39
|
+
#
|
40
|
+
def parse_data_response body
|
41
|
+
lines = body.split(%Q(\n))
|
42
|
+
text = lines.shift
|
43
|
+
if REKEY =~ text
|
44
|
+
@update_obj.rekey = true
|
45
|
+
text = lines.shift
|
46
|
+
end
|
47
|
+
# the line is NEXT line
|
48
|
+
parse_next text
|
49
|
+
return if @update_obj.rekey
|
50
|
+
|
51
|
+
text = lines.shift
|
52
|
+
if RESET =~ text
|
53
|
+
@update_obj.reset = true
|
54
|
+
return
|
55
|
+
end
|
56
|
+
# the line is the LIST line
|
57
|
+
parse_list text
|
58
|
+
while !(text = lines.shift).nil?
|
59
|
+
parse_mix_line text
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def parse_next line
|
65
|
+
m = NEXT.match(line.to_s)
|
66
|
+
if m.nil?
|
67
|
+
raise Google::Error::ParserError
|
68
|
+
else
|
69
|
+
@update_obj.next = m[2].to_i
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse_list line
|
74
|
+
m = LIST.match(line.to_s)
|
75
|
+
if m.nil?
|
76
|
+
raise Google::Error::ParserError
|
77
|
+
else
|
78
|
+
@update_obj.set_current_list(m[2])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def parse_mix_line line
|
83
|
+
m = MIX_LINE.match(line.to_s)
|
84
|
+
raise Google::Error::ParserError if m.nil?
|
85
|
+
my_list = @update_obj.get_current_list
|
86
|
+
|
87
|
+
case m[1]
|
88
|
+
when 'i'
|
89
|
+
@update_obj.set_current_list m[2]
|
90
|
+
when 'u'
|
91
|
+
my_list[:u] << m[2].strip
|
92
|
+
when 'sd'
|
93
|
+
parse_chunk_list m[2], my_list[:sd]
|
94
|
+
when 'ad'
|
95
|
+
parse_chunk_list m[2], my_list[:ad]
|
96
|
+
else
|
97
|
+
raise Google::Error::ParserError
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def parse_chunk_list chunk_list, cached_chunks
|
102
|
+
chunks = chunk_list.split(',')
|
103
|
+
while !(chunk = chunks.shift).nil?
|
104
|
+
m = CHUNK_LIST.match(chunk)
|
105
|
+
raise Google::Error::ParserError if m.nil?
|
106
|
+
if m[0].include?('-')
|
107
|
+
low, upper = m[0].split('-').map {|x| x.to_i}
|
108
|
+
cached_chunks << (low..upper) # Range
|
109
|
+
else
|
110
|
+
cached_chunks << m[0].to_i
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
|
3
|
+
module Google
|
4
|
+
class SafeBrowsingClient
|
5
|
+
|
6
|
+
include Google::SafeBrowsingUpdateHelper
|
7
|
+
|
8
|
+
attr_accessor :headers
|
9
|
+
|
10
|
+
CLIENT ||= 'api'
|
11
|
+
API_KEY ||= Google::CONFIG['api_key']
|
12
|
+
APP_VER ||= Google::CONFIG['app_ver']
|
13
|
+
P_VER ||= Google::CONFIG['p_ver']
|
14
|
+
|
15
|
+
SEMI_COLON ||= ';'
|
16
|
+
COLON ||= ':'
|
17
|
+
NL ||= %Q(\n)
|
18
|
+
|
19
|
+
FULL_HASH_TIMEOUT = 2 # secs
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@headers = {
|
23
|
+
:'User-Agent' => 'Faraday Ruby Client'
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def api_server
|
28
|
+
'https://safebrowsing.clients.google.com'
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# goog-malware-shavar
|
33
|
+
# goog-regtest-shavar
|
34
|
+
# goog-whitedomain-shavar
|
35
|
+
# googpub-phish-shavar
|
36
|
+
#
|
37
|
+
def list_url
|
38
|
+
'/safebrowsing/list?client=%s&apikey=%s&appver=%s&pver=%s' % [CLIENT, API_KEY, APP_VER, P_VER]
|
39
|
+
end
|
40
|
+
|
41
|
+
def download_url
|
42
|
+
'/safebrowsing/downloads?client=%s&apikey=%s&appver=%s&pver=%s' % [CLIENT, API_KEY, APP_VER, P_VER]
|
43
|
+
end
|
44
|
+
|
45
|
+
def full_hash_url
|
46
|
+
'/safebrowsing/gethash?client=%s&apikey=%s&appver=%s&pver=%s' % [CLIENT, API_KEY, APP_VER, P_VER]
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# resp = <<-END_OF_SAMPLE
|
51
|
+
# n:1200
|
52
|
+
# i:googpub-phish-shavar
|
53
|
+
# u:cache.google.com/first_redirect_example
|
54
|
+
# u:cache.google.com/second_redirect_example
|
55
|
+
# sd:1,2
|
56
|
+
# i:acme-white-shavar
|
57
|
+
# u:cache.google.com/second_redirect_example
|
58
|
+
# ad:1-2,4-5,7
|
59
|
+
# sd:2-6
|
60
|
+
# END_OF_SAMPLE
|
61
|
+
#
|
62
|
+
# parser = Faraday::Response::SafeBrowsingUpdateParser.new
|
63
|
+
# r = parser.parse resp
|
64
|
+
#
|
65
|
+
def shavar_data_update list_name, options = {}
|
66
|
+
conn = Faraday.new(url: api_server, headers: headers) do |builder|
|
67
|
+
builder.use Faraday::Request::UrlEncoded
|
68
|
+
builder.use Faraday::Response::SafeBrowsingUpdateParser
|
69
|
+
builder.adapter ::Faraday.default_adapter
|
70
|
+
end
|
71
|
+
|
72
|
+
request_body = download_data_request_body(list_name)
|
73
|
+
|
74
|
+
r = conn.post do |req|
|
75
|
+
req.url download_url
|
76
|
+
req.body = request_body
|
77
|
+
end
|
78
|
+
|
79
|
+
if successful_status_code?(r.status)
|
80
|
+
r.body
|
81
|
+
else
|
82
|
+
process_error_response(r.status)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
# To get shavar chunk data from the redirect url
|
88
|
+
#
|
89
|
+
def chunk_data redirect_url, options = {}
|
90
|
+
url = 'https://' + redirect_url if /\Ahttps?/ !~ redirect_url
|
91
|
+
|
92
|
+
conn = Faraday.new(headers: headers) do |builder|
|
93
|
+
builder.use Faraday::Request::UrlEncoded
|
94
|
+
builder.adapter ::Faraday.default_adapter
|
95
|
+
end
|
96
|
+
|
97
|
+
r = conn.post do |req|
|
98
|
+
req.url url
|
99
|
+
end
|
100
|
+
|
101
|
+
if successful_status_code?(r.status)
|
102
|
+
r.body
|
103
|
+
else
|
104
|
+
process_error_response(r.status)
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# hash_prefixes: ['5b3583c0', 'b3e357a6']
|
111
|
+
# @return
|
112
|
+
# # { 'goog-malware-shavar' (list_name)
|
113
|
+
# => {
|
114
|
+
# :add_chunk_num1 => [full_hash0, full_hash1, ...]
|
115
|
+
# :add_chunk_num2 => [full_hash0]
|
116
|
+
# }
|
117
|
+
# }
|
118
|
+
#
|
119
|
+
def full_hash hash_prefixes
|
120
|
+
conn = Faraday.new(url: api_server, headers: headers) do |builder|
|
121
|
+
builder.use Faraday::Request::UrlEncoded
|
122
|
+
builder.adapter ::Faraday.default_adapter
|
123
|
+
end
|
124
|
+
|
125
|
+
request_body = full_hash_request_body hash_prefixes
|
126
|
+
|
127
|
+
r = conn.post do |req|
|
128
|
+
req.url full_hash_url
|
129
|
+
req.body = request_body
|
130
|
+
req.options[:timeout] = FULL_HASH_TIMEOUT
|
131
|
+
end
|
132
|
+
|
133
|
+
if successful_status_code?(r.status)
|
134
|
+
SafeBrowsingParser.parse_full_hash_entries r.body
|
135
|
+
else
|
136
|
+
process_error_response(r.status)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
#
|
141
|
+
# s;200
|
142
|
+
# googpub-phish-shavar;a:1-3,5,8:s:4-5
|
143
|
+
# acme-white-shavar;a:1-7:s:1-2
|
144
|
+
#
|
145
|
+
def download_data_request_body list_name
|
146
|
+
gen_list_request(list_name)
|
147
|
+
end
|
148
|
+
|
149
|
+
def gen_list_request list
|
150
|
+
list = SafeBrowsingList.where(name: list).first
|
151
|
+
s = StringIO.new("")
|
152
|
+
s << list.name << SEMI_COLON
|
153
|
+
add_chunk_ids = gen_chunk_nums_string(
|
154
|
+
SafeBrowsingShavar.add_chunk_nums_for_list(list.name).map(&:chunk_num))
|
155
|
+
sub_chunk_ids = gen_chunk_nums_string(
|
156
|
+
SafeBrowsingShavar.sub_chunk_nums_for_list(list.name).map(&:chunk_num))
|
157
|
+
|
158
|
+
s << SafeBrowsingShavar::CHUNK_TYPE_ADD << COLON << add_chunk_ids unless add_chunk_ids.blank?
|
159
|
+
s << COLON if !add_chunk_ids.blank? && !sub_chunk_ids.blank?
|
160
|
+
s << SafeBrowsingShavar::CHUNK_TYPE_SUB << COLON << sub_chunk_ids unless sub_chunk_ids.blank?
|
161
|
+
s << NL
|
162
|
+
s.string
|
163
|
+
end
|
164
|
+
|
165
|
+
#
|
166
|
+
# BODY = HEADER LF PREFIXES EOF
|
167
|
+
# HEADER = PREFIXSIZE ":" LENGTH
|
168
|
+
# PREFIXSIZE = DIGIT+ # Size of each prefix in bytes
|
169
|
+
# LENGTH = DIGIT+ # Size of PREFIXES in bytes
|
170
|
+
#
|
171
|
+
# The prefixes should all have the same length of bytes
|
172
|
+
#
|
173
|
+
def full_hash_request_body prefixes
|
174
|
+
return "" if prefixes.empty?
|
175
|
+
|
176
|
+
prefix_size = prefixes.first.size / 2
|
177
|
+
|
178
|
+
s = StringIO.new("")
|
179
|
+
s << prefix_size.to_s << ":"
|
180
|
+
s << (prefix_size * prefixes.size).to_s << NL
|
181
|
+
s << pack_hash_prefix(prefixes, prefix_size)
|
182
|
+
s.string
|
183
|
+
end
|
184
|
+
|
185
|
+
def pack_hash_prefix prefixes, prefix_size
|
186
|
+
s = StringIO.new("")
|
187
|
+
prefixes.each do |pre|
|
188
|
+
s << [pre].pack("H#{prefix_size*2}")
|
189
|
+
end
|
190
|
+
s.string
|
191
|
+
end
|
192
|
+
|
193
|
+
def process_error_response status
|
194
|
+
case status.to_s
|
195
|
+
when /\A204/
|
196
|
+
raise Error::NoContent
|
197
|
+
when /\A4/
|
198
|
+
raise Error::InvalidRequest
|
199
|
+
when /\A5/
|
200
|
+
raise Error::ServiceUnavailable
|
201
|
+
else
|
202
|
+
raise Error::UnknownError
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def successful_status_code?(status_code)
|
207
|
+
status_code.to_s == '200'
|
208
|
+
end
|
209
|
+
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,214 @@
|
|
1
|
+
module Google
|
2
|
+
module SafeBrowsingParser
|
3
|
+
extend self
|
4
|
+
|
5
|
+
ADD ||= SafeBrowsingShavar::CHUNK_TYPE_ADD
|
6
|
+
SUB ||= SafeBrowsingShavar::CHUNK_TYPE_SUB
|
7
|
+
|
8
|
+
ADD_SUB_HEAD ||= /(?<add_sub>a|s):(?<chunk_num>\d+):(?<hash_len>\d+):(?<chunk_len>\d+)(\n)/
|
9
|
+
FULL_HASH_HEAD ||= /(?<rekey>e:pleaserekey)|((?<list>[-_\w]+):(?<chunk_num>\d+):(?<chunk_len>\d+))(\n)/
|
10
|
+
|
11
|
+
CHUNKNUM_SIZE = HOST_KEY_SIZE = 4 # Bytes
|
12
|
+
|
13
|
+
FULL_HASH_SIZE = 32 # Bytes, 256 bit
|
14
|
+
|
15
|
+
#
|
16
|
+
# @params str A clob of characters returned from the redirect download url
|
17
|
+
# @returns Two arrays of shavar list data decoded: one for ADD, the other for SUB
|
18
|
+
#
|
19
|
+
# The shavar list data has the following structure
|
20
|
+
# For ADD
|
21
|
+
# { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,
|
22
|
+
# :chunk_data => {
|
23
|
+
# :host_key_one => [prefix0, prefix1, ...],
|
24
|
+
# :host_key_two => []
|
25
|
+
# }
|
26
|
+
# }
|
27
|
+
#
|
28
|
+
# For SUB
|
29
|
+
# { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,
|
30
|
+
# :chunk_data => {
|
31
|
+
# :host_key_one => {
|
32
|
+
# :add_chunknum_one => [prefix0, prefix1, ...],
|
33
|
+
# :add_chunknum_two => []
|
34
|
+
# }
|
35
|
+
# :host_key_two => {
|
36
|
+
# :add_chunknum_three => [prefix0, prefix1, ...],
|
37
|
+
# :add_chunknum_four => []
|
38
|
+
# }
|
39
|
+
# }
|
40
|
+
# }
|
41
|
+
#
|
42
|
+
def parse_shavar_list str, test_mode = false
|
43
|
+
adds = []; subs = []
|
44
|
+
scanner = StringScanner.new(str)
|
45
|
+
|
46
|
+
count = 0; scanner.pos = 0
|
47
|
+
while !(head = scanner.scan_until(ADD_SUB_HEAD)).nil?
|
48
|
+
if test_mode && count > 0
|
49
|
+
break
|
50
|
+
end
|
51
|
+
|
52
|
+
count += 1
|
53
|
+
m = ADD_SUB_HEAD.match head
|
54
|
+
chunk_num, hash_len, chunk_len = m[:chunk_num].to_i, m[:hash_len].to_i, m[:chunk_len].to_i
|
55
|
+
pointer = 0; chunk_data = []
|
56
|
+
|
57
|
+
while pointer < chunk_len
|
58
|
+
chunk_data << scanner.get_byte
|
59
|
+
pointer += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
if m[:add_sub] == 'a'
|
63
|
+
data_arr = parse_add_data(chunk_data, hash_len)
|
64
|
+
elsif m[:add_sub] == 's'
|
65
|
+
data_arr = parse_sub_data(chunk_data, hash_len)
|
66
|
+
end
|
67
|
+
|
68
|
+
obj = {
|
69
|
+
chunk_num: chunk_num,
|
70
|
+
hash_len: hash_len,
|
71
|
+
chunk_len: chunk_len,
|
72
|
+
chunk_data: data_arr
|
73
|
+
}
|
74
|
+
|
75
|
+
if m[:add_sub] == ADD
|
76
|
+
adds << obj
|
77
|
+
elsif m[:add_sub] == SUB
|
78
|
+
subs << obj
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
Rails.logger.info "Total # of ADD/SUB section is #{count}, #{adds.size} adds, #{subs.size} subs"
|
84
|
+
|
85
|
+
[adds, subs]
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def parse_add_data byte_arr, hash_len
|
90
|
+
ret = {}
|
91
|
+
total_chars = 0
|
92
|
+
pointer = 0
|
93
|
+
while pointer < byte_arr.size
|
94
|
+
host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE]
|
95
|
+
total_chars += HOST_KEY_SIZE
|
96
|
+
pointer += HOST_KEY_SIZE
|
97
|
+
ret[host_key] ||= []
|
98
|
+
count = parse_count_number byte_arr[pointer]
|
99
|
+
pointer += 1
|
100
|
+
total_chars += 1
|
101
|
+
|
102
|
+
if count > 0
|
103
|
+
sub_count = 0
|
104
|
+
while sub_count < count
|
105
|
+
ret[host_key] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len)
|
106
|
+
total_chars += hash_len
|
107
|
+
pointer += hash_len
|
108
|
+
sub_count += 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
ret
|
114
|
+
end
|
115
|
+
|
116
|
+
def parse_sub_data byte_arr, hash_len
|
117
|
+
ret = {}
|
118
|
+
total_chars = 0
|
119
|
+
pointer = 0
|
120
|
+
while pointer < byte_arr.size
|
121
|
+
host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE]
|
122
|
+
total_chars += HOST_KEY_SIZE
|
123
|
+
pointer += HOST_KEY_SIZE
|
124
|
+
count = parse_count_number byte_arr[pointer]
|
125
|
+
total_chars += 1
|
126
|
+
pointer += 1
|
127
|
+
|
128
|
+
ret[host_key] ||= {}
|
129
|
+
if count > 0
|
130
|
+
sub_count = 0
|
131
|
+
while sub_count < count
|
132
|
+
add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first
|
133
|
+
pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE
|
134
|
+
ret[host_key][add_chunknum] ||= []
|
135
|
+
ret[host_key][add_chunknum] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len)
|
136
|
+
pointer += hash_len; total_chars += hash_len
|
137
|
+
sub_count += 1
|
138
|
+
end
|
139
|
+
else
|
140
|
+
add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first
|
141
|
+
ret[host_key][add_chunknum] = []
|
142
|
+
pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
ret
|
148
|
+
end
|
149
|
+
|
150
|
+
def parse_host_key char_arr
|
151
|
+
char_arr.join('').unpack('H8').first
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_count_number char
|
155
|
+
char.unpack('C').first
|
156
|
+
end
|
157
|
+
|
158
|
+
def parse_hash_prefix char_arr, hash_len
|
159
|
+
char_arr.join('').unpack("H#{hash_len*2}").first
|
160
|
+
end
|
161
|
+
|
162
|
+
#
|
163
|
+
# Returns
|
164
|
+
# { 'goog-malware-shavar' (list_name)
|
165
|
+
# => {
|
166
|
+
# :add_chunk_num1 => [full_hash0, full_hash1, ...]
|
167
|
+
# :add_chunk_num2 => [full_hash0]
|
168
|
+
# }
|
169
|
+
# }
|
170
|
+
#
|
171
|
+
# BODY = ([MAC LF] HASHENTRY+) | (REKEY LF) EOF
|
172
|
+
# HASHENTRY = LISTNAME ":" ADDCHUNK ":" HASHDATALEN LF HASHDATA
|
173
|
+
# ADDCHUNK = DIGIT+ # Add chunk number
|
174
|
+
# HASHDATALEN = DIGIT+ # Length of HASHDATA
|
175
|
+
# HASHDATA = <HASHDATALEN number of unsigned bytes> # Full length hashes in binary
|
176
|
+
# MAC = (LOALPHA | DIGIT)+
|
177
|
+
#
|
178
|
+
# Ignore rekey response for now
|
179
|
+
#
|
180
|
+
def parse_full_hash_entries str
|
181
|
+
full_list = {}
|
182
|
+
scanner = StringScanner.new(str)
|
183
|
+
count = 0; scanner.pos = 0
|
184
|
+
|
185
|
+
while !(head = scanner.scan_until(FULL_HASH_HEAD)).nil?
|
186
|
+
m = FULL_HASH_HEAD.match head
|
187
|
+
return full_list if m[:rekey]
|
188
|
+
|
189
|
+
count += 1
|
190
|
+
list_name, chunk_num, chunk_len = m[:list].to_s.to_sym, m[:chunk_num].to_i, m[:chunk_len].to_i
|
191
|
+
pointer = 0; chunk_data = []
|
192
|
+
|
193
|
+
my_list = (full_list[list_name] ||= {})
|
194
|
+
my_list[chunk_num] ||= []
|
195
|
+
|
196
|
+
while pointer < chunk_len
|
197
|
+
chunk_data << scanner.get_byte
|
198
|
+
pointer += 1
|
199
|
+
end
|
200
|
+
|
201
|
+
parse_full_hash_data chunk_data, my_list[chunk_num]
|
202
|
+
end
|
203
|
+
|
204
|
+
full_list
|
205
|
+
end
|
206
|
+
|
207
|
+
def parse_full_hash_data byte_arr, full_hash_arr
|
208
|
+
byte_arr.each_slice(FULL_HASH_SIZE) do |slice|
|
209
|
+
full_hash_arr << slice.join('').unpack("H#{FULL_HASH_SIZE*2}").first
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
end
|