google-safe-browsing-plugin 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/LICENSE.txt +20 -0
  2. data/README.md +88 -0
  3. data/lib/faraday/response/safe_browsing_update_parser.rb +119 -0
  4. data/lib/google/safe_browsing_client.rb +211 -0
  5. data/lib/google/safe_browsing_parser.rb +214 -0
  6. data/lib/google/safe_browsing_update_helper.rb +171 -0
  7. data/lib/google/sha_util.rb +22 -0
  8. data/lib/google/url_canonicalizer.rb +36 -0
  9. data/lib/google/url_scramble.rb +54 -0
  10. data/lib/google_safe_browsing_plugin.rb +29 -0
  11. data/lib/rails/generators/google/config/config_generator.rb +16 -0
  12. data/lib/rails/generators/google/config/templates/google_safe_browsing.yml +16 -0
  13. data/lib/rails/generators/google/helper/helper_generator.rb +16 -0
  14. data/lib/rails/generators/google/helper/templates/safe_browsing_helper.rb +168 -0
  15. data/lib/rails/generators/google/install_generator.rb +20 -0
  16. data/lib/rails/generators/google/model/model_generator.rb +47 -0
  17. data/lib/rails/generators/google/model/templates/create_google_functions.rb +18 -0
  18. data/lib/rails/generators/google/model/templates/create_google_safe_browsing_full_hash_requests.rb +22 -0
  19. data/lib/rails/generators/google/model/templates/create_google_safe_browsing_full_hashes.rb +20 -0
  20. data/lib/rails/generators/google/model/templates/create_google_safe_browsing_list.rb +15 -0
  21. data/lib/rails/generators/google/model/templates/create_google_safe_browsing_redirect_urls.rb +26 -0
  22. data/lib/rails/generators/google/model/templates/create_google_safe_browsing_shavar.rb +27 -0
  23. data/lib/rails/generators/google/model/templates/google.rb +2 -0
  24. data/lib/rails/generators/google/model/templates/google/error.rb +11 -0
  25. data/lib/rails/generators/google/model/templates/google/function.rb +6 -0
  26. data/lib/rails/generators/google/model/templates/google/safe_browsing_full_hash.rb +7 -0
  27. data/lib/rails/generators/google/model/templates/google/safe_browsing_full_hash_request.rb +19 -0
  28. data/lib/rails/generators/google/model/templates/google/safe_browsing_list.rb +41 -0
  29. data/lib/rails/generators/google/model/templates/google/safe_browsing_redirect_url.rb +36 -0
  30. data/lib/rails/generators/google/model/templates/google/safe_browsing_shavar.rb +38 -0
  31. data/lib/rails/generators/google/model/templates/google/safe_browsing_update.rb +77 -0
  32. data/lib/rails/generators/google/rspec/rspec_generator.rb +28 -0
  33. data/lib/rails/generators/google/rspec/templates/bin_sample_1.data +0 -0
  34. data/lib/rails/generators/google/rspec/templates/bin_sample_2.data +0 -0
  35. data/lib/rails/generators/google/rspec/templates/full_hash_parse_spec.rb +58 -0
  36. data/lib/rails/generators/google/rspec/templates/full_hash_response_0.data +0 -0
  37. data/lib/rails/generators/google/rspec/templates/full_hash_response_1.data +0 -0
  38. data/lib/rails/generators/google/rspec/templates/full_hash_response_2.data +3 -0
  39. data/lib/rails/generators/google/rspec/templates/full_hash_response_3.data +3 -0
  40. data/lib/rails/generators/google/rspec/templates/shavar_encode_data_parse_spec.rb +56 -0
  41. data/lib/rails/generators/google/rspec/templates/shavar_list_info_parse_spec.rb +48 -0
  42. data/lib/safe_browsing_task.rb +5 -0
  43. data/lib/tasks/google.rake +122 -0
  44. metadata +222 -0
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 stonelonely
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ ## Google Safe Browsing Plugin
2
+
3
+ A Rails 3 plugin for [Google Safe Browsing API v2](https://developers.google.com/safe-browsing/developers_guide_v2).
4
+ It supports Google malware and phishing list.
5
+
6
+ ## Installation
7
+
8
+ Add the plugin to your Gemfile
9
+
10
+ gem 'google_safe_browsing_plugin', '~> 0.1'
11
+
12
+ After bundle install, run the following to generate the db migration, model classes and other code
13
+
14
+ bundle install
15
+ bundle exec rails g google:install
16
+
17
+ Run the migrations generated from the previous step, and then seed the databse
18
+
19
+ bundle exec rake db:migrate
20
+ bundle exec rake google:safe_browsing:db_seed
21
+
22
+ Edit the configration file with your Google API key
23
+
24
+ # Edit config/google_safe_browsing.yml, and replace the real API key in line 2.
25
+
26
+
27
+ ## Build the hash prefix data locally
28
+
29
+ The plugin stores the hash prefixes in the relational database. The following rake task needs to be run under a _cron schedule_ to keep the local data in sync with the Google server lists. It may require several runs initially before you have a relatively complete hash prefix set before you can do any meaningful full hash lookup. The first run may take a while because it needs to download quite a bit of data and store them in the local database. The initial run of the rake task could generate quarter a million shavar records in the database.
30
+
31
+ bundle exec rake google:safe_browsing:load_remote
32
+
33
+ ## Url lookup
34
+
35
+ After you run the _'load_remote'_ rake task several times, your local cache of the hash prefixes will be ready. Now you can start to do url lookup. Start the Rails console, and then try the following
36
+
37
+ url = 'financestudyhelp.com'
38
+ r = Google::SafeBrowsingHelper.lookup_url url
39
+
40
+ Since the Google Safe Browsing data get updated frequently, the previous query may not necessarily generate hit on Malware, other urls you can try are 'http://gumblar.cn' and 'http://ianfette.org'.
41
+
42
+ Upon a match on full-length hash lookup, the _lookup\_url_ call will return a hash object that contains the match. E.g.
43
+
44
+ {"financestudyhelp.com"=>["goog-malware-shavar"]}
45
+
46
+ The key of the hash is the url that's been queried. The array is the match themselves. If the url is both a malware and
47
+ a phishing link, the value will be
48
+
49
+ ["goog-malware-shavar","googpub-phish-shavar"]
50
+
51
+ If the url is neither a malware nor phishing link, the lookup result will be an empty array [].
52
+
53
+
54
+ ## Uninstall
55
+
56
+ If you want to uninstall the gem and remove the generated files
57
+
58
+ rails d google:install
59
+
60
+ ## Features and limitations
61
+
62
+ * The plugin does not hide the ActiveRecord models and the helper inside the Gem. It instead uses the generator function provided by Rails and Thor to copy the template model/migration/helper code to the Rails application source tree. The generated code is under the _Google_ namespace.
63
+ * The plugin uses the relational database as the data store. But it doesn't have to. Redis may be a better choice considering lookup speed and cache expire. But the local data have to be kept under certain limit. The tables generated from the migration have _google_ as the prefix.
64
+ * The hash prefix is stored in the database as plain text but not in binary/encoded format.
65
+ * The config parameters, in _config/google\_safe\_browsing.yml_, can be changed to your need. E.g. _full\_length\_hash\_expires_ is for how long the full-length hash will be cached locally.
66
+ * The plugin has backoff strategy built in when error happens. So if you notice that the full-length hash request doesn't go to Google, it probably is still in the backoff mode that prevents the request being sent to Google.
67
+ * Some RSpec tests are provided in the spec folder.
68
+ * The _google:safe\_browsing:load\_remote_ rake task cannot be run repeatedly within a short time span. It honors the _NEXT_ instruction from Google's response. This value is kept in _google\_functions#next\_updated\_at_. You have to change the value if you want to run it immediately after the previous run.
69
+ * When Google sends out _'reset'_ instruction to the request, the plugin will _not_ clean up the local data by default. You can change the behavior to reset the data by adding the following configuration in application.rb or the environment file in your Rails app:
70
+ <pre><code>
71
+ # This will reset your local data
72
+ config.google_safe_browsing_upon_reset = lambda {
73
+ Google::SafeBrowsingShavar.delete_all
74
+ }
75
+ </code></pre>
76
+
77
+ * _'rekey'_ instruction is not supported.
78
+ * The plugin works for Rails 3.2. But it should be relatively trivial to make it work with other Rails version. The reason for using Rails is mostly because of the value ActiveRecord provides for the data mapping and store. The download, parsing and encoding/decoding of the Google Safe Browsing data do not have to use Rails.
79
+
80
+
81
+ ## Reference
82
+
83
+ * [Safe Browsing API v2](https://developers.google.com/safe-browsing/developers_guide_v2)
84
+ * [google-safe-browsing](http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec) project on Google code.
85
+
86
+
87
+
88
+ Copyright (c) 2013 stonelonely and contributors, released under the MIT license.
@@ -0,0 +1,119 @@
1
+ require 'faraday'
2
+
3
+ module Faraday
4
+ class Response
5
+
6
+ class SafeBrowsingUpdateParser < ::Faraday::Response::Middleware
7
+
8
+ REKEY ||= /(e):(pleaserekey)/
9
+ NEXT ||= /(n):(\d+)/
10
+ RESET ||= /(r):(pleasereset)/
11
+ LIST ||= /(i):(.+)/
12
+
13
+ MIX_LINE ||= /(i|u|ad|sd):(.+)/
14
+ CHUNK_LIST ||= /(\d+-\d+|\d+)/
15
+
16
+ # define_parser do |body|
17
+ def parse body
18
+ @update_obj = Google::SafeBrowsingUpdate.new
19
+ parse_data_response(body)
20
+ @update_obj
21
+ end
22
+
23
+ #
24
+ # BODY = [(REKEY | MAC) LF] NEXT LF (RESET | (LIST LF)+) EOF
25
+ # NEXT = "n:" DIGIT+ # Minimum delay before polling again in seconds
26
+ # REKEY = "e:pleaserekey"
27
+ # RESET = "r:pleasereset"
28
+ # LIST = "i:" LISTNAME [MAC] (LF LISTDATA)+
29
+ # LISTNAME = (LOALPHA | DIGIT | "-")+ # e.g. "goog-phish-sha128"
30
+ # MAC = "," (LOALPHA | DIGIT)+
31
+ # LISTDATA = ((REDIRECT_URL | ADDDEL-HEAD | SUBDEL-HEAD) LF)+
32
+ # REDIRECT_URL = "u:" URL [MAC]
33
+ # URL = Defined in RFC 1738
34
+ # ADDDEL-HEAD = "ad:" CHUNKLIST
35
+ # SUBDEL-HEAD = "sd:" CHUNKLIST
36
+ # CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
37
+ # NUMBER = DIGIT+ # Chunk number >= 1
38
+ # RANGE = NUMBER "-" NUMBER
39
+ #
40
+ def parse_data_response body
41
+ lines = body.split(%Q(\n))
42
+ text = lines.shift
43
+ if REKEY =~ text
44
+ @update_obj.rekey = true
45
+ text = lines.shift
46
+ end
47
+ # the line is NEXT line
48
+ parse_next text
49
+ return if @update_obj.rekey
50
+
51
+ text = lines.shift
52
+ if RESET =~ text
53
+ @update_obj.reset = true
54
+ return
55
+ end
56
+ # the line is the LIST line
57
+ parse_list text
58
+ while !(text = lines.shift).nil?
59
+ parse_mix_line text
60
+ end
61
+ end
62
+
63
+
64
+ def parse_next line
65
+ m = NEXT.match(line.to_s)
66
+ if m.nil?
67
+ raise Google::Error::ParserError
68
+ else
69
+ @update_obj.next = m[2].to_i
70
+ end
71
+ end
72
+
73
+ def parse_list line
74
+ m = LIST.match(line.to_s)
75
+ if m.nil?
76
+ raise Google::Error::ParserError
77
+ else
78
+ @update_obj.set_current_list(m[2])
79
+ end
80
+ end
81
+
82
+ def parse_mix_line line
83
+ m = MIX_LINE.match(line.to_s)
84
+ raise Google::Error::ParserError if m.nil?
85
+ my_list = @update_obj.get_current_list
86
+
87
+ case m[1]
88
+ when 'i'
89
+ @update_obj.set_current_list m[2]
90
+ when 'u'
91
+ my_list[:u] << m[2].strip
92
+ when 'sd'
93
+ parse_chunk_list m[2], my_list[:sd]
94
+ when 'ad'
95
+ parse_chunk_list m[2], my_list[:ad]
96
+ else
97
+ raise Google::Error::ParserError
98
+ end
99
+ end
100
+
101
+ def parse_chunk_list chunk_list, cached_chunks
102
+ chunks = chunk_list.split(',')
103
+ while !(chunk = chunks.shift).nil?
104
+ m = CHUNK_LIST.match(chunk)
105
+ raise Google::Error::ParserError if m.nil?
106
+ if m[0].include?('-')
107
+ low, upper = m[0].split('-').map {|x| x.to_i}
108
+ cached_chunks << (low..upper) # Range
109
+ else
110
+ cached_chunks << m[0].to_i
111
+ end
112
+ end
113
+ end
114
+
115
+ end
116
+
117
+ end
118
+ end
119
+
@@ -0,0 +1,211 @@
1
+ require 'faraday'
2
+
3
+ module Google
4
+ class SafeBrowsingClient
5
+
6
+ include Google::SafeBrowsingUpdateHelper
7
+
8
+ attr_accessor :headers
9
+
10
+ CLIENT ||= 'api'
11
+ API_KEY ||= Google::CONFIG['api_key']
12
+ APP_VER ||= Google::CONFIG['app_ver']
13
+ P_VER ||= Google::CONFIG['p_ver']
14
+
15
+ SEMI_COLON ||= ';'
16
+ COLON ||= ':'
17
+ NL ||= %Q(\n)
18
+
19
+ FULL_HASH_TIMEOUT = 2 # secs
20
+
21
+ def initialize
22
+ @headers = {
23
+ :'User-Agent' => 'Faraday Ruby Client'
24
+ }
25
+ end
26
+
27
+ def api_server
28
+ 'https://safebrowsing.clients.google.com'
29
+ end
30
+
31
+ #
32
+ # goog-malware-shavar
33
+ # goog-regtest-shavar
34
+ # goog-whitedomain-shavar
35
+ # googpub-phish-shavar
36
+ #
37
+ def list_url
38
+ '/safebrowsing/list?client=%s&apikey=%s&appver=%s&pver=%s' % [CLIENT, API_KEY, APP_VER, P_VER]
39
+ end
40
+
41
+ def download_url
42
+ '/safebrowsing/downloads?client=%s&apikey=%s&appver=%s&pver=%s' % [CLIENT, API_KEY, APP_VER, P_VER]
43
+ end
44
+
45
+ def full_hash_url
46
+ '/safebrowsing/gethash?client=%s&apikey=%s&appver=%s&pver=%s' % [CLIENT, API_KEY, APP_VER, P_VER]
47
+ end
48
+
49
+ #
50
+ # resp = <<-END_OF_SAMPLE
51
+ # n:1200
52
+ # i:googpub-phish-shavar
53
+ # u:cache.google.com/first_redirect_example
54
+ # u:cache.google.com/second_redirect_example
55
+ # sd:1,2
56
+ # i:acme-white-shavar
57
+ # u:cache.google.com/second_redirect_example
58
+ # ad:1-2,4-5,7
59
+ # sd:2-6
60
+ # END_OF_SAMPLE
61
+ #
62
+ # parser = Faraday::Response::SafeBrowsingUpdateParser.new
63
+ # r = parser.parse resp
64
+ #
65
+ def shavar_data_update list_name, options = {}
66
+ conn = Faraday.new(url: api_server, headers: headers) do |builder|
67
+ builder.use Faraday::Request::UrlEncoded
68
+ builder.use Faraday::Response::SafeBrowsingUpdateParser
69
+ builder.adapter ::Faraday.default_adapter
70
+ end
71
+
72
+ request_body = download_data_request_body(list_name)
73
+
74
+ r = conn.post do |req|
75
+ req.url download_url
76
+ req.body = request_body
77
+ end
78
+
79
+ if successful_status_code?(r.status)
80
+ r.body
81
+ else
82
+ process_error_response(r.status)
83
+ end
84
+ end
85
+
86
+ #
87
+ # To get shavar chunk data from the redirect url
88
+ #
89
+ def chunk_data redirect_url, options = {}
90
+ url = 'https://' + redirect_url if /\Ahttps?/ !~ redirect_url
91
+
92
+ conn = Faraday.new(headers: headers) do |builder|
93
+ builder.use Faraday::Request::UrlEncoded
94
+ builder.adapter ::Faraday.default_adapter
95
+ end
96
+
97
+ r = conn.post do |req|
98
+ req.url url
99
+ end
100
+
101
+ if successful_status_code?(r.status)
102
+ r.body
103
+ else
104
+ process_error_response(r.status)
105
+ end
106
+
107
+ end
108
+
109
+ #
110
+ # hash_prefixes: ['5b3583c0', 'b3e357a6']
111
+ # @return
112
+ # # { 'goog-malware-shavar' (list_name)
113
+ # => {
114
+ # :add_chunk_num1 => [full_hash0, full_hash1, ...]
115
+ # :add_chunk_num2 => [full_hash0]
116
+ # }
117
+ # }
118
+ #
119
+ def full_hash hash_prefixes
120
+ conn = Faraday.new(url: api_server, headers: headers) do |builder|
121
+ builder.use Faraday::Request::UrlEncoded
122
+ builder.adapter ::Faraday.default_adapter
123
+ end
124
+
125
+ request_body = full_hash_request_body hash_prefixes
126
+
127
+ r = conn.post do |req|
128
+ req.url full_hash_url
129
+ req.body = request_body
130
+ req.options[:timeout] = FULL_HASH_TIMEOUT
131
+ end
132
+
133
+ if successful_status_code?(r.status)
134
+ SafeBrowsingParser.parse_full_hash_entries r.body
135
+ else
136
+ process_error_response(r.status)
137
+ end
138
+ end
139
+
140
+ #
141
+ # s;200
142
+ # googpub-phish-shavar;a:1-3,5,8:s:4-5
143
+ # acme-white-shavar;a:1-7:s:1-2
144
+ #
145
+ def download_data_request_body list_name
146
+ gen_list_request(list_name)
147
+ end
148
+
149
+ def gen_list_request list
150
+ list = SafeBrowsingList.where(name: list).first
151
+ s = StringIO.new("")
152
+ s << list.name << SEMI_COLON
153
+ add_chunk_ids = gen_chunk_nums_string(
154
+ SafeBrowsingShavar.add_chunk_nums_for_list(list.name).map(&:chunk_num))
155
+ sub_chunk_ids = gen_chunk_nums_string(
156
+ SafeBrowsingShavar.sub_chunk_nums_for_list(list.name).map(&:chunk_num))
157
+
158
+ s << SafeBrowsingShavar::CHUNK_TYPE_ADD << COLON << add_chunk_ids unless add_chunk_ids.blank?
159
+ s << COLON if !add_chunk_ids.blank? && !sub_chunk_ids.blank?
160
+ s << SafeBrowsingShavar::CHUNK_TYPE_SUB << COLON << sub_chunk_ids unless sub_chunk_ids.blank?
161
+ s << NL
162
+ s.string
163
+ end
164
+
165
+ #
166
+ # BODY = HEADER LF PREFIXES EOF
167
+ # HEADER = PREFIXSIZE ":" LENGTH
168
+ # PREFIXSIZE = DIGIT+ # Size of each prefix in bytes
169
+ # LENGTH = DIGIT+ # Size of PREFIXES in bytes
170
+ #
171
+ # The prefixes should all have the same length of bytes
172
+ #
173
+ def full_hash_request_body prefixes
174
+ return "" if prefixes.empty?
175
+
176
+ prefix_size = prefixes.first.size / 2
177
+
178
+ s = StringIO.new("")
179
+ s << prefix_size.to_s << ":"
180
+ s << (prefix_size * prefixes.size).to_s << NL
181
+ s << pack_hash_prefix(prefixes, prefix_size)
182
+ s.string
183
+ end
184
+
185
+ def pack_hash_prefix prefixes, prefix_size
186
+ s = StringIO.new("")
187
+ prefixes.each do |pre|
188
+ s << [pre].pack("H#{prefix_size*2}")
189
+ end
190
+ s.string
191
+ end
192
+
193
+ def process_error_response status
194
+ case status.to_s
195
+ when /\A204/
196
+ raise Error::NoContent
197
+ when /\A4/
198
+ raise Error::InvalidRequest
199
+ when /\A5/
200
+ raise Error::ServiceUnavailable
201
+ else
202
+ raise Error::UnknownError
203
+ end
204
+ end
205
+
206
+ def successful_status_code?(status_code)
207
+ status_code.to_s == '200'
208
+ end
209
+
210
+ end
211
+ end
@@ -0,0 +1,214 @@
1
+ module Google
2
+ module SafeBrowsingParser
3
+ extend self
4
+
5
+ ADD ||= SafeBrowsingShavar::CHUNK_TYPE_ADD
6
+ SUB ||= SafeBrowsingShavar::CHUNK_TYPE_SUB
7
+
8
+ ADD_SUB_HEAD ||= /(?<add_sub>a|s):(?<chunk_num>\d+):(?<hash_len>\d+):(?<chunk_len>\d+)(\n)/
9
+ FULL_HASH_HEAD ||= /(?<rekey>e:pleaserekey)|((?<list>[-_\w]+):(?<chunk_num>\d+):(?<chunk_len>\d+))(\n)/
10
+
11
+ CHUNKNUM_SIZE = HOST_KEY_SIZE = 4 # Bytes
12
+
13
+ FULL_HASH_SIZE = 32 # Bytes, 256 bit
14
+
15
+ #
16
+ # @params str A clob of characters returned from the redirect download url
17
+ # @returns Two arrays of shavar list data decoded: one for ADD, the other for SUB
18
+ #
19
+ # The shavar list data has the following structure
20
+ # For ADD
21
+ # { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,
22
+ # :chunk_data => {
23
+ # :host_key_one => [prefix0, prefix1, ...],
24
+ # :host_key_two => []
25
+ # }
26
+ # }
27
+ #
28
+ # For SUB
29
+ # { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,
30
+ # :chunk_data => {
31
+ # :host_key_one => {
32
+ # :add_chunknum_one => [prefix0, prefix1, ...],
33
+ # :add_chunknum_two => []
34
+ # }
35
+ # :host_key_two => {
36
+ # :add_chunknum_three => [prefix0, prefix1, ...],
37
+ # :add_chunknum_four => []
38
+ # }
39
+ # }
40
+ # }
41
+ #
42
+ def parse_shavar_list str, test_mode = false
43
+ adds = []; subs = []
44
+ scanner = StringScanner.new(str)
45
+
46
+ count = 0; scanner.pos = 0
47
+ while !(head = scanner.scan_until(ADD_SUB_HEAD)).nil?
48
+ if test_mode && count > 0
49
+ break
50
+ end
51
+
52
+ count += 1
53
+ m = ADD_SUB_HEAD.match head
54
+ chunk_num, hash_len, chunk_len = m[:chunk_num].to_i, m[:hash_len].to_i, m[:chunk_len].to_i
55
+ pointer = 0; chunk_data = []
56
+
57
+ while pointer < chunk_len
58
+ chunk_data << scanner.get_byte
59
+ pointer += 1
60
+ end
61
+
62
+ if m[:add_sub] == 'a'
63
+ data_arr = parse_add_data(chunk_data, hash_len)
64
+ elsif m[:add_sub] == 's'
65
+ data_arr = parse_sub_data(chunk_data, hash_len)
66
+ end
67
+
68
+ obj = {
69
+ chunk_num: chunk_num,
70
+ hash_len: hash_len,
71
+ chunk_len: chunk_len,
72
+ chunk_data: data_arr
73
+ }
74
+
75
+ if m[:add_sub] == ADD
76
+ adds << obj
77
+ elsif m[:add_sub] == SUB
78
+ subs << obj
79
+ end
80
+
81
+ end
82
+
83
+ Rails.logger.info "Total # of ADD/SUB section is #{count}, #{adds.size} adds, #{subs.size} subs"
84
+
85
+ [adds, subs]
86
+ end
87
+
88
+
89
+ def parse_add_data byte_arr, hash_len
90
+ ret = {}
91
+ total_chars = 0
92
+ pointer = 0
93
+ while pointer < byte_arr.size
94
+ host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE]
95
+ total_chars += HOST_KEY_SIZE
96
+ pointer += HOST_KEY_SIZE
97
+ ret[host_key] ||= []
98
+ count = parse_count_number byte_arr[pointer]
99
+ pointer += 1
100
+ total_chars += 1
101
+
102
+ if count > 0
103
+ sub_count = 0
104
+ while sub_count < count
105
+ ret[host_key] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len)
106
+ total_chars += hash_len
107
+ pointer += hash_len
108
+ sub_count += 1
109
+ end
110
+ end
111
+ end
112
+
113
+ ret
114
+ end
115
+
116
+ def parse_sub_data byte_arr, hash_len
117
+ ret = {}
118
+ total_chars = 0
119
+ pointer = 0
120
+ while pointer < byte_arr.size
121
+ host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE]
122
+ total_chars += HOST_KEY_SIZE
123
+ pointer += HOST_KEY_SIZE
124
+ count = parse_count_number byte_arr[pointer]
125
+ total_chars += 1
126
+ pointer += 1
127
+
128
+ ret[host_key] ||= {}
129
+ if count > 0
130
+ sub_count = 0
131
+ while sub_count < count
132
+ add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first
133
+ pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE
134
+ ret[host_key][add_chunknum] ||= []
135
+ ret[host_key][add_chunknum] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len)
136
+ pointer += hash_len; total_chars += hash_len
137
+ sub_count += 1
138
+ end
139
+ else
140
+ add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first
141
+ ret[host_key][add_chunknum] = []
142
+ pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE
143
+ end
144
+
145
+ end
146
+
147
+ ret
148
+ end
149
+
150
+ def parse_host_key char_arr
151
+ char_arr.join('').unpack('H8').first
152
+ end
153
+
154
+ def parse_count_number char
155
+ char.unpack('C').first
156
+ end
157
+
158
+ def parse_hash_prefix char_arr, hash_len
159
+ char_arr.join('').unpack("H#{hash_len*2}").first
160
+ end
161
+
162
+ #
163
+ # Returns
164
+ # { 'goog-malware-shavar' (list_name)
165
+ # => {
166
+ # :add_chunk_num1 => [full_hash0, full_hash1, ...]
167
+ # :add_chunk_num2 => [full_hash0]
168
+ # }
169
+ # }
170
+ #
171
+ # BODY = ([MAC LF] HASHENTRY+) | (REKEY LF) EOF
172
+ # HASHENTRY = LISTNAME ":" ADDCHUNK ":" HASHDATALEN LF HASHDATA
173
+ # ADDCHUNK = DIGIT+ # Add chunk number
174
+ # HASHDATALEN = DIGIT+ # Length of HASHDATA
175
+ # HASHDATA = <HASHDATALEN number of unsigned bytes> # Full length hashes in binary
176
+ # MAC = (LOALPHA | DIGIT)+
177
+ #
178
+ # Ignore rekey response for now
179
+ #
180
+ def parse_full_hash_entries str
181
+ full_list = {}
182
+ scanner = StringScanner.new(str)
183
+ count = 0; scanner.pos = 0
184
+
185
+ while !(head = scanner.scan_until(FULL_HASH_HEAD)).nil?
186
+ m = FULL_HASH_HEAD.match head
187
+ return full_list if m[:rekey]
188
+
189
+ count += 1
190
+ list_name, chunk_num, chunk_len = m[:list].to_s.to_sym, m[:chunk_num].to_i, m[:chunk_len].to_i
191
+ pointer = 0; chunk_data = []
192
+
193
+ my_list = (full_list[list_name] ||= {})
194
+ my_list[chunk_num] ||= []
195
+
196
+ while pointer < chunk_len
197
+ chunk_data << scanner.get_byte
198
+ pointer += 1
199
+ end
200
+
201
+ parse_full_hash_data chunk_data, my_list[chunk_num]
202
+ end
203
+
204
+ full_list
205
+ end
206
+
207
+ def parse_full_hash_data byte_arr, full_hash_arr
208
+ byte_arr.each_slice(FULL_HASH_SIZE) do |slice|
209
+ full_hash_arr << slice.join('').unpack("H#{FULL_HASH_SIZE*2}").first
210
+ end
211
+ end
212
+
213
+ end
214
+ end