google-safe-browsing-plugin 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +20 -0
- data/README.md +88 -0
- data/lib/faraday/response/safe_browsing_update_parser.rb +119 -0
- data/lib/google/safe_browsing_client.rb +211 -0
- data/lib/google/safe_browsing_parser.rb +214 -0
- data/lib/google/safe_browsing_update_helper.rb +171 -0
- data/lib/google/sha_util.rb +22 -0
- data/lib/google/url_canonicalizer.rb +36 -0
- data/lib/google/url_scramble.rb +54 -0
- data/lib/google_safe_browsing_plugin.rb +29 -0
- data/lib/rails/generators/google/config/config_generator.rb +16 -0
- data/lib/rails/generators/google/config/templates/google_safe_browsing.yml +16 -0
- data/lib/rails/generators/google/helper/helper_generator.rb +16 -0
- data/lib/rails/generators/google/helper/templates/safe_browsing_helper.rb +168 -0
- data/lib/rails/generators/google/install_generator.rb +20 -0
- data/lib/rails/generators/google/model/model_generator.rb +47 -0
- data/lib/rails/generators/google/model/templates/create_google_functions.rb +18 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_full_hash_requests.rb +22 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_full_hashes.rb +20 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_list.rb +15 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_redirect_urls.rb +26 -0
- data/lib/rails/generators/google/model/templates/create_google_safe_browsing_shavar.rb +27 -0
- data/lib/rails/generators/google/model/templates/google.rb +2 -0
- data/lib/rails/generators/google/model/templates/google/error.rb +11 -0
- data/lib/rails/generators/google/model/templates/google/function.rb +6 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_full_hash.rb +7 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_full_hash_request.rb +19 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_list.rb +41 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_redirect_url.rb +36 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_shavar.rb +38 -0
- data/lib/rails/generators/google/model/templates/google/safe_browsing_update.rb +77 -0
- data/lib/rails/generators/google/rspec/rspec_generator.rb +28 -0
- data/lib/rails/generators/google/rspec/templates/bin_sample_1.data +0 -0
- data/lib/rails/generators/google/rspec/templates/bin_sample_2.data +0 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_parse_spec.rb +58 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_0.data +0 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_1.data +0 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_2.data +3 -0
- data/lib/rails/generators/google/rspec/templates/full_hash_response_3.data +3 -0
- data/lib/rails/generators/google/rspec/templates/shavar_encode_data_parse_spec.rb +56 -0
- data/lib/rails/generators/google/rspec/templates/shavar_list_info_parse_spec.rb +48 -0
- data/lib/safe_browsing_task.rb +5 -0
- data/lib/tasks/google.rake +122 -0
- metadata +222 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
module Google
|
2
|
+
module SafeBrowsingUpdateHelper
|
3
|
+
|
4
|
+
def update_local_shavar_info update_obj
|
5
|
+
gsb = safe_browsing_service
|
6
|
+
update_next_update_time(gsb, update_obj.next) if update_obj.next
|
7
|
+
if update_obj.reset
|
8
|
+
if Rails.configuration.respond_to?(:google_safe_browsing_upon_reset)
|
9
|
+
Rails.configuration.google_safe_browsing_upon_reset.call
|
10
|
+
else
|
11
|
+
Rails.logger.warn "I got a reset from Google... Don't know what to do."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
if update_obj.has_lists?
|
16
|
+
update_obj.lists.keys.each do |name|
|
17
|
+
if SafeBrowsingList.valid_list?(name)
|
18
|
+
list = SafeBrowsingList.find_by_name(name.to_s)
|
19
|
+
update_add_sub_chunks list, update_obj.get_ad_chunk_ids(name), SafeBrowsingShavar::CHUNK_TYPE_ADD
|
20
|
+
update_add_sub_chunks list, update_obj.get_sd_chunk_ids(name), SafeBrowsingShavar::CHUNK_TYPE_SUB
|
21
|
+
else
|
22
|
+
Rails.logger.info "Got invalid list name [#{name}]"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def save_redirect_urls urls, list_obj
|
29
|
+
urls.each do |url|
|
30
|
+
obj = SafeBrowsingRedirectUrl.for_url_and_list_id(url, list_obj.id).first
|
31
|
+
if obj.nil?
|
32
|
+
obj = SafeBrowsingRedirectUrl.create(url: url, google_safe_browsing_list_id: list_obj.id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def update_redirect_urls url, list_obj, attributes
|
38
|
+
obj = SafeBrowsingRedirectUrl.for_url_and_list_id(url, list_obj.id).first
|
39
|
+
obj.update_attributes(attributes)
|
40
|
+
end
|
41
|
+
|
42
|
+
def update_add_sub_chunks list_obj, del_chunk_ids, chunk_type
|
43
|
+
|
44
|
+
del_chunk_ids.each do |chunk_id|
|
45
|
+
if chunk_id.is_a?(Range)
|
46
|
+
chunk.each do |chunk_id|
|
47
|
+
SafeBrowsingShavar.where(google_safe_browsing_list_id: list_obj.id, chunk_num: chunk_id, chunk_type: chunk_type).destroy_all
|
48
|
+
end
|
49
|
+
else
|
50
|
+
SafeBrowsingShavar.where(google_safe_browsing_list_id: list_obj.id, chunk_num: chunk_id, chunk_type: chunk_type).destroy_all
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def update_shavar_chunk adds, subs, shavar_list
|
56
|
+
update_add_shavar_chunk adds, shavar_list
|
57
|
+
update_sub_shavar_chunk subs, shavar_list
|
58
|
+
end
|
59
|
+
|
60
|
+
def update_add_shavar_chunk adds, shavar_list
|
61
|
+
adds.each do |add|
|
62
|
+
|
63
|
+
chunk_num = add[:chunk_num]
|
64
|
+
add[:chunk_data].each do |host_key, prefixes|
|
65
|
+
if prefixes.empty?
|
66
|
+
shavar = SafeBrowsingShavar.where(
|
67
|
+
google_safe_browsing_list_id: shavar_list.id,
|
68
|
+
chunk_type: SafeBrowsingShavar::CHUNK_TYPE_ADD,
|
69
|
+
chunk_num: chunk_num, host_key: host_key, prefix: nil).first_or_create
|
70
|
+
else
|
71
|
+
prefixes.each do |prefix|
|
72
|
+
shavar = SafeBrowsingShavar.where(
|
73
|
+
google_safe_browsing_list_id: shavar_list.id,
|
74
|
+
chunk_type: SafeBrowsingShavar::CHUNK_TYPE_ADD,
|
75
|
+
chunk_num: chunk_num, host_key: host_key, prefix: prefix).first_or_create
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
def update_sub_shavar_chunk subs, shavar_list
|
84
|
+
subs.each do |sub|
|
85
|
+
chunk_num = sub[:chunk_num]
|
86
|
+
|
87
|
+
sub[:chunk_data].each do |host_key, chunk_num_hash_prefix|
|
88
|
+
chunk_num_hash_prefix.each do |add_chunk_num, prefixes|
|
89
|
+
if prefixes.empty?
|
90
|
+
shavar = SafeBrowsingShavar.where(
|
91
|
+
google_safe_browsing_list_id: shavar_list.id,
|
92
|
+
chunk_type: SafeBrowsingShavar::CHUNK_TYPE_SUB,
|
93
|
+
chunk_num: chunk_num, host_key: host_key,
|
94
|
+
add_chunk_num: add_chunk_num, prefix: nil).first_or_create
|
95
|
+
else
|
96
|
+
prefixes.each do |prefix|
|
97
|
+
shavar = SafeBrowsingShavar.where(
|
98
|
+
google_safe_browsing_list_id: shavar_list.id,
|
99
|
+
chunk_type: SafeBrowsingShavar::CHUNK_TYPE_SUB,
|
100
|
+
chunk_num: chunk_num, host_key: host_key,
|
101
|
+
add_chunk_num: add_chunk_num, prefix: prefix).first_or_create
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end # chunk_num_hash_prefix.each
|
105
|
+
end # subs[:chunk_data].each
|
106
|
+
end # subs.each
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# chunk_id_arr: [113121, 113122, 113123, 113132], in increasing order
|
112
|
+
# return: "113121-113123,113132"
|
113
|
+
#
|
114
|
+
def gen_chunk_nums_string chunk_id_arr
|
115
|
+
ranges_and_integers = []
|
116
|
+
first = last = chunk_id_arr.shift
|
117
|
+
return "" if first.nil?
|
118
|
+
|
119
|
+
increment = 0
|
120
|
+
while !(int = chunk_id_arr.shift).nil?
|
121
|
+
increment += 1
|
122
|
+
if int == first + increment
|
123
|
+
last = int
|
124
|
+
next
|
125
|
+
else
|
126
|
+
if first == last
|
127
|
+
ranges_and_integers << first
|
128
|
+
else
|
129
|
+
ranges_and_integers << (first..last)
|
130
|
+
end
|
131
|
+
|
132
|
+
first = last = int
|
133
|
+
increment = 0
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
if first == last
|
138
|
+
ranges_and_integers << first
|
139
|
+
else
|
140
|
+
ranges_and_integers << (first..last)
|
141
|
+
end
|
142
|
+
|
143
|
+
range_and_int_arr_to_string(ranges_and_integers)
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
def safe_browsing_service
|
148
|
+
Function.where(name: Function::GoogleSafeBrowsing).first
|
149
|
+
end
|
150
|
+
|
151
|
+
def update_next_update_time link_function, ts
|
152
|
+
link_function.update_attributes(:next_updated_at => Time.now + ts)
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
protected
|
157
|
+
|
158
|
+
def range_and_int_arr_to_string arr
|
159
|
+
ret = []
|
160
|
+
arr.each do |member|
|
161
|
+
if member.is_a?(Range)
|
162
|
+
ret << member.first.to_s + "-" + member.last.to_s
|
163
|
+
elsif member.is_a?(Integer)
|
164
|
+
ret << member.to_s
|
165
|
+
end
|
166
|
+
end
|
167
|
+
ret.join(",")
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Google
|
2
|
+
module ShaUtil
|
3
|
+
extend self
|
4
|
+
|
5
|
+
SHA256 = OpenSSL::Digest::SHA256.new
|
6
|
+
|
7
|
+
def sha256_hex str, prefix = nil
|
8
|
+
hash = sha256_digest(str).unpack("H64").first
|
9
|
+
if prefix
|
10
|
+
hash.first(prefix) # first 'prefix' chars
|
11
|
+
else
|
12
|
+
hash
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def sha256_digest str
|
17
|
+
SHA256.digest(str)
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
|
2
|
+
module Google
|
3
|
+
module UrlCanonicalizer
|
4
|
+
extend self
|
5
|
+
|
6
|
+
ANCHOR_REGEX = /(?:#(?:[-\w~!$+|.,*:=]|%[A-Fa-f\d]{2})*)?\b/
|
7
|
+
URL_REGEX = /(?<protocol>(?:ht|f)tp(?:s?)\:\/\/|~\/|\/)?(?<user_pwd>\w+:\w+@)?(?<host>((?<sub>[-\w]+\.)+(?<top>com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum|travel|[a-z]{2}))|(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))(?<port>:[\d]{1,5})?(?<dir>(?:(?:\/(?:[-\w~!$+|.=]|%[A-Fa-f\d]{2}|#!)+)+|\/)+|\?|#)?(?<query>(?:\?(?:[-\w~!$+|.,*:]|%[A-Fa-f\d{2}])+=?(?:[-\w~!$+|.,*:=\/]|%[A-Fa-f\d]{2})*)(?:&(?:[-\w~!$+|.,*:]|%[A-Fa-f\d{2}])+=?(?:[-\w~!$+|.,*:=\/]|%[A-Fa-f\d]{2})*)*)*\b/
|
8
|
+
|
9
|
+
def apply input_url
|
10
|
+
url = input_url.to_s.encode("ASCII-8BIT", :invalid => :replace, :undef => :replace, :replace => '?')
|
11
|
+
url = url.gsub(/\s/, '')
|
12
|
+
url = url.gsub(ANCHOR_REGEX, '')
|
13
|
+
url = unescape(url)
|
14
|
+
m = URL_REGEX.match(url.downcase)
|
15
|
+
if m
|
16
|
+
protocol, host, port, dir, query = m[:protocol], m[:host], m[:port], m[:dir], m[:query]
|
17
|
+
protocol = 'http://' if protocol.nil? or protocol == '/'
|
18
|
+
host = host.sub(/\A\.*/,'').sub(/\.\z/, '') if host
|
19
|
+
dir = dir.sub(/\A\/*/, '').gsub(/\/+/, '/').gsub(/\/\.\//, '/') if dir
|
20
|
+
url = protocol << host.to_s << port.to_s << '/' << dir.to_s << query.to_s
|
21
|
+
end
|
22
|
+
|
23
|
+
url
|
24
|
+
end
|
25
|
+
|
26
|
+
def unescape url
|
27
|
+
unescape = URI.unescape(url)
|
28
|
+
while unescape != url
|
29
|
+
url = unescape
|
30
|
+
unescape = URI.unescape(url)
|
31
|
+
end
|
32
|
+
URI.escape(unescape)
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Google
|
2
|
+
module UrlScramble
|
3
|
+
extend self
|
4
|
+
|
5
|
+
# url is canonicalized url
|
6
|
+
def gen url
|
7
|
+
m = Google::UrlCanonicalizer::URL_REGEX.match(url)
|
8
|
+
return [[], []] if m.nil?
|
9
|
+
protocol, host, port, dir, query = m[:protocol], m[:host], m[:port], m[:dir], m[:query]
|
10
|
+
return [[], []] if host.nil?
|
11
|
+
|
12
|
+
urls = []
|
13
|
+
hosts, paths = [], []
|
14
|
+
|
15
|
+
hosts << host
|
16
|
+
if /[^\d.]/ =~ host
|
17
|
+
host_segments = host.split('.')
|
18
|
+
host_segments = host_segments[-6..-1] if host_segments.size >= 6
|
19
|
+
h = host_segments.shift
|
20
|
+
while !h.nil? && host_segments.size > 1 && hosts.size <= 5
|
21
|
+
hosts << host_segments.join('.')
|
22
|
+
h = host_segments.shift
|
23
|
+
end
|
24
|
+
host_keys = hosts.select {|x| x.count(".") == 2 or x.count(".") == 1}.map{|x| x+'/'}
|
25
|
+
else
|
26
|
+
host_keys = hosts.map{|x| x+'/'}
|
27
|
+
end
|
28
|
+
|
29
|
+
dir = dir.to_s.sub(/\A\//, '') # remove the leading slash
|
30
|
+
paths << ('/' << dir)
|
31
|
+
paths << ('/' << dir << query.to_s) unless query.blank?
|
32
|
+
path_segments = dir.split('/')
|
33
|
+
paths << '/'
|
34
|
+
count = 0; tmp_path = ''
|
35
|
+
while !(p = path_segments.shift).nil? && count <= 3
|
36
|
+
tmp_path += ('/' << p)
|
37
|
+
paths << tmp_path
|
38
|
+
count += 1
|
39
|
+
end
|
40
|
+
paths.uniq!
|
41
|
+
|
42
|
+
(urls = []).tap do
|
43
|
+
hosts.each do |h|
|
44
|
+
paths.each do |p|
|
45
|
+
urls << h + p
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
[host_keys, urls]
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'active_support/all'
|
2
|
+
|
3
|
+
module Google
|
4
|
+
def self.table_name_prefix
|
5
|
+
'google_'
|
6
|
+
end
|
7
|
+
|
8
|
+
if defined?(Rails)
|
9
|
+
config_file = File.expand_path('./config/google_safe_browsing.yml', Rails.root)
|
10
|
+
CONFIG ||= YAML.load_file(config_file)[Rails.env] if File.exists?(config_file)
|
11
|
+
end
|
12
|
+
|
13
|
+
autoload :SafeBrowsingClient, 'google/safe_browsing_client'
|
14
|
+
autoload :SafeBrowsingParser, 'google/safe_browsing_parser'
|
15
|
+
autoload :SafeBrowsingUpdateHelper, 'google/safe_browsing_update_helper'
|
16
|
+
autoload :ShaUtil, 'google/sha_util'
|
17
|
+
autoload :UrlCanonicalizer, 'google/url_canonicalizer'
|
18
|
+
autoload :UrlScramble, 'google/url_scramble'
|
19
|
+
end
|
20
|
+
|
21
|
+
module Faraday
|
22
|
+
class Response
|
23
|
+
autoload :SafeBrowsingUpdateParser, 'faraday/response/safe_browsing_update_parser'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
if defined?(Rake)
|
28
|
+
require 'safe_browsing_task'
|
29
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Google
|
4
|
+
module Generators
|
5
|
+
class ConfigGenerator < Rails::Generators::Base
|
6
|
+
desc 'Creates a Google Safe Browsing plugin configuration file config/google_safe_browsing.yml'
|
7
|
+
|
8
|
+
source_root File.expand_path('../templates', __FILE__)
|
9
|
+
|
10
|
+
def copy_config_file
|
11
|
+
copy_file 'google_safe_browsing.yml', "config/google_safe_browsing.yml"
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
default: &default
|
2
|
+
api_key: your_google_api_key
|
3
|
+
app_ver: 1.5.2
|
4
|
+
p_ver: 2.2
|
5
|
+
full_length_hash_expires: 86400 # 1 day
|
6
|
+
full_length_hash_backoff_delay: 300 # seconds
|
7
|
+
full_length_hash_backoff_delay_max: 7200 # seconds
|
8
|
+
|
9
|
+
development:
|
10
|
+
<<: *default
|
11
|
+
|
12
|
+
test:
|
13
|
+
<<: *default
|
14
|
+
|
15
|
+
production:
|
16
|
+
<<: *default
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Google
|
4
|
+
module Generators
|
5
|
+
class HelperGenerator < Rails::Generators::Base
|
6
|
+
desc 'Creates a Google Safe Browsing plugin helper file app/helpers/google/safe_browsing_helper.rb'
|
7
|
+
|
8
|
+
source_root File.expand_path('../templates', __FILE__)
|
9
|
+
|
10
|
+
def copy_helper_file
|
11
|
+
copy_file 'safe_browsing_helper.rb', "app/helpers/google/safe_browsing_helper.rb"
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
module Google
|
2
|
+
module SafeBrowsingHelper
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def save_full_hash_requests prefixes, state, requested_at = Time.now
|
6
|
+
prefixes.each do |pre|
|
7
|
+
obj = SafeBrowsingFullHashRequest.where(prefix: pre).first
|
8
|
+
if obj.nil?
|
9
|
+
obj = SafeBrowsingFullHashRequest.create(prefix: pre, requested_at: requested_at, state: state)
|
10
|
+
else
|
11
|
+
obj.update_attributes(requested_at: requested_at, state: state)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
#
|
18
|
+
# hash_prefix_arr: ['5b3583c0', 'b3e357a6']
|
19
|
+
#
|
20
|
+
def get_full_hash_list hash_prefix_arr
|
21
|
+
ret_full_hashes = []
|
22
|
+
api = SafeBrowsingClient.new
|
23
|
+
|
24
|
+
return ret_full_hashes if hash_prefix_arr.empty?
|
25
|
+
|
26
|
+
begin
|
27
|
+
full_hash_objs = api.full_hash(hash_prefix_arr) || {}
|
28
|
+
save_full_hash_requests(hash_prefix_arr, SafeBrowsingFullHashRequest::COMPLETED)
|
29
|
+
rescue Google::Error::NoContent => e
|
30
|
+
Rails.logger.warn "NoContent Error for hash prefixes [#{hash_prefix_arr.join(', ')}]"
|
31
|
+
save_full_hash_requests(hash_prefix_arr, SafeBrowsingFullHashRequest::COMPLETED)
|
32
|
+
return ret_full_hashes
|
33
|
+
rescue Exception => e
|
34
|
+
Rails.logger.error "Error backtrace #{e.backtrace.join(%Q(\n))}"
|
35
|
+
Rails.logger.warn "Error (#{e.inspect}) for hash prefixes [#{hash_prefix_arr.join(', ')}], continue..."
|
36
|
+
save_full_hash_requests(hash_prefix_arr, e.message)
|
37
|
+
return ret_full_hashes
|
38
|
+
end
|
39
|
+
|
40
|
+
full_hash_objs.keys.each do |list|
|
41
|
+
full_hash_objs[list].keys.each do |add_chunk_num|
|
42
|
+
full_hash_objs[list][add_chunk_num].each do |full_hash|
|
43
|
+
unless (list_obj = SafeBrowsingList.list_by_name(list)).nil?
|
44
|
+
Rails.logger.info "Updating full hash data with #{list}:#{add_chunk_num}:#{full_hash}"
|
45
|
+
local = SafeBrowsingFullHash.where(value: full_hash).first
|
46
|
+
|
47
|
+
if local
|
48
|
+
local.touch
|
49
|
+
else
|
50
|
+
local = SafeBrowsingFullHash.create(value: full_hash, add_chunk_num: add_chunk_num,
|
51
|
+
google_safe_browsing_list_id: list_obj.id)
|
52
|
+
end
|
53
|
+
|
54
|
+
ret_full_hashes << full_hash
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
ret_full_hashes
|
61
|
+
end
|
62
|
+
|
63
|
+
def can_request_full_hash? hash_prefix
|
64
|
+
now = Time.now
|
65
|
+
request = Google::SafeBrowsingFullHashRequest.where(prefix: hash_prefix).first
|
66
|
+
if request.nil?
|
67
|
+
true
|
68
|
+
else
|
69
|
+
if request.state == SafeBrowsingFullHashRequest::COMPLETED
|
70
|
+
if request.requested_at < now - Google::CONFIG['full_length_hash_expires']
|
71
|
+
true
|
72
|
+
else
|
73
|
+
Rails.logger.warn "Full hash [#{hash_prefix}] requested successfully recently, skip this time."
|
74
|
+
false
|
75
|
+
end
|
76
|
+
else
|
77
|
+
attempts = request.attempts.nil?? 1 : request.attempts
|
78
|
+
max_delay = Google::CONFIG['full_length_hash_backoff_delay_max']
|
79
|
+
delay = attempts * Google::CONFIG['full_length_hash_backoff_delay']
|
80
|
+
delay = max_delay if delay > max_delay
|
81
|
+
if request.requested_at > now - delay
|
82
|
+
Rails.logger.warn "Full hash [#{hash_prefix}] request in backoff mode. Wait time is #{delay} seconds"
|
83
|
+
false
|
84
|
+
else
|
85
|
+
true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def lookup_url url
|
92
|
+
canon_url = Google::UrlCanonicalizer.apply(url)
|
93
|
+
hosts_set, urls_set = Google::UrlScramble.gen(canon_url)
|
94
|
+
full_hits = []
|
95
|
+
|
96
|
+
unless hosts_set.empty?
|
97
|
+
host_shas = hosts_set.map {|x| ShaUtil.sha256_hex(x, 8)}
|
98
|
+
host_hits = find_host_key_hits(host_shas).map(&:host_key)
|
99
|
+
|
100
|
+
if host_hits.empty?
|
101
|
+
Rails.logger.info "No host key prefix found. Return."
|
102
|
+
return hits_to_category(url, full_hits)
|
103
|
+
else
|
104
|
+
url_shas = urls_set.map {|x| ShaUtil.sha256_hex(x, 8)}
|
105
|
+
prefix_hits = find_prefix_key_hits(host_shas, url_shas).map(&:prefix)
|
106
|
+
|
107
|
+
candidate_prefixes = (host_hits + prefix_hits).uniq
|
108
|
+
full_hash_expressions = urls_set.map {|x| ShaUtil.sha256_hex(x)}.select {|x| candidate_prefixes.include?(x.first(8))}
|
109
|
+
full_hits = full_hash_cache_hits(full_hash_expressions)
|
110
|
+
|
111
|
+
if full_hits.empty?
|
112
|
+
warm_prefixes = candidate_prefixes.select {|x| !can_request_full_hash?(x)}
|
113
|
+
candidate_prefixes -= warm_prefixes
|
114
|
+
unless candidate_prefixes.empty?
|
115
|
+
Rails.logger.info "Asking Google for full length hash #{candidate_prefixes}"
|
116
|
+
get_full_hash_list(candidate_prefixes)
|
117
|
+
full_hits = full_hash_cache_hits(full_hash_expressions)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
if full_hits.empty?
|
124
|
+
Rails.logger.info "Return no hits..."
|
125
|
+
else
|
126
|
+
Rails.logger.info "Return hits #{full_hits.inspect}..."
|
127
|
+
end
|
128
|
+
|
129
|
+
hits_to_category(url, full_hits)
|
130
|
+
end
|
131
|
+
|
132
|
+
def hits_to_category url, full_hits
|
133
|
+
(ret = {}).tap do
|
134
|
+
ret[url] ||= []
|
135
|
+
full_hits.each do |hit|
|
136
|
+
ret[url] << hit.list.name
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def find_host_key_hits prefixes
|
142
|
+
ret = []
|
143
|
+
SafeBrowsingShavar.add_host_keys(prefixes).each do |add|
|
144
|
+
if SafeBrowsingShavar.find_subs_for_add(add.chunk_num, add.host_key, add.prefix).empty?
|
145
|
+
ret << add
|
146
|
+
end
|
147
|
+
end
|
148
|
+
ret
|
149
|
+
end
|
150
|
+
|
151
|
+
def find_prefix_key_hits host_keys, prefixes
|
152
|
+
ret = []
|
153
|
+
SafeBrowsingShavar.add_host_prefixes(host_keys, prefixes).each do |add|
|
154
|
+
if SafeBrowsingShavar.find_subs_for_add(add.chunk_num, add.host_key, add.prefix).empty?
|
155
|
+
ret << add
|
156
|
+
end
|
157
|
+
end
|
158
|
+
ret
|
159
|
+
end
|
160
|
+
|
161
|
+
def full_hash_cache_hits full_hashes
|
162
|
+
SafeBrowsingFullHash.includes(:list).where(value: full_hashes)
|
163
|
+
.where('updated_at > ?', Time.now - Google::CONFIG['full_length_hash_expires'])
|
164
|
+
end
|
165
|
+
|
166
|
+
|
167
|
+
end
|
168
|
+
end
|