google_safe_browsing 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2012 YOURNAME
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.mkd ADDED
@@ -0,0 +1,101 @@
1
+ # Google Safe Browsing Rails 3 Plugin
2
+
3
+ This gem allows easy Google Safe Browsing integration
4
+ with Rails 3 apps.
5
+
6
+ It includes:
7
+
8
+ * a migration generator for database schema
9
+ * method to update your lists
10
+ * method to lookup a url
11
+ * rake tasks to update hash database
12
+ * Autonomous updating via Resque and Resque Scheduler
13
+
14
+ ----------------------
15
+
16
+ ##Installation
17
+
18
+ Install the gem
19
+
20
+ gem install google_safe_browsing
21
+
22
+ Or add it to your Gemfile
23
+
24
+ #Gemfile
25
+
26
+ ...
27
+
28
+ gem 'google_safe_browsing'
29
+
30
+ Then, generate the migration and run it
31
+
32
+ $ rails generate google_safe_browsing:install
33
+ create db/migrate/20120227143535_create_google_safe_browsing_tables.rb
34
+ $ rake db:migrate
35
+
36
+
37
+ Add your Google Safe Browsing API key to congif/application.rb
38
+ You can get a key from the [Google Safe Browsing website](http://code.google.com/apis/safebrowsing/key_signup.html)
39
+
40
+ #config/application.rb
41
+
42
+ ...
43
+
44
+ config.google_safe_browsing.api_key = 'MySuperAwesomeKey5124'
45
+
46
+
47
+ ## Rake Tasks
48
+
49
+ You can run an update manually
50
+
51
+ $ rake google_safe_browsing:update
52
+
53
+ Or, if you have [Resque](https://github.com/defunkt/resque) and
54
+ [Resque Scheduler](https://github.com/bvandenbos/resque-scheduler) set up, you can
55
+ run an update and automatically schedule another update based on the 'next polling
56
+ interval' parameter from the API
57
+
58
+ $ rake google_safe_browsing:update_and_reschedule
59
+
60
+ ## Usage
61
+
62
+ To programatically run an update in your app
63
+
64
+ GoogleSafeBrowsing::APIv2.update
65
+
66
+ Note: This can take a while, especially when first seeding your database. I wouldn't recommend
67
+ calling this in a controller for a normal page request.
68
+
69
+ To check a url for badness
70
+
71
+ GoogleSafeBrowsing::APIv2.lookup('http://bad.url.address.here.com.edu/forProfit')
72
+
73
+ The url string parameter does not have to be any specific format or Canonicalization the Google
74
+ Safe Browsing gem will handle all of that for you. Please report any errors from a weirdly formatted
75
+ url though. I most likely have missed some cases.
76
+
77
+ The `lookup` method returns a string ( either 'malware' or 'phishing' ) for the name of the black list
78
+ which the url appears on, or `nil` if the url is not on Google's list.
79
+
80
+ ----------------
81
+
82
+ ### More information
83
+
84
+ [Google Safe Browsing API Reference](http://code.google.com/apis/safebrowsing/)
85
+
86
+ ----------------
87
+
88
+ ### Inspiration
89
+
90
+ The interface of this gem is based upon these two gems, which are
91
+ based on Safe Browsing v1 API:
92
+
93
+ https://github.com/koke/malware_api
94
+ and
95
+ https://github.com/codelux/malware_api
96
+
97
+ ------------------
98
+
99
+ Thank you for using my gem! Please report any bugs or issues. Contributions are also always welcome!
100
+
101
+ -- Chris Marshall
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env rake
2
+ begin
3
+ require 'bundler/setup'
4
+ rescue LoadError
5
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
6
+ end
7
+ begin
8
+ require 'rdoc/task'
9
+ rescue LoadError
10
+ require 'rdoc/rdoc'
11
+ require 'rake/rdoctask'
12
+ RDoc::Task = Rake::RDocTask
13
+ end
14
+
15
+ RDoc::Task.new(:rdoc) do |rdoc|
16
+ rdoc.rdoc_dir = 'rdoc'
17
+ rdoc.title = 'GoogleSafeBrowsing'
18
+ rdoc.options << '--line-numbers'
19
+ rdoc.rdoc_files.include('README.rdoc')
20
+ rdoc.rdoc_files.include('lib/**/*.rb')
21
+ end
22
+
23
+
24
+
25
+ Bundler::GemHelper.install_tasks
26
+
@@ -0,0 +1,25 @@
1
+ require 'rails/generators'
2
+ require 'rails/generators/migration'
3
+
4
+ module GoogleSafeBrowsing
5
+ class InstallGenerator < Rails::Generators::Base
6
+ include Rails::Generators::Migration
7
+ desc "Creates Migrations for Shavar Hashes and Full Hashes. Creates initializer file for API Key."
8
+
9
+ def self.source_root
10
+ @source_root ||= File.join(File.dirname(__FILE__), 'templates')
11
+ end
12
+
13
+ def self.next_migration_number(path)
14
+ if ActiveRecord::Base.timestamped_migrations
15
+ Time.now.utc.strftime("%Y%m%d%H%M%S")
16
+ else
17
+ "%.3d" % (current_migration_number(dirname) + 1)
18
+ end
19
+ end
20
+
21
+ def create_migration_files
22
+ migration_template 'create_google_safe_browsing_tables.rb', "db/migrate/create_google_safe_browsing_tables"
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,37 @@
1
+ class CreateGoogleSafeBrowsingTables < ActiveRecord::Migration
2
+ def self.up
3
+
4
+ create_table :gsb_full_hashes do |t|
5
+ t.string :full_hash
6
+ t.integer :add_chunk_number
7
+ t.string :list
8
+ end
9
+ add_index :gsb_full_hashes, :full_hash
10
+
11
+ create_table :gsb_add_shavars do |t|
12
+ t.string :prefix
13
+ t.string :host_key
14
+ t.integer :chunk_number, :null => false
15
+ t.string :list, :null => false
16
+ end
17
+ add_index :gsb_add_shavars, :host_key
18
+ add_index :gsb_add_shavars, [:host_key, :prefix ]
19
+
20
+ create_table :gsb_sub_shavars do |t|
21
+ t.string :prefix
22
+ t.string :host_key
23
+ t.integer :add_chunk_number
24
+ t.integer :chunk_number, :null => false
25
+ t.string :list, :null => false
26
+ end
27
+ add_index :gsb_sub_shavars, :host_key
28
+ add_index :gsb_sub_shavars, [:host_key, :prefix ]
29
+
30
+ end
31
+
32
+ def self.down
33
+ drop_table :gsb_add_shavars
34
+ drop_table :gsb_sub_shavars
35
+ drop_table :gsb_full_hashes
36
+ end
37
+ end
@@ -0,0 +1,58 @@
1
+ require 'net/http'
2
+ require 'open-uri'
3
+ require 'active_record'
4
+
5
+ require 'google_safe_browsing/google_safe_browsing_railtie' if defined?(Rails)
6
+
7
+ require File.dirname(__FILE__) + '/google_safe_browsing/api_v2'
8
+ require File.dirname(__FILE__) + '/google_safe_browsing/binary_helper'
9
+ require File.dirname(__FILE__) + '/google_safe_browsing/canonicalize'
10
+ require File.dirname(__FILE__) + '/google_safe_browsing/chunk_helper'
11
+ require File.dirname(__FILE__) + '/google_safe_browsing/hash_helper'
12
+ require File.dirname(__FILE__) + '/google_safe_browsing/http_helper'
13
+ require File.dirname(__FILE__) + '/google_safe_browsing/response_helper'
14
+ require File.dirname(__FILE__) + '/google_safe_browsing/top_level_domain'
15
+
16
+ require File.dirname(__FILE__) + '/google_safe_browsing/add_shavar'
17
+ require File.dirname(__FILE__) + '/google_safe_browsing/sub_shavar'
18
+ require File.dirname(__FILE__) + '/google_safe_browsing/full_hash'
19
+
20
+ require File.dirname(__FILE__) + '/google_safe_browsing/rescheduler'
21
+
22
+ module GoogleSafeBrowsing
23
+ class Config
24
+ attr_accessor :client, :app_ver, :p_ver, :host, :current_lists, :api_key
25
+
26
+ def initialize
27
+ @client = 'api'
28
+ @app_ver = VERSION
29
+ @p_ver = '2.2'
30
+ @host = 'http://safebrowsing.clients.google.com/safebrowsing'
31
+ @current_lists = [ 'googpub-phish-shavar', 'goog-malware-shavar' ]
32
+ end
33
+ end
34
+
35
+ def self.config
36
+ @@config ||= Config.new
37
+ end
38
+
39
+ def self.configure
40
+ yield self.config
41
+ end
42
+
43
+ def self.kick_off
44
+ Resque.enqueue(Rescheduler)
45
+ end
46
+
47
+
48
+ def self.friendly_list_name(list)
49
+ case list
50
+ when 'goog-malware-shavar'
51
+ 'malware'
52
+ when 'googpub-phish-shavar'
53
+ 'phishing'
54
+ else
55
+ nil
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,5 @@
1
+ module GoogleSafeBrowsing
2
+ class AddShavar < ActiveRecord::Base
3
+ set_table_name 'gsb_add_shavars'
4
+ end
5
+ end
@@ -0,0 +1,60 @@
1
+ module GoogleSafeBrowsing
2
+ class APIv2
3
+ def self.update
4
+ data_response = HttpHelper.get_data
5
+
6
+ to_do_array = ResponseHelper.parse_data_response(data_response.body)
7
+
8
+ to_do_array[:lists].each do |list|
9
+ to_do_array[:data_urls][list].each do |url|
10
+ puts "#{list} - #{url}\n"
11
+ ResponseHelper.receive_data('http://' + url, list)
12
+ end
13
+ end
14
+ to_do_array[:delay_seconds]
15
+ end
16
+
17
+ def self.lookup(url)
18
+ urls = Canonicalize.urls_for_lookup(url)
19
+
20
+ hashes = HashHelper.urls_to_hashes(urls)
21
+ raw_hash_array = hashes.collect{ |h| h.to_s }
22
+
23
+ if full = FullHash.where(:full_hash => raw_hash_array).first
24
+ return GoogleSafeBrowsing.friendly_list_name(full.list)
25
+ end
26
+
27
+ hits = AddShavar.where(:prefix => hashes.map{|h| h.prefix}).collect{ |s| [ s.list, s.prefix ] }
28
+ safes = SubShavar.where(:prefix => hashes.map{|h| h.prefix}).collect{ |s| [ s.list, s.prefix ] }
29
+
30
+ reals = hits - safes
31
+
32
+ if reals.any?
33
+ full_hashes = HttpHelper.request_full_hashes(reals.collect{|r| r[1] })
34
+
35
+ # save hashes first
36
+ # cannot return early because all FullHashes need to be saved
37
+ hit_list = nil
38
+ full_hashes.each do |hash|
39
+ FullHash.create!(:list => hash[:list], :add_chunk_number => hash[:add_chunk_num],
40
+ :full_hash => hash[:full_hash])
41
+
42
+ hit_list = hash[:list] if raw_hash_array.include?(hash[:full_hash])
43
+ end
44
+ return GoogleSafeBrowsing.friendly_list_name(hit_list)
45
+ end
46
+ nil
47
+ end
48
+
49
+ def self.delay(delay_seconds)
50
+ puts "Google told us to wait for #{delay_seconds} seconds"
51
+ puts "We will wait...."
52
+ start_time = Time.now
53
+ while(start_time + delay_seconds > Time.now)
54
+ puts "#{(delay_seconds - (Time.now - start_time)).to_i}..."
55
+ sleep(10)
56
+ end
57
+ puts "Thank you for being patient"
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,40 @@
1
+ module GoogleSafeBrowsing
2
+ class BinaryHelper
3
+
4
+ def self.read_bytes_as_hex(iter, count)
5
+ read_bytes_from(iter, count).unpack("H#{count * 2}")[0]
6
+ end
7
+
8
+ def self.four_as_hex(string)
9
+ string.unpack('H8')[0]
10
+ end
11
+
12
+
13
+ def self.read_bytes_from(iter, count)
14
+ ret = ''
15
+ count.to_i.times { ret << iter.next }
16
+ ret
17
+ #rescue
18
+ # puts "Tried to read past chunk iterator++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
19
+ # return nil
20
+ end
21
+
22
+ def self.unpack_host_key(bin)
23
+ bin.unpack('H8')[0]
24
+ end
25
+
26
+ def self.unpack_count(bin)
27
+ # this may not be correct
28
+ bin.unpack('U')[0]
29
+ end
30
+
31
+ def self.unpack_add_chunk_num(bin)
32
+ bin.unpack('N')[0]
33
+ end
34
+
35
+ def self.hex_to_bin(hex)
36
+ hex.to_a.pack('H*')
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,181 @@
1
+ require 'uri'
2
+ require 'ip'
3
+ require File.dirname(__FILE__) + '/top_level_domain.rb'
4
+
5
+ module GoogleSafeBrowsing
6
+ class Canonicalize
7
+
8
+ PROTOCOL_DELIMITER = '://'
9
+ DEFAULT_PROTOCOL = 'http'
10
+
11
+ def self.url(raw_url)
12
+ #puts raw_url
13
+ #remove tabs, carriage returns and line feeds
14
+ raw_url.gsub!("\t",'')
15
+ raw_url.gsub!("\r",'')
16
+ raw_url.gsub!("\n",'')
17
+
18
+ cann = raw_url.clone
19
+ cann.gsub!(/\A\s+|\s+\Z/, '')
20
+
21
+ cann = remove_fragment(cann)
22
+
23
+ # repeatedly unescape until no more escaping
24
+ cann = recursively_unescape(cann)
25
+
26
+ # remove leading PROTOCOL
27
+ cann = remove_protocol(cann)
28
+
29
+ #split into host and path components
30
+ splits = split_host_path(cann)
31
+ cann = fix_host( splits[:host] ) + '/' + fix_path( splits[:path] )
32
+
33
+ # add leading protocol
34
+ @protocol ||= DEFAULT_PROTOCOL
35
+ cann = @protocol + PROTOCOL_DELIMITER + cann
36
+
37
+ strict_escape(cann)
38
+ end
39
+
40
+ def self.urls_for_lookup(lookup_url)
41
+ lookup_url = url(lookup_url)
42
+
43
+ lookup_url = remove_protocol(lookup_url)
44
+
45
+ splits = split_host_path(lookup_url)
46
+
47
+ host_strings = [splits[:host]]
48
+ host = TopLevelDomain.split_from_host(splits[:host]).last(5)
49
+ ( host.length - 1 ).times do
50
+ host_strings << host.join('.')
51
+ host.shift
52
+ end
53
+ host_strings.uniq!
54
+
55
+ path_split = splits[:path].split('?')
56
+ path = path_split[0]
57
+ params = path_split[1]
58
+
59
+
60
+ path_strings = [ splits[:path], '/' ]
61
+ if path
62
+ path_strings << path
63
+ paths_to_append = path.split('/').first(3)
64
+ paths_to_append.length.times do
65
+ path_strings << paths_to_append.join('/')
66
+ paths_to_append.pop
67
+ end
68
+ end
69
+ path_strings.map!{ |p| '/' + p + '/' }
70
+ path_strings.map!{ |p| p.gsub!(/\/+/, '/') }
71
+ path_strings.compact!
72
+ path_strings.uniq!
73
+
74
+ #puts host_strings.length
75
+ #puts path_strings.length
76
+
77
+
78
+ ( cart_prod(host_strings, path_strings) + host_strings ).uniq
79
+ end
80
+
81
+ private
82
+
83
+ def self.cart_prod(a_one, a_two)
84
+ result = []
85
+ a_one.each do |i|
86
+ a_two.each do |j|
87
+ result << "#{i}#{j}"
88
+ end
89
+ end
90
+ result
91
+ end
92
+
93
+ def self.split_host_path(cann)
94
+ ret= { :host => cann, :path => '' }
95
+ split_point = cann.index('/')
96
+ if split_point
97
+ ret[:host] = cann[0..split_point-1]
98
+ ret[:path] = cann[split_point+1..-1]
99
+ end
100
+
101
+ ret
102
+ end
103
+
104
+ def self.remove_fragment(string)
105
+ string = string[0..string.index('#')-1] if string.index('#')
106
+ string
107
+ end
108
+
109
+ def self.recursively_unescape(url)
110
+ compare_url = url.clone
111
+ url = URI.unescape(url)
112
+ while(compare_url != url)
113
+ compare_url = url.clone
114
+ url = URI.unescape(url)
115
+ end
116
+ url
117
+ end
118
+
119
+ def self.fix_host(host)
120
+ #puts "In Host: #{host}"
121
+ # remove leading and trailing dots, multiple dots to one
122
+ host.gsub!(/\A\.+|\.+\Z/, '')
123
+ host.gsub!(/\.+/, '.')
124
+
125
+ host.downcase!
126
+
127
+ host = IP::V4.new(host.to_i).to_s if host.to_i > 256
128
+
129
+ host
130
+ end
131
+
132
+ def self.fix_path(path)
133
+ #puts "In Path: #{path}"
134
+
135
+ #remove leading slash
136
+ path = path[1..-1] if path[0..0] == '/'
137
+
138
+ preserve_trailing_slash = ( path[-1..-1] == '/' )
139
+
140
+ if path.index('?')
141
+ first_ques = path.index('?')
142
+ params = path[first_ques..-1]
143
+ path = path[0..first_ques-1]
144
+ end
145
+
146
+ # remove multiple '/'
147
+ path.gsub!(/\/+/, '/')
148
+
149
+ new_path_array = []
150
+ path.split('/').each do |p|
151
+ new_path_array << p unless p == '.' || p == '..'
152
+ new_path_array.pop if p == '..'
153
+ end
154
+
155
+ path = new_path_array.join('/')
156
+ path += '/' if preserve_trailing_slash
157
+ path += params if params
158
+
159
+ path
160
+ end
161
+
162
+ def self.strict_escape(url)
163
+ url = URI.escape url
164
+
165
+ # unescape carat, may need other optionally escapeable chars
166
+ url.gsub!('%5E','^')
167
+
168
+ url
169
+ end
170
+
171
+ def self.remove_protocol(cann)
172
+ if cann.index(PROTOCOL_DELIMITER)
173
+ delimiting_index = cann.index(PROTOCOL_DELIMITER)
174
+ @protocol = cann[0..delimiting_index-1]
175
+ protocol_end_index = delimiting_index + PROTOCOL_DELIMITER.length
176
+ cann = cann[protocol_end_index..-1]
177
+ end
178
+ cann
179
+ end
180
+ end
181
+ end