google_safe_browsing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2012 YOURNAME
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.mkd ADDED
@@ -0,0 +1,101 @@
1
+ # Google Safe Browsing Rails 3 Plugin
2
+
3
+ This gem allows easy Google Safe Browsing integration
4
+ with Rails 3 apps.
5
+
6
+ It includes:
7
+
8
+ * a migration generator for database schema
9
+ * method to update your lists
10
+ * method to lookup a url
11
+ * rake tasks to update hash database
12
+ * Autonomous updating via Resque and Resque Scheduler
13
+
14
+ ----------------------
15
+
16
+ ##Installation
17
+
18
+ Install the gem
19
+
20
+ gem install google_safe_browsing
21
+
22
+ Or add it to your Gemfile
23
+
24
+ #Gemfile
25
+
26
+ ...
27
+
28
+ gem 'google_safe_browsing'
29
+
30
+ Then, generate the migration and run it
31
+
32
+ $ rails generate google_safe_browsing:install
33
+ create db/migrate/20120227143535_create_google_safe_browsing_tables.rb
34
+ $ rake db:migrate
35
+
36
+
37
+ Add your Google Safe Browsing API key to congif/application.rb
38
+ You can get a key from the [Google Safe Browsing website](http://code.google.com/apis/safebrowsing/key_signup.html)
39
+
40
+ #config/application.rb
41
+
42
+ ...
43
+
44
+ config.google_safe_browsing.api_key = 'MySuperAwesomeKey5124'
45
+
46
+
47
+ ## Rake Tasks
48
+
49
+ You can run an update manually
50
+
51
+ $ rake google_safe_browsing:update
52
+
53
+ Or, if you have [Resque](https://github.com/defunkt/resque) and
54
+ [Resque Scheduler](https://github.com/bvandenbos/resque-scheduler) set up, you can
55
+ run an update and automatically schedule another update based on the 'next polling
56
+ interval' parameter from the API
57
+
58
+ $ rake google_safe_browsing:update_and_reschedule
59
+
60
+ ## Usage
61
+
62
+ To programatically run an update in your app
63
+
64
+ GoogleSafeBrowsing::APIv2.update
65
+
66
+ Note: This can take a while, especially when first seeding your database. I wouldn't recommend
67
+ calling this in a controller for a normal page request.
68
+
69
+ To check a url for badness
70
+
71
+ GoogleSafeBrowsing::APIv2.lookup('http://bad.url.address.here.com.edu/forProfit')
72
+
73
+ The url string parameter does not have to be any specific format or Canonicalization the Google
74
+ Safe Browsing gem will handle all of that for you. Please report any errors from a weirdly formatted
75
+ url though. I most likely have missed some cases.
76
+
77
+ The `lookup` method returns a string ( either 'malware' or 'phishing' ) for the name of the black list
78
+ which the url appears on, or `nil` if the url is not on Google's list.
79
+
80
+ ----------------
81
+
82
+ ### More information
83
+
84
+ [Google Safe Browsing API Reference](http://code.google.com/apis/safebrowsing/)
85
+
86
+ ----------------
87
+
88
+ ### Inspiration
89
+
90
+ The interface of this gem is based upon these two gems, which are
91
+ based on Safe Browsing v1 API:
92
+
93
+ https://github.com/koke/malware_api
94
+ and
95
+ https://github.com/codelux/malware_api
96
+
97
+ ------------------
98
+
99
+ Thank you for using my gem! Please report any bugs or issues. Contributions are also always welcome!
100
+
101
+ -- Chris Marshall
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env rake
2
+ begin
3
+ require 'bundler/setup'
4
+ rescue LoadError
5
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
6
+ end
7
+ begin
8
+ require 'rdoc/task'
9
+ rescue LoadError
10
+ require 'rdoc/rdoc'
11
+ require 'rake/rdoctask'
12
+ RDoc::Task = Rake::RDocTask
13
+ end
14
+
15
+ RDoc::Task.new(:rdoc) do |rdoc|
16
+ rdoc.rdoc_dir = 'rdoc'
17
+ rdoc.title = 'GoogleSafeBrowsing'
18
+ rdoc.options << '--line-numbers'
19
+ rdoc.rdoc_files.include('README.rdoc')
20
+ rdoc.rdoc_files.include('lib/**/*.rb')
21
+ end
22
+
23
+
24
+
25
+ Bundler::GemHelper.install_tasks
26
+
@@ -0,0 +1,25 @@
1
+ require 'rails/generators'
2
+ require 'rails/generators/migration'
3
+
4
+ module GoogleSafeBrowsing
5
+ class InstallGenerator < Rails::Generators::Base
6
+ include Rails::Generators::Migration
7
+ desc "Creates Migrations for Shavar Hashes and Full Hashes. Creates initializer file for API Key."
8
+
9
+ def self.source_root
10
+ @source_root ||= File.join(File.dirname(__FILE__), 'templates')
11
+ end
12
+
13
+ def self.next_migration_number(path)
14
+ if ActiveRecord::Base.timestamped_migrations
15
+ Time.now.utc.strftime("%Y%m%d%H%M%S")
16
+ else
17
+ "%.3d" % (current_migration_number(dirname) + 1)
18
+ end
19
+ end
20
+
21
+ def create_migration_files
22
+ migration_template 'create_google_safe_browsing_tables.rb', "db/migrate/create_google_safe_browsing_tables"
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,37 @@
1
+ class CreateGoogleSafeBrowsingTables < ActiveRecord::Migration
2
+ def self.up
3
+
4
+ create_table :gsb_full_hashes do |t|
5
+ t.string :full_hash
6
+ t.integer :add_chunk_number
7
+ t.string :list
8
+ end
9
+ add_index :gsb_full_hashes, :full_hash
10
+
11
+ create_table :gsb_add_shavars do |t|
12
+ t.string :prefix
13
+ t.string :host_key
14
+ t.integer :chunk_number, :null => false
15
+ t.string :list, :null => false
16
+ end
17
+ add_index :gsb_add_shavars, :host_key
18
+ add_index :gsb_add_shavars, [:host_key, :prefix ]
19
+
20
+ create_table :gsb_sub_shavars do |t|
21
+ t.string :prefix
22
+ t.string :host_key
23
+ t.integer :add_chunk_number
24
+ t.integer :chunk_number, :null => false
25
+ t.string :list, :null => false
26
+ end
27
+ add_index :gsb_sub_shavars, :host_key
28
+ add_index :gsb_sub_shavars, [:host_key, :prefix ]
29
+
30
+ end
31
+
32
+ def self.down
33
+ drop_table :gsb_add_shavars
34
+ drop_table :gsb_sub_shavars
35
+ drop_table :gsb_full_hashes
36
+ end
37
+ end
@@ -0,0 +1,58 @@
1
+ require 'net/http'
2
+ require 'open-uri'
3
+ require 'active_record'
4
+
5
+ require 'google_safe_browsing/google_safe_browsing_railtie' if defined?(Rails)
6
+
7
+ require File.dirname(__FILE__) + '/google_safe_browsing/api_v2'
8
+ require File.dirname(__FILE__) + '/google_safe_browsing/binary_helper'
9
+ require File.dirname(__FILE__) + '/google_safe_browsing/canonicalize'
10
+ require File.dirname(__FILE__) + '/google_safe_browsing/chunk_helper'
11
+ require File.dirname(__FILE__) + '/google_safe_browsing/hash_helper'
12
+ require File.dirname(__FILE__) + '/google_safe_browsing/http_helper'
13
+ require File.dirname(__FILE__) + '/google_safe_browsing/response_helper'
14
+ require File.dirname(__FILE__) + '/google_safe_browsing/top_level_domain'
15
+
16
+ require File.dirname(__FILE__) + '/google_safe_browsing/add_shavar'
17
+ require File.dirname(__FILE__) + '/google_safe_browsing/sub_shavar'
18
+ require File.dirname(__FILE__) + '/google_safe_browsing/full_hash'
19
+
20
+ require File.dirname(__FILE__) + '/google_safe_browsing/rescheduler'
21
+
22
+ module GoogleSafeBrowsing
23
+ class Config
24
+ attr_accessor :client, :app_ver, :p_ver, :host, :current_lists, :api_key
25
+
26
+ def initialize
27
+ @client = 'api'
28
+ @app_ver = VERSION
29
+ @p_ver = '2.2'
30
+ @host = 'http://safebrowsing.clients.google.com/safebrowsing'
31
+ @current_lists = [ 'googpub-phish-shavar', 'goog-malware-shavar' ]
32
+ end
33
+ end
34
+
35
+ def self.config
36
+ @@config ||= Config.new
37
+ end
38
+
39
+ def self.configure
40
+ yield self.config
41
+ end
42
+
43
+ def self.kick_off
44
+ Resque.enqueue(Rescheduler)
45
+ end
46
+
47
+
48
+ def self.friendly_list_name(list)
49
+ case list
50
+ when 'goog-malware-shavar'
51
+ 'malware'
52
+ when 'googpub-phish-shavar'
53
+ 'phishing'
54
+ else
55
+ nil
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,5 @@
1
+ module GoogleSafeBrowsing
2
+ class AddShavar < ActiveRecord::Base
3
+ set_table_name 'gsb_add_shavars'
4
+ end
5
+ end
@@ -0,0 +1,60 @@
1
+ module GoogleSafeBrowsing
2
+ class APIv2
3
+ def self.update
4
+ data_response = HttpHelper.get_data
5
+
6
+ to_do_array = ResponseHelper.parse_data_response(data_response.body)
7
+
8
+ to_do_array[:lists].each do |list|
9
+ to_do_array[:data_urls][list].each do |url|
10
+ puts "#{list} - #{url}\n"
11
+ ResponseHelper.receive_data('http://' + url, list)
12
+ end
13
+ end
14
+ to_do_array[:delay_seconds]
15
+ end
16
+
17
+ def self.lookup(url)
18
+ urls = Canonicalize.urls_for_lookup(url)
19
+
20
+ hashes = HashHelper.urls_to_hashes(urls)
21
+ raw_hash_array = hashes.collect{ |h| h.to_s }
22
+
23
+ if full = FullHash.where(:full_hash => raw_hash_array).first
24
+ return GoogleSafeBrowsing.friendly_list_name(full.list)
25
+ end
26
+
27
+ hits = AddShavar.where(:prefix => hashes.map{|h| h.prefix}).collect{ |s| [ s.list, s.prefix ] }
28
+ safes = SubShavar.where(:prefix => hashes.map{|h| h.prefix}).collect{ |s| [ s.list, s.prefix ] }
29
+
30
+ reals = hits - safes
31
+
32
+ if reals.any?
33
+ full_hashes = HttpHelper.request_full_hashes(reals.collect{|r| r[1] })
34
+
35
+ # save hashes first
36
+ # cannot return early because all FullHashes need to be saved
37
+ hit_list = nil
38
+ full_hashes.each do |hash|
39
+ FullHash.create!(:list => hash[:list], :add_chunk_number => hash[:add_chunk_num],
40
+ :full_hash => hash[:full_hash])
41
+
42
+ hit_list = hash[:list] if raw_hash_array.include?(hash[:full_hash])
43
+ end
44
+ return GoogleSafeBrowsing.friendly_list_name(hit_list)
45
+ end
46
+ nil
47
+ end
48
+
49
+ def self.delay(delay_seconds)
50
+ puts "Google told us to wait for #{delay_seconds} seconds"
51
+ puts "We will wait...."
52
+ start_time = Time.now
53
+ while(start_time + delay_seconds > Time.now)
54
+ puts "#{(delay_seconds - (Time.now - start_time)).to_i}..."
55
+ sleep(10)
56
+ end
57
+ puts "Thank you for being patient"
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,40 @@
1
+ module GoogleSafeBrowsing
2
+ class BinaryHelper
3
+
4
+ def self.read_bytes_as_hex(iter, count)
5
+ read_bytes_from(iter, count).unpack("H#{count * 2}")[0]
6
+ end
7
+
8
+ def self.four_as_hex(string)
9
+ string.unpack('H8')[0]
10
+ end
11
+
12
+
13
+ def self.read_bytes_from(iter, count)
14
+ ret = ''
15
+ count.to_i.times { ret << iter.next }
16
+ ret
17
+ #rescue
18
+ # puts "Tried to read past chunk iterator++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
19
+ # return nil
20
+ end
21
+
22
+ def self.unpack_host_key(bin)
23
+ bin.unpack('H8')[0]
24
+ end
25
+
26
+ def self.unpack_count(bin)
27
+ # this may not be correct
28
+ bin.unpack('U')[0]
29
+ end
30
+
31
+ def self.unpack_add_chunk_num(bin)
32
+ bin.unpack('N')[0]
33
+ end
34
+
35
+ def self.hex_to_bin(hex)
36
+ hex.to_a.pack('H*')
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,181 @@
1
+ require 'uri'
2
+ require 'ip'
3
+ require File.dirname(__FILE__) + '/top_level_domain.rb'
4
+
5
+ module GoogleSafeBrowsing
6
+ class Canonicalize
7
+
8
+ PROTOCOL_DELIMITER = '://'
9
+ DEFAULT_PROTOCOL = 'http'
10
+
11
+ def self.url(raw_url)
12
+ #puts raw_url
13
+ #remove tabs, carriage returns and line feeds
14
+ raw_url.gsub!("\t",'')
15
+ raw_url.gsub!("\r",'')
16
+ raw_url.gsub!("\n",'')
17
+
18
+ cann = raw_url.clone
19
+ cann.gsub!(/\A\s+|\s+\Z/, '')
20
+
21
+ cann = remove_fragment(cann)
22
+
23
+ # repeatedly unescape until no more escaping
24
+ cann = recursively_unescape(cann)
25
+
26
+ # remove leading PROTOCOL
27
+ cann = remove_protocol(cann)
28
+
29
+ #split into host and path components
30
+ splits = split_host_path(cann)
31
+ cann = fix_host( splits[:host] ) + '/' + fix_path( splits[:path] )
32
+
33
+ # add leading protocol
34
+ @protocol ||= DEFAULT_PROTOCOL
35
+ cann = @protocol + PROTOCOL_DELIMITER + cann
36
+
37
+ strict_escape(cann)
38
+ end
39
+
40
+ def self.urls_for_lookup(lookup_url)
41
+ lookup_url = url(lookup_url)
42
+
43
+ lookup_url = remove_protocol(lookup_url)
44
+
45
+ splits = split_host_path(lookup_url)
46
+
47
+ host_strings = [splits[:host]]
48
+ host = TopLevelDomain.split_from_host(splits[:host]).last(5)
49
+ ( host.length - 1 ).times do
50
+ host_strings << host.join('.')
51
+ host.shift
52
+ end
53
+ host_strings.uniq!
54
+
55
+ path_split = splits[:path].split('?')
56
+ path = path_split[0]
57
+ params = path_split[1]
58
+
59
+
60
+ path_strings = [ splits[:path], '/' ]
61
+ if path
62
+ path_strings << path
63
+ paths_to_append = path.split('/').first(3)
64
+ paths_to_append.length.times do
65
+ path_strings << paths_to_append.join('/')
66
+ paths_to_append.pop
67
+ end
68
+ end
69
+ path_strings.map!{ |p| '/' + p + '/' }
70
+ path_strings.map!{ |p| p.gsub!(/\/+/, '/') }
71
+ path_strings.compact!
72
+ path_strings.uniq!
73
+
74
+ #puts host_strings.length
75
+ #puts path_strings.length
76
+
77
+
78
+ ( cart_prod(host_strings, path_strings) + host_strings ).uniq
79
+ end
80
+
81
+ private
82
+
83
+ def self.cart_prod(a_one, a_two)
84
+ result = []
85
+ a_one.each do |i|
86
+ a_two.each do |j|
87
+ result << "#{i}#{j}"
88
+ end
89
+ end
90
+ result
91
+ end
92
+
93
+ def self.split_host_path(cann)
94
+ ret= { :host => cann, :path => '' }
95
+ split_point = cann.index('/')
96
+ if split_point
97
+ ret[:host] = cann[0..split_point-1]
98
+ ret[:path] = cann[split_point+1..-1]
99
+ end
100
+
101
+ ret
102
+ end
103
+
104
+ def self.remove_fragment(string)
105
+ string = string[0..string.index('#')-1] if string.index('#')
106
+ string
107
+ end
108
+
109
+ def self.recursively_unescape(url)
110
+ compare_url = url.clone
111
+ url = URI.unescape(url)
112
+ while(compare_url != url)
113
+ compare_url = url.clone
114
+ url = URI.unescape(url)
115
+ end
116
+ url
117
+ end
118
+
119
+ def self.fix_host(host)
120
+ #puts "In Host: #{host}"
121
+ # remove leading and trailing dots, multiple dots to one
122
+ host.gsub!(/\A\.+|\.+\Z/, '')
123
+ host.gsub!(/\.+/, '.')
124
+
125
+ host.downcase!
126
+
127
+ host = IP::V4.new(host.to_i).to_s if host.to_i > 256
128
+
129
+ host
130
+ end
131
+
132
+ def self.fix_path(path)
133
+ #puts "In Path: #{path}"
134
+
135
+ #remove leading slash
136
+ path = path[1..-1] if path[0..0] == '/'
137
+
138
+ preserve_trailing_slash = ( path[-1..-1] == '/' )
139
+
140
+ if path.index('?')
141
+ first_ques = path.index('?')
142
+ params = path[first_ques..-1]
143
+ path = path[0..first_ques-1]
144
+ end
145
+
146
+ # remove multiple '/'
147
+ path.gsub!(/\/+/, '/')
148
+
149
+ new_path_array = []
150
+ path.split('/').each do |p|
151
+ new_path_array << p unless p == '.' || p == '..'
152
+ new_path_array.pop if p == '..'
153
+ end
154
+
155
+ path = new_path_array.join('/')
156
+ path += '/' if preserve_trailing_slash
157
+ path += params if params
158
+
159
+ path
160
+ end
161
+
162
+ def self.strict_escape(url)
163
+ url = URI.escape url
164
+
165
+ # unescape carat, may need other optionally escapeable chars
166
+ url.gsub!('%5E','^')
167
+
168
+ url
169
+ end
170
+
171
+ def self.remove_protocol(cann)
172
+ if cann.index(PROTOCOL_DELIMITER)
173
+ delimiting_index = cann.index(PROTOCOL_DELIMITER)
174
+ @protocol = cann[0..delimiting_index-1]
175
+ protocol_end_index = delimiting_index + PROTOCOL_DELIMITER.length
176
+ cann = cann[protocol_end_index..-1]
177
+ end
178
+ cann
179
+ end
180
+ end
181
+ end