google_safe_browsing 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.mkd CHANGED
@@ -1,15 +1,15 @@
1
- # Google Safe Browsing Rails 3 Plugin
1
+ # Google Safe Browsing Gem
2
2
 
3
- This gem allows easy Google Safe Browsing integration
4
- with Rails 3 apps.
3
+ This gem allows easy Google Safe Browsing APIv2 usage with optional integration
4
+ into Rails 3 apps.
5
5
 
6
6
  It includes:
7
7
 
8
8
  * a migration generator for database schema
9
- * method to update your lists
9
+ * method to update hash database
10
10
  * method to lookup a url
11
11
  * rake tasks to update hash database
12
- * Autonomous updating via Resque and Resque Scheduler
12
+ * Autonomous updating via Resque and Resque Scheduler (optional)
13
13
 
14
14
  ----------------------
15
15
 
@@ -96,6 +96,4 @@ https://github.com/codelux/malware_api
96
96
 
97
97
  ------------------
98
98
 
99
- Thank you for using my gem! Please report any bugs or issues. Contributions are also always welcome!
100
-
101
- -- Chris Marshall
99
+ Thank you for using this gem! Please report any bugs or issues.
@@ -20,6 +20,8 @@ require File.dirname(__FILE__) + '/google_safe_browsing/full_hash'
20
20
  require File.dirname(__FILE__) + '/google_safe_browsing/rescheduler'
21
21
 
22
22
  module GoogleSafeBrowsing
23
+
24
+ # Handles the configuration values for the module
23
25
  class Config
24
26
  attr_accessor :client, :app_ver, :p_ver, :host, :current_lists, :api_key
25
27
 
@@ -32,19 +34,26 @@ module GoogleSafeBrowsing
32
34
  end
33
35
  end
34
36
 
37
+ # Returns of initializes the Module configuration
35
38
  def self.config
36
39
  @@config ||= Config.new
37
40
  end
38
41
 
42
+ # Allows for setting config values via a block
39
43
  def self.configure
40
44
  yield self.config
41
45
  end
42
46
 
47
+ # Adds the Rescheduler job to Resque
43
48
  def self.kick_off
44
49
  Resque.enqueue(Rescheduler)
45
50
  end
46
51
 
47
52
 
53
+ # Converts the official Google list name into the name to return
54
+ #
55
+ # @param (String) list the 'official' list name
56
+ # @return (String) the friendly list name
48
57
  def self.friendly_list_name(list)
49
58
  case list
50
59
  when 'goog-malware-shavar'
@@ -1,5 +1,9 @@
1
1
  module GoogleSafeBrowsing
2
+ # Main Interface for Module
2
3
  class APIv2
4
+ # Completes an update
5
+ #
6
+ # @return (Integer) the number of seconds before this method should be called again
3
7
  def self.update
4
8
  data_response = HttpHelper.get_data
5
9
 
@@ -14,8 +18,13 @@ module GoogleSafeBrowsing
14
18
  to_do_array[:delay_seconds]
15
19
  end
16
20
 
21
+ # Performs a lookup of the given url
22
+ #
23
+ # @param (String) url a url string to be looked up
24
+ # @return (String, nil) the friendly list name if found, or `nil`
17
25
  def self.lookup(url)
18
26
  urls = Canonicalize.urls_for_lookup(url)
27
+ return '' if urls.empty?
19
28
 
20
29
  hashes = HashHelper.urls_to_hashes(urls)
21
30
  raw_hash_array = hashes.collect{ |h| h.to_s }
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
46
55
  nil
47
56
  end
48
57
 
58
+ # Can be used to force a delay into a script running updates
59
+ #
60
+ # @param (Integer) delay_seconds the number of seconds to delay, should be the return value of {update}
49
61
  def self.delay(delay_seconds)
50
62
  puts "Google told us to wait for #{delay_seconds} seconds"
51
63
  puts "We will wait...."
@@ -1,15 +1,29 @@
1
1
  module GoogleSafeBrowsing
2
+ # Helper methods for working with binary encoded data from Forwarding URLs
2
3
  class BinaryHelper
3
4
 
5
+ # Reads `counter` byes from byte iterator `iter` and returns the hex string represnetation
6
+ #
7
+ # @param [ByteIterator] iter byte iterator already at correct position
8
+ # @param [Integer] count number of bytes to read
9
+ # @return [String] hexidecimal string
4
10
  def self.read_bytes_as_hex(iter, count)
5
11
  read_bytes_from(iter, count).unpack("H#{count * 2}")[0]
6
12
  end
7
13
 
14
+ # Returns the first four bytes of `string` as hexidecimal
15
+ #
16
+ # @param [String] string to unpack the first four bytes as hex
17
+ # @return (see read_bytes_as_hex)
8
18
  def self.four_as_hex(string)
9
19
  string.unpack('H8')[0]
10
20
  end
11
21
 
12
22
 
23
+ # Read `count` bytes from `iter` without unpacking the result
24
+ #
25
+ # @param (see read_bytes_as_hex)
26
+ # @return (String) not unpacked string from `iter`
13
27
  def self.read_bytes_from(iter, count)
14
28
  ret = ''
15
29
  count.to_i.times { ret << iter.next }
@@ -19,19 +33,34 @@ module GoogleSafeBrowsing
19
33
  # return nil
20
34
  end
21
35
 
36
+ # Returns the first four bytes of `string` as hexidecimal; for host key
37
+ # @param (String) bin string to unpack
38
+ # @return (String) unpacked string
22
39
  def self.unpack_host_key(bin)
23
40
  bin.unpack('H8')[0]
24
41
  end
25
42
 
43
+ # Unpack string as an unsigned integer; for count
44
+ #
45
+ # @param (see unpack_host_key)
46
+ # @return (see unpack_host_key)
26
47
  def self.unpack_count(bin)
27
48
  # this may not be correct
28
49
  bin.unpack('U')[0]
29
50
  end
30
51
 
52
+ # Unpack string as big-endian network byte order
53
+ #
54
+ # @param (see unpack_count)
55
+ # @return (see unpack_count)
31
56
  def self.unpack_add_chunk_num(bin)
32
57
  bin.unpack('N')[0]
33
58
  end
34
59
 
60
+ # Pack a Hex String into binary
61
+ #
62
+ # @param (String) hex string to encode
63
+ # @return (String) encoded string
35
64
  def self.hex_to_bin(hex)
36
65
  hex.to_a.pack('H*')
37
66
  end
@@ -3,11 +3,16 @@ require 'ip'
3
3
  require File.dirname(__FILE__) + '/top_level_domain.rb'
4
4
 
5
5
  module GoogleSafeBrowsing
6
+ # Helpers to Canonicalize urls and generate url permutations for lookups
6
7
  class Canonicalize
7
8
 
8
9
  PROTOCOL_DELIMITER = '://'
9
10
  DEFAULT_PROTOCOL = 'http'
10
11
 
12
+ # Base Canonicalizer method
13
+ #
14
+ # @param (String) uncanonicalized url string
15
+ # @return (String) canonicalized url string
11
16
  def self.url(raw_url)
12
17
  #puts raw_url
13
18
  #remove tabs, carriage returns and line feeds
@@ -37,6 +42,10 @@ module GoogleSafeBrowsing
37
42
  strict_escape(cann)
38
43
  end
39
44
 
45
+ # Generate the url permutations for lookup
46
+ #
47
+ # @param (String) lookup_url uncanonicalized url string
48
+ # @return (Array) array of cannonicalized url permutation strings
40
49
  def self.urls_for_lookup(lookup_url)
41
50
  lookup_url = url(lookup_url)
42
51
 
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
46
55
 
47
56
  host_string = strip_username_password_and_port_from_host(splits[:host])
48
57
 
58
+ #return empty array unless host_string has at least one period
59
+ return [] unless host_string.include?('.')
60
+
49
61
  host_strings = [host_string]
50
62
  host = TopLevelDomain.split_from_host(host_string).last(5)
51
63
  ( host.length - 1 ).times do
@@ -61,45 +73,54 @@ module GoogleSafeBrowsing
61
73
 
62
74
  private
63
75
 
64
- def self.generate_path_strings(raw_path)
65
- return [ '/', '' ] if raw_path == ''
66
-
67
- path_split = raw_path.split('?')
68
- path = path_split[0]
69
- params = path_split[1]
76
+ # Generates the path permutations from the raw path string
77
+ #
78
+ # @param (String) raw_path path split from the full url string
79
+ # @return (Array) array of path permutation strings
80
+ def self.generate_path_strings(raw_path)
81
+ return [ '/', '' ] if raw_path == ''
70
82
 
83
+ path_split = raw_path.split('?')
84
+ path = path_split[0]
85
+ params = path_split[1]
71
86
 
72
- path_components = path.split('/').first(3)
73
- path_strings = [ '/' ]
74
- path_components.length.times do
75
- path_strings << '/' + path_components.join('/')
76
- path_components.pop
77
- end
78
87
 
79
- path_strings.map! do |p|
80
- unless p.index('.')
81
- p + '/'
82
- else
83
- p
88
+ path_components = path.split('/').first(3)
89
+ path_strings = [ '/' ]
90
+ path_components.length.times do
91
+ path_strings << '/' + path_components.join('/')
92
+ path_components.pop
84
93
  end
85
- end
86
- path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
87
- path_strings.compact!
88
- path_strings.uniq!
89
94
 
90
- if params
91
- path_strings | path_strings.map do |p|
92
- if p[-1..-1] == '/'
93
- p
95
+ path_strings.map! do |p|
96
+ unless p.index('.')
97
+ p + '/'
94
98
  else
95
- "#{p}?#{params}"
99
+ p
96
100
  end
97
101
  end
98
- else
99
- return path_strings
102
+ path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
103
+ path_strings.compact!
104
+ path_strings.uniq!
105
+
106
+ if params
107
+ path_strings | path_strings.map do |p|
108
+ if p[-1..-1] == '/'
109
+ p
110
+ else
111
+ "#{p}?#{params}"
112
+ end
113
+ end
114
+ else
115
+ return path_strings
116
+ end
100
117
  end
101
- end
102
118
 
119
+ # Returns the cartesian product of two arrays by concatination of the string representation of the elements
120
+ #
121
+ # @param (Array) a_one array of strings
122
+ # @param (Array) a_two array of strings
123
+ # @return (Array) cartesian product of arrays with elements concatinated
103
124
  def self.cart_prod(a_one, a_two)
104
125
  result = []
105
126
  a_one.each do |i|
@@ -110,6 +131,10 @@ module GoogleSafeBrowsing
110
131
  result
111
132
  end
112
133
 
134
+ # Takes the canonicalized url and splits the host and the path apart
135
+ #
136
+ # @param (String) cann canonicalized url string
137
+ # @return (Hash) !{ :host => host_part, :path => path_part }
113
138
  def self.split_host_path(cann)
114
139
  ret= { :host => cann, :path => '' }
115
140
  split_point = cann.index('/')
@@ -121,11 +146,19 @@ module GoogleSafeBrowsing
121
146
  ret
122
147
  end
123
148
 
149
+ # Strips the fragment portion of the url string (the last '#' and everything after)
150
+ #
151
+ # @param (String) string url
152
+ # @return (String) parameter with the fragment removed
124
153
  def self.remove_fragment(string)
125
154
  string = string[0..string.index('#')-1] if string.index('#')
126
155
  string
127
156
  end
128
157
 
158
+ # Continues to unescape the url until unescaping has no effect
159
+ #
160
+ # @param (String) url url string
161
+ # @return (String) fully unescaped url string
129
162
  def self.recursively_unescape(url)
130
163
  compare_url = url.clone
131
164
  url = URI.unescape(url)
@@ -136,6 +169,10 @@ module GoogleSafeBrowsing
136
169
  url
137
170
  end
138
171
 
172
+ # Apply initial fixes to host string
173
+ #
174
+ # @param (String) host host string
175
+ # @return (String) standardized host string
139
176
  def self.fix_host(host)
140
177
  #puts "In Host: #{host}"
141
178
  # remove leading and trailing dots, multiple dots to one
@@ -149,6 +186,10 @@ module GoogleSafeBrowsing
149
186
  host
150
187
  end
151
188
 
189
+ # Apply initial fixes to path string
190
+ #
191
+ # @param (String) path path string
192
+ # @return (String) standardized path string
152
193
  def self.fix_path(path)
153
194
  #puts "In Path: #{path}"
154
195
 
@@ -179,6 +220,10 @@ module GoogleSafeBrowsing
179
220
  path
180
221
  end
181
222
 
223
+ # Escape the url, but do not escape certain characters; such as the carat
224
+ #
225
+ # @param (String) url url string
226
+ # @return (String) escaped url string
182
227
  def self.strict_escape(url)
183
228
  url = URI.escape url
184
229
 
@@ -188,6 +233,10 @@ module GoogleSafeBrowsing
188
233
  url
189
234
  end
190
235
 
236
+ # Strip the leading protocol from the url string
237
+ #
238
+ # @param (String) cann url string
239
+ # @return (String) url string without the protocol
191
240
  def self.remove_protocol(cann)
192
241
  if cann.index(PROTOCOL_DELIMITER)
193
242
  delimiting_index = cann.index(PROTOCOL_DELIMITER)
@@ -198,11 +247,19 @@ module GoogleSafeBrowsing
198
247
  cann
199
248
  end
200
249
 
250
+ # Strip the user name, password and port number from the url
251
+ #
252
+ # @param (String) host_string host portion of the url
253
+ # @return (String) host portion of the url without the username, password and port
201
254
  def self.strip_username_password_and_port_from_host(host_string)
202
255
  host_string = remove_port(host_string)
203
256
  remove_username_and_password(host_string)
204
257
  end
205
258
 
259
+ # Strip port number from host string
260
+ #
261
+ # @param (see strip_username_password_and_port_from_host)
262
+ # @return (String) host part without the port number
206
263
  def self.remove_port(host_string)
207
264
  port_sep = host_string.rindex(':')
208
265
  if port_sep
@@ -212,6 +269,10 @@ module GoogleSafeBrowsing
212
269
  end
213
270
  end
214
271
 
272
+ # Strip user name and password from host part of url
273
+ #
274
+ # @param (see remove_port)
275
+ # @return (String) host part of url without user name or password
215
276
  def self.remove_username_and_password(host_string)
216
277
  un_sep = host_string.index('@')
217
278
  if un_sep
@@ -20,6 +20,9 @@ module GoogleSafeBrowsing
20
20
  tld
21
21
  end
22
22
 
23
+ # return array of host components (www, example, com from www.example.com)
24
+ # taking into account of top level domains
25
+ # e.g. 'sub.domain.example.co.uk' => [ 'sub', 'domain', 'example', 'co.uk' ]
23
26
  def self.split_from_host(host)
24
27
  components = host.split('.')
25
28
 
@@ -1,3 +1,3 @@
1
1
  module GoogleSafeBrowsing
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_safe_browsing
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 0
10
- version: 0.3.0
9
+ - 1
10
+ version: 0.3.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Chris Marshall
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-05 00:00:00 Z
18
+ date: 2012-04-09 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  prerelease: false
@@ -136,7 +136,7 @@ files:
136
136
  - MIT-LICENSE
137
137
  - Rakefile
138
138
  - README.mkd
139
- homepage: https://github.com/chrismar035/google_safe_browsing
139
+ homepage: https://github.com/mobiledefense/mobiledefense_google_safe_browsing
140
140
  licenses: []
141
141
 
142
142
  post_install_message: