google_safe_browsing 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.mkd CHANGED
@@ -1,15 +1,15 @@
1
- # Google Safe Browsing Rails 3 Plugin
1
+ # Google Safe Browsing Gem
2
2
 
3
- This gem allows easy Google Safe Browsing integration
4
- with Rails 3 apps.
3
+ This gem allows easy Google Safe Browsing APIv2 usage with optional integration
4
+ into Rails 3 apps.
5
5
 
6
6
  It includes:
7
7
 
8
8
  * a migration generator for database schema
9
- * method to update your lists
9
+ * method to update hash database
10
10
  * method to lookup a url
11
11
  * rake tasks to update hash database
12
- * Autonomous updating via Resque and Resque Scheduler
12
+ * Autonomous updating via Resque and Resque Scheduler (optional)
13
13
 
14
14
  ----------------------
15
15
 
@@ -96,6 +96,4 @@ https://github.com/codelux/malware_api
96
96
 
97
97
  ------------------
98
98
 
99
- Thank you for using my gem! Please report any bugs or issues. Contributions are also always welcome!
100
-
101
- -- Chris Marshall
99
+ Thank you for using this gem! Please report any bugs or issues.
@@ -20,6 +20,8 @@ require File.dirname(__FILE__) + '/google_safe_browsing/full_hash'
20
20
  require File.dirname(__FILE__) + '/google_safe_browsing/rescheduler'
21
21
 
22
22
  module GoogleSafeBrowsing
23
+
24
+ # Handles the configuration values for the module
23
25
  class Config
24
26
  attr_accessor :client, :app_ver, :p_ver, :host, :current_lists, :api_key
25
27
 
@@ -32,19 +34,26 @@ module GoogleSafeBrowsing
32
34
  end
33
35
  end
34
36
 
37
+ # Returns of initializes the Module configuration
35
38
  def self.config
36
39
  @@config ||= Config.new
37
40
  end
38
41
 
42
+ # Allows for setting config values via a block
39
43
  def self.configure
40
44
  yield self.config
41
45
  end
42
46
 
47
+ # Adds the Rescheduler job to Resque
43
48
  def self.kick_off
44
49
  Resque.enqueue(Rescheduler)
45
50
  end
46
51
 
47
52
 
53
+ # Converts the official Google list name into the name to return
54
+ #
55
+ # @param (String) list the 'official' list name
56
+ # @return (String) the friendly list name
48
57
  def self.friendly_list_name(list)
49
58
  case list
50
59
  when 'goog-malware-shavar'
@@ -1,5 +1,9 @@
1
1
  module GoogleSafeBrowsing
2
+ # Main Interface for Module
2
3
  class APIv2
4
+ # Completes an update
5
+ #
6
+ # @return (Integer) the number of seconds before this method should be called again
3
7
  def self.update
4
8
  data_response = HttpHelper.get_data
5
9
 
@@ -14,8 +18,13 @@ module GoogleSafeBrowsing
14
18
  to_do_array[:delay_seconds]
15
19
  end
16
20
 
21
+ # Performs a lookup of the given url
22
+ #
23
+ # @param (String) url a url string to be looked up
24
+ # @return (String, nil) the friendly list name if found, or `nil`
17
25
  def self.lookup(url)
18
26
  urls = Canonicalize.urls_for_lookup(url)
27
+ return '' if urls.empty?
19
28
 
20
29
  hashes = HashHelper.urls_to_hashes(urls)
21
30
  raw_hash_array = hashes.collect{ |h| h.to_s }
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
46
55
  nil
47
56
  end
48
57
 
58
+ # Can be used to force a delay into a script running updates
59
+ #
60
+ # @param (Integer) delay_seconds the number of seconds to delay, should be the return value of {update}
49
61
  def self.delay(delay_seconds)
50
62
  puts "Google told us to wait for #{delay_seconds} seconds"
51
63
  puts "We will wait...."
@@ -1,15 +1,29 @@
1
1
  module GoogleSafeBrowsing
2
+ # Helper methods for working with binary encoded data from Forwarding URLs
2
3
  class BinaryHelper
3
4
 
5
+ # Reads `counter` byes from byte iterator `iter` and returns the hex string represnetation
6
+ #
7
+ # @param [ByteIterator] iter byte iterator already at correct position
8
+ # @param [Integer] count number of bytes to read
9
+ # @return [String] hexidecimal string
4
10
  def self.read_bytes_as_hex(iter, count)
5
11
  read_bytes_from(iter, count).unpack("H#{count * 2}")[0]
6
12
  end
7
13
 
14
+ # Returns the first four bytes of `string` as hexidecimal
15
+ #
16
+ # @param [String] string to unpack the first four bytes as hex
17
+ # @return (see read_bytes_as_hex)
8
18
  def self.four_as_hex(string)
9
19
  string.unpack('H8')[0]
10
20
  end
11
21
 
12
22
 
23
+ # Read `count` bytes from `iter` without unpacking the result
24
+ #
25
+ # @param (see read_bytes_as_hex)
26
+ # @return (String) not unpacked string from `iter`
13
27
  def self.read_bytes_from(iter, count)
14
28
  ret = ''
15
29
  count.to_i.times { ret << iter.next }
@@ -19,19 +33,34 @@ module GoogleSafeBrowsing
19
33
  # return nil
20
34
  end
21
35
 
36
+ # Returns the first four bytes of `string` as hexidecimal; for host key
37
+ # @param (String) bin string to unpack
38
+ # @return (String) unpacked string
22
39
  def self.unpack_host_key(bin)
23
40
  bin.unpack('H8')[0]
24
41
  end
25
42
 
43
+ # Unpack string as an unsigned integer; for count
44
+ #
45
+ # @param (see unpack_host_key)
46
+ # @return (see unpack_host_key)
26
47
  def self.unpack_count(bin)
27
48
  # this may not be correct
28
49
  bin.unpack('U')[0]
29
50
  end
30
51
 
52
+ # Unpack string as big-endian network byte order
53
+ #
54
+ # @param (see unpack_count)
55
+ # @return (see unpack_count)
31
56
  def self.unpack_add_chunk_num(bin)
32
57
  bin.unpack('N')[0]
33
58
  end
34
59
 
60
+ # Pack a Hex String into binary
61
+ #
62
+ # @param (String) hex string to encode
63
+ # @return (String) encoded string
35
64
  def self.hex_to_bin(hex)
36
65
  hex.to_a.pack('H*')
37
66
  end
@@ -3,11 +3,16 @@ require 'ip'
3
3
  require File.dirname(__FILE__) + '/top_level_domain.rb'
4
4
 
5
5
  module GoogleSafeBrowsing
6
+ # Helpers to Canonicalize urls and generate url permutations for lookups
6
7
  class Canonicalize
7
8
 
8
9
  PROTOCOL_DELIMITER = '://'
9
10
  DEFAULT_PROTOCOL = 'http'
10
11
 
12
+ # Base Canonicalizer method
13
+ #
14
+ # @param (String) uncanonicalized url string
15
+ # @return (String) canonicalized url string
11
16
  def self.url(raw_url)
12
17
  #puts raw_url
13
18
  #remove tabs, carriage returns and line feeds
@@ -37,6 +42,10 @@ module GoogleSafeBrowsing
37
42
  strict_escape(cann)
38
43
  end
39
44
 
45
+ # Generate the url permutations for lookup
46
+ #
47
+ # @param (String) lookup_url uncanonicalized url string
48
+ # @return (Array) array of cannonicalized url permutation strings
40
49
  def self.urls_for_lookup(lookup_url)
41
50
  lookup_url = url(lookup_url)
42
51
 
@@ -46,6 +55,9 @@ module GoogleSafeBrowsing
46
55
 
47
56
  host_string = strip_username_password_and_port_from_host(splits[:host])
48
57
 
58
+ #return empty array unless host_string has at least one period
59
+ return [] unless host_string.include?('.')
60
+
49
61
  host_strings = [host_string]
50
62
  host = TopLevelDomain.split_from_host(host_string).last(5)
51
63
  ( host.length - 1 ).times do
@@ -61,45 +73,54 @@ module GoogleSafeBrowsing
61
73
 
62
74
  private
63
75
 
64
- def self.generate_path_strings(raw_path)
65
- return [ '/', '' ] if raw_path == ''
66
-
67
- path_split = raw_path.split('?')
68
- path = path_split[0]
69
- params = path_split[1]
76
+ # Generates the path permutations from the raw path string
77
+ #
78
+ # @param (String) raw_path path split from the full url string
79
+ # @return (Array) array of path permutation strings
80
+ def self.generate_path_strings(raw_path)
81
+ return [ '/', '' ] if raw_path == ''
70
82
 
83
+ path_split = raw_path.split('?')
84
+ path = path_split[0]
85
+ params = path_split[1]
71
86
 
72
- path_components = path.split('/').first(3)
73
- path_strings = [ '/' ]
74
- path_components.length.times do
75
- path_strings << '/' + path_components.join('/')
76
- path_components.pop
77
- end
78
87
 
79
- path_strings.map! do |p|
80
- unless p.index('.')
81
- p + '/'
82
- else
83
- p
88
+ path_components = path.split('/').first(3)
89
+ path_strings = [ '/' ]
90
+ path_components.length.times do
91
+ path_strings << '/' + path_components.join('/')
92
+ path_components.pop
84
93
  end
85
- end
86
- path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
87
- path_strings.compact!
88
- path_strings.uniq!
89
94
 
90
- if params
91
- path_strings | path_strings.map do |p|
92
- if p[-1..-1] == '/'
93
- p
95
+ path_strings.map! do |p|
96
+ unless p.index('.')
97
+ p + '/'
94
98
  else
95
- "#{p}?#{params}"
99
+ p
96
100
  end
97
101
  end
98
- else
99
- return path_strings
102
+ path_strings.map!{ |p| p.to_s.gsub!(/\/+/, '/') }
103
+ path_strings.compact!
104
+ path_strings.uniq!
105
+
106
+ if params
107
+ path_strings | path_strings.map do |p|
108
+ if p[-1..-1] == '/'
109
+ p
110
+ else
111
+ "#{p}?#{params}"
112
+ end
113
+ end
114
+ else
115
+ return path_strings
116
+ end
100
117
  end
101
- end
102
118
 
119
+ # Returns the cartesian product of two arrays by concatination of the string representation of the elements
120
+ #
121
+ # @param (Array) a_one array of strings
122
+ # @param (Array) a_two array of strings
123
+ # @return (Array) cartesian product of arrays with elements concatinated
103
124
  def self.cart_prod(a_one, a_two)
104
125
  result = []
105
126
  a_one.each do |i|
@@ -110,6 +131,10 @@ module GoogleSafeBrowsing
110
131
  result
111
132
  end
112
133
 
134
+ # Takes the canonicalized url and splits the host and the path apart
135
+ #
136
+ # @param (String) cann canonicalized url string
137
+ # @return (Hash) !{ :host => host_part, :path => path_part }
113
138
  def self.split_host_path(cann)
114
139
  ret= { :host => cann, :path => '' }
115
140
  split_point = cann.index('/')
@@ -121,11 +146,19 @@ module GoogleSafeBrowsing
121
146
  ret
122
147
  end
123
148
 
149
+ # Strips the fragment portion of the url string (the last '#' and everything after)
150
+ #
151
+ # @param (String) string url
152
+ # @return (String) parameter with the fragment removed
124
153
  def self.remove_fragment(string)
125
154
  string = string[0..string.index('#')-1] if string.index('#')
126
155
  string
127
156
  end
128
157
 
158
+ # Continues to unescape the url until unescaping has no effect
159
+ #
160
+ # @param (String) url url string
161
+ # @return (String) fully unescaped url string
129
162
  def self.recursively_unescape(url)
130
163
  compare_url = url.clone
131
164
  url = URI.unescape(url)
@@ -136,6 +169,10 @@ module GoogleSafeBrowsing
136
169
  url
137
170
  end
138
171
 
172
+ # Apply initial fixes to host string
173
+ #
174
+ # @param (String) host host string
175
+ # @return (String) standardized host string
139
176
  def self.fix_host(host)
140
177
  #puts "In Host: #{host}"
141
178
  # remove leading and trailing dots, multiple dots to one
@@ -149,6 +186,10 @@ module GoogleSafeBrowsing
149
186
  host
150
187
  end
151
188
 
189
+ # Apply initial fixes to path string
190
+ #
191
+ # @param (String) path path string
192
+ # @return (String) standardized path string
152
193
  def self.fix_path(path)
153
194
  #puts "In Path: #{path}"
154
195
 
@@ -179,6 +220,10 @@ module GoogleSafeBrowsing
179
220
  path
180
221
  end
181
222
 
223
+ # Escape the url, but do not escape certain characters; such as the carat
224
+ #
225
+ # @param (String) url url string
226
+ # @return (String) escaped url string
182
227
  def self.strict_escape(url)
183
228
  url = URI.escape url
184
229
 
@@ -188,6 +233,10 @@ module GoogleSafeBrowsing
188
233
  url
189
234
  end
190
235
 
236
+ # Strip the leading protocol from the url string
237
+ #
238
+ # @param (String) cann url string
239
+ # @return (String) url string without the protocol
191
240
  def self.remove_protocol(cann)
192
241
  if cann.index(PROTOCOL_DELIMITER)
193
242
  delimiting_index = cann.index(PROTOCOL_DELIMITER)
@@ -198,11 +247,19 @@ module GoogleSafeBrowsing
198
247
  cann
199
248
  end
200
249
 
250
+ # Strip the user name, password and port number from the url
251
+ #
252
+ # @param (String) host_string host portion of the url
253
+ # @return (String) host portion of the url without the username, password and port
201
254
  def self.strip_username_password_and_port_from_host(host_string)
202
255
  host_string = remove_port(host_string)
203
256
  remove_username_and_password(host_string)
204
257
  end
205
258
 
259
+ # Strip port number from host string
260
+ #
261
+ # @param (see strip_username_password_and_port_from_host)
262
+ # @return (String) host part without the port number
206
263
  def self.remove_port(host_string)
207
264
  port_sep = host_string.rindex(':')
208
265
  if port_sep
@@ -212,6 +269,10 @@ module GoogleSafeBrowsing
212
269
  end
213
270
  end
214
271
 
272
+ # Strip user name and password from host part of url
273
+ #
274
+ # @param (see remove_port)
275
+ # @return (String) host part of url without user name or password
215
276
  def self.remove_username_and_password(host_string)
216
277
  un_sep = host_string.index('@')
217
278
  if un_sep
@@ -20,6 +20,9 @@ module GoogleSafeBrowsing
20
20
  tld
21
21
  end
22
22
 
23
+ # return array of host components (www, example, com from www.example.com)
24
+ # taking into account of top level domains
25
+ # e.g. 'sub.domain.example.co.uk' => [ 'sub', 'domain', 'example', 'co.uk' ]
23
26
  def self.split_from_host(host)
24
27
  components = host.split('.')
25
28
 
@@ -1,3 +1,3 @@
1
1
  module GoogleSafeBrowsing
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_safe_browsing
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 0
10
- version: 0.3.0
9
+ - 1
10
+ version: 0.3.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Chris Marshall
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-05 00:00:00 Z
18
+ date: 2012-04-09 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  prerelease: false
@@ -136,7 +136,7 @@ files:
136
136
  - MIT-LICENSE
137
137
  - Rakefile
138
138
  - README.mkd
139
- homepage: https://github.com/chrismar035/google_safe_browsing
139
+ homepage: https://github.com/mobiledefense/mobiledefense_google_safe_browsing
140
140
  licenses: []
141
141
 
142
142
  post_install_message: