defender 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,423 @@
1
+ module Defender
2
+ ##
3
+ # A document contains content to be analyzed by Defensio, or that has been
4
+ # analyzed.
5
+ #
6
+ # Most of the Defensio API revolves around documents, including the detection
7
+ # of unwanted content.
8
+ class Document
9
+ ##
10
+ # Whether the document should be published by your Web site or not. For
11
+ # example, spam and malicious content are not allowed.
12
+ #
13
+ # This is the only attribute that can be updated after the initial saving.
14
+ # Use this for retraining purposes.
15
+ #
16
+ # @return [Boolean]
17
+ attr_accessor :allow
18
+ alias :allow? :allow
19
+
20
+ ##
21
+ # The type of content in the document.
22
+ #
23
+ # @return [String] The possible values are innocent, spam and malicious.
24
+ attr_reader :classification
25
+
26
+ ##
27
+ # Whether the document matches profanity or other words defined by the
28
+ # user. For example, this is useful to detect obscene comments posted
29
+ # to your Web site. When true, you can obtain a filtered version of the
30
+ # document by calling {#filter!}.
31
+ #
32
+ # @return [Boolean]
33
+ attr_reader :profane
34
+ alias :profane? :profane
35
+
36
+ ##
37
+ # A unique identifier for the document. You need this value to perform new
38
+ # requests on the same document. Signatures should be kept private and never
39
+ # be shared with your users.
40
+ #
41
+ # @return [String]
42
+ attr_reader :signature
43
+
44
+ ##
45
+ # A numeric value indicating how strongly the document resembles spam. For
46
+ # example, a document containing many links to pharmaceutical sites is
47
+ # likely to have a very high spaminess value. This value should only be used
48
+ # for sorting, and should never be used to determine if a document should be
49
+ # allowed or not. Spaminess should be kept private and never be shared with
50
+ # your users.
51
+ #
52
+ # @return [Float<0..1>] A float value between 0 and 1, whith 1 being
53
+ # extremely spammy. For example, 0.89 (89%).
54
+ attr_reader :spaminess
55
+
56
+ ##
57
+ # The string containing the body of the document. This field is required.
58
+ #
59
+ # @return [String]
60
+ attr_accessor :content
61
+
62
+ ##
63
+ # The platform which the document is submitted on.
64
+ #
65
+ # One word, lower case. Spaces should be converted to underscores.
66
+ #
67
+ # *Examples:*
68
+ # wordpress, pixelpost, drupal, phpbb, movable_type
69
+ #
70
+ # The default is 'ruby'.
71
+ #
72
+ # @return [String]
73
+ attr_accessor :platform
74
+
75
+ ##
76
+ # Identified the type of content to be analyzed.
77
+ #
78
+ # Use *test* only for testing purposes.
79
+ #
80
+ # When *type* is set to *test*, Defensio (not Defender) parses content for
81
+ # classification and spaminess. For example, if you want the API to return
82
+ # *malicious* as the classification and a spaminess of *0.99*, insert the
83
+ # following in content:
84
+ # [malicious,0.99]
85
+ #
86
+ # There are three possible classifications:
87
+ #
88
+ # * innocent
89
+ # * spam
90
+ # * malicious
91
+ #
92
+ # Spaminess should be a decimal value between 0 and 1 (see
93
+ # {#spaminess})
94
+ #
95
+ # *IMPORTANT*
96
+ #
97
+ # Do *NOT* leave type set to *test* in production. This could represent a
98
+ # significant security breach.
99
+ attr_accessor :type
100
+
101
+ ##
102
+ # The email address of the author of the document.
103
+ #
104
+ # @return [String]
105
+ attr_accessor :author_email
106
+
107
+ ##
108
+ # The IP address of the author of the document.
109
+ #
110
+ # For example, this could be the IP address of the person posting a comment
111
+ # on a blog.
112
+ #
113
+ # @return [String]
114
+ attr_accessor :author_ip
115
+
116
+ ##
117
+ # Whether or not the user posting the document is logged in onto your Web
118
+ # site, either through your own authentication mechanism or through OpenID.
119
+ #
120
+ # @see Document#author_openid
121
+ # @see Document#author_trusted
122
+ # @return [Boolean]
123
+ attr_accessor :author_logged_in
124
+
125
+ ##
126
+ # The name of the author of the document.
127
+ #
128
+ # @return [Boolean]
129
+ attr_accessor :author_name
130
+
131
+ ##
132
+ # The OpenID URL of the logged-on user. Must be used in conjunction with
133
+ # {Document#author_logged_in} = true.
134
+ #
135
+ # OpenID authentication must be taken care of by your application. Only send
136
+ # this parameter if you have successfully authenticated the user with
137
+ # OpenID.
138
+ #
139
+ # @return [String]
140
+ attr_accessor :author_openid
141
+
142
+ ##
143
+ # Whether or not the user is an administrator, moderator or editor of your
144
+ # Web site. Pass `true` only if you can guarantee that the user has been
145
+ # authenticated, has a role of responsibility, and can be trusted as a good
146
+ # Web citizen.
147
+ #
148
+ # @return [Boolean]
149
+ attr_accessor :author_trusted
150
+
151
+ ##
152
+ # The URL of the person posting the document.
153
+ #
154
+ # @return [String]
155
+ attr_accessor :author_url
156
+
157
+ ##
158
+ # Whether or not the Web browser used to post the document (i.e., the
159
+ # comment) has cookies enabled. If no such detection has been made, leave
160
+ # this value empty.
161
+ #
162
+ # @return [Boolean]
163
+ attr_accessor :browser_cookies
164
+
165
+ ##
166
+ # Whether or not the Web browser used to post the document (i.e., the
167
+ # comment) has JavaScript enabled. If no such detection has been made, leave
168
+ # this value empty.
169
+ #
170
+ # @return [Boolean]
171
+ attr_accessor :browser_javascript
172
+
173
+ ##
174
+ # The URL of the document being posted.
175
+ #
176
+ # *Examples*
177
+ #
178
+ # For a comment on a blog, the permalink URL might be:
179
+ #
180
+ # 'http://yourdomain.com/article#comment-51'
181
+ #
182
+ # For an article, it might be:
183
+ #
184
+ # 'http://yourdomain.com/article'
185
+ #
186
+ # @return [String]
187
+ attr_accessor :document_permalink
188
+
189
+ ##
190
+ # Contains the HTTP headers sent with the request. You can send a few values
191
+ # or all values. Because this information helps Defensio determine if a
192
+ # document is innocent or not, the more headers you send, the better.
193
+ #
194
+ # @see #referrer
195
+ # @return [Hash{String => String}, Array<String>] You can pass a hash with
196
+ # key => values, or an array where each entry has the format `"HEADER:
197
+ # value"`
198
+ attr_accessor :http_headers
199
+
200
+ ##
201
+ # The date the parent document was posted. For example, on a blog, this
202
+ # would be the date the article related to the comment (document) was
203
+ # posted.
204
+ #
205
+ # If you are using threaded comments, send the date the article was posted,
206
+ # *not* the date the parent comment was posted.
207
+ #
208
+ # @return [Time, Date, DateTime, "yyyy-mm-dd"] If a Time or DateTime is passed, only the
209
+ # date part will be saved.
210
+ attr_accessor :parent_document_date
211
+
212
+ ##
213
+ # The URL of the parent document. For example, on a blog, this would be the
214
+ # URL of the article on which the comment (document) was posted.
215
+ #
216
+ # @see #document_permalink
217
+ # @return [String]
218
+ attr_accessor :parent_document_permalink
219
+
220
+ ##
221
+ # Provide the value of the HTTP_REFERER (note the spelling) in this field.
222
+ #
223
+ # @see #http_headers
224
+ # @return [String]
225
+ attr_accessor :referrer
226
+
227
+ ##
228
+ # Provide the title of the document being sent. For example, this might be
229
+ # the title of a blog article.
230
+ #
231
+ # Do not send this information if no title has been provided.
232
+ attr_accessor :title
233
+
234
+ ##
235
+ # Is the document still pending?
236
+ #
237
+ # @return [Boolean]
238
+ attr_reader :pending
239
+ alias :pending? :pending
240
+
241
+ ##
242
+ # Set the pending attribute to true. Only to be used by {find} and similar
243
+ # methods.
244
+ #
245
+ # @private
246
+ def pending!; @pending = true; end
247
+
248
+ ##
249
+ # Retrieves a document from the Defensio server.
250
+ #
251
+ # This can be called up to 30 days after the initial posting of a document
252
+ # to Defensio.
253
+ #
254
+ # @return [Document]
255
+ def self.find(signature)
256
+ document = new()
257
+ response = Defender.get("/#{Defender.api_key}/documents/#{signature}.json")['defensio-result']
258
+ if response['status'] == 'success' || response['status'] == 'pending'
259
+ document.set_attributes(response)
260
+ document.pending! if response['status'] == 'pending'
261
+ else
262
+ raise StandardError, response['message']
263
+ end
264
+ document
265
+ end
266
+
267
+ ##
268
+ # Create a new document.
269
+ def initialize()
270
+ end
271
+
272
+ ##
273
+ # Re-retrieves the document from the Defensio server
274
+ #
275
+ # This can be called up to 30 days after the initial posting of the document
276
+ # to Defensio
277
+ #
278
+ # @return [true] The document was updated.
279
+ # @return [false] The document was not updated (still pending).
280
+ def refresh!
281
+ response = Defender.get("/#{Defender.api_key}/documents/#{signature}.json")['defensio-result']
282
+ if response['status'] == 'success'
283
+ document.set_attributes(response)
284
+ return true
285
+ elsif response['status'] == 'pending'
286
+ pending!
287
+ return false
288
+ else
289
+ raise StandardError, response['message']
290
+ end
291
+ end
292
+
293
+ ##
294
+ # Creates an attributes hash to be sent to Defensio. This method will make
295
+ # sure that the required attributess are in, and the names of the attributes
296
+ # are correct.
297
+ #
298
+ # @return [Hash{String => String}]
299
+ def attributes_hash
300
+ options = {
301
+ 'client' => "Defender | #{Defender::VERSION} | Henrik Hodne | henrik.hodne@binaryhex.com",
302
+ 'platform' => platform || "ruby",
303
+ 'content' => content,
304
+ 'type' => type
305
+ }
306
+ [
307
+ :author_email, :author_ip, :author_logged_in, :author_name, :author_openid,
308
+ :author_trusted, :author_url, :browser_cookies, :browser_javascript,
309
+ :document_permalink, :referrer, :title, :parent_document_permalink
310
+ ].each do |symbol|
311
+ options[symbol.to_s.gsub("_", "-")] = self.send(symbol)
312
+ end
313
+
314
+ headers = http_headers
315
+ unless headers.nil?
316
+ options['http-headers'] = headers.to_a.map do |kv|
317
+ kv.respond_to?(:join) ? kv.join(": ") : kv
318
+ end.join("\n")
319
+ end
320
+
321
+ pddate = parent_document_date
322
+ options['parent-document-date'] = pddate.respond_to?(:strftime) ?
323
+ pddate.strftime("%Y-%m-%d") : pddate
324
+
325
+ formatted_options = {}
326
+
327
+ options.each do |key, value|
328
+ formatted_options[key] = value.to_s unless value.nil?
329
+ end
330
+
331
+ formatted_options
332
+ end
333
+
334
+ ##
335
+ # Post the document to Defensio to be analyzed for spam and malicious
336
+ # content.
337
+ #
338
+ # @param [Boolean] async Whether or not the document analysis should be done
339
+ # asynchronously. With asynchronous document analysis you will obtain
340
+ # better accuracy. Do not poll the servers more than once every 30 seconds
341
+ # for each document. To avoid polling, set the callback URL with
342
+ # {Defender.async_callback}. You can get the information from the server
343
+ # using the {#refresh!} method or calling {Document.find} with the
344
+ # signature.
345
+ #
346
+ # @see #pending?
347
+ #
348
+ # @raise ArgumentError if a required field is not set.
349
+ # @return [Boolean] Whether the record was saved or not.
350
+ def save(async=false)
351
+ if sig = signature # The document is submitted to Defensio
352
+ response = Defender.put("/#{Defender.api_key}/documents/#{sig}.json",
353
+ :allow => allow?)['defensio-result']
354
+ else
355
+ hsh = attributes_hash
356
+ if attributes_hash['content'].nil?
357
+ raise ArgumentError, 'The content field is required'
358
+ end
359
+ if attributes_hash['type'].nil?
360
+ raise ArgumentError, 'The type field is required'
361
+ end
362
+
363
+ if async
364
+ hsh['async'] = 'true'
365
+ hsh['async-callback'] = Defender.async_callback if Defender.async_callback
366
+ end
367
+ response = Defender.post("/#{Defender.api_key}/documents.json", hsh)['defensio-result']
368
+ end
369
+ if response['status'] == 'success'
370
+ set_attributes(response)
371
+ return true
372
+ elsif response['status'] == 'pending'
373
+ set_attributes(response) # Some fields are blank
374
+ @pending = true
375
+ return true
376
+ else
377
+ return false
378
+ end
379
+ end
380
+
381
+ def set_attributes(attributes)
382
+ [:classification, :signature, :spaminess, :allow].each do |symbol|
383
+ self.instance_variable_set(:"@#{symbol}", attributes[symbol.to_s])
384
+ end
385
+ @profane = attributes['profanity-match']
386
+ undefine_setters
387
+ end
388
+
389
+ ##
390
+ # Filters the provided fields. The filtering is based on a default
391
+ # dictionary and one previously configured by the user.
392
+ #
393
+ # @param [Array<Symbol>] *args The fields to filter (like `:content`,
394
+ # `:author_name`, etc.)
395
+ def filter!(*args)
396
+ filter = {}
397
+ args.each {|arg| filter[arg] = __send__(arg) }
398
+ response = Defender.post("/#{Defender.api_key}/profanity-filter.json", filter)['defensio-result']
399
+ if response['status'] == 'success'
400
+ response['filtered'].each do |key, value|
401
+ self.instance_variable_set(:"@#{key}", value)
402
+ end
403
+ else
404
+ raise StandardError, response['message']
405
+ end
406
+ end
407
+
408
+ private
409
+
410
+ def undefine_setters
411
+ [
412
+ :content=, :platform=, :type=, :author_email=, :author_ip=,
413
+ :author_logged_in=, :author_name=, :author_openid=,
414
+ :author_trusted=, :author_url=, :browser_cookies=,
415
+ :browser_javascript=, :document_permalink=, :http_headers=,
416
+ :parent_document_date=, :referrer=, :title=
417
+ ].each do |method|
418
+ # TODO: Fix hack.
419
+ instance_eval "def self.#{method}(*args)\nmethod_missing(#{method.inspect}, *args)\nend"
420
+ end
421
+ end
422
+ end
423
+ end
@@ -0,0 +1,176 @@
1
+ module Defender
2
+ class Statistics
3
+ class Extended
4
+ ##
5
+ # The starting date.
6
+ #
7
+ # @return [String] Is in the format YYYY-MM-DD.
8
+ attr_reader :from
9
+
10
+ ##
11
+ # The ending date.
12
+ #
13
+ # @return [String] Is in the form YYYY-MM-DD.
14
+ attr_reader :to
15
+
16
+ ##
17
+ # Provides a set of URLs that chart the data provided in the data array.
18
+ #
19
+ # The Hash returned will have the keys `:accuracy`, `:unwanted` and
20
+ # `:legitimate`, which all refer to the same fields in the {#data} hash.
21
+ #
22
+ # @return [Hash{Symbol => String}]
23
+ attr_reader :chart_urls
24
+
25
+ ##
26
+ # The set of dates within the retrieved period.
27
+ #
28
+ # The keys are the date in YYYY-MM-DD format.
29
+ #
30
+ # Each date has the following keys:
31
+ #
32
+ # * `:false_negatives` - The number of false negatives for the specified
33
+ # date.
34
+ # * `:false_positives` - The number of false positives for the specified
35
+ # date.
36
+ # * `:legitimate` - The number of legitimate documents processed on the
37
+ # specified date.
38
+ # * `:accuracy` - How accurate Defensio has recently been for the current
39
+ # user on the specified date. This is returned as a Float between 0
40
+ # and 1. For example, 0.9525 means 95.25% accurate.
41
+ # * `:unwanted` - The number of unwanted documents processed on the
42
+ # specified date.
43
+ #
44
+ # @return [Hash{String => Hash{Symbol => Object}}]
45
+ attr_reader :data
46
+
47
+ ##
48
+ # Retrieves extended statistics from a given date to another one.
49
+ #
50
+ # @param [#strftime, #to_s] from The starting date.
51
+ # @param [#strftime, #to_s] to The ending date.
52
+ def initialize(from, to)
53
+ @from = from.respond_to?(:strftime) ? from.strftime('%Y-%m-%d') : from.to_s
54
+ @to = to.respond_to?(:strftime) ? to.strftime('%Y-%m-%d') : to.to_s
55
+
56
+ response = Defender.get("/#{Defender.api_key}/extended-stats.json", :from => @from, :to => @to)['defensio-result']
57
+ if response['status'] == 'success'
58
+ @chart_urls = {
59
+ :accuracy => response['chart-urls']['recent-accuracy'],
60
+ :unwanted => response['chart-urls']['total-unwanted'],
61
+ :legitimate => response['chart-urls']['total-legitimate']
62
+ }
63
+
64
+ @data = {}
65
+ response['data'].each do |data|
66
+ @data[data['date']] = {
67
+ :false_negatives => data['false-negatives'],
68
+ :false_positives => data['false-positives'],
69
+ :legitimate => data['legitimate'],
70
+ :accuracy => data['recent-accuracy'],
71
+ :unwanted => data['unwanted']
72
+ }
73
+ end
74
+ else
75
+ raise StandardError, response['message']
76
+ end
77
+ end
78
+ end
79
+
80
+ ##
81
+ # The version of the Defensio API being used. Should be the same as
82
+ # {Defender::API_VERSION}.
83
+ #
84
+ # @return [String]
85
+ attr_reader :api_version
86
+
87
+ ##
88
+ # The number of documents that have been allowed but that should have been
89
+ # blocked.
90
+ #
91
+ # @return [Fixnum]
92
+ attr_reader :false_negatives
93
+
94
+ ##
95
+ # The number of documents that have been blocked but that should have been
96
+ # allowed.
97
+ #
98
+ # @return [Fixnum]
99
+ attr_reader :false_positives
100
+
101
+ ##
102
+ # Whether Defensio is learning from the documents you post.
103
+ #
104
+ # @return [Boolean]
105
+ attr_reader :learning
106
+
107
+ ##
108
+ # A message explaining why Defensio is in learning mode.
109
+ #
110
+ # @return [String]
111
+ attr_reader :learning_status
112
+
113
+ ##
114
+ # The total number of legitimate documents analyzed.
115
+ #
116
+ # @return [Fixnum]
117
+ attr_reader :legitimate_total
118
+
119
+ ##
120
+ # How accurate Defensio has recently been for this user.
121
+ #
122
+ # This returns a floating point value between 0 and 1. For example, 0.9525
123
+ # means 95.25% accurate.
124
+ #
125
+ # @return [Float<0..1>]
126
+ attr_reader :recent_accuracy
127
+
128
+ ##
129
+ # The number of documents containing malicious content.
130
+ #
131
+ # @return [Fixnum]
132
+ attr_reader :unwanted_malicious
133
+
134
+ ##
135
+ # The number of spam documents analyzed.
136
+ #
137
+ # @return [Fixnum]
138
+ attr_reader :unwanted_spam
139
+
140
+ ##
141
+ # The total number of unwanted documents.
142
+ #
143
+ # @return [Fixnum]
144
+ attr_reader :unwanted_total
145
+
146
+ ##
147
+ # Initialize the object and retrieve basic statistics.
148
+ #
149
+ # @raise StandardError if any of the calls to the server during retrieving
150
+ # of statistics fail.
151
+ def initialize
152
+ retrieve_basic_stats
153
+ end
154
+
155
+ private
156
+
157
+ def retrieve_basic_stats
158
+ response = Defender.get("/#{Defender.api_key}/basic-stats.json")['defensio-result']
159
+
160
+ if response['status'] == 'success'
161
+ @api_version = response["api-version"]
162
+ @false_negatives = response["false-negatives"]
163
+ @false_positives = response["false-positives"]
164
+ @learning = response["learning"]
165
+ @learning_status = response["learning-status"]
166
+ @legitimate_total = response["legitimate"]["total"]
167
+ @recent_accuracy = response["recent-accuracy"]
168
+ @unwanted_malicious = response["unwanted"]["malicious"]
169
+ @unwanted_spam = response["unwanted"]["spam"]
170
+ @unwanted_total = response["unwanted"]["total"]
171
+ else
172
+ raise StandardError, response['message']
173
+ end
174
+ end
175
+ end
176
+ end