defender 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ module Defender
2
+ ##
3
+ # A document contains content to be analyzed by Defensio, or that has been
4
+ # analyzed.
5
+ #
6
+ # Most of the Defensio API revolves around documents, including the detection
7
+ # of unwanted content.
8
+ class Document
9
+ ##
10
+ # Whether the document should be published by your Web site or not. For
11
+ # example, spam and malicious content are not allowed.
12
+ #
13
+ # This is the only attribute that can be updated after the initial saving.
14
+ # Use this for retraining purposes.
15
+ #
16
+ # @return [Boolean]
17
+ attr_accessor :allow
18
+ alias :allow? :allow
19
+
20
+ ##
21
+ # The type of content in the document.
22
+ #
23
+ # @return [String] The possible values are innocent, spam and malicious.
24
+ attr_reader :classification
25
+
26
+ ##
27
+ # Whether the document matches profanity or other words defined by the
28
+ # user. For example, this is useful to detect obscene comments posted
29
+ # to your Web site. When true, you can obtain a filtered version of the
30
+ # document by calling {#filter!}.
31
+ #
32
+ # @return [Boolean]
33
+ attr_reader :profane
34
+ alias :profane? :profane
35
+
36
+ ##
37
+ # A unique identifier for the document. You need this value to perform new
38
+ # requests on the same document. Signatures should be kept private and never
39
+ # be shared with your users.
40
+ #
41
+ # @return [String]
42
+ attr_reader :signature
43
+
44
+ ##
45
+ # A numeric value indicating how strongly the document resembles spam. For
46
+ # example, a document containing many links to pharmaceutical sites is
47
+ # likely to have a very high spaminess value. This value should only be used
48
+ # for sorting, and should never be used to determine if a document should be
49
+ # allowed or not. Spaminess should be kept private and never be shared with
50
+ # your users.
51
+ #
52
+ # @return [Float<0..1>] A float value between 0 and 1, whith 1 being
53
+ # extremely spammy. For example, 0.89 (89%).
54
+ attr_reader :spaminess
55
+
56
+ ##
57
+ # The string containing the body of the document. This field is required.
58
+ #
59
+ # @return [String]
60
+ attr_accessor :content
61
+
62
+ ##
63
+ # The platform which the document is submitted on.
64
+ #
65
+ # One word, lower case. Spaces should be converted to underscores.
66
+ #
67
+ # *Examples:*
68
+ # wordpress, pixelpost, drupal, phpbb, movable_type
69
+ #
70
+ # The default is 'ruby'.
71
+ #
72
+ # @return [String]
73
+ attr_accessor :platform
74
+
75
+ ##
76
+ # Identified the type of content to be analyzed.
77
+ #
78
+ # Use *test* only for testing purposes.
79
+ #
80
+ # When *type* is set to *test*, Defensio (not Defender) parses content for
81
+ # classification and spaminess. For example, if you want the API to return
82
+ # *malicious* as the classification and a spaminess of *0.99*, insert the
83
+ # following in content:
84
+ # [malicious,0.99]
85
+ #
86
+ # There are three possible classifications:
87
+ #
88
+ # * innocent
89
+ # * spam
90
+ # * malicious
91
+ #
92
+ # Spaminess should be a decimal value between 0 and 1 (see
93
+ # {#spaminess})
94
+ #
95
+ # *IMPORTANT*
96
+ #
97
+ # Do *NOT* leave type set to *test* in production. This could represent a
98
+ # significant security breach.
99
+ attr_accessor :type
100
+
101
+ ##
102
+ # The email address of the author of the document.
103
+ #
104
+ # @return [String]
105
+ attr_accessor :author_email
106
+
107
+ ##
108
+ # The IP address of the author of the document.
109
+ #
110
+ # For example, this could be the IP address of the person posting a comment
111
+ # on a blog.
112
+ #
113
+ # @return [String]
114
+ attr_accessor :author_ip
115
+
116
+ ##
117
+ # Whether or not the user posting the document is logged in onto your Web
118
+ # site, either through your own authentication mechanism or through OpenID.
119
+ #
120
+ # @see Document#author_openid
121
+ # @see Document#author_trusted
122
+ # @return [Boolean]
123
+ attr_accessor :author_logged_in
124
+
125
+ ##
126
+ # The name of the author of the document.
127
+ #
128
+ # @return [Boolean]
129
+ attr_accessor :author_name
130
+
131
+ ##
132
+ # The OpenID URL of the logged-on user. Must be used in conjunction with
133
+ # {Document#author_logged_in} = true.
134
+ #
135
+ # OpenID authentication must be taken care of by your application. Only send
136
+ # this parameter if you have successfully authenticated the user with
137
+ # OpenID.
138
+ #
139
+ # @return [String]
140
+ attr_accessor :author_openid
141
+
142
+ ##
143
+ # Whether or not the user is an administrator, moderator or editor of your
144
+ # Web site. Pass `true` only if you can guarantee that the user has been
145
+ # authenticated, has a role of responsibility, and can be trusted as a good
146
+ # Web citizen.
147
+ #
148
+ # @return [Boolean]
149
+ attr_accessor :author_trusted
150
+
151
+ ##
152
+ # The URL of the person posting the document.
153
+ #
154
+ # @return [String]
155
+ attr_accessor :author_url
156
+
157
+ ##
158
+ # Whether or not the Web browser used to post the document (i.e., the
159
+ # comment) has cookies enabled. If no such detection has been made, leave
160
+ # this value empty.
161
+ #
162
+ # @return [Boolean]
163
+ attr_accessor :browser_cookies
164
+
165
+ ##
166
+ # Whether or not the Web browser used to post the document (i.e., the
167
+ # comment) has JavaScript enabled. If no such detection has been made, leave
168
+ # this value empty.
169
+ #
170
+ # @return [Boolean]
171
+ attr_accessor :browser_javascript
172
+
173
+ ##
174
+ # The URL of the document being posted.
175
+ #
176
+ # *Examples*
177
+ #
178
+ # For a comment on a blog, the permalink URL might be:
179
+ #
180
+ # 'http://yourdomain.com/article#comment-51'
181
+ #
182
+ # For an article, it might be:
183
+ #
184
+ # 'http://yourdomain.com/article'
185
+ #
186
+ # @return [String]
187
+ attr_accessor :document_permalink
188
+
189
+ ##
190
+ # Contains the HTTP headers sent with the request. You can send a few values
191
+ # or all values. Because this information helps Defensio determine if a
192
+ # document is innocent or not, the more headers you send, the better.
193
+ #
194
+ # @see #referrer
195
+ # @return [Hash{String => String}, Array<String>] You can pass a hash with
196
+ # key => values, or an array where each entry has the format `"HEADER:
197
+ # value"`
198
+ attr_accessor :http_headers
199
+
200
+ ##
201
+ # The date the parent document was posted. For example, on a blog, this
202
+ # would be the date the article related to the comment (document) was
203
+ # posted.
204
+ #
205
+ # If you are using threaded comments, send the date the article was posted,
206
+ # *not* the date the parent comment was posted.
207
+ #
208
+ # @return [Time, Date, DateTime, "yyyy-mm-dd"] If a Time or DateTime is passed, only the
209
+ # date part will be saved.
210
+ attr_accessor :parent_document_date
211
+
212
+ ##
213
+ # The URL of the parent document. For example, on a blog, this would be the
214
+ # URL of the article on which the comment (document) was posted.
215
+ #
216
+ # @see #document_permalink
217
+ # @return [String]
218
+ attr_accessor :parent_document_permalink
219
+
220
+ ##
221
+ # Provide the value of the HTTP_REFERER (note the spelling) in this field.
222
+ #
223
+ # @see #http_headers
224
+ # @return [String]
225
+ attr_accessor :referrer
226
+
227
+ ##
228
+ # Provide the title of the document being sent. For example, this might be
229
+ # the title of a blog article.
230
+ #
231
+ # Do not send this information if no title has been provided.
232
+ attr_accessor :title
233
+
234
+ ##
235
+ # Is the document still pending?
236
+ #
237
+ # @return [Boolean]
238
+ attr_reader :pending
239
+ alias :pending? :pending
240
+
241
+ ##
242
+ # Set the pending attribute to true. Only to be used by {find} and similar
243
+ # methods.
244
+ #
245
+ # @private
246
+ def pending!; @pending = true; end
247
+
248
+ ##
249
+ # Retrieves a document from the Defensio server.
250
+ #
251
+ # This can be called up to 30 days after the initial posting of a document
252
+ # to Defensio.
253
+ #
254
+ # @return [Document]
255
+ def self.find(signature)
256
+ document = new()
257
+ response = Defender.get("/#{Defender.api_key}/documents/#{signature}.json")['defensio-result']
258
+ if response['status'] == 'success' || response['status'] == 'pending'
259
+ document.set_attributes(response)
260
+ document.pending! if response['status'] == 'pending'
261
+ else
262
+ raise StandardError, response['message']
263
+ end
264
+ document
265
+ end
266
+
267
+ ##
268
+ # Create a new document.
269
+ def initialize()
270
+ end
271
+
272
+ ##
273
+ # Re-retrieves the document from the Defensio server
274
+ #
275
+ # This can be called up to 30 days after the initial posting of the document
276
+ # to Defensio
277
+ #
278
+ # @return [true] The document was updated.
279
+ # @return [false] The document was not updated (still pending).
280
+ def refresh!
281
+ response = Defender.get("/#{Defender.api_key}/documents/#{signature}.json")['defensio-result']
282
+ if response['status'] == 'success'
283
+ document.set_attributes(response)
284
+ return true
285
+ elsif response['status'] == 'pending'
286
+ pending!
287
+ return false
288
+ else
289
+ raise StandardError, response['message']
290
+ end
291
+ end
292
+
293
+ ##
294
+ # Creates an attributes hash to be sent to Defensio. This method will make
295
+ # sure that the required attributess are in, and the names of the attributes
296
+ # are correct.
297
+ #
298
+ # @return [Hash{String => String}]
299
+ def attributes_hash
300
+ options = {
301
+ 'client' => "Defender | #{Defender::VERSION} | Henrik Hodne | henrik.hodne@binaryhex.com",
302
+ 'platform' => platform || "ruby",
303
+ 'content' => content,
304
+ 'type' => type
305
+ }
306
+ [
307
+ :author_email, :author_ip, :author_logged_in, :author_name, :author_openid,
308
+ :author_trusted, :author_url, :browser_cookies, :browser_javascript,
309
+ :document_permalink, :referrer, :title, :parent_document_permalink
310
+ ].each do |symbol|
311
+ options[symbol.to_s.gsub("_", "-")] = self.send(symbol)
312
+ end
313
+
314
+ headers = http_headers
315
+ unless headers.nil?
316
+ options['http-headers'] = headers.to_a.map do |kv|
317
+ kv.respond_to?(:join) ? kv.join(": ") : kv
318
+ end.join("\n")
319
+ end
320
+
321
+ pddate = parent_document_date
322
+ options['parent-document-date'] = pddate.respond_to?(:strftime) ?
323
+ pddate.strftime("%Y-%m-%d") : pddate
324
+
325
+ formatted_options = {}
326
+
327
+ options.each do |key, value|
328
+ formatted_options[key] = value.to_s unless value.nil?
329
+ end
330
+
331
+ formatted_options
332
+ end
333
+
334
+ ##
335
+ # Post the document to Defensio to be analyzed for spam and malicious
336
+ # content.
337
+ #
338
+ # @param [Boolean] async Whether or not the document analysis should be done
339
+ # asynchronously. With asynchronous document analysis you will obtain
340
+ # better accuracy. Do not poll the servers more than once every 30 seconds
341
+ # for each document. To avoid polling, set the callback URL with
342
+ # {Defender.async_callback}. You can get the information from the server
343
+ # using the {#refresh!} method or calling {Document.find} with the
344
+ # signature.
345
+ #
346
+ # @see #pending?
347
+ #
348
+ # @raise ArgumentError if a required field is not set.
349
+ # @return [Boolean] Whether the record was saved or not.
350
+ def save(async=false)
351
+ if sig = signature # The document is submitted to Defensio
352
+ response = Defender.put("/#{Defender.api_key}/documents/#{sig}.json",
353
+ :allow => allow?)['defensio-result']
354
+ else
355
+ hsh = attributes_hash
356
+ if attributes_hash['content'].nil?
357
+ raise ArgumentError, 'The content field is required'
358
+ end
359
+ if attributes_hash['type'].nil?
360
+ raise ArgumentError, 'The type field is required'
361
+ end
362
+
363
+ if async
364
+ hsh['async'] = 'true'
365
+ hsh['async-callback'] = Defender.async_callback if Defender.async_callback
366
+ end
367
+ response = Defender.post("/#{Defender.api_key}/documents.json", hsh)['defensio-result']
368
+ end
369
+ if response['status'] == 'success'
370
+ set_attributes(response)
371
+ return true
372
+ elsif response['status'] == 'pending'
373
+ set_attributes(response) # Some fields are blank
374
+ @pending = true
375
+ return true
376
+ else
377
+ return false
378
+ end
379
+ end
380
+
381
+ def set_attributes(attributes)
382
+ [:classification, :signature, :spaminess, :allow].each do |symbol|
383
+ self.instance_variable_set(:"@#{symbol}", attributes[symbol.to_s])
384
+ end
385
+ @profane = attributes['profanity-match']
386
+ undefine_setters
387
+ end
388
+
389
+ ##
390
+ # Filters the provided fields. The filtering is based on a default
391
+ # dictionary and one previously configured by the user.
392
+ #
393
+ # @param [Array<Symbol>] *args The fields to filter (like `:content`,
394
+ # `:author_name`, etc.)
395
+ def filter!(*args)
396
+ filter = {}
397
+ args.each {|arg| filter[arg] = __send__(arg) }
398
+ response = Defender.post("/#{Defender.api_key}/profanity-filter.json", filter)['defensio-result']
399
+ if response['status'] == 'success'
400
+ response['filtered'].each do |key, value|
401
+ self.instance_variable_set(:"@#{key}", value)
402
+ end
403
+ else
404
+ raise StandardError, response['message']
405
+ end
406
+ end
407
+
408
+ private
409
+
410
+ def undefine_setters
411
+ [
412
+ :content=, :platform=, :type=, :author_email=, :author_ip=,
413
+ :author_logged_in=, :author_name=, :author_openid=,
414
+ :author_trusted=, :author_url=, :browser_cookies=,
415
+ :browser_javascript=, :document_permalink=, :http_headers=,
416
+ :parent_document_date=, :referrer=, :title=
417
+ ].each do |method|
418
+ # TODO: Fix hack.
419
+ instance_eval "def self.#{method}(*args)\nmethod_missing(#{method.inspect}, *args)\nend"
420
+ end
421
+ end
422
+ end
423
+ end
@@ -0,0 +1,176 @@
1
+ module Defender
2
+ class Statistics
3
+ class Extended
4
+ ##
5
+ # The starting date.
6
+ #
7
+ # @return [String] Is in the format YYYY-MM-DD.
8
+ attr_reader :from
9
+
10
+ ##
11
+ # The ending date.
12
+ #
13
+ # @return [String] Is in the form YYYY-MM-DD.
14
+ attr_reader :to
15
+
16
+ ##
17
+ # Provides a set of URLs that chart the data provided in the data array.
18
+ #
19
+ # The Hash returned will have the keys `:accuracy`, `:unwanted` and
20
+ # `:legitimate`, which all refer to the same fields in the {#data} hash.
21
+ #
22
+ # @return [Hash{Symbol => String}]
23
+ attr_reader :chart_urls
24
+
25
+ ##
26
+ # The set of dates within the retrieved period.
27
+ #
28
+ # The keys are the date in YYYY-MM-DD format.
29
+ #
30
+ # Each date has the following keys:
31
+ #
32
+ # * `:false_negatives` - The number of false negatives for the specified
33
+ # date.
34
+ # * `:false_positives` - The number of false positives for the specified
35
+ # date.
36
+ # * `:legitimate` - The number of legitimate documents processed on the
37
+ # specified date.
38
+ # * `:accuracy` - How accurate Defensio has recently been for the current
39
+ # user on the specified date. This is returned as a Float between 0
40
+ # and 1. For example, 0.9525 means 95.25% accurate.
41
+ # * `:unwanted` - The number of unwanted documents processed on the
42
+ # specified date.
43
+ #
44
+ # @return [Hash{String => Hash{Symbol => Object}}]
45
+ attr_reader :data
46
+
47
+ ##
48
+ # Retrieves extended statistics from a given date to another one.
49
+ #
50
+ # @param [#strftime, #to_s] from The starting date.
51
+ # @param [#strftime, #to_s] to The ending date.
52
+ def initialize(from, to)
53
+ @from = from.respond_to?(:strftime) ? from.strftime('%Y-%m-%d') : from.to_s
54
+ @to = to.respond_to?(:strftime) ? to.strftime('%Y-%m-%d') : to.to_s
55
+
56
+ response = Defender.get("/#{Defender.api_key}/extended-stats.json", :from => @from, :to => @to)['defensio-result']
57
+ if response['status'] == 'success'
58
+ @chart_urls = {
59
+ :accuracy => response['chart-urls']['recent-accuracy'],
60
+ :unwanted => response['chart-urls']['total-unwanted'],
61
+ :legitimate => response['chart-urls']['total-legitimate']
62
+ }
63
+
64
+ @data = {}
65
+ response['data'].each do |data|
66
+ @data[data['date']] = {
67
+ :false_negatives => data['false-negatives'],
68
+ :false_positives => data['false-positives'],
69
+ :legitimate => data['legitimate'],
70
+ :accuracy => data['recent-accuracy'],
71
+ :unwanted => data['unwanted']
72
+ }
73
+ end
74
+ else
75
+ raise StandardError, response['message']
76
+ end
77
+ end
78
+ end
79
+
80
+ ##
81
+ # The version of the Defensio API being used. Should be the same as
82
+ # {Defender::API_VERSION}.
83
+ #
84
+ # @return [String]
85
+ attr_reader :api_version
86
+
87
+ ##
88
+ # The number of documents that have been allowed but that should have been
89
+ # blocked.
90
+ #
91
+ # @return [Fixnum]
92
+ attr_reader :false_negatives
93
+
94
+ ##
95
+ # The number of documents that have been blocked but that should have been
96
+ # allowed.
97
+ #
98
+ # @return [Fixnum]
99
+ attr_reader :false_positives
100
+
101
+ ##
102
+ # Whether Defensio is learning from the documents you post.
103
+ #
104
+ # @return [Boolean]
105
+ attr_reader :learning
106
+
107
+ ##
108
+ # A message explaining why Defensio is in learning mode.
109
+ #
110
+ # @return [String]
111
+ attr_reader :learning_status
112
+
113
+ ##
114
+ # The total number of legitimate documents analyzed.
115
+ #
116
+ # @return [Fixnum]
117
+ attr_reader :legitimate_total
118
+
119
+ ##
120
+ # How accurate Defensio has recently been for this user.
121
+ #
122
+ # This returns a floating point value between 0 and 1. For example, 0.9525
123
+ # means 95.25% accurate.
124
+ #
125
+ # @return [Float<0..1>]
126
+ attr_reader :recent_accuracy
127
+
128
+ ##
129
+ # The number of documents containing malicious content.
130
+ #
131
+ # @return [Fixnum]
132
+ attr_reader :unwanted_malicious
133
+
134
+ ##
135
+ # The number of spam documents analyzed.
136
+ #
137
+ # @return [Fixnum]
138
+ attr_reader :unwanted_spam
139
+
140
+ ##
141
+ # The total number of unwanted documents.
142
+ #
143
+ # @return [Fixnum]
144
+ attr_reader :unwanted_total
145
+
146
+ ##
147
+ # Initialize the object and retrieve basic statistics.
148
+ #
149
+ # @raise StandardError if any of the calls to the server during retrieving
150
+ # of statistics fail.
151
+ def initialize
152
+ retrieve_basic_stats
153
+ end
154
+
155
+ private
156
+
157
+ def retrieve_basic_stats
158
+ response = Defender.get("/#{Defender.api_key}/basic-stats.json")['defensio-result']
159
+
160
+ if response['status'] == 'success'
161
+ @api_version = response["api-version"]
162
+ @false_negatives = response["false-negatives"]
163
+ @false_positives = response["false-positives"]
164
+ @learning = response["learning"]
165
+ @learning_status = response["learning-status"]
166
+ @legitimate_total = response["legitimate"]["total"]
167
+ @recent_accuracy = response["recent-accuracy"]
168
+ @unwanted_malicious = response["unwanted"]["malicious"]
169
+ @unwanted_spam = response["unwanted"]["spam"]
170
+ @unwanted_total = response["unwanted"]["total"]
171
+ else
172
+ raise StandardError, response['message']
173
+ end
174
+ end
175
+ end
176
+ end