splam 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +2 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +24 -0
- data/MIT-LICENSE +20 -0
- data/README +53 -0
- data/Rakefile +14 -0
- data/gem-public_cert.pem +20 -0
- data/lib/splam.rb +124 -0
- data/lib/splam/rule.rb +73 -0
- data/lib/splam/rules.rb +2 -0
- data/lib/splam/rules/arms_race.rb +24 -0
- data/lib/splam/rules/bad_words.rb +46 -0
- data/lib/splam/rules/bbcode.rb +12 -0
- data/lib/splam/rules/chinese.rb +26 -0
- data/lib/splam/rules/fuzz.rb +20 -0
- data/lib/splam/rules/good_words.rb +19 -0
- data/lib/splam/rules/href.rb +55 -0
- data/lib/splam/rules/html.rb +12 -0
- data/lib/splam/rules/line_length.rb +26 -0
- data/lib/splam/rules/punctuation.rb +14 -0
- data/lib/splam/rules/russian.rb +15 -0
- data/lib/splam/rules/word_length.rb +32 -0
- data/splam.gemspec +12 -0
- data/test/fixtures/comment/ham/api-1.txt +1 -0
- data/test/fixtures/comment/ham/api-2.txt +7 -0
- data/test/fixtures/comment/ham/api-3.txt +3 -0
- data/test/fixtures/comment/ham/api-4.txt +1 -0
- data/test/fixtures/comment/ham/api-5.txt +7 -0
- data/test/fixtures/comment/ham/api.txt +5 -0
- data/test/fixtures/comment/ham/api_bug.txt +16 -0
- data/test/fixtures/comment/ham/backtrace.txt +79 -0
- data/test/fixtures/comment/ham/epic.txt +35 -0
- data/test/fixtures/comment/ham/epic_warehouse.txt +92 -0
- data/test/fixtures/comment/ham/extra_fields.txt +25 -0
- data/test/fixtures/comment/ham/feedlinks.txt +13 -0
- data/test/fixtures/comment/ham/github.txt +5 -0
- data/test/fixtures/comment/ham/hub.txt +10 -0
- data/test/fixtures/comment/ham/mario.txt +19 -0
- data/test/fixtures/comment/ham/mylyn.txt +10 -0
- data/test/fixtures/comment/ham/omg_thanks_again_finally_warehouse.txt +30 -0
- data/test/fixtures/comment/ham/omg_thanks_again_warehouse.txt +17 -0
- data/test/fixtures/comment/ham/problem.txt +7 -0
- data/test/fixtures/comment/ham/sample_html.txt +3 -0
- data/test/fixtures/comment/ham/short_reply.txt +3 -0
- data/test/fixtures/comment/ham/tags.txt +11 -0
- data/test/fixtures/comment/ham/thanks_warehouse.txt +15 -0
- data/test/fixtures/comment/ham/thx.txt +5 -0
- data/test/fixtures/comment/spam/125_spam-12420.txt +6 -0
- data/test/fixtures/comment/spam/40_pharmacia.txt +1 -0
- data/test/fixtures/comment/spam/amazon.txt +51 -0
- data/test/fixtures/comment/spam/bluebichen.txt +1 -0
- data/test/fixtures/comment/spam/boobz.txt +3 -0
- data/test/fixtures/comment/spam/buffy.txt +1 -0
- data/test/fixtures/comment/spam/chinese.txt +19 -0
- data/test/fixtures/comment/spam/comment_bbc.txt +1 -0
- data/test/fixtures/comment/spam/comment_cnn.txt +1 -0
- data/test/fixtures/comment/spam/comment_randi.txt +1 -0
- data/test/fixtures/comment/spam/comment_wordy.txt +1 -0
- data/test/fixtures/comment/spam/consent.txt +1 -0
- data/test/fixtures/comment/spam/december.txt +1 -0
- data/test/fixtures/comment/spam/digital_rights.txt +1 -0
- data/test/fixtures/comment/spam/dyed_wool.txt +1 -0
- data/test/fixtures/comment/spam/hairbrush_sex.txt +119 -0
- data/test/fixtures/comment/spam/handbag.txt +5 -0
- data/test/fixtures/comment/spam/inqius.txt +5 -0
- data/test/fixtures/comment/spam/kidneys.txt +1 -0
- data/test/fixtures/comment/spam/madonna.txt +3 -0
- data/test/fixtures/comment/spam/make_plans.txt +3 -0
- data/test/fixtures/comment/spam/oem.txt +130 -0
- data/test/fixtures/comment/spam/oem2.txt +130 -0
- data/test/fixtures/comment/spam/oem_intl.txt +131 -0
- data/test/fixtures/comment/spam/omg_sex.txt +26 -0
- data/test/fixtures/comment/spam/ottersex.txt +1 -0
- data/test/fixtures/comment/spam/pdwkb.txt +1 -0
- data/test/fixtures/comment/spam/pr0n.txt +320 -0
- data/test/fixtures/comment/spam/property.txt +448 -0
- data/test/fixtures/comment/spam/pyromancy.txt +1 -0
- data/test/fixtures/comment/spam/rapid.txt +10 -0
- data/test/fixtures/comment/spam/russki.txt +5 -0
- data/test/fixtures/comment/spam/russki2.txt +2 -0
- data/test/fixtures/comment/spam/shipping.txt +3 -0
- data/test/fixtures/comment/spam/short_n_sweet.txt +1 -0
- data/test/fixtures/comment/spam/spam-13232.txt +15 -0
- data/test/fixtures/comment/spam/spam-13518.txt +3 -0
- data/test/fixtures/comment/spam/spam-13519.txt +3 -0
- data/test/fixtures/comment/spam/spam-13520.txt +3 -0
- data/test/fixtures/comment/spam/spam-13521.txt +3 -0
- data/test/fixtures/comment/spam/spam-13982.txt +10 -0
- data/test/fixtures/comment/spam/spam-14178.txt +1 -0
- data/test/fixtures/comment/spam/spam-14447.txt +4 -0
- data/test/fixtures/comment/spam/spam-14718.txt +4 -0
- data/test/fixtures/comment/spam/spam0113081.txt +1 -0
- data/test/fixtures/comment/spam/tk.txt +4 -0
- data/test/fixtures/comment/spam/troubles.txt +2 -0
- data/test/fixtures/comment/spam/url_only_idiot.txt +1 -0
- data/test/fixtures/comment/spam/webcam.txt +3 -0
- data/test/splam_rule_test.rb +20 -0
- data/test/splam_test.rb +102 -0
- data/test/test_helper.rb +8 -0
- metadata +183 -0
- metadata.gz.sig +2 -0
data.tar.gz.sig
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
splam (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
activesupport (3.2.12)
|
10
|
+
i18n (~> 0.6)
|
11
|
+
multi_json (~> 1.0)
|
12
|
+
bump (0.3.9)
|
13
|
+
i18n (0.6.1)
|
14
|
+
multi_json (1.6.0)
|
15
|
+
rake (10.0.3)
|
16
|
+
|
17
|
+
PLATFORMS
|
18
|
+
ruby
|
19
|
+
|
20
|
+
DEPENDENCIES
|
21
|
+
activesupport
|
22
|
+
bump
|
23
|
+
rake
|
24
|
+
splam!
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 [name of plugin creator]
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
Splam
|
2
|
+
=====
|
3
|
+
|
4
|
+
Splam is a simple spam scoring plugin. It contains a set of rules that are run on a field
|
5
|
+
to help you determine the likelihood of that field being spam. It doesn't do anything
|
6
|
+
other than give a field a score. It's up to you to act on that score.
|
7
|
+
|
8
|
+
Check out the tests for instructions on how to use: you'll want to integrate this into
|
9
|
+
your application's workflow.
|
10
|
+
|
11
|
+
It's heavily biased towards the spam I've been seeing in the past two or three hours.
|
12
|
+
This includes lots of crap with
|
13
|
+
- bbcode [url=
|
14
|
+
- lots of links (http://)
|
15
|
+
- russian text
|
16
|
+
- links to russian or chinese websites
|
17
|
+
|
18
|
+
You can write your own plugins to Splam: simply subclass Splam::Rule. Splam is clever enough
|
19
|
+
to iterate over all Rule's subclasses and run the 'run' method on the field to be checked.
|
20
|
+
The other way to do this would be to define Rule.add_rule do ... end but I think the class
|
21
|
+
form is easier for rubyists to understand and modify.
|
22
|
+
|
23
|
+
Splam aggregates the scores from all the rules. From the brief testing I've done, anything over
|
24
|
+
about 40 is likely to be spam. Real spam will blow out of the scoring stratosphere with over 1,000.
|
25
|
+
|
26
|
+
Recommended serving directions:
|
27
|
+
|
28
|
+
class Comment
|
29
|
+
include Splam
|
30
|
+
|
31
|
+
splammable :body
|
32
|
+
end
|
33
|
+
|
34
|
+
comment = Comment.new :body => "This is spam!!!1"
|
35
|
+
comment.splam? # => false
|
36
|
+
comment.splam_score # => 2
|
37
|
+
comment.splam_reasons # => []
|
38
|
+
|
39
|
+
Add this to a model, check the score, and determine (based on other factors such as logged-in
|
40
|
+
user, time spent on the page, validity of request headers, length of user's membership on the
|
41
|
+
site) whether to ban the post or not.
|
42
|
+
|
43
|
+
We recommend showing the post to the user (spambox them in) but hide it from everyone else.
|
44
|
+
|
45
|
+
TODO
|
46
|
+
|
47
|
+
- Integrate bayesian or other clever algorithm, so that scores aren't hardcoded.
|
48
|
+
- Switch to using a percentage (0.994) rather than a score (250)
|
49
|
+
- Write more plugins!
|
50
|
+
- Test against a larger Ham corpus
|
51
|
+
- Fix that nasty autoloading code in splam.rb
|
52
|
+
|
53
|
+
Copyright (c) 2008 ENTP, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'bundler/gem_tasks'
|
3
|
+
require 'bump/tasks'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
desc 'Default: run unit tests.'
|
7
|
+
task :default => :test
|
8
|
+
|
9
|
+
desc 'Test the splam gem.'
|
10
|
+
Rake::TestTask.new(:test) do |t|
|
11
|
+
t.libs << 'lib'
|
12
|
+
t.pattern = 'test/**/*_test.rb'
|
13
|
+
t.verbose = true
|
14
|
+
end
|
data/gem-public_cert.pem
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
2
|
+
MIIDMjCCAhqgAwIBAgIBADANBgkqhkiG9w0BAQUFADA/MRAwDgYDVQQDDAdtaWNo
|
3
|
+
YWVsMRcwFQYKCZImiZPyLGQBGRYHZ3Jvc3NlcjESMBAGCgmSJomT8ixkARkWAml0
|
4
|
+
MB4XDTEzMDIwMzE4MTMxMVoXDTE0MDIwMzE4MTMxMVowPzEQMA4GA1UEAwwHbWlj
|
5
|
+
aGFlbDEXMBUGCgmSJomT8ixkARkWB2dyb3NzZXIxEjAQBgoJkiaJk/IsZAEZFgJp
|
6
|
+
dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMorXo/hgbUq97+kII9H
|
7
|
+
MsQcLdC/7wQ1ZP2OshVHPkeP0qH8MBHGg6eYisOX2ubNagF9YTCZWnhrdKrwpLOO
|
8
|
+
cPLaZbjUjljJ3cQR3B8Yn1veV5IhG86QseTBjymzJWsLpqJ1UZGpfB9tXcsFtuxO
|
9
|
+
6vHvcIHdzvc/OUkICttLbH+1qb6rsHUceqh+JrH4GrsJ5H4hAfIdyS2XMK7YRKbh
|
10
|
+
h+IBu6dFWJJByzFsYmV1PDXln3UBmgAt65cmCu4qPfThioCGDzbSJrGDGLmw/pFX
|
11
|
+
FPpVCm1zgYSb1v6Qnf3cgXa2f2wYGm17+zAVyIDpwryFru9yF/jJxE38z/DRsd9R
|
12
|
+
/88CAwEAAaM5MDcwCQYDVR0TBAIwADAdBgNVHQ4EFgQUsiNnXHtKeMYYcr4yJVmQ
|
13
|
+
WONL+IwwCwYDVR0PBAQDAgSwMA0GCSqGSIb3DQEBBQUAA4IBAQAlyN7kKo/NQCQ0
|
14
|
+
AOzZLZ3WAePvStkCFIJ53tsv5Kyo4pMAllv+BgPzzBt7qi605mFSL6zBd9uLou+W
|
15
|
+
Co3s48p1dy7CjjAfVQdmVNHF3MwXtfC2OEyvSQPi4xKR8iba8wa3xp9LVo1PuLpw
|
16
|
+
/6DsrChWw74HfsJN6qJOK684hJeT8lBYAUfiC3wD0owoPSg+XtyAAddisR+KV5Y1
|
17
|
+
NmVHuLtQcNTZy+gRht3ahJRMuC6QyLmkTsf+6MaenwAMkAgHdswGsJztOnNnBa3F
|
18
|
+
y0kCSWmK6D+x/SbfS6r7Ke07MRqziJdB9GuE1+0cIRuFh8EQ+LN6HXCKM5pon/GU
|
19
|
+
ycwMXfl0
|
20
|
+
-----END CERTIFICATE-----
|
data/lib/splam.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# Splam
|
2
|
+
#require File.dirname(__FILE__) + "/splam/rule"
|
3
|
+
#require File.dirname(__FILE__) + "/splam/rules"
|
4
|
+
#require File.dirname(__FILE__) + "/splam/rules/russian"
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'activesupport'
|
8
|
+
require 'active_support/inflector'
|
9
|
+
|
10
|
+
module Splam
|
11
|
+
class Suite < Struct.new(:body, :rules, :threshold, :conditions)
|
12
|
+
# Should be a Rack::Request, in case you want to inspect user agents and whatnot
|
13
|
+
# unimplemented, cry about it fanboy!
|
14
|
+
attr_accessor :request
|
15
|
+
|
16
|
+
attr_reader :score
|
17
|
+
attr_reader :reasons
|
18
|
+
|
19
|
+
def initialize(body, rules, threshold, conditions, &block)
|
20
|
+
super(body, rules, threshold, conditions)
|
21
|
+
block.call(self) if block
|
22
|
+
self.rules = self.rules.inject({}) do |memo, (rule, weight)|
|
23
|
+
if (rule.is_a?(Class) && rule.superclass == Splam::Rule) || rule = Splam::Rule.rules[rule]
|
24
|
+
memo[rule] = weight || 1.0
|
25
|
+
else
|
26
|
+
raise ArgumentError, "Invalid rule: #{rule.inspect}"
|
27
|
+
end
|
28
|
+
memo
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def run(record)
|
33
|
+
score, reasons = 0, []
|
34
|
+
rules.each do |rule_class, weight|
|
35
|
+
weight ||= 1
|
36
|
+
worker = rule_class.run(self, record, weight)
|
37
|
+
score += worker.score
|
38
|
+
reasons << worker.reasons
|
39
|
+
end
|
40
|
+
[score, reasons]
|
41
|
+
end
|
42
|
+
|
43
|
+
def splam?(score)
|
44
|
+
score >= threshold
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.included(base)
|
49
|
+
# Autoload all files in rules
|
50
|
+
# This is bad, mkay
|
51
|
+
Dir["#{File.dirname(__FILE__)}/splam/rules/*.rb"].each do |f|
|
52
|
+
require f
|
53
|
+
end
|
54
|
+
base.send :extend, ClassMethods
|
55
|
+
end
|
56
|
+
|
57
|
+
module ClassMethods
|
58
|
+
def splam_suite; @splam_suite; end
|
59
|
+
# Set #body attribute as splammable with default threshold of 100
|
60
|
+
# splammable :body
|
61
|
+
#
|
62
|
+
# Set #body attribute as splammable with custom threshold
|
63
|
+
# splammable :body, 50
|
64
|
+
#
|
65
|
+
# Set #body splammable with threshold and a conditions block?
|
66
|
+
# splamamble :body, 50, lambda { |record| record.skip_splam_check }
|
67
|
+
#
|
68
|
+
# Set any Splam::Suite options
|
69
|
+
# splammable :body do |splam|
|
70
|
+
# splam.threshold = 150
|
71
|
+
# splam.conditions = lambda { |r| r.body.size.zero? }
|
72
|
+
# # Set rules with #splam_key value
|
73
|
+
# splam.rules = [:chinese, :html]
|
74
|
+
# # Set rules with Class instances
|
75
|
+
# splam.rules = [Splam::Rules::Chinese]
|
76
|
+
# # Mix and match, we're all friends here
|
77
|
+
# splam.rules = [Splam::Rules::Chinese, :html]
|
78
|
+
# # Specify optional weights
|
79
|
+
# splam.rules = {Splam::Rules::Chinese => 1.2, :html => 5.0}
|
80
|
+
#
|
81
|
+
def splammable(fieldname, threshold=100, conditions=nil, &block)
|
82
|
+
# todo: run only certain rules
|
83
|
+
# e.g. splammable :body, 100, [ :chinese, :html ]
|
84
|
+
# todo: define some weighting on the model level
|
85
|
+
# e.g. splammable :body, 50, { :russian => 2.0 }
|
86
|
+
@splam_suite = Suite.new(fieldname, Splam::Rule.default_rules, threshold, conditions, &block)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
attr_accessor :skip_splam_check
|
91
|
+
attr_reader :splam_score, :splam_reasons
|
92
|
+
|
93
|
+
def splam_score
|
94
|
+
@splam_score || run_splam_suite(:score) || 0
|
95
|
+
end
|
96
|
+
|
97
|
+
def splam_reasons
|
98
|
+
@splam_reasons || run_splam_suite(:reasons) || []
|
99
|
+
end
|
100
|
+
|
101
|
+
def splam?
|
102
|
+
# run_splam_suite # ask yourself, do you want this to be cached for each record instance or not?
|
103
|
+
self.class.splam_suite.splam?(splam_score)
|
104
|
+
end
|
105
|
+
|
106
|
+
def validates_as_spam
|
107
|
+
errors.add(self.class.splam_suite.body, "looks like spam.") if (!skip_splam_check? && splam?)
|
108
|
+
end
|
109
|
+
|
110
|
+
protected
|
111
|
+
def run_splam_suite(attr_suffix = nil)
|
112
|
+
splam_suite = self.class.splam_suite || raise("Splam::Suite is not initialized")
|
113
|
+
return false if (splam_suite.conditions && !splam_suite.conditions.call(self)) ||
|
114
|
+
skip_splam_check ||
|
115
|
+
send(splam_suite.body).nil?
|
116
|
+
@splam_score, @splam_reasons = splam_suite.run(self)
|
117
|
+
instance_variable_get("@splam_#{attr_suffix}") if attr_suffix
|
118
|
+
end
|
119
|
+
|
120
|
+
def skip_splam_check?
|
121
|
+
# This enables us to use a checkbox
|
122
|
+
skip_splam_check.to_i > 0
|
123
|
+
end
|
124
|
+
end
|
data/lib/splam/rule.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
class Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_writer :splam_key
|
4
|
+
|
5
|
+
# Global set of rules for all splammable classes. By default it is an array of all Splam::Rule subclasses.
|
6
|
+
# It can be set to a subset of all rules, or even a hash with specified weights.
|
7
|
+
# self.default_rules = [:bad_words, :bbcode]
|
8
|
+
# self.default_rules = {:bad_words => 0.5, :bbcode => 7}
|
9
|
+
#
|
10
|
+
attr_accessor :default_rules
|
11
|
+
|
12
|
+
# Index linking all splam_keys to the rule classes. This is populated automatically.
|
13
|
+
attr_reader :rules
|
14
|
+
|
15
|
+
def splam_key
|
16
|
+
@splam_key || (self.splam_key = name.demodulize.underscore.to_sym)
|
17
|
+
end
|
18
|
+
|
19
|
+
def splam_key=(value)
|
20
|
+
Splam::Rule.rules.delete(@splam_key) if @splam_key
|
21
|
+
Splam::Rule.rules[value] = self
|
22
|
+
@splam_key = value
|
23
|
+
value
|
24
|
+
end
|
25
|
+
|
26
|
+
def run(*args)
|
27
|
+
rule = new(*args)
|
28
|
+
rule.run
|
29
|
+
rule
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(suite, record, weight = 1.0)
|
34
|
+
@suite, @weight, @score, @reasons, @body = suite, weight, 0, [], record.send(suite.body)
|
35
|
+
end
|
36
|
+
|
37
|
+
def name
|
38
|
+
self.class.splam_key
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.inherited(_subclass)
|
42
|
+
@rules ||= {}
|
43
|
+
@default_rules ||= []
|
44
|
+
@default_rules << _subclass
|
45
|
+
_subclass.splam_key
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
attr_reader :suite, :body, :weight
|
50
|
+
attr_accessor :reasons, :score
|
51
|
+
|
52
|
+
# Overload this method to run your rule. Call #add_score to modify the suite's splam score.
|
53
|
+
#
|
54
|
+
# def run
|
55
|
+
# add_score -5, 'water'
|
56
|
+
# add_score 5, 'PBR'
|
57
|
+
# add_score 10, 'black butte'
|
58
|
+
# add_score 30, 'red wine'
|
59
|
+
# add_score 95, 'everclear'
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
def run
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_score(points, reason)
|
66
|
+
@score ||= 0
|
67
|
+
if points != 0
|
68
|
+
@reasons << "#{name}: [#{points}#{" * #{weight}" if weight != 1}] #{reason}"
|
69
|
+
points = points * weight.to_i
|
70
|
+
@score += points
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/splam/rules.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
class Splam::Rules::ArmsRace < Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_accessor :bad_word_score
|
4
|
+
end
|
5
|
+
|
6
|
+
self.bad_word_score = 40
|
7
|
+
|
8
|
+
# This is where you put banned domain names or otherwise
|
9
|
+
def run
|
10
|
+
shitty_sites = ["inquisitr"]
|
11
|
+
shitty_sites.each do |word|
|
12
|
+
results = @body.downcase.scan(word)
|
13
|
+
if results && results.size > 0
|
14
|
+
add_score((self.class.bad_word_score ** results.size), "stupid site: '#{word}'")
|
15
|
+
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
|
16
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
|
17
|
+
end
|
18
|
+
@body.scan(/<a(.*?)>/).each do |match|
|
19
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
class Splam::Rules::BadWords < Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_accessor :bad_word_score, :suspicious_word_score
|
4
|
+
end
|
5
|
+
|
6
|
+
self.bad_word_score = 10
|
7
|
+
self.suspicious_word_score = 4
|
8
|
+
|
9
|
+
def run
|
10
|
+
bad_words = %w( sex sexy porn gay erotica viagra erotismo porno porn lesbian amateur tit\b)
|
11
|
+
bad_words |= %w( gratis erotismo porno torrent bittorrent adulto )
|
12
|
+
bad_words |= %w( cialis viagra payday loan jihad )
|
13
|
+
bad_words |= %w( webcam free-web-host rapidshare muslim)
|
14
|
+
bad_words << /pel?cula/ << /pornogr?fica/ << "portal porno" # srsly, spamming in spanish?
|
15
|
+
|
16
|
+
suspicious_words = %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card" << "my friend" << "friend sent me"
|
17
|
+
suspicious_words |= %w( adult pharmacy overnight shipping free hot movie nylon arab ?????? xxx) << "sent me a link"
|
18
|
+
suspicious_words << "forums/member.php?u=" << "chat room" << "free chat" << "yahoo chat" << "page.php"
|
19
|
+
bad_words.each do |word|
|
20
|
+
results = @body.downcase.scan(word)
|
21
|
+
if results && results.size > 0
|
22
|
+
add_score((self.class.bad_word_score ** results.size), "nasty word: '#{word}'")
|
23
|
+
# Add more points if the bad word is INSIDE a link
|
24
|
+
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
|
25
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
|
26
|
+
end
|
27
|
+
@body.scan(/\nhttp:\/\/(.*?#{word})/).each do |match|
|
28
|
+
add_score self.class.bad_word_score ** 4 * match[0].scan(word).size, "nasty word inside a straight-up link: #{word}"
|
29
|
+
end
|
30
|
+
@body.scan(/<a(.*?)>/).each do |match|
|
31
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
suspicious_words.each do |word|
|
36
|
+
results = @body.downcase.scan(word)
|
37
|
+
if results && results.size > 0
|
38
|
+
add_score (self.class.suspicious_word_score * results.size), "suspicious word: #{word}"
|
39
|
+
# Add more points if the bad word is INSIDE a link
|
40
|
+
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
|
41
|
+
add_score((self.class.suspicious_word_score * match[0].scan(word).size), "suspicious word inside a link: #{word}")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class Splam::Rules::Bbcode < Splam::Rule
|
2
|
+
|
3
|
+
def run
|
4
|
+
add_score 10 * @body.scan("showpost.php?p=").size, "Linking to a shitty forum"
|
5
|
+
# add_score 10 * @body.scan("\r\n").size, "Poorly formed POST (\\r\\n)"
|
6
|
+
add_score 40 * @body.scan("[url=").size, "URL" # no URLS for you!!
|
7
|
+
add_score 40 * @body.scan("[URL=").size, "URL" # no URLS for you!!
|
8
|
+
add_score 40 * @body.scan("[url=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
|
9
|
+
add_score 40 * @body.scan("[URL=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
|
10
|
+
add_score 10 * @body.scan(/\[[bai]/).size, "b/a/i tag"
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Splam::Rules::Chinese < Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_accessor :base_score
|
4
|
+
end
|
5
|
+
self.base_score = 3
|
6
|
+
|
7
|
+
def run
|
8
|
+
banned_words =[ # various chinese characters
|
9
|
+
"\350\263\207",
|
10
|
+
"\351\207\221",
|
11
|
+
"\357\274\222", # number 2 in weird unicode
|
12
|
+
"\357\274\224", # number 4
|
13
|
+
"\357\274\225", # number 5
|
14
|
+
"\357\274\231", # number 9
|
15
|
+
"\357\274\215", # hyphen
|
16
|
+
/\\357\2\d\d\\\d{3}/,
|
17
|
+
# "\357", # ugh, these don't work .. because they're only part of a character.
|
18
|
+
# "\351",
|
19
|
+
"\35"
|
20
|
+
]
|
21
|
+
banned_words.each do |word|
|
22
|
+
hits = (self.class.base_score * @body.scan(word).size) # 1 point for every banned word
|
23
|
+
add_score hits, "Banned character: #{word}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|