splam 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +2 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +24 -0
- data/MIT-LICENSE +20 -0
- data/README +53 -0
- data/Rakefile +14 -0
- data/gem-public_cert.pem +20 -0
- data/lib/splam.rb +124 -0
- data/lib/splam/rule.rb +73 -0
- data/lib/splam/rules.rb +2 -0
- data/lib/splam/rules/arms_race.rb +24 -0
- data/lib/splam/rules/bad_words.rb +46 -0
- data/lib/splam/rules/bbcode.rb +12 -0
- data/lib/splam/rules/chinese.rb +26 -0
- data/lib/splam/rules/fuzz.rb +20 -0
- data/lib/splam/rules/good_words.rb +19 -0
- data/lib/splam/rules/href.rb +55 -0
- data/lib/splam/rules/html.rb +12 -0
- data/lib/splam/rules/line_length.rb +26 -0
- data/lib/splam/rules/punctuation.rb +14 -0
- data/lib/splam/rules/russian.rb +15 -0
- data/lib/splam/rules/word_length.rb +32 -0
- data/splam.gemspec +12 -0
- data/test/fixtures/comment/ham/api-1.txt +1 -0
- data/test/fixtures/comment/ham/api-2.txt +7 -0
- data/test/fixtures/comment/ham/api-3.txt +3 -0
- data/test/fixtures/comment/ham/api-4.txt +1 -0
- data/test/fixtures/comment/ham/api-5.txt +7 -0
- data/test/fixtures/comment/ham/api.txt +5 -0
- data/test/fixtures/comment/ham/api_bug.txt +16 -0
- data/test/fixtures/comment/ham/backtrace.txt +79 -0
- data/test/fixtures/comment/ham/epic.txt +35 -0
- data/test/fixtures/comment/ham/epic_warehouse.txt +92 -0
- data/test/fixtures/comment/ham/extra_fields.txt +25 -0
- data/test/fixtures/comment/ham/feedlinks.txt +13 -0
- data/test/fixtures/comment/ham/github.txt +5 -0
- data/test/fixtures/comment/ham/hub.txt +10 -0
- data/test/fixtures/comment/ham/mario.txt +19 -0
- data/test/fixtures/comment/ham/mylyn.txt +10 -0
- data/test/fixtures/comment/ham/omg_thanks_again_finally_warehouse.txt +30 -0
- data/test/fixtures/comment/ham/omg_thanks_again_warehouse.txt +17 -0
- data/test/fixtures/comment/ham/problem.txt +7 -0
- data/test/fixtures/comment/ham/sample_html.txt +3 -0
- data/test/fixtures/comment/ham/short_reply.txt +3 -0
- data/test/fixtures/comment/ham/tags.txt +11 -0
- data/test/fixtures/comment/ham/thanks_warehouse.txt +15 -0
- data/test/fixtures/comment/ham/thx.txt +5 -0
- data/test/fixtures/comment/spam/125_spam-12420.txt +6 -0
- data/test/fixtures/comment/spam/40_pharmacia.txt +1 -0
- data/test/fixtures/comment/spam/amazon.txt +51 -0
- data/test/fixtures/comment/spam/bluebichen.txt +1 -0
- data/test/fixtures/comment/spam/boobz.txt +3 -0
- data/test/fixtures/comment/spam/buffy.txt +1 -0
- data/test/fixtures/comment/spam/chinese.txt +19 -0
- data/test/fixtures/comment/spam/comment_bbc.txt +1 -0
- data/test/fixtures/comment/spam/comment_cnn.txt +1 -0
- data/test/fixtures/comment/spam/comment_randi.txt +1 -0
- data/test/fixtures/comment/spam/comment_wordy.txt +1 -0
- data/test/fixtures/comment/spam/consent.txt +1 -0
- data/test/fixtures/comment/spam/december.txt +1 -0
- data/test/fixtures/comment/spam/digital_rights.txt +1 -0
- data/test/fixtures/comment/spam/dyed_wool.txt +1 -0
- data/test/fixtures/comment/spam/hairbrush_sex.txt +119 -0
- data/test/fixtures/comment/spam/handbag.txt +5 -0
- data/test/fixtures/comment/spam/inqius.txt +5 -0
- data/test/fixtures/comment/spam/kidneys.txt +1 -0
- data/test/fixtures/comment/spam/madonna.txt +3 -0
- data/test/fixtures/comment/spam/make_plans.txt +3 -0
- data/test/fixtures/comment/spam/oem.txt +130 -0
- data/test/fixtures/comment/spam/oem2.txt +130 -0
- data/test/fixtures/comment/spam/oem_intl.txt +131 -0
- data/test/fixtures/comment/spam/omg_sex.txt +26 -0
- data/test/fixtures/comment/spam/ottersex.txt +1 -0
- data/test/fixtures/comment/spam/pdwkb.txt +1 -0
- data/test/fixtures/comment/spam/pr0n.txt +320 -0
- data/test/fixtures/comment/spam/property.txt +448 -0
- data/test/fixtures/comment/spam/pyromancy.txt +1 -0
- data/test/fixtures/comment/spam/rapid.txt +10 -0
- data/test/fixtures/comment/spam/russki.txt +5 -0
- data/test/fixtures/comment/spam/russki2.txt +2 -0
- data/test/fixtures/comment/spam/shipping.txt +3 -0
- data/test/fixtures/comment/spam/short_n_sweet.txt +1 -0
- data/test/fixtures/comment/spam/spam-13232.txt +15 -0
- data/test/fixtures/comment/spam/spam-13518.txt +3 -0
- data/test/fixtures/comment/spam/spam-13519.txt +3 -0
- data/test/fixtures/comment/spam/spam-13520.txt +3 -0
- data/test/fixtures/comment/spam/spam-13521.txt +3 -0
- data/test/fixtures/comment/spam/spam-13982.txt +10 -0
- data/test/fixtures/comment/spam/spam-14178.txt +1 -0
- data/test/fixtures/comment/spam/spam-14447.txt +4 -0
- data/test/fixtures/comment/spam/spam-14718.txt +4 -0
- data/test/fixtures/comment/spam/spam0113081.txt +1 -0
- data/test/fixtures/comment/spam/tk.txt +4 -0
- data/test/fixtures/comment/spam/troubles.txt +2 -0
- data/test/fixtures/comment/spam/url_only_idiot.txt +1 -0
- data/test/fixtures/comment/spam/webcam.txt +3 -0
- data/test/splam_rule_test.rb +20 -0
- data/test/splam_test.rb +102 -0
- data/test/test_helper.rb +8 -0
- metadata +183 -0
- metadata.gz.sig +2 -0
data.tar.gz.sig
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
splam (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
activesupport (3.2.12)
|
10
|
+
i18n (~> 0.6)
|
11
|
+
multi_json (~> 1.0)
|
12
|
+
bump (0.3.9)
|
13
|
+
i18n (0.6.1)
|
14
|
+
multi_json (1.6.0)
|
15
|
+
rake (10.0.3)
|
16
|
+
|
17
|
+
PLATFORMS
|
18
|
+
ruby
|
19
|
+
|
20
|
+
DEPENDENCIES
|
21
|
+
activesupport
|
22
|
+
bump
|
23
|
+
rake
|
24
|
+
splam!
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 [name of plugin creator]
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
Splam
|
2
|
+
=====
|
3
|
+
|
4
|
+
Splam is a simple spam scoring plugin. It contains a set of rules that are run on a field
|
5
|
+
to help you determine the likelihood of that field being spam. It doesn't do anything
|
6
|
+
other than give a field a score. It's up to you to act on that score.
|
7
|
+
|
8
|
+
Check out the tests for instructions on how to use: you'll want to integrate this into
|
9
|
+
your application's workflow.
|
10
|
+
|
11
|
+
It's heavily biased towards the spam I've been seeing in the past two or three hours.
|
12
|
+
This includes lots of crap with
|
13
|
+
- bbcode [url=
|
14
|
+
- lots of links (http://)
|
15
|
+
- russian text
|
16
|
+
- links to russian or chinese websites
|
17
|
+
|
18
|
+
You can write your own plugins to Splam: simply subclass Splam::Rule. Splam is clever enough
|
19
|
+
to iterate over all Rule's subclasses and run the 'run' method on the field to be checked.
|
20
|
+
The other way to do this would be to define Rule.add_rule do ... end but I think the class
|
21
|
+
form is easier for rubyists to understand and modify.
|
22
|
+
|
23
|
+
Splam aggregates the scores from all the rules. From the brief testing I've done, anything over
|
24
|
+
about 40 is likely to be spam. Real spam will blow out of the scoring stratosphere with over 1,000.
|
25
|
+
|
26
|
+
Recommended serving directions:
|
27
|
+
|
28
|
+
class Comment
|
29
|
+
include Splam
|
30
|
+
|
31
|
+
splammable :body
|
32
|
+
end
|
33
|
+
|
34
|
+
comment = Comment.new :body => "This is spam!!!1"
|
35
|
+
comment.splam? # => false
|
36
|
+
comment.splam_score # => 2
|
37
|
+
comment.splam_reasons # => []
|
38
|
+
|
39
|
+
Add this to a model, check the score, and determine (based on other factors such as logged-in
|
40
|
+
user, time spent on the page, validity of request headers, length of user's membership on the
|
41
|
+
site) whether to ban the post or not.
|
42
|
+
|
43
|
+
We recommend showing the post to the user (spambox them in) but hide it from everyone else.
|
44
|
+
|
45
|
+
TODO
|
46
|
+
|
47
|
+
- Integrate bayesian or other clever algorithm, so that scores aren't hardcoded.
|
48
|
+
- Switch to using a percentage (0.994) rather than a score (250)
|
49
|
+
- Write more plugins!
|
50
|
+
- Test against a larger Ham corpus
|
51
|
+
- Fix that nasty autoloading code in splam.rb
|
52
|
+
|
53
|
+
Copyright (c) 2008 ENTP, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'bundler/gem_tasks'
|
3
|
+
require 'bump/tasks'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
desc 'Default: run unit tests.'
|
7
|
+
task :default => :test
|
8
|
+
|
9
|
+
desc 'Test the splam gem.'
|
10
|
+
Rake::TestTask.new(:test) do |t|
|
11
|
+
t.libs << 'lib'
|
12
|
+
t.pattern = 'test/**/*_test.rb'
|
13
|
+
t.verbose = true
|
14
|
+
end
|
data/gem-public_cert.pem
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
2
|
+
MIIDMjCCAhqgAwIBAgIBADANBgkqhkiG9w0BAQUFADA/MRAwDgYDVQQDDAdtaWNo
|
3
|
+
YWVsMRcwFQYKCZImiZPyLGQBGRYHZ3Jvc3NlcjESMBAGCgmSJomT8ixkARkWAml0
|
4
|
+
MB4XDTEzMDIwMzE4MTMxMVoXDTE0MDIwMzE4MTMxMVowPzEQMA4GA1UEAwwHbWlj
|
5
|
+
aGFlbDEXMBUGCgmSJomT8ixkARkWB2dyb3NzZXIxEjAQBgoJkiaJk/IsZAEZFgJp
|
6
|
+
dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMorXo/hgbUq97+kII9H
|
7
|
+
MsQcLdC/7wQ1ZP2OshVHPkeP0qH8MBHGg6eYisOX2ubNagF9YTCZWnhrdKrwpLOO
|
8
|
+
cPLaZbjUjljJ3cQR3B8Yn1veV5IhG86QseTBjymzJWsLpqJ1UZGpfB9tXcsFtuxO
|
9
|
+
6vHvcIHdzvc/OUkICttLbH+1qb6rsHUceqh+JrH4GrsJ5H4hAfIdyS2XMK7YRKbh
|
10
|
+
h+IBu6dFWJJByzFsYmV1PDXln3UBmgAt65cmCu4qPfThioCGDzbSJrGDGLmw/pFX
|
11
|
+
FPpVCm1zgYSb1v6Qnf3cgXa2f2wYGm17+zAVyIDpwryFru9yF/jJxE38z/DRsd9R
|
12
|
+
/88CAwEAAaM5MDcwCQYDVR0TBAIwADAdBgNVHQ4EFgQUsiNnXHtKeMYYcr4yJVmQ
|
13
|
+
WONL+IwwCwYDVR0PBAQDAgSwMA0GCSqGSIb3DQEBBQUAA4IBAQAlyN7kKo/NQCQ0
|
14
|
+
AOzZLZ3WAePvStkCFIJ53tsv5Kyo4pMAllv+BgPzzBt7qi605mFSL6zBd9uLou+W
|
15
|
+
Co3s48p1dy7CjjAfVQdmVNHF3MwXtfC2OEyvSQPi4xKR8iba8wa3xp9LVo1PuLpw
|
16
|
+
/6DsrChWw74HfsJN6qJOK684hJeT8lBYAUfiC3wD0owoPSg+XtyAAddisR+KV5Y1
|
17
|
+
NmVHuLtQcNTZy+gRht3ahJRMuC6QyLmkTsf+6MaenwAMkAgHdswGsJztOnNnBa3F
|
18
|
+
y0kCSWmK6D+x/SbfS6r7Ke07MRqziJdB9GuE1+0cIRuFh8EQ+LN6HXCKM5pon/GU
|
19
|
+
ycwMXfl0
|
20
|
+
-----END CERTIFICATE-----
|
data/lib/splam.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# Splam
|
2
|
+
#require File.dirname(__FILE__) + "/splam/rule"
|
3
|
+
#require File.dirname(__FILE__) + "/splam/rules"
|
4
|
+
#require File.dirname(__FILE__) + "/splam/rules/russian"
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'activesupport'
|
8
|
+
require 'active_support/inflector'
|
9
|
+
|
10
|
+
module Splam
|
11
|
+
class Suite < Struct.new(:body, :rules, :threshold, :conditions)
|
12
|
+
# Should be a Rack::Request, in case you want to inspect user agents and whatnot
|
13
|
+
# unimplemented, cry about it fanboy!
|
14
|
+
attr_accessor :request
|
15
|
+
|
16
|
+
attr_reader :score
|
17
|
+
attr_reader :reasons
|
18
|
+
|
19
|
+
def initialize(body, rules, threshold, conditions, &block)
|
20
|
+
super(body, rules, threshold, conditions)
|
21
|
+
block.call(self) if block
|
22
|
+
self.rules = self.rules.inject({}) do |memo, (rule, weight)|
|
23
|
+
if (rule.is_a?(Class) && rule.superclass == Splam::Rule) || rule = Splam::Rule.rules[rule]
|
24
|
+
memo[rule] = weight || 1.0
|
25
|
+
else
|
26
|
+
raise ArgumentError, "Invalid rule: #{rule.inspect}"
|
27
|
+
end
|
28
|
+
memo
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def run(record)
|
33
|
+
score, reasons = 0, []
|
34
|
+
rules.each do |rule_class, weight|
|
35
|
+
weight ||= 1
|
36
|
+
worker = rule_class.run(self, record, weight)
|
37
|
+
score += worker.score
|
38
|
+
reasons << worker.reasons
|
39
|
+
end
|
40
|
+
[score, reasons]
|
41
|
+
end
|
42
|
+
|
43
|
+
def splam?(score)
|
44
|
+
score >= threshold
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.included(base)
|
49
|
+
# Autoload all files in rules
|
50
|
+
# This is bad, mkay
|
51
|
+
Dir["#{File.dirname(__FILE__)}/splam/rules/*.rb"].each do |f|
|
52
|
+
require f
|
53
|
+
end
|
54
|
+
base.send :extend, ClassMethods
|
55
|
+
end
|
56
|
+
|
57
|
+
module ClassMethods
|
58
|
+
def splam_suite; @splam_suite; end
|
59
|
+
# Set #body attribute as splammable with default threshold of 100
|
60
|
+
# splammable :body
|
61
|
+
#
|
62
|
+
# Set #body attribute as splammable with custom threshold
|
63
|
+
# splammable :body, 50
|
64
|
+
#
|
65
|
+
# Set #body splammable with threshold and a conditions block?
|
66
|
+
# splamamble :body, 50, lambda { |record| record.skip_splam_check }
|
67
|
+
#
|
68
|
+
# Set any Splam::Suite options
|
69
|
+
# splammable :body do |splam|
|
70
|
+
# splam.threshold = 150
|
71
|
+
# splam.conditions = lambda { |r| r.body.size.zero? }
|
72
|
+
# # Set rules with #splam_key value
|
73
|
+
# splam.rules = [:chinese, :html]
|
74
|
+
# # Set rules with Class instances
|
75
|
+
# splam.rules = [Splam::Rules::Chinese]
|
76
|
+
# # Mix and match, we're all friends here
|
77
|
+
# splam.rules = [Splam::Rules::Chinese, :html]
|
78
|
+
# # Specify optional weights
|
79
|
+
# splam.rules = {Splam::Rules::Chinese => 1.2, :html => 5.0}
|
80
|
+
#
|
81
|
+
def splammable(fieldname, threshold=100, conditions=nil, &block)
|
82
|
+
# todo: run only certain rules
|
83
|
+
# e.g. splammable :body, 100, [ :chinese, :html ]
|
84
|
+
# todo: define some weighting on the model level
|
85
|
+
# e.g. splammable :body, 50, { :russian => 2.0 }
|
86
|
+
@splam_suite = Suite.new(fieldname, Splam::Rule.default_rules, threshold, conditions, &block)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
attr_accessor :skip_splam_check
|
91
|
+
attr_reader :splam_score, :splam_reasons
|
92
|
+
|
93
|
+
def splam_score
|
94
|
+
@splam_score || run_splam_suite(:score) || 0
|
95
|
+
end
|
96
|
+
|
97
|
+
def splam_reasons
|
98
|
+
@splam_reasons || run_splam_suite(:reasons) || []
|
99
|
+
end
|
100
|
+
|
101
|
+
def splam?
|
102
|
+
# run_splam_suite # ask yourself, do you want this to be cached for each record instance or not?
|
103
|
+
self.class.splam_suite.splam?(splam_score)
|
104
|
+
end
|
105
|
+
|
106
|
+
def validates_as_spam
|
107
|
+
errors.add(self.class.splam_suite.body, "looks like spam.") if (!skip_splam_check? && splam?)
|
108
|
+
end
|
109
|
+
|
110
|
+
protected
|
111
|
+
def run_splam_suite(attr_suffix = nil)
|
112
|
+
splam_suite = self.class.splam_suite || raise("Splam::Suite is not initialized")
|
113
|
+
return false if (splam_suite.conditions && !splam_suite.conditions.call(self)) ||
|
114
|
+
skip_splam_check ||
|
115
|
+
send(splam_suite.body).nil?
|
116
|
+
@splam_score, @splam_reasons = splam_suite.run(self)
|
117
|
+
instance_variable_get("@splam_#{attr_suffix}") if attr_suffix
|
118
|
+
end
|
119
|
+
|
120
|
+
def skip_splam_check?
|
121
|
+
# This enables us to use a checkbox
|
122
|
+
skip_splam_check.to_i > 0
|
123
|
+
end
|
124
|
+
end
|
data/lib/splam/rule.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
class Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_writer :splam_key
|
4
|
+
|
5
|
+
# Global set of rules for all splammable classes. By default it is an array of all Splam::Rule subclasses.
|
6
|
+
# It can be set to a subset of all rules, or even a hash with specified weights.
|
7
|
+
# self.default_rules = [:bad_words, :bbcode]
|
8
|
+
# self.default_rules = {:bad_words => 0.5, :bbcode => 7}
|
9
|
+
#
|
10
|
+
attr_accessor :default_rules
|
11
|
+
|
12
|
+
# Index linking all splam_keys to the rule classes. This is populated automatically.
|
13
|
+
attr_reader :rules
|
14
|
+
|
15
|
+
def splam_key
|
16
|
+
@splam_key || (self.splam_key = name.demodulize.underscore.to_sym)
|
17
|
+
end
|
18
|
+
|
19
|
+
def splam_key=(value)
|
20
|
+
Splam::Rule.rules.delete(@splam_key) if @splam_key
|
21
|
+
Splam::Rule.rules[value] = self
|
22
|
+
@splam_key = value
|
23
|
+
value
|
24
|
+
end
|
25
|
+
|
26
|
+
def run(*args)
|
27
|
+
rule = new(*args)
|
28
|
+
rule.run
|
29
|
+
rule
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(suite, record, weight = 1.0)
|
34
|
+
@suite, @weight, @score, @reasons, @body = suite, weight, 0, [], record.send(suite.body)
|
35
|
+
end
|
36
|
+
|
37
|
+
def name
|
38
|
+
self.class.splam_key
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.inherited(_subclass)
|
42
|
+
@rules ||= {}
|
43
|
+
@default_rules ||= []
|
44
|
+
@default_rules << _subclass
|
45
|
+
_subclass.splam_key
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
attr_reader :suite, :body, :weight
|
50
|
+
attr_accessor :reasons, :score
|
51
|
+
|
52
|
+
# Overload this method to run your rule. Call #add_score to modify the suite's splam score.
|
53
|
+
#
|
54
|
+
# def run
|
55
|
+
# add_score -5, 'water'
|
56
|
+
# add_score 5, 'PBR'
|
57
|
+
# add_score 10, 'black butte'
|
58
|
+
# add_score 30, 'red wine'
|
59
|
+
# add_score 95, 'everclear'
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
def run
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_score(points, reason)
|
66
|
+
@score ||= 0
|
67
|
+
if points != 0
|
68
|
+
@reasons << "#{name}: [#{points}#{" * #{weight}" if weight != 1}] #{reason}"
|
69
|
+
points = points * weight.to_i
|
70
|
+
@score += points
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/splam/rules.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
class Splam::Rules::ArmsRace < Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_accessor :bad_word_score
|
4
|
+
end
|
5
|
+
|
6
|
+
self.bad_word_score = 40
|
7
|
+
|
8
|
+
# This is where you put banned domain names or otherwise
|
9
|
+
def run
|
10
|
+
shitty_sites = ["inquisitr"]
|
11
|
+
shitty_sites.each do |word|
|
12
|
+
results = @body.downcase.scan(word)
|
13
|
+
if results && results.size > 0
|
14
|
+
add_score((self.class.bad_word_score ** results.size), "stupid site: '#{word}'")
|
15
|
+
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
|
16
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
|
17
|
+
end
|
18
|
+
@body.scan(/<a(.*?)>/).each do |match|
|
19
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
class Splam::Rules::BadWords < Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_accessor :bad_word_score, :suspicious_word_score
|
4
|
+
end
|
5
|
+
|
6
|
+
self.bad_word_score = 10
|
7
|
+
self.suspicious_word_score = 4
|
8
|
+
|
9
|
+
def run
|
10
|
+
bad_words = %w( sex sexy porn gay erotica viagra erotismo porno porn lesbian amateur tit\b)
|
11
|
+
bad_words |= %w( gratis erotismo porno torrent bittorrent adulto )
|
12
|
+
bad_words |= %w( cialis viagra payday loan jihad )
|
13
|
+
bad_words |= %w( webcam free-web-host rapidshare muslim)
|
14
|
+
bad_words << /pel?cula/ << /pornogr?fica/ << "portal porno" # srsly, spamming in spanish?
|
15
|
+
|
16
|
+
suspicious_words = %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card" << "my friend" << "friend sent me"
|
17
|
+
suspicious_words |= %w( adult pharmacy overnight shipping free hot movie nylon arab ?????? xxx) << "sent me a link"
|
18
|
+
suspicious_words << "forums/member.php?u=" << "chat room" << "free chat" << "yahoo chat" << "page.php"
|
19
|
+
bad_words.each do |word|
|
20
|
+
results = @body.downcase.scan(word)
|
21
|
+
if results && results.size > 0
|
22
|
+
add_score((self.class.bad_word_score ** results.size), "nasty word: '#{word}'")
|
23
|
+
# Add more points if the bad word is INSIDE a link
|
24
|
+
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
|
25
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
|
26
|
+
end
|
27
|
+
@body.scan(/\nhttp:\/\/(.*?#{word})/).each do |match|
|
28
|
+
add_score self.class.bad_word_score ** 4 * match[0].scan(word).size, "nasty word inside a straight-up link: #{word}"
|
29
|
+
end
|
30
|
+
@body.scan(/<a(.*?)>/).each do |match|
|
31
|
+
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
suspicious_words.each do |word|
|
36
|
+
results = @body.downcase.scan(word)
|
37
|
+
if results && results.size > 0
|
38
|
+
add_score (self.class.suspicious_word_score * results.size), "suspicious word: #{word}"
|
39
|
+
# Add more points if the bad word is INSIDE a link
|
40
|
+
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
|
41
|
+
add_score((self.class.suspicious_word_score * match[0].scan(word).size), "suspicious word inside a link: #{word}")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class Splam::Rules::Bbcode < Splam::Rule
|
2
|
+
|
3
|
+
def run
|
4
|
+
add_score 10 * @body.scan("showpost.php?p=").size, "Linking to a shitty forum"
|
5
|
+
# add_score 10 * @body.scan("\r\n").size, "Poorly formed POST (\\r\\n)"
|
6
|
+
add_score 40 * @body.scan("[url=").size, "URL" # no URLS for you!!
|
7
|
+
add_score 40 * @body.scan("[URL=").size, "URL" # no URLS for you!!
|
8
|
+
add_score 40 * @body.scan("[url=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
|
9
|
+
add_score 40 * @body.scan("[URL=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
|
10
|
+
add_score 10 * @body.scan(/\[[bai]/).size, "b/a/i tag"
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Splam::Rules::Chinese < Splam::Rule
|
2
|
+
class << self
|
3
|
+
attr_accessor :base_score
|
4
|
+
end
|
5
|
+
self.base_score = 3
|
6
|
+
|
7
|
+
def run
|
8
|
+
banned_words =[ # various chinese characters
|
9
|
+
"\350\263\207",
|
10
|
+
"\351\207\221",
|
11
|
+
"\357\274\222", # number 2 in weird unicode
|
12
|
+
"\357\274\224", # number 4
|
13
|
+
"\357\274\225", # number 5
|
14
|
+
"\357\274\231", # number 9
|
15
|
+
"\357\274\215", # hyphen
|
16
|
+
/\\357\2\d\d\\\d{3}/,
|
17
|
+
# "\357", # ugh, these don't work .. because they're only part of a character.
|
18
|
+
# "\351",
|
19
|
+
"\35"
|
20
|
+
]
|
21
|
+
banned_words.each do |word|
|
22
|
+
hits = (self.class.base_score * @body.scan(word).size) # 1 point for every banned word
|
23
|
+
add_score hits, "Banned character: #{word}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|