splam 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. data.tar.gz.sig +2 -0
  2. data/Gemfile +6 -0
  3. data/Gemfile.lock +24 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README +53 -0
  6. data/Rakefile +14 -0
  7. data/gem-public_cert.pem +20 -0
  8. data/lib/splam.rb +124 -0
  9. data/lib/splam/rule.rb +73 -0
  10. data/lib/splam/rules.rb +2 -0
  11. data/lib/splam/rules/arms_race.rb +24 -0
  12. data/lib/splam/rules/bad_words.rb +46 -0
  13. data/lib/splam/rules/bbcode.rb +12 -0
  14. data/lib/splam/rules/chinese.rb +26 -0
  15. data/lib/splam/rules/fuzz.rb +20 -0
  16. data/lib/splam/rules/good_words.rb +19 -0
  17. data/lib/splam/rules/href.rb +55 -0
  18. data/lib/splam/rules/html.rb +12 -0
  19. data/lib/splam/rules/line_length.rb +26 -0
  20. data/lib/splam/rules/punctuation.rb +14 -0
  21. data/lib/splam/rules/russian.rb +15 -0
  22. data/lib/splam/rules/word_length.rb +32 -0
  23. data/splam.gemspec +12 -0
  24. data/test/fixtures/comment/ham/api-1.txt +1 -0
  25. data/test/fixtures/comment/ham/api-2.txt +7 -0
  26. data/test/fixtures/comment/ham/api-3.txt +3 -0
  27. data/test/fixtures/comment/ham/api-4.txt +1 -0
  28. data/test/fixtures/comment/ham/api-5.txt +7 -0
  29. data/test/fixtures/comment/ham/api.txt +5 -0
  30. data/test/fixtures/comment/ham/api_bug.txt +16 -0
  31. data/test/fixtures/comment/ham/backtrace.txt +79 -0
  32. data/test/fixtures/comment/ham/epic.txt +35 -0
  33. data/test/fixtures/comment/ham/epic_warehouse.txt +92 -0
  34. data/test/fixtures/comment/ham/extra_fields.txt +25 -0
  35. data/test/fixtures/comment/ham/feedlinks.txt +13 -0
  36. data/test/fixtures/comment/ham/github.txt +5 -0
  37. data/test/fixtures/comment/ham/hub.txt +10 -0
  38. data/test/fixtures/comment/ham/mario.txt +19 -0
  39. data/test/fixtures/comment/ham/mylyn.txt +10 -0
  40. data/test/fixtures/comment/ham/omg_thanks_again_finally_warehouse.txt +30 -0
  41. data/test/fixtures/comment/ham/omg_thanks_again_warehouse.txt +17 -0
  42. data/test/fixtures/comment/ham/problem.txt +7 -0
  43. data/test/fixtures/comment/ham/sample_html.txt +3 -0
  44. data/test/fixtures/comment/ham/short_reply.txt +3 -0
  45. data/test/fixtures/comment/ham/tags.txt +11 -0
  46. data/test/fixtures/comment/ham/thanks_warehouse.txt +15 -0
  47. data/test/fixtures/comment/ham/thx.txt +5 -0
  48. data/test/fixtures/comment/spam/125_spam-12420.txt +6 -0
  49. data/test/fixtures/comment/spam/40_pharmacia.txt +1 -0
  50. data/test/fixtures/comment/spam/amazon.txt +51 -0
  51. data/test/fixtures/comment/spam/bluebichen.txt +1 -0
  52. data/test/fixtures/comment/spam/boobz.txt +3 -0
  53. data/test/fixtures/comment/spam/buffy.txt +1 -0
  54. data/test/fixtures/comment/spam/chinese.txt +19 -0
  55. data/test/fixtures/comment/spam/comment_bbc.txt +1 -0
  56. data/test/fixtures/comment/spam/comment_cnn.txt +1 -0
  57. data/test/fixtures/comment/spam/comment_randi.txt +1 -0
  58. data/test/fixtures/comment/spam/comment_wordy.txt +1 -0
  59. data/test/fixtures/comment/spam/consent.txt +1 -0
  60. data/test/fixtures/comment/spam/december.txt +1 -0
  61. data/test/fixtures/comment/spam/digital_rights.txt +1 -0
  62. data/test/fixtures/comment/spam/dyed_wool.txt +1 -0
  63. data/test/fixtures/comment/spam/hairbrush_sex.txt +119 -0
  64. data/test/fixtures/comment/spam/handbag.txt +5 -0
  65. data/test/fixtures/comment/spam/inqius.txt +5 -0
  66. data/test/fixtures/comment/spam/kidneys.txt +1 -0
  67. data/test/fixtures/comment/spam/madonna.txt +3 -0
  68. data/test/fixtures/comment/spam/make_plans.txt +3 -0
  69. data/test/fixtures/comment/spam/oem.txt +130 -0
  70. data/test/fixtures/comment/spam/oem2.txt +130 -0
  71. data/test/fixtures/comment/spam/oem_intl.txt +131 -0
  72. data/test/fixtures/comment/spam/omg_sex.txt +26 -0
  73. data/test/fixtures/comment/spam/ottersex.txt +1 -0
  74. data/test/fixtures/comment/spam/pdwkb.txt +1 -0
  75. data/test/fixtures/comment/spam/pr0n.txt +320 -0
  76. data/test/fixtures/comment/spam/property.txt +448 -0
  77. data/test/fixtures/comment/spam/pyromancy.txt +1 -0
  78. data/test/fixtures/comment/spam/rapid.txt +10 -0
  79. data/test/fixtures/comment/spam/russki.txt +5 -0
  80. data/test/fixtures/comment/spam/russki2.txt +2 -0
  81. data/test/fixtures/comment/spam/shipping.txt +3 -0
  82. data/test/fixtures/comment/spam/short_n_sweet.txt +1 -0
  83. data/test/fixtures/comment/spam/spam-13232.txt +15 -0
  84. data/test/fixtures/comment/spam/spam-13518.txt +3 -0
  85. data/test/fixtures/comment/spam/spam-13519.txt +3 -0
  86. data/test/fixtures/comment/spam/spam-13520.txt +3 -0
  87. data/test/fixtures/comment/spam/spam-13521.txt +3 -0
  88. data/test/fixtures/comment/spam/spam-13982.txt +10 -0
  89. data/test/fixtures/comment/spam/spam-14178.txt +1 -0
  90. data/test/fixtures/comment/spam/spam-14447.txt +4 -0
  91. data/test/fixtures/comment/spam/spam-14718.txt +4 -0
  92. data/test/fixtures/comment/spam/spam0113081.txt +1 -0
  93. data/test/fixtures/comment/spam/tk.txt +4 -0
  94. data/test/fixtures/comment/spam/troubles.txt +2 -0
  95. data/test/fixtures/comment/spam/url_only_idiot.txt +1 -0
  96. data/test/fixtures/comment/spam/webcam.txt +3 -0
  97. data/test/splam_rule_test.rb +20 -0
  98. data/test/splam_test.rb +102 -0
  99. data/test/test_helper.rb +8 -0
  100. metadata +183 -0
  101. metadata.gz.sig +2 -0
@@ -0,0 +1,2 @@
1
+ ;��'��b��3���L���ݯ����36Uw%p�!~�4R.b>^ƶ`�� #��]�dV�,)m>��qGu�Z$Z��񦚼D�� Q���Е�SK��_����~N��w5xM/��%m��ϳrP�\����UQ���׊e��S1]���-��:���;p'��jc�<L9�~�8꿘Q��#Ι��z�.�%���h�~b��B�q`uҝ��M������j� i�Vj 
2
+ �N�$��X\�ɺ�
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source :rubygems
2
+ gemspec
3
+
4
+ gem 'bump'
5
+ gem 'rake'
6
+ gem 'activesupport'
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ splam (0.1.0)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ activesupport (3.2.12)
10
+ i18n (~> 0.6)
11
+ multi_json (~> 1.0)
12
+ bump (0.3.9)
13
+ i18n (0.6.1)
14
+ multi_json (1.6.0)
15
+ rake (10.0.3)
16
+
17
+ PLATFORMS
18
+ ruby
19
+
20
+ DEPENDENCIES
21
+ activesupport
22
+ bump
23
+ rake
24
+ splam!
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 [name of plugin creator]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,53 @@
1
+ Splam
2
+ =====
3
+
4
+ Splam is a simple spam scoring plugin. It contains a set of rules that are run on a field
5
+ to help you determine the likelihood of that field being spam. It doesn't do anything
6
+ other than give a field a score. It's up to you to act on that score.
7
+
8
+ Check out the tests for instructions on how to use: you'll want to integrate this into
9
+ your application's workflow.
10
+
11
+ It's heavily biased towards the spam I've been seeing in the past two or three hours.
12
+ This includes lots of crap with
13
+ - bbcode [url=
14
+ - lots of links (http://)
15
+ - russian text
16
+ - links to russian or chinese websites
17
+
18
+ You can write your own plugins to Splam: simply subclass Splam::Rule. Splam is clever enough
19
+ to iterate over all Rule's subclasses and run the 'run' method on the field to be checked.
20
+ The other way to do this would be to define Rule.add_rule do ... end but I think the class
21
+ form is easier for rubyists to understand and modify.
22
+
23
+ Splam aggregates the scores from all the rules. From the brief testing I've done, anything over
24
+ about 40 is likely to be spam. Real spam will blow out of the scoring stratosphere with over 1,000.
25
+
26
+ Recommended serving directions:
27
+
28
+ class Comment
29
+ include Splam
30
+
31
+ splammable :body
32
+ end
33
+
34
+ comment = Comment.new :body => "This is spam!!!1"
35
+ comment.splam? # => false
36
+ comment.splam_score # => 2
37
+ comment.splam_reasons # => []
38
+
39
+ Add this to a model, check the score, and determine (based on other factors such as logged-in
40
+ user, time spent on the page, validity of request headers, length of user's membership on the
41
+ site) whether to ban the post or not.
42
+
43
+ We recommend showing the post to the user (spambox them in) but hide it from everyone else.
44
+
45
+ TODO
46
+
47
+ - Integrate bayesian or other clever algorithm, so that scores aren't hardcoded.
48
+ - Switch to using a percentage (0.994) rather than a score (250)
49
+ - Write more plugins!
50
+ - Test against a larger Ham corpus
51
+ - Fix that nasty autoloading code in splam.rb
52
+
53
+ Copyright (c) 2008 ENTP, released under the MIT license
@@ -0,0 +1,14 @@
1
+ require 'bundler/setup'
2
+ require 'bundler/gem_tasks'
3
+ require 'bump/tasks'
4
+ require 'rake/testtask'
5
+
6
+ desc 'Default: run unit tests.'
7
+ task :default => :test
8
+
9
+ desc 'Test the splam gem.'
10
+ Rake::TestTask.new(:test) do |t|
11
+ t.libs << 'lib'
12
+ t.pattern = 'test/**/*_test.rb'
13
+ t.verbose = true
14
+ end
@@ -0,0 +1,20 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIDMjCCAhqgAwIBAgIBADANBgkqhkiG9w0BAQUFADA/MRAwDgYDVQQDDAdtaWNo
3
+ YWVsMRcwFQYKCZImiZPyLGQBGRYHZ3Jvc3NlcjESMBAGCgmSJomT8ixkARkWAml0
4
+ MB4XDTEzMDIwMzE4MTMxMVoXDTE0MDIwMzE4MTMxMVowPzEQMA4GA1UEAwwHbWlj
5
+ aGFlbDEXMBUGCgmSJomT8ixkARkWB2dyb3NzZXIxEjAQBgoJkiaJk/IsZAEZFgJp
6
+ dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMorXo/hgbUq97+kII9H
7
+ MsQcLdC/7wQ1ZP2OshVHPkeP0qH8MBHGg6eYisOX2ubNagF9YTCZWnhrdKrwpLOO
8
+ cPLaZbjUjljJ3cQR3B8Yn1veV5IhG86QseTBjymzJWsLpqJ1UZGpfB9tXcsFtuxO
9
+ 6vHvcIHdzvc/OUkICttLbH+1qb6rsHUceqh+JrH4GrsJ5H4hAfIdyS2XMK7YRKbh
10
+ h+IBu6dFWJJByzFsYmV1PDXln3UBmgAt65cmCu4qPfThioCGDzbSJrGDGLmw/pFX
11
+ FPpVCm1zgYSb1v6Qnf3cgXa2f2wYGm17+zAVyIDpwryFru9yF/jJxE38z/DRsd9R
12
+ /88CAwEAAaM5MDcwCQYDVR0TBAIwADAdBgNVHQ4EFgQUsiNnXHtKeMYYcr4yJVmQ
13
+ WONL+IwwCwYDVR0PBAQDAgSwMA0GCSqGSIb3DQEBBQUAA4IBAQAlyN7kKo/NQCQ0
14
+ AOzZLZ3WAePvStkCFIJ53tsv5Kyo4pMAllv+BgPzzBt7qi605mFSL6zBd9uLou+W
15
+ Co3s48p1dy7CjjAfVQdmVNHF3MwXtfC2OEyvSQPi4xKR8iba8wa3xp9LVo1PuLpw
16
+ /6DsrChWw74HfsJN6qJOK684hJeT8lBYAUfiC3wD0owoPSg+XtyAAddisR+KV5Y1
17
+ NmVHuLtQcNTZy+gRht3ahJRMuC6QyLmkTsf+6MaenwAMkAgHdswGsJztOnNnBa3F
18
+ y0kCSWmK6D+x/SbfS6r7Ke07MRqziJdB9GuE1+0cIRuFh8EQ+LN6HXCKM5pon/GU
19
+ ycwMXfl0
20
+ -----END CERTIFICATE-----
@@ -0,0 +1,124 @@
1
+ # Splam
2
+ #require File.dirname(__FILE__) + "/splam/rule"
3
+ #require File.dirname(__FILE__) + "/splam/rules"
4
+ #require File.dirname(__FILE__) + "/splam/rules/russian"
5
+
6
+ require 'rubygems'
7
+ gem 'activesupport'
8
+ require 'active_support/inflector'
9
+
10
+ module Splam
11
+ class Suite < Struct.new(:body, :rules, :threshold, :conditions)
12
+ # Should be a Rack::Request, in case you want to inspect user agents and whatnot
13
+ # unimplemented, cry about it fanboy!
14
+ attr_accessor :request
15
+
16
+ attr_reader :score
17
+ attr_reader :reasons
18
+
19
+ def initialize(body, rules, threshold, conditions, &block)
20
+ super(body, rules, threshold, conditions)
21
+ block.call(self) if block
22
+ self.rules = self.rules.inject({}) do |memo, (rule, weight)|
23
+ if (rule.is_a?(Class) && rule.superclass == Splam::Rule) || rule = Splam::Rule.rules[rule]
24
+ memo[rule] = weight || 1.0
25
+ else
26
+ raise ArgumentError, "Invalid rule: #{rule.inspect}"
27
+ end
28
+ memo
29
+ end
30
+ end
31
+
32
+ def run(record)
33
+ score, reasons = 0, []
34
+ rules.each do |rule_class, weight|
35
+ weight ||= 1
36
+ worker = rule_class.run(self, record, weight)
37
+ score += worker.score
38
+ reasons << worker.reasons
39
+ end
40
+ [score, reasons]
41
+ end
42
+
43
+ def splam?(score)
44
+ score >= threshold
45
+ end
46
+ end
47
+
48
+ def self.included(base)
49
+ # Autoload all files in rules
50
+ # This is bad, mkay
51
+ Dir["#{File.dirname(__FILE__)}/splam/rules/*.rb"].each do |f|
52
+ require f
53
+ end
54
+ base.send :extend, ClassMethods
55
+ end
56
+
57
+ module ClassMethods
58
+ def splam_suite; @splam_suite; end
59
+ # Set #body attribute as splammable with default threshold of 100
60
+ # splammable :body
61
+ #
62
+ # Set #body attribute as splammable with custom threshold
63
+ # splammable :body, 50
64
+ #
65
+ # Set #body splammable with threshold and a conditions block?
66
+ # splamamble :body, 50, lambda { |record| record.skip_splam_check }
67
+ #
68
+ # Set any Splam::Suite options
69
+ # splammable :body do |splam|
70
+ # splam.threshold = 150
71
+ # splam.conditions = lambda { |r| r.body.size.zero? }
72
+ # # Set rules with #splam_key value
73
+ # splam.rules = [:chinese, :html]
74
+ # # Set rules with Class instances
75
+ # splam.rules = [Splam::Rules::Chinese]
76
+ # # Mix and match, we're all friends here
77
+ # splam.rules = [Splam::Rules::Chinese, :html]
78
+ # # Specify optional weights
79
+ # splam.rules = {Splam::Rules::Chinese => 1.2, :html => 5.0}
80
+ #
81
+ def splammable(fieldname, threshold=100, conditions=nil, &block)
82
+ # todo: run only certain rules
83
+ # e.g. splammable :body, 100, [ :chinese, :html ]
84
+ # todo: define some weighting on the model level
85
+ # e.g. splammable :body, 50, { :russian => 2.0 }
86
+ @splam_suite = Suite.new(fieldname, Splam::Rule.default_rules, threshold, conditions, &block)
87
+ end
88
+ end
89
+
90
+ attr_accessor :skip_splam_check
91
+ attr_reader :splam_score, :splam_reasons
92
+
93
+ def splam_score
94
+ @splam_score || run_splam_suite(:score) || 0
95
+ end
96
+
97
+ def splam_reasons
98
+ @splam_reasons || run_splam_suite(:reasons) || []
99
+ end
100
+
101
+ def splam?
102
+ # run_splam_suite # ask yourself, do you want this to be cached for each record instance or not?
103
+ self.class.splam_suite.splam?(splam_score)
104
+ end
105
+
106
+ def validates_as_spam
107
+ errors.add(self.class.splam_suite.body, "looks like spam.") if (!skip_splam_check? && splam?)
108
+ end
109
+
110
+ protected
111
+ def run_splam_suite(attr_suffix = nil)
112
+ splam_suite = self.class.splam_suite || raise("Splam::Suite is not initialized")
113
+ return false if (splam_suite.conditions && !splam_suite.conditions.call(self)) ||
114
+ skip_splam_check ||
115
+ send(splam_suite.body).nil?
116
+ @splam_score, @splam_reasons = splam_suite.run(self)
117
+ instance_variable_get("@splam_#{attr_suffix}") if attr_suffix
118
+ end
119
+
120
+ def skip_splam_check?
121
+ # This enables us to use a checkbox
122
+ skip_splam_check.to_i > 0
123
+ end
124
+ end
@@ -0,0 +1,73 @@
1
+ class Splam::Rule
2
+ class << self
3
+ attr_writer :splam_key
4
+
5
+ # Global set of rules for all splammable classes. By default it is an array of all Splam::Rule subclasses.
6
+ # It can be set to a subset of all rules, or even a hash with specified weights.
7
+ # self.default_rules = [:bad_words, :bbcode]
8
+ # self.default_rules = {:bad_words => 0.5, :bbcode => 7}
9
+ #
10
+ attr_accessor :default_rules
11
+
12
+ # Index linking all splam_keys to the rule classes. This is populated automatically.
13
+ attr_reader :rules
14
+
15
+ def splam_key
16
+ @splam_key || (self.splam_key = name.demodulize.underscore.to_sym)
17
+ end
18
+
19
+ def splam_key=(value)
20
+ Splam::Rule.rules.delete(@splam_key) if @splam_key
21
+ Splam::Rule.rules[value] = self
22
+ @splam_key = value
23
+ value
24
+ end
25
+
26
+ def run(*args)
27
+ rule = new(*args)
28
+ rule.run
29
+ rule
30
+ end
31
+ end
32
+
33
+ def initialize(suite, record, weight = 1.0)
34
+ @suite, @weight, @score, @reasons, @body = suite, weight, 0, [], record.send(suite.body)
35
+ end
36
+
37
+ def name
38
+ self.class.splam_key
39
+ end
40
+
41
+ def self.inherited(_subclass)
42
+ @rules ||= {}
43
+ @default_rules ||= []
44
+ @default_rules << _subclass
45
+ _subclass.splam_key
46
+ super
47
+ end
48
+
49
+ attr_reader :suite, :body, :weight
50
+ attr_accessor :reasons, :score
51
+
52
+ # Overload this method to run your rule. Call #add_score to modify the suite's splam score.
53
+ #
54
+ # def run
55
+ # add_score -5, 'water'
56
+ # add_score 5, 'PBR'
57
+ # add_score 10, 'black butte'
58
+ # add_score 30, 'red wine'
59
+ # add_score 95, 'everclear'
60
+ # end
61
+ #
62
+ def run
63
+ end
64
+
65
+ def add_score(points, reason)
66
+ @score ||= 0
67
+ if points != 0
68
+ @reasons << "#{name}: [#{points}#{" * #{weight}" if weight != 1}] #{reason}"
69
+ points = points * weight.to_i
70
+ @score += points
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,2 @@
1
+ module Splam::Rules
2
+ end
@@ -0,0 +1,24 @@
1
+ class Splam::Rules::ArmsRace < Splam::Rule
2
+ class << self
3
+ attr_accessor :bad_word_score
4
+ end
5
+
6
+ self.bad_word_score = 40
7
+
8
+ # This is where you put banned domain names or otherwise
9
+ def run
10
+ shitty_sites = ["inquisitr"]
11
+ shitty_sites.each do |word|
12
+ results = @body.downcase.scan(word)
13
+ if results && results.size > 0
14
+ add_score((self.class.bad_word_score ** results.size), "stupid site: '#{word}'")
15
+ @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
16
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
17
+ end
18
+ @body.scan(/<a(.*?)>/).each do |match|
19
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,46 @@
1
+ class Splam::Rules::BadWords < Splam::Rule
2
+ class << self
3
+ attr_accessor :bad_word_score, :suspicious_word_score
4
+ end
5
+
6
+ self.bad_word_score = 10
7
+ self.suspicious_word_score = 4
8
+
9
+ def run
10
+ bad_words = %w( sex sexy porn gay erotica viagra erotismo porno porn lesbian amateur tit\b)
11
+ bad_words |= %w( gratis erotismo porno torrent bittorrent adulto )
12
+ bad_words |= %w( cialis viagra payday loan jihad )
13
+ bad_words |= %w( webcam free-web-host rapidshare muslim)
14
+ bad_words << /pel?cula/ << /pornogr?fica/ << "portal porno" # srsly, spamming in spanish?
15
+
16
+ suspicious_words = %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card" << "my friend" << "friend sent me"
17
+ suspicious_words |= %w( adult pharmacy overnight shipping free hot movie nylon arab ?????? xxx) << "sent me a link"
18
+ suspicious_words << "forums/member.php?u=" << "chat room" << "free chat" << "yahoo chat" << "page.php"
19
+ bad_words.each do |word|
20
+ results = @body.downcase.scan(word)
21
+ if results && results.size > 0
22
+ add_score((self.class.bad_word_score ** results.size), "nasty word: '#{word}'")
23
+ # Add more points if the bad word is INSIDE a link
24
+ @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
25
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
26
+ end
27
+ @body.scan(/\nhttp:\/\/(.*?#{word})/).each do |match|
28
+ add_score self.class.bad_word_score ** 4 * match[0].scan(word).size, "nasty word inside a straight-up link: #{word}"
29
+ end
30
+ @body.scan(/<a(.*?)>/).each do |match|
31
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
32
+ end
33
+ end
34
+ end
35
+ suspicious_words.each do |word|
36
+ results = @body.downcase.scan(word)
37
+ if results && results.size > 0
38
+ add_score (self.class.suspicious_word_score * results.size), "suspicious word: #{word}"
39
+ # Add more points if the bad word is INSIDE a link
40
+ @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
41
+ add_score((self.class.suspicious_word_score * match[0].scan(word).size), "suspicious word inside a link: #{word}")
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,12 @@
1
+ class Splam::Rules::Bbcode < Splam::Rule
2
+
3
+ def run
4
+ add_score 10 * @body.scan("showpost.php?p=").size, "Linking to a shitty forum"
5
+ # add_score 10 * @body.scan("\r\n").size, "Poorly formed POST (\\r\\n)"
6
+ add_score 40 * @body.scan("[url=").size, "URL" # no URLS for you!!
7
+ add_score 40 * @body.scan("[URL=").size, "URL" # no URLS for you!!
8
+ add_score 40 * @body.scan("[url=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
9
+ add_score 40 * @body.scan("[URL=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
10
+ add_score 10 * @body.scan(/\[[bai]/).size, "b/a/i tag"
11
+ end
12
+ end
@@ -0,0 +1,26 @@
1
+ class Splam::Rules::Chinese < Splam::Rule
2
+ class << self
3
+ attr_accessor :base_score
4
+ end
5
+ self.base_score = 3
6
+
7
+ def run
8
+ banned_words =[ # various chinese characters
9
+ "\350\263\207",
10
+ "\351\207\221",
11
+ "\357\274\222", # number 2 in weird unicode
12
+ "\357\274\224", # number 4
13
+ "\357\274\225", # number 5
14
+ "\357\274\231", # number 9
15
+ "\357\274\215", # hyphen
16
+ /\\357\2\d\d\\\d{3}/,
17
+ # "\357", # ugh, these don't work .. because they're only part of a character.
18
+ # "\351",
19
+ "\35"
20
+ ]
21
+ banned_words.each do |word|
22
+ hits = (self.class.base_score * @body.scan(word).size) # 1 point for every banned word
23
+ add_score hits, "Banned character: #{word}"
24
+ end
25
+ end
26
+ end