splam 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. data.tar.gz.sig +2 -0
  2. data/Gemfile +6 -0
  3. data/Gemfile.lock +24 -0
  4. data/MIT-LICENSE +20 -0
  5. data/README +53 -0
  6. data/Rakefile +14 -0
  7. data/gem-public_cert.pem +20 -0
  8. data/lib/splam.rb +124 -0
  9. data/lib/splam/rule.rb +73 -0
  10. data/lib/splam/rules.rb +2 -0
  11. data/lib/splam/rules/arms_race.rb +24 -0
  12. data/lib/splam/rules/bad_words.rb +46 -0
  13. data/lib/splam/rules/bbcode.rb +12 -0
  14. data/lib/splam/rules/chinese.rb +26 -0
  15. data/lib/splam/rules/fuzz.rb +20 -0
  16. data/lib/splam/rules/good_words.rb +19 -0
  17. data/lib/splam/rules/href.rb +55 -0
  18. data/lib/splam/rules/html.rb +12 -0
  19. data/lib/splam/rules/line_length.rb +26 -0
  20. data/lib/splam/rules/punctuation.rb +14 -0
  21. data/lib/splam/rules/russian.rb +15 -0
  22. data/lib/splam/rules/word_length.rb +32 -0
  23. data/splam.gemspec +12 -0
  24. data/test/fixtures/comment/ham/api-1.txt +1 -0
  25. data/test/fixtures/comment/ham/api-2.txt +7 -0
  26. data/test/fixtures/comment/ham/api-3.txt +3 -0
  27. data/test/fixtures/comment/ham/api-4.txt +1 -0
  28. data/test/fixtures/comment/ham/api-5.txt +7 -0
  29. data/test/fixtures/comment/ham/api.txt +5 -0
  30. data/test/fixtures/comment/ham/api_bug.txt +16 -0
  31. data/test/fixtures/comment/ham/backtrace.txt +79 -0
  32. data/test/fixtures/comment/ham/epic.txt +35 -0
  33. data/test/fixtures/comment/ham/epic_warehouse.txt +92 -0
  34. data/test/fixtures/comment/ham/extra_fields.txt +25 -0
  35. data/test/fixtures/comment/ham/feedlinks.txt +13 -0
  36. data/test/fixtures/comment/ham/github.txt +5 -0
  37. data/test/fixtures/comment/ham/hub.txt +10 -0
  38. data/test/fixtures/comment/ham/mario.txt +19 -0
  39. data/test/fixtures/comment/ham/mylyn.txt +10 -0
  40. data/test/fixtures/comment/ham/omg_thanks_again_finally_warehouse.txt +30 -0
  41. data/test/fixtures/comment/ham/omg_thanks_again_warehouse.txt +17 -0
  42. data/test/fixtures/comment/ham/problem.txt +7 -0
  43. data/test/fixtures/comment/ham/sample_html.txt +3 -0
  44. data/test/fixtures/comment/ham/short_reply.txt +3 -0
  45. data/test/fixtures/comment/ham/tags.txt +11 -0
  46. data/test/fixtures/comment/ham/thanks_warehouse.txt +15 -0
  47. data/test/fixtures/comment/ham/thx.txt +5 -0
  48. data/test/fixtures/comment/spam/125_spam-12420.txt +6 -0
  49. data/test/fixtures/comment/spam/40_pharmacia.txt +1 -0
  50. data/test/fixtures/comment/spam/amazon.txt +51 -0
  51. data/test/fixtures/comment/spam/bluebichen.txt +1 -0
  52. data/test/fixtures/comment/spam/boobz.txt +3 -0
  53. data/test/fixtures/comment/spam/buffy.txt +1 -0
  54. data/test/fixtures/comment/spam/chinese.txt +19 -0
  55. data/test/fixtures/comment/spam/comment_bbc.txt +1 -0
  56. data/test/fixtures/comment/spam/comment_cnn.txt +1 -0
  57. data/test/fixtures/comment/spam/comment_randi.txt +1 -0
  58. data/test/fixtures/comment/spam/comment_wordy.txt +1 -0
  59. data/test/fixtures/comment/spam/consent.txt +1 -0
  60. data/test/fixtures/comment/spam/december.txt +1 -0
  61. data/test/fixtures/comment/spam/digital_rights.txt +1 -0
  62. data/test/fixtures/comment/spam/dyed_wool.txt +1 -0
  63. data/test/fixtures/comment/spam/hairbrush_sex.txt +119 -0
  64. data/test/fixtures/comment/spam/handbag.txt +5 -0
  65. data/test/fixtures/comment/spam/inqius.txt +5 -0
  66. data/test/fixtures/comment/spam/kidneys.txt +1 -0
  67. data/test/fixtures/comment/spam/madonna.txt +3 -0
  68. data/test/fixtures/comment/spam/make_plans.txt +3 -0
  69. data/test/fixtures/comment/spam/oem.txt +130 -0
  70. data/test/fixtures/comment/spam/oem2.txt +130 -0
  71. data/test/fixtures/comment/spam/oem_intl.txt +131 -0
  72. data/test/fixtures/comment/spam/omg_sex.txt +26 -0
  73. data/test/fixtures/comment/spam/ottersex.txt +1 -0
  74. data/test/fixtures/comment/spam/pdwkb.txt +1 -0
  75. data/test/fixtures/comment/spam/pr0n.txt +320 -0
  76. data/test/fixtures/comment/spam/property.txt +448 -0
  77. data/test/fixtures/comment/spam/pyromancy.txt +1 -0
  78. data/test/fixtures/comment/spam/rapid.txt +10 -0
  79. data/test/fixtures/comment/spam/russki.txt +5 -0
  80. data/test/fixtures/comment/spam/russki2.txt +2 -0
  81. data/test/fixtures/comment/spam/shipping.txt +3 -0
  82. data/test/fixtures/comment/spam/short_n_sweet.txt +1 -0
  83. data/test/fixtures/comment/spam/spam-13232.txt +15 -0
  84. data/test/fixtures/comment/spam/spam-13518.txt +3 -0
  85. data/test/fixtures/comment/spam/spam-13519.txt +3 -0
  86. data/test/fixtures/comment/spam/spam-13520.txt +3 -0
  87. data/test/fixtures/comment/spam/spam-13521.txt +3 -0
  88. data/test/fixtures/comment/spam/spam-13982.txt +10 -0
  89. data/test/fixtures/comment/spam/spam-14178.txt +1 -0
  90. data/test/fixtures/comment/spam/spam-14447.txt +4 -0
  91. data/test/fixtures/comment/spam/spam-14718.txt +4 -0
  92. data/test/fixtures/comment/spam/spam0113081.txt +1 -0
  93. data/test/fixtures/comment/spam/tk.txt +4 -0
  94. data/test/fixtures/comment/spam/troubles.txt +2 -0
  95. data/test/fixtures/comment/spam/url_only_idiot.txt +1 -0
  96. data/test/fixtures/comment/spam/webcam.txt +3 -0
  97. data/test/splam_rule_test.rb +20 -0
  98. data/test/splam_test.rb +102 -0
  99. data/test/test_helper.rb +8 -0
  100. metadata +183 -0
  101. metadata.gz.sig +2 -0
@@ -0,0 +1,2 @@
1
+ ;��'��b��3���L���ݯ����36Uw%p�!~�4R.b>^ƶ`�� #��]�dV�,)m>��qGu�Z$Z��񦚼D�� Q���Е�SK��_����~N��w5xM/��%m��ϳrP�\����UQ���׊e��S1]���-��:���;p'��jc�<L9�~�8꿘Q��#Ι��z�.�%���h�~b��B�q`uҝ��M������j� i�Vj 
2
+ �N�$��X\�ɺ�
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source :rubygems
2
+ gemspec
3
+
4
+ gem 'bump'
5
+ gem 'rake'
6
+ gem 'activesupport'
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ splam (0.1.0)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ activesupport (3.2.12)
10
+ i18n (~> 0.6)
11
+ multi_json (~> 1.0)
12
+ bump (0.3.9)
13
+ i18n (0.6.1)
14
+ multi_json (1.6.0)
15
+ rake (10.0.3)
16
+
17
+ PLATFORMS
18
+ ruby
19
+
20
+ DEPENDENCIES
21
+ activesupport
22
+ bump
23
+ rake
24
+ splam!
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 [name of plugin creator]
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,53 @@
1
+ Splam
2
+ =====
3
+
4
+ Splam is a simple spam scoring plugin. It contains a set of rules that are run on a field
5
+ to help you determine the likelihood of that field being spam. It doesn't do anything
6
+ other than give a field a score. It's up to you to act on that score.
7
+
8
+ Check out the tests for instructions on how to use: you'll want to integrate this into
9
+ your application's workflow.
10
+
11
+ It's heavily biased towards the spam I've been seeing in the past two or three hours.
12
+ This includes lots of crap with
13
+ - bbcode [url=
14
+ - lots of links (http://)
15
+ - russian text
16
+ - links to russian or chinese websites
17
+
18
+ You can write your own plugins to Splam: simply subclass Splam::Rule. Splam is clever enough
19
+ to iterate over all Rule's subclasses and run the 'run' method on the field to be checked.
20
+ The other way to do this would be to define Rule.add_rule do ... end but I think the class
21
+ form is easier for rubyists to understand and modify.
22
+
23
+ Splam aggregates the scores from all the rules. From the brief testing I've done, anything over
24
+ about 40 is likely to be spam. Real spam will blow out of the scoring stratosphere with over 1,000.
25
+
26
+ Recommended serving directions:
27
+
28
+ class Comment
29
+ include Splam
30
+
31
+ splammable :body
32
+ end
33
+
34
+ comment = Comment.new :body => "This is spam!!!1"
35
+ comment.splam? # => false
36
+ comment.splam_score # => 2
37
+ comment.splam_reasons # => []
38
+
39
+ Add this to a model, check the score, and determine (based on other factors such as logged-in
40
+ user, time spent on the page, validity of request headers, length of user's membership on the
41
+ site) whether to ban the post or not.
42
+
43
+ We recommend showing the post to the user (spambox them in) but hide it from everyone else.
44
+
45
+ TODO
46
+
47
+ - Integrate bayesian or other clever algorithm, so that scores aren't hardcoded.
48
+ - Switch to using a percentage (0.994) rather than a score (250)
49
+ - Write more plugins!
50
+ - Test against a larger Ham corpus
51
+ - Fix that nasty autoloading code in splam.rb
52
+
53
+ Copyright (c) 2008 ENTP, released under the MIT license
@@ -0,0 +1,14 @@
1
+ require 'bundler/setup'
2
+ require 'bundler/gem_tasks'
3
+ require 'bump/tasks'
4
+ require 'rake/testtask'
5
+
6
+ desc 'Default: run unit tests.'
7
+ task :default => :test
8
+
9
+ desc 'Test the splam gem.'
10
+ Rake::TestTask.new(:test) do |t|
11
+ t.libs << 'lib'
12
+ t.pattern = 'test/**/*_test.rb'
13
+ t.verbose = true
14
+ end
@@ -0,0 +1,20 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIDMjCCAhqgAwIBAgIBADANBgkqhkiG9w0BAQUFADA/MRAwDgYDVQQDDAdtaWNo
3
+ YWVsMRcwFQYKCZImiZPyLGQBGRYHZ3Jvc3NlcjESMBAGCgmSJomT8ixkARkWAml0
4
+ MB4XDTEzMDIwMzE4MTMxMVoXDTE0MDIwMzE4MTMxMVowPzEQMA4GA1UEAwwHbWlj
5
+ aGFlbDEXMBUGCgmSJomT8ixkARkWB2dyb3NzZXIxEjAQBgoJkiaJk/IsZAEZFgJp
6
+ dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMorXo/hgbUq97+kII9H
7
+ MsQcLdC/7wQ1ZP2OshVHPkeP0qH8MBHGg6eYisOX2ubNagF9YTCZWnhrdKrwpLOO
8
+ cPLaZbjUjljJ3cQR3B8Yn1veV5IhG86QseTBjymzJWsLpqJ1UZGpfB9tXcsFtuxO
9
+ 6vHvcIHdzvc/OUkICttLbH+1qb6rsHUceqh+JrH4GrsJ5H4hAfIdyS2XMK7YRKbh
10
+ h+IBu6dFWJJByzFsYmV1PDXln3UBmgAt65cmCu4qPfThioCGDzbSJrGDGLmw/pFX
11
+ FPpVCm1zgYSb1v6Qnf3cgXa2f2wYGm17+zAVyIDpwryFru9yF/jJxE38z/DRsd9R
12
+ /88CAwEAAaM5MDcwCQYDVR0TBAIwADAdBgNVHQ4EFgQUsiNnXHtKeMYYcr4yJVmQ
13
+ WONL+IwwCwYDVR0PBAQDAgSwMA0GCSqGSIb3DQEBBQUAA4IBAQAlyN7kKo/NQCQ0
14
+ AOzZLZ3WAePvStkCFIJ53tsv5Kyo4pMAllv+BgPzzBt7qi605mFSL6zBd9uLou+W
15
+ Co3s48p1dy7CjjAfVQdmVNHF3MwXtfC2OEyvSQPi4xKR8iba8wa3xp9LVo1PuLpw
16
+ /6DsrChWw74HfsJN6qJOK684hJeT8lBYAUfiC3wD0owoPSg+XtyAAddisR+KV5Y1
17
+ NmVHuLtQcNTZy+gRht3ahJRMuC6QyLmkTsf+6MaenwAMkAgHdswGsJztOnNnBa3F
18
+ y0kCSWmK6D+x/SbfS6r7Ke07MRqziJdB9GuE1+0cIRuFh8EQ+LN6HXCKM5pon/GU
19
+ ycwMXfl0
20
+ -----END CERTIFICATE-----
@@ -0,0 +1,124 @@
1
+ # Splam
2
+ #require File.dirname(__FILE__) + "/splam/rule"
3
+ #require File.dirname(__FILE__) + "/splam/rules"
4
+ #require File.dirname(__FILE__) + "/splam/rules/russian"
5
+
6
+ require 'rubygems'
7
+ gem 'activesupport'
8
+ require 'active_support/inflector'
9
+
10
+ module Splam
11
+ class Suite < Struct.new(:body, :rules, :threshold, :conditions)
12
+ # Should be a Rack::Request, in case you want to inspect user agents and whatnot
13
+ # unimplemented, cry about it fanboy!
14
+ attr_accessor :request
15
+
16
+ attr_reader :score
17
+ attr_reader :reasons
18
+
19
+ def initialize(body, rules, threshold, conditions, &block)
20
+ super(body, rules, threshold, conditions)
21
+ block.call(self) if block
22
+ self.rules = self.rules.inject({}) do |memo, (rule, weight)|
23
+ if (rule.is_a?(Class) && rule.superclass == Splam::Rule) || rule = Splam::Rule.rules[rule]
24
+ memo[rule] = weight || 1.0
25
+ else
26
+ raise ArgumentError, "Invalid rule: #{rule.inspect}"
27
+ end
28
+ memo
29
+ end
30
+ end
31
+
32
+ def run(record)
33
+ score, reasons = 0, []
34
+ rules.each do |rule_class, weight|
35
+ weight ||= 1
36
+ worker = rule_class.run(self, record, weight)
37
+ score += worker.score
38
+ reasons << worker.reasons
39
+ end
40
+ [score, reasons]
41
+ end
42
+
43
+ def splam?(score)
44
+ score >= threshold
45
+ end
46
+ end
47
+
48
+ def self.included(base)
49
+ # Autoload all files in rules
50
+ # This is bad, mkay
51
+ Dir["#{File.dirname(__FILE__)}/splam/rules/*.rb"].each do |f|
52
+ require f
53
+ end
54
+ base.send :extend, ClassMethods
55
+ end
56
+
57
+ module ClassMethods
58
+ def splam_suite; @splam_suite; end
59
+ # Set #body attribute as splammable with default threshold of 100
60
+ # splammable :body
61
+ #
62
+ # Set #body attribute as splammable with custom threshold
63
+ # splammable :body, 50
64
+ #
65
+ # Set #body splammable with threshold and a conditions block?
66
+ # splamamble :body, 50, lambda { |record| record.skip_splam_check }
67
+ #
68
+ # Set any Splam::Suite options
69
+ # splammable :body do |splam|
70
+ # splam.threshold = 150
71
+ # splam.conditions = lambda { |r| r.body.size.zero? }
72
+ # # Set rules with #splam_key value
73
+ # splam.rules = [:chinese, :html]
74
+ # # Set rules with Class instances
75
+ # splam.rules = [Splam::Rules::Chinese]
76
+ # # Mix and match, we're all friends here
77
+ # splam.rules = [Splam::Rules::Chinese, :html]
78
+ # # Specify optional weights
79
+ # splam.rules = {Splam::Rules::Chinese => 1.2, :html => 5.0}
80
+ #
81
+ def splammable(fieldname, threshold=100, conditions=nil, &block)
82
+ # todo: run only certain rules
83
+ # e.g. splammable :body, 100, [ :chinese, :html ]
84
+ # todo: define some weighting on the model level
85
+ # e.g. splammable :body, 50, { :russian => 2.0 }
86
+ @splam_suite = Suite.new(fieldname, Splam::Rule.default_rules, threshold, conditions, &block)
87
+ end
88
+ end
89
+
90
+ attr_accessor :skip_splam_check
91
+ attr_reader :splam_score, :splam_reasons
92
+
93
+ def splam_score
94
+ @splam_score || run_splam_suite(:score) || 0
95
+ end
96
+
97
+ def splam_reasons
98
+ @splam_reasons || run_splam_suite(:reasons) || []
99
+ end
100
+
101
+ def splam?
102
+ # run_splam_suite # ask yourself, do you want this to be cached for each record instance or not?
103
+ self.class.splam_suite.splam?(splam_score)
104
+ end
105
+
106
+ def validates_as_spam
107
+ errors.add(self.class.splam_suite.body, "looks like spam.") if (!skip_splam_check? && splam?)
108
+ end
109
+
110
+ protected
111
+ def run_splam_suite(attr_suffix = nil)
112
+ splam_suite = self.class.splam_suite || raise("Splam::Suite is not initialized")
113
+ return false if (splam_suite.conditions && !splam_suite.conditions.call(self)) ||
114
+ skip_splam_check ||
115
+ send(splam_suite.body).nil?
116
+ @splam_score, @splam_reasons = splam_suite.run(self)
117
+ instance_variable_get("@splam_#{attr_suffix}") if attr_suffix
118
+ end
119
+
120
+ def skip_splam_check?
121
+ # This enables us to use a checkbox
122
+ skip_splam_check.to_i > 0
123
+ end
124
+ end
@@ -0,0 +1,73 @@
1
+ class Splam::Rule
2
+ class << self
3
+ attr_writer :splam_key
4
+
5
+ # Global set of rules for all splammable classes. By default it is an array of all Splam::Rule subclasses.
6
+ # It can be set to a subset of all rules, or even a hash with specified weights.
7
+ # self.default_rules = [:bad_words, :bbcode]
8
+ # self.default_rules = {:bad_words => 0.5, :bbcode => 7}
9
+ #
10
+ attr_accessor :default_rules
11
+
12
+ # Index linking all splam_keys to the rule classes. This is populated automatically.
13
+ attr_reader :rules
14
+
15
+ def splam_key
16
+ @splam_key || (self.splam_key = name.demodulize.underscore.to_sym)
17
+ end
18
+
19
+ def splam_key=(value)
20
+ Splam::Rule.rules.delete(@splam_key) if @splam_key
21
+ Splam::Rule.rules[value] = self
22
+ @splam_key = value
23
+ value
24
+ end
25
+
26
+ def run(*args)
27
+ rule = new(*args)
28
+ rule.run
29
+ rule
30
+ end
31
+ end
32
+
33
+ def initialize(suite, record, weight = 1.0)
34
+ @suite, @weight, @score, @reasons, @body = suite, weight, 0, [], record.send(suite.body)
35
+ end
36
+
37
+ def name
38
+ self.class.splam_key
39
+ end
40
+
41
+ def self.inherited(_subclass)
42
+ @rules ||= {}
43
+ @default_rules ||= []
44
+ @default_rules << _subclass
45
+ _subclass.splam_key
46
+ super
47
+ end
48
+
49
+ attr_reader :suite, :body, :weight
50
+ attr_accessor :reasons, :score
51
+
52
+ # Overload this method to run your rule. Call #add_score to modify the suite's splam score.
53
+ #
54
+ # def run
55
+ # add_score -5, 'water'
56
+ # add_score 5, 'PBR'
57
+ # add_score 10, 'black butte'
58
+ # add_score 30, 'red wine'
59
+ # add_score 95, 'everclear'
60
+ # end
61
+ #
62
+ def run
63
+ end
64
+
65
+ def add_score(points, reason)
66
+ @score ||= 0
67
+ if points != 0
68
+ @reasons << "#{name}: [#{points}#{" * #{weight}" if weight != 1}] #{reason}"
69
+ points = points * weight.to_i
70
+ @score += points
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,2 @@
1
+ module Splam::Rules
2
+ end
@@ -0,0 +1,24 @@
1
+ class Splam::Rules::ArmsRace < Splam::Rule
2
+ class << self
3
+ attr_accessor :bad_word_score
4
+ end
5
+
6
+ self.bad_word_score = 40
7
+
8
+ # This is where you put banned domain names or otherwise
9
+ def run
10
+ shitty_sites = ["inquisitr"]
11
+ shitty_sites.each do |word|
12
+ results = @body.downcase.scan(word)
13
+ if results && results.size > 0
14
+ add_score((self.class.bad_word_score ** results.size), "stupid site: '#{word}'")
15
+ @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
16
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
17
+ end
18
+ @body.scan(/<a(.*?)>/).each do |match|
19
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,46 @@
1
+ class Splam::Rules::BadWords < Splam::Rule
2
+ class << self
3
+ attr_accessor :bad_word_score, :suspicious_word_score
4
+ end
5
+
6
+ self.bad_word_score = 10
7
+ self.suspicious_word_score = 4
8
+
9
+ def run
10
+ bad_words = %w( sex sexy porn gay erotica viagra erotismo porno porn lesbian amateur tit\b)
11
+ bad_words |= %w( gratis erotismo porno torrent bittorrent adulto )
12
+ bad_words |= %w( cialis viagra payday loan jihad )
13
+ bad_words |= %w( webcam free-web-host rapidshare muslim)
14
+ bad_words << /pel?cula/ << /pornogr?fica/ << "portal porno" # srsly, spamming in spanish?
15
+
16
+ suspicious_words = %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card" << "my friend" << "friend sent me"
17
+ suspicious_words |= %w( adult pharmacy overnight shipping free hot movie nylon arab ?????? xxx) << "sent me a link"
18
+ suspicious_words << "forums/member.php?u=" << "chat room" << "free chat" << "yahoo chat" << "page.php"
19
+ bad_words.each do |word|
20
+ results = @body.downcase.scan(word)
21
+ if results && results.size > 0
22
+ add_score((self.class.bad_word_score ** results.size), "nasty word: '#{word}'")
23
+ # Add more points if the bad word is INSIDE a link
24
+ @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
25
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
26
+ end
27
+ @body.scan(/\nhttp:\/\/(.*?#{word})/).each do |match|
28
+ add_score self.class.bad_word_score ** 4 * match[0].scan(word).size, "nasty word inside a straight-up link: #{word}"
29
+ end
30
+ @body.scan(/<a(.*?)>/).each do |match|
31
+ add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
32
+ end
33
+ end
34
+ end
35
+ suspicious_words.each do |word|
36
+ results = @body.downcase.scan(word)
37
+ if results && results.size > 0
38
+ add_score (self.class.suspicious_word_score * results.size), "suspicious word: #{word}"
39
+ # Add more points if the bad word is INSIDE a link
40
+ @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
41
+ add_score((self.class.suspicious_word_score * match[0].scan(word).size), "suspicious word inside a link: #{word}")
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,12 @@
1
+ class Splam::Rules::Bbcode < Splam::Rule
2
+
3
+ def run
4
+ add_score 10 * @body.scan("showpost.php?p=").size, "Linking to a shitty forum"
5
+ # add_score 10 * @body.scan("\r\n").size, "Poorly formed POST (\\r\\n)"
6
+ add_score 40 * @body.scan("[url=").size, "URL" # no URLS for you!!
7
+ add_score 40 * @body.scan("[URL=").size, "URL" # no URLS for you!!
8
+ add_score 40 * @body.scan("[url=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
9
+ add_score 40 * @body.scan("[URL=http").size, "Shitty URL/html" # another 10 points for shitty bbcode html
10
+ add_score 10 * @body.scan(/\[[bai]/).size, "b/a/i tag"
11
+ end
12
+ end
@@ -0,0 +1,26 @@
1
+ class Splam::Rules::Chinese < Splam::Rule
2
+ class << self
3
+ attr_accessor :base_score
4
+ end
5
+ self.base_score = 3
6
+
7
+ def run
8
+ banned_words =[ # various chinese characters
9
+ "\350\263\207",
10
+ "\351\207\221",
11
+ "\357\274\222", # number 2 in weird unicode
12
+ "\357\274\224", # number 4
13
+ "\357\274\225", # number 5
14
+ "\357\274\231", # number 9
15
+ "\357\274\215", # hyphen
16
+ /\\357\2\d\d\\\d{3}/,
17
+ # "\357", # ugh, these don't work .. because they're only part of a character.
18
+ # "\351",
19
+ "\35"
20
+ ]
21
+ banned_words.each do |word|
22
+ hits = (self.class.base_score * @body.scan(word).size) # 1 point for every banned word
23
+ add_score hits, "Banned character: #{word}"
24
+ end
25
+ end
26
+ end