filters_spam 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/filters_spam.rb +138 -0
  2. data/readme.md +44 -0
  3. metadata +70 -0
@@ -0,0 +1,138 @@
1
+ # Just call filters_spam in your model with any of the options you want.
2
+ def filters_spam(options = {})
3
+ options = {
4
+ :message_field => :message,
5
+ :email_field => :email,
6
+ :author_field => :author,
7
+ :other_fields => [],
8
+ :extra_spam_words => []
9
+ }.update(options)
10
+
11
+ self.module_eval do
12
+ scope :ham, lambda {{:conditions => {:spam => false}, :order => 'created_at DESC'}}
13
+ scope :spam, lambda {{:conditions => {:spam => true}, :order => 'created_at DESC'}}
14
+ before_validation(:on => :create) { calculate_spam_score }
15
+
16
+ cattr_accessor :spam_words
17
+ self.spam_words = %w{
18
+ -online 4u 4-u acne adipex advicer baccarrat blackjack bllogspot booker buy byob carisoprodol
19
+ casino chatroom cialis coolhu credit-card-debt cwas cyclen cyclobenzaprine orgy
20
+ day-trading debt-consolidation discreetordering duty-free dutyfree equityloans fioricet
21
+ freenet free\s*shipping gambling- hair-loss homefinance holdem incest jrcreations leethal levitra macinstruct
22
+ mortgagequotes nemogs online-gambling ottawavalleyag ownsthis paxil penis pharmacy phentermine
23
+ poker poze pussy ringtones roulette shemale shoes -site slot-machine thorcarlson
24
+ tramadol trim-spa ultram valeofglamorganconservatives viagra vioxx xanax zolus
25
+ } | options[:extra_spam_words]
26
+ end
27
+
28
+ self.module_eval %{
29
+ def ham?
30
+ not spam?
31
+ end
32
+
33
+ def ham!
34
+ self.update_attribute(:spam, false)
35
+ end
36
+
37
+ def spam!
38
+ self.update_attribute(:spam, true)
39
+ end
40
+
41
+ protected
42
+
43
+ def score_for_message_links
44
+ link_count = self.#{options[:message_field]}.to_s.scan(/http:/).size
45
+ link_count > 2 ? -link_count : 2
46
+ end
47
+
48
+ def score_for_message_length
49
+ if self.#{options[:message_field]}.to_s.length > 20 and self.#{options[:message_field]}.to_s.scan(/http:/).size.zero?
50
+ 2
51
+ else
52
+ -1
53
+ end
54
+ end
55
+
56
+ def score_for_previous_submissions
57
+ current_score = 0
58
+
59
+ self.class.find(:all, :conditions => {:#{options[:email_field]} => #{options[:email_field]}}).each do |i|
60
+ if i.spam?
61
+ current_score -= 1
62
+ else
63
+ current_score += 1
64
+ end
65
+ end
66
+
67
+ current_score
68
+ end
69
+
70
+ def score_for_spam_words
71
+ current_score = 0
72
+
73
+ spam_words.each do |word|
74
+ regex = /\#{word}/i
75
+ if #{options[:message_field]} =~ regex ||
76
+ #{options[:author_field]} =~ regex #{" || #{options[:other_fields].join(' =~ regex ')} =~ regex" if options[:other_fields].any?}
77
+ current_score -= 1
78
+ end
79
+ end
80
+
81
+ current_score
82
+ end
83
+
84
+ def score_for_suspect_url
85
+ current_score = 0
86
+
87
+ regex = /http:\\/\\/\\S*(\\.html|\\.info|\\?|&|free)/i
88
+ current_score =- (1 * #{options[:message_field]}.to_s.scan(regex).size)
89
+ end
90
+
91
+ def score_for_suspect_tld
92
+ regex = /http:\\/\\/\\S*\\.(de|pl|cn)/i
93
+ #{options[:message_field]}.to_s.scan(regex).size * -1
94
+ end
95
+
96
+ def score_for_lame_message_start
97
+ #{options[:message_field]}.to_s.strip =~ /^(interesting|sorry|nice|cool)/i ? -10 : 0
98
+ end
99
+
100
+ def score_for_author_link
101
+ #{options[:author_field]}.to_s.scan(/http:/).size * -2
102
+ end
103
+
104
+ def score_for_same_message
105
+ self.class.count(:conditions => {:#{options[:message_field]} => #{options[:message_field]}}) * -1
106
+ end
107
+
108
+ def score_for_consonant_runs
109
+ current_score = 0
110
+
111
+ [#{([options[:author_field], options[:message_field], options[:email_field]] | options[:other_fields]).join(', ')}].each do |field|
112
+ field.to_s.scan(/[bcdfghjklmnpqrstvwxz]{5,}/).each do |run|
113
+ current_score =- run.size - 4
114
+ end
115
+ end
116
+
117
+ current_score
118
+ end
119
+
120
+ def calculate_spam_score
121
+ score = 0
122
+ score += score_for_message_links
123
+ score += score_for_message_length
124
+ score += score_for_previous_submissions
125
+ score += score_for_spam_words
126
+ score += score_for_suspect_tld
127
+ score += score_for_lame_message_start
128
+ score += score_for_author_link
129
+ score += score_for_same_message
130
+ score += score_for_consonant_runs
131
+ self.spam = (score < 0)
132
+
133
+ logger.info("spam score was \#{score}")
134
+
135
+ true
136
+ end
137
+ }
138
+ end
@@ -0,0 +1,44 @@
1
+ # FiltersSpam
2
+
3
+ This is a small Ruby on Rails plugin that can be installed as a gem in your ``Gemfile``
4
+ that allows models to attach to it to provide spam filtering functionality.
5
+
6
+ ## Rails Quickstart
7
+
8
+ # Add to Gemfile
9
+ gem 'filters_spam', '~> 0.1'
10
+
11
+ bundle install
12
+
13
+ ## Usage
14
+
15
+ Once you have the plugin installed, you can use it by calling the function in your model like so:
16
+
17
+ filters_spam
18
+
19
+ If you want to change the default fields that are used by ``filters_spam``
20
+ then you can pass them in to the method as options.
21
+
22
+ All of the possible options are outlined below with the default values for each:
23
+
24
+ filters_spam({
25
+ :message_field => :message,
26
+ :email_field => :email,
27
+ :author_field => :author,
28
+ :other_fields => [],
29
+ :extra_spam_words => %w()
30
+ })
31
+
32
+ So, say you wanted to mark 'ruby' and 'rails' as spam words you simply pass them
33
+ in using the ``:extra_spam_words`` option:
34
+
35
+ filters_spam({
36
+ :extra_spam_words => %w(ruby rails)
37
+ })
38
+
39
+ Enjoy a life with less spam.
40
+
41
+ ## Credits
42
+
43
+ This code was inspired by Russel Norris' [acts_as_snook plugin](http://github.com/rsl/acts_as_snook)
44
+ and ideas presented by [Jonathan Snook](http://snook.ca/archives/other/effective_blog_comment_spam_blocker)
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: filters_spam
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Philip Arndt
13
+ - David Jones
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-08-27 00:00:00 +12:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: This is a small Ruby on Rails plugin that can be installed as a gem in your Gemfile that allows models to attach to it to provide spam filtering functionality.
23
+ email: info@resolvedigital.co.nz
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - readme.md
32
+ - lib/filters_spam.rb
33
+ has_rdoc: true
34
+ homepage: http://www.resolvedigital.co.nz
35
+ licenses: []
36
+
37
+ post_install_message:
38
+ rdoc_options: []
39
+
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ hash: 57
48
+ segments:
49
+ - 1
50
+ - 8
51
+ - 7
52
+ version: 1.8.7
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ requirements:
63
+ - none
64
+ rubyforge_project:
65
+ rubygems_version: 1.3.7
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Attach to your model to have this filter out the spam using scoring techniques.
69
+ test_files: []
70
+