despamilator 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +44 -0
- data/PostInstall.txt +1 -0
- data/README.rdoc +107 -0
- data/Rakefile +33 -0
- data/despamilator.gemspec +42 -0
- data/lib/despamilator/filter/html_tags.rb +116 -0
- data/lib/despamilator/filter/naughty_q.rb +17 -0
- data/lib/despamilator/filter/numbers_and_words.rb +33 -0
- data/lib/despamilator/filter/script_tag.rb +13 -0
- data/lib/despamilator/filter.rb +52 -0
- data/lib/despamilator/filter_base.rb +37 -0
- data/lib/despamilator.rb +19 -0
- data/pkg/despamilator-0.1/History.txt +4 -0
- data/pkg/despamilator-0.1/Manifest.txt +21 -0
- data/pkg/despamilator-0.1/PostInstall.txt +1 -0
- data/pkg/despamilator-0.1/README.rdoc +107 -0
- data/pkg/despamilator-0.1/Rakefile +33 -0
- data/pkg/despamilator-0.1/despamilator.gemspec +42 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +116 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +17 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +33 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +13 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter.rb +52 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +37 -0
- data/pkg/despamilator-0.1/lib/despamilator.rb +19 -0
- data/pkg/despamilator-0.1/spec/despamilator_spec.rb +15 -0
- data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +144 -0
- data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +39 -0
- data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +59 -0
- data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +32 -0
- data/pkg/despamilator-0.1/spec/spec.opts +1 -0
- data/pkg/despamilator-0.1/spec/spec_helper.rb +10 -0
- data/pkg/despamilator-0.1/tasks/rspec.rake +21 -0
- data/pkg/despamilator-0.1.gem +0 -0
- data/pkg/despamilator-0.1.tgz +0 -0
- data/spec/despamilator_spec.rb +15 -0
- data/spec/filters/html_tags_spec.rb +144 -0
- data/spec/filters/naughty_q_spec.rb +39 -0
- data/spec/filters/numbers_and_words_spec.rb +59 -0
- data/spec/filters/script_tag_spec.rb +32 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +10 -0
- data/tasks/rspec.rake +21 -0
- metadata +155 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
PostInstall.txt
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
despamilator.gemspec
|
7
|
+
lib/despamilator.rb
|
8
|
+
lib/despamilator/filter.rb
|
9
|
+
lib/despamilator/filter/html_tags.rb
|
10
|
+
lib/despamilator/filter/naughty_q.rb
|
11
|
+
lib/despamilator/filter/numbers_and_words.rb
|
12
|
+
lib/despamilator/filter/script_tag.rb
|
13
|
+
lib/despamilator/filter_base.rb
|
14
|
+
pkg/despamilator-0.1.gem
|
15
|
+
pkg/despamilator-0.1.tgz
|
16
|
+
pkg/despamilator-0.1/History.txt
|
17
|
+
pkg/despamilator-0.1/Manifest.txt
|
18
|
+
pkg/despamilator-0.1/PostInstall.txt
|
19
|
+
pkg/despamilator-0.1/README.rdoc
|
20
|
+
pkg/despamilator-0.1/Rakefile
|
21
|
+
pkg/despamilator-0.1/despamilator.gemspec
|
22
|
+
pkg/despamilator-0.1/lib/despamilator.rb
|
23
|
+
pkg/despamilator-0.1/lib/despamilator/filter.rb
|
24
|
+
pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb
|
25
|
+
pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb
|
26
|
+
pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb
|
27
|
+
pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb
|
28
|
+
pkg/despamilator-0.1/lib/despamilator/filter_base.rb
|
29
|
+
pkg/despamilator-0.1/spec/despamilator_spec.rb
|
30
|
+
pkg/despamilator-0.1/spec/filters/html_tags_spec.rb
|
31
|
+
pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb
|
32
|
+
pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb
|
33
|
+
pkg/despamilator-0.1/spec/filters/script_tag_spec.rb
|
34
|
+
pkg/despamilator-0.1/spec/spec.opts
|
35
|
+
pkg/despamilator-0.1/spec/spec_helper.rb
|
36
|
+
pkg/despamilator-0.1/tasks/rspec.rake
|
37
|
+
spec/despamilator_spec.rb
|
38
|
+
spec/filters/html_tags_spec.rb
|
39
|
+
spec/filters/naughty_q_spec.rb
|
40
|
+
spec/filters/numbers_and_words_spec.rb
|
41
|
+
spec/filters/script_tag_spec.rb
|
42
|
+
spec/spec.opts
|
43
|
+
spec/spec_helper.rb
|
44
|
+
tasks/rspec.rake
|
data/PostInstall.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
For more information on despamilator or to contribute more filters, see http://github.com/moowahaha/despamliator
|
data/README.rdoc
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
= Despamilator
|
2
|
+
|
3
|
+
* http://github.com/moowahaha/despamliator
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
8
|
+
Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
|
9
|
+
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
10
|
+
|
11
|
+
== FEATURES/PROBLEMS:
|
12
|
+
|
13
|
+
* rspec will be run twice. How annoying?!
|
14
|
+
|
15
|
+
== SYNOPSIS:
|
16
|
+
|
17
|
+
# using Despamilator
|
18
|
+
dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
19
|
+
|
20
|
+
dspam.score #=> the total score for this string (1 is considered high)
|
21
|
+
dspam.matched_by #=> array of matching filters
|
22
|
+
first_match = dspam.matched_by.first #=> first matching filter
|
23
|
+
first_match.name #=> some string with the name of the filter
|
24
|
+
first_match.description #=> some string to describe
|
25
|
+
first_match.score #=> the individual score assigned by this filter
|
26
|
+
|
27
|
+
# adding a new filter! example: detecting the letter "a"
|
28
|
+
# put the following code in lib/despamilator/filter/detect_letter_a.rb
|
29
|
+
def name
|
30
|
+
'Detecting the letter A'
|
31
|
+
end
|
32
|
+
|
33
|
+
def description
|
34
|
+
'Detects the letter "a" in a string for no reason other than a demo'
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse
|
38
|
+
if self.text.downcase.scan(/a/)
|
39
|
+
# add 0.1 to the score of the text
|
40
|
+
self.append_score = 0.1
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
== FILTERING:
|
45
|
+
|
46
|
+
As stated, this is a heuristic scanner so its up to the user to decide the thresholds of the scanner. I usually
|
47
|
+
say "it's spam" if the score reaches 1.
|
48
|
+
|
49
|
+
The score will be added to incrementally by each matching filter. So if there is some HTML in there, it will be added
|
50
|
+
to the score. If there is also a script tag of some sort, that will add more.
|
51
|
+
|
52
|
+
Each filter decides how much of a score it assigns. For example, detecting a number next to a letter
|
53
|
+
(the numbers_an_words filter) is only a mild hint compared with a script tag (detected by the script_tag filter).
|
54
|
+
|
55
|
+
=== NEW FILTERS:
|
56
|
+
|
57
|
+
I absolutely welcome new filters and experiments. New filters should be put in the 'lib/despamilator/filter/' directory.
|
58
|
+
The core filtering code will detect and use what is in there so you only need to drop the code in.
|
59
|
+
Filters should be simple, no classes etc wrapped around them and should try to perform one simple task.
|
60
|
+
They should always supply the following methods:
|
61
|
+
|
62
|
+
* name #=> the name of your filter.
|
63
|
+
* description #=> what your filter will look for.
|
64
|
+
* parse #=> the method that will be called when parsing.
|
65
|
+
|
66
|
+
Along side the above, the following methods are made available to each filter:
|
67
|
+
|
68
|
+
* text #=> a copy of the text your parser will parse
|
69
|
+
* append_score= #=> method to append a score to the text if there are matches in your parser.
|
70
|
+
* matched? #=> whether or not any filter has so far detected something suspect
|
71
|
+
* score #=> the current score assigned to the text
|
72
|
+
|
73
|
+
spec tests are an absolute must!
|
74
|
+
|
75
|
+
|
76
|
+
== REQUIREMENTS:
|
77
|
+
|
78
|
+
* hoe
|
79
|
+
* rspec
|
80
|
+
|
81
|
+
|
82
|
+
== INSTALL:
|
83
|
+
|
84
|
+
$ sudo gem install despamilator
|
85
|
+
|
86
|
+
== LICENSE:
|
87
|
+
|
88
|
+
Copyright (c) 2010 Stephen Hardisty
|
89
|
+
|
90
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
91
|
+
a copy of this software and associated documentation files (the
|
92
|
+
'Software'), to deal in the Software without restriction, including
|
93
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
94
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
95
|
+
permit persons to whom the Software is furnished to do so, subject to
|
96
|
+
the following conditions:
|
97
|
+
|
98
|
+
The above copyright notice and this permission notice shall be
|
99
|
+
included in all copies or substantial portions of the Software.
|
100
|
+
|
101
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
102
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
103
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
104
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
105
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
106
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
107
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'hoe', '>= 2.1.0'
|
3
|
+
require 'hoe'
|
4
|
+
require 'fileutils'
|
5
|
+
require './lib/despamilator'
|
6
|
+
|
7
|
+
Hoe.plugin :newgem
|
8
|
+
# Hoe.plugin :website
|
9
|
+
# Hoe.plugin :cucumberfeatures
|
10
|
+
|
11
|
+
# Generate all the Rake tasks
|
12
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
+
$hoe = Hoe.spec 'despamilator' do
|
14
|
+
self.developer 'Stephen Hardisty', 'moowahaha@hotmail.com'
|
15
|
+
self.post_install_message = 'PostInstall.txt'
|
16
|
+
self.rubyforge_name = self.name # TODO this is default value
|
17
|
+
# self.extra_deps = [['activesupport','>= 2.0.2']]
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'newgem/tasks'
|
22
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
23
|
+
|
24
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
25
|
+
# remove_task :default
|
26
|
+
task :test => [:spec]
|
27
|
+
task :default => [:test]
|
28
|
+
task :install => [:install_gem]
|
29
|
+
|
30
|
+
task :cultivate do
|
31
|
+
system "touch Manifest.txt; rake check_manifest | grep -v \"(in \" | patch"
|
32
|
+
system "rake debug_gem | grep -v \"(in \" > `basename \\`pwd\\``.gemspec"
|
33
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{despamilator}
|
5
|
+
s.version = "0.2"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Stephen Hardisty"]
|
9
|
+
s.date = %q{2010-03-28}
|
10
|
+
s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
11
|
+
Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
|
12
|
+
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
|
13
|
+
s.email = ["moowahaha@hotmail.com"]
|
14
|
+
s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt"]
|
15
|
+
s.files = ["History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter_base.rb", "pkg/despamilator-0.1.gem", "pkg/despamilator-0.1.tgz", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt", "pkg/despamilator-0.1/README.rdoc", "pkg/despamilator-0.1/Rakefile", "pkg/despamilator-0.1/despamilator.gemspec", "pkg/despamilator-0.1/lib/despamilator.rb", "pkg/despamilator-0.1/lib/despamilator/filter.rb", "pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb", "pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb", "pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb", "pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb", "pkg/despamilator-0.1/lib/despamilator/filter_base.rb", "pkg/despamilator-0.1/spec/despamilator_spec.rb", "pkg/despamilator-0.1/spec/filters/html_tags_spec.rb", "pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb", "pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb", "pkg/despamilator-0.1/spec/filters/script_tag_spec.rb", "pkg/despamilator-0.1/spec/spec.opts", "pkg/despamilator-0.1/spec/spec_helper.rb", "pkg/despamilator-0.1/tasks/rspec.rake", "spec/despamilator_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "tasks/rspec.rake"]
|
16
|
+
s.homepage = %q{http://github.com/moowahaha/despamliator}
|
17
|
+
s.post_install_message = %q{PostInstall.txt}
|
18
|
+
s.rdoc_options = ["--main", "README.rdoc"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.rubyforge_project = %q{despamilator}
|
21
|
+
s.rubygems_version = %q{1.3.6}
|
22
|
+
s.summary = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms AND CAPTCHAS being intrusive}
|
23
|
+
|
24
|
+
if s.respond_to? :specification_version then
|
25
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
26
|
+
s.specification_version = 3
|
27
|
+
|
28
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
29
|
+
s.add_development_dependency(%q<rubyforge>, [">= 2.0.3"])
|
30
|
+
s.add_development_dependency(%q<gemcutter>, [">= 0.5.0"])
|
31
|
+
s.add_development_dependency(%q<hoe>, [">= 2.5.0"])
|
32
|
+
else
|
33
|
+
s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
|
34
|
+
s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
|
35
|
+
s.add_dependency(%q<hoe>, [">= 2.5.0"])
|
36
|
+
end
|
37
|
+
else
|
38
|
+
s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
|
39
|
+
s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
|
40
|
+
s.add_dependency(%q<hoe>, [">= 2.5.0"])
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
def parse
|
2
|
+
html = self.text.downcase
|
3
|
+
|
4
|
+
html_tags.each do |tag|
|
5
|
+
if html.match(/<\s*#{tag}\W/) || html.match(/<\n*#{tag}\W/) || html.match(/\W#{tag}\s*\//) || html.match(/\W#{tag}\n*\//)
|
6
|
+
self.append_score = 0.3
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
'Detects HTML tags in text'
|
13
|
+
end
|
14
|
+
|
15
|
+
def description
|
16
|
+
'Searches for various HTML tags'
|
17
|
+
end
|
18
|
+
|
19
|
+
def html_tags
|
20
|
+
# make sure these are lowercase, in order to save processing
|
21
|
+
[
|
22
|
+
'!--',
|
23
|
+
'!doctype',
|
24
|
+
'a',
|
25
|
+
'abbr',
|
26
|
+
'acronym',
|
27
|
+
'address',
|
28
|
+
'applet',
|
29
|
+
'area',
|
30
|
+
'b',
|
31
|
+
'base',
|
32
|
+
'basefont',
|
33
|
+
'bdo',
|
34
|
+
'big',
|
35
|
+
'blockquote',
|
36
|
+
'body',
|
37
|
+
'br',
|
38
|
+
'button',
|
39
|
+
'caption',
|
40
|
+
'center',
|
41
|
+
'cite',
|
42
|
+
'code',
|
43
|
+
'col',
|
44
|
+
'colgroup',
|
45
|
+
'dd',
|
46
|
+
'del',
|
47
|
+
'dfn',
|
48
|
+
'dir',
|
49
|
+
'div',
|
50
|
+
'dl',
|
51
|
+
'dt',
|
52
|
+
'em',
|
53
|
+
'fieldset',
|
54
|
+
'font',
|
55
|
+
'form',
|
56
|
+
'frame',
|
57
|
+
'frameset',
|
58
|
+
'h1',
|
59
|
+
'h2',
|
60
|
+
'h3',
|
61
|
+
'h4',
|
62
|
+
'h5',
|
63
|
+
'h6',
|
64
|
+
'head',
|
65
|
+
'hr',
|
66
|
+
'html',
|
67
|
+
'i',
|
68
|
+
'iframe',
|
69
|
+
'img',
|
70
|
+
'input',
|
71
|
+
'ins',
|
72
|
+
'isindex',
|
73
|
+
'kbd',
|
74
|
+
'label',
|
75
|
+
'legend',
|
76
|
+
'li',
|
77
|
+
'link',
|
78
|
+
'map',
|
79
|
+
'menu',
|
80
|
+
'meta',
|
81
|
+
'noframes',
|
82
|
+
'noscript',
|
83
|
+
'object',
|
84
|
+
'ol',
|
85
|
+
'optgroup',
|
86
|
+
'option',
|
87
|
+
'p',
|
88
|
+
'param',
|
89
|
+
'pre',
|
90
|
+
'q',
|
91
|
+
's',
|
92
|
+
'samp',
|
93
|
+
'select',
|
94
|
+
'small',
|
95
|
+
'span',
|
96
|
+
'strike',
|
97
|
+
'strong',
|
98
|
+
'style',
|
99
|
+
'sub',
|
100
|
+
'sup',
|
101
|
+
'table',
|
102
|
+
'tbody',
|
103
|
+
'td',
|
104
|
+
'textarea',
|
105
|
+
'tfoot',
|
106
|
+
'th',
|
107
|
+
'thead',
|
108
|
+
'title',
|
109
|
+
'tr',
|
110
|
+
'tt',
|
111
|
+
'u',
|
112
|
+
'ul',
|
113
|
+
'var',
|
114
|
+
'xmp'
|
115
|
+
]
|
116
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
def name
|
2
|
+
'Naughty Q'
|
3
|
+
end
|
4
|
+
|
5
|
+
def description
|
6
|
+
'Detects possible misuse of the letter Q (English language)'
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse
|
10
|
+
matches = self.text.downcase.scan(/q./)
|
11
|
+
|
12
|
+
return unless matches
|
13
|
+
|
14
|
+
matches.each do |match|
|
15
|
+
self.append_score = 0.2 if match != 'qu' and match != 'qa' and match !~ /q\s/
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
def parse
|
2
|
+
string = self.text.downcase
|
3
|
+
|
4
|
+
# strip out "good numbers"
|
5
|
+
string.gsub!(/h[1-6]/, '')
|
6
|
+
string.gsub!(/(^|\b)\d+($|\b)/, '')
|
7
|
+
string.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
|
8
|
+
string.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
|
9
|
+
|
10
|
+
[
|
11
|
+
/\w\d+/,
|
12
|
+
/\d+\w/,
|
13
|
+
/\d+($|\b)/
|
14
|
+
].each do |regexp|
|
15
|
+
matches = string.scan(regexp)
|
16
|
+
|
17
|
+
next if matches.empty?
|
18
|
+
|
19
|
+
matches.each do |to_remove|
|
20
|
+
to_remove = to_remove.to_s
|
21
|
+
string.sub!(to_remove, '') unless to_remove.empty?
|
22
|
+
self.append_score = 0.1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def name
|
28
|
+
'Numbers next to words'
|
29
|
+
end
|
30
|
+
|
31
|
+
def description
|
32
|
+
'Detects unusual number/word combinations'
|
33
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class Despamilator
|
2
|
+
class Filter
|
3
|
+
attr_accessor :matches, :score
|
4
|
+
|
5
|
+
def initialize text
|
6
|
+
@@loaded ||= {}
|
7
|
+
@filters ||= []
|
8
|
+
@matches ||= []
|
9
|
+
@score ||= 0
|
10
|
+
load_filters text
|
11
|
+
run_filters
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def load_filters text
|
17
|
+
Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
|
18
|
+
filter_name = classify_filename filter_file
|
19
|
+
filter = @@loaded[filter_name]
|
20
|
+
|
21
|
+
unless filter
|
22
|
+
filter_code = File.open(filter_file, File::RDWR).read
|
23
|
+
filter = Class.new
|
24
|
+
filter.class_eval(
|
25
|
+
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
@filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def run_filters
|
34
|
+
@filters.each do |filter|
|
35
|
+
filter.parse
|
36
|
+
|
37
|
+
if filter.matched?
|
38
|
+
@matches.push(filter)
|
39
|
+
@score += filter.score
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def classify_filename filename
|
45
|
+
classname = ''
|
46
|
+
File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
|
47
|
+
classname += filename_part.capitalize
|
48
|
+
end
|
49
|
+
classname || filename.capitalize
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|