despamilator 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +44 -0
- data/PostInstall.txt +1 -0
- data/README.rdoc +107 -0
- data/Rakefile +33 -0
- data/despamilator.gemspec +42 -0
- data/lib/despamilator/filter/html_tags.rb +116 -0
- data/lib/despamilator/filter/naughty_q.rb +17 -0
- data/lib/despamilator/filter/numbers_and_words.rb +33 -0
- data/lib/despamilator/filter/script_tag.rb +13 -0
- data/lib/despamilator/filter.rb +52 -0
- data/lib/despamilator/filter_base.rb +37 -0
- data/lib/despamilator.rb +19 -0
- data/pkg/despamilator-0.1/History.txt +4 -0
- data/pkg/despamilator-0.1/Manifest.txt +21 -0
- data/pkg/despamilator-0.1/PostInstall.txt +1 -0
- data/pkg/despamilator-0.1/README.rdoc +107 -0
- data/pkg/despamilator-0.1/Rakefile +33 -0
- data/pkg/despamilator-0.1/despamilator.gemspec +42 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +116 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +17 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +33 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +13 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter.rb +52 -0
- data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +37 -0
- data/pkg/despamilator-0.1/lib/despamilator.rb +19 -0
- data/pkg/despamilator-0.1/spec/despamilator_spec.rb +15 -0
- data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +144 -0
- data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +39 -0
- data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +59 -0
- data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +32 -0
- data/pkg/despamilator-0.1/spec/spec.opts +1 -0
- data/pkg/despamilator-0.1/spec/spec_helper.rb +10 -0
- data/pkg/despamilator-0.1/tasks/rspec.rake +21 -0
- data/pkg/despamilator-0.1.gem +0 -0
- data/pkg/despamilator-0.1.tgz +0 -0
- data/spec/despamilator_spec.rb +15 -0
- data/spec/filters/html_tags_spec.rb +144 -0
- data/spec/filters/naughty_q_spec.rb +39 -0
- data/spec/filters/numbers_and_words_spec.rb +59 -0
- data/spec/filters/script_tag_spec.rb +32 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +10 -0
- data/tasks/rspec.rake +21 -0
- metadata +155 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
class Despamilator
|
2
|
+
class Filter
|
3
|
+
attr_accessor :matches, :score
|
4
|
+
|
5
|
+
def initialize text
|
6
|
+
@@loaded ||= {}
|
7
|
+
@filters ||= []
|
8
|
+
@matches ||= []
|
9
|
+
@score ||= 0
|
10
|
+
load_filters text
|
11
|
+
run_filters
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def load_filters text
|
17
|
+
Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
|
18
|
+
filter_name = classify_filename filter_file
|
19
|
+
filter = @@loaded[filter_name]
|
20
|
+
|
21
|
+
unless filter
|
22
|
+
filter_code = File.open(filter_file, File::RDWR).read
|
23
|
+
filter = Class.new
|
24
|
+
filter.class_eval(
|
25
|
+
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
@filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def run_filters
|
34
|
+
@filters.each do |filter|
|
35
|
+
filter.parse
|
36
|
+
|
37
|
+
if filter.matched?
|
38
|
+
@matches.push(filter)
|
39
|
+
@score += filter.score
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def classify_filename filename
|
45
|
+
classname = ''
|
46
|
+
File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
|
47
|
+
classname += filename_part.capitalize
|
48
|
+
end
|
49
|
+
classname || filename.capitalize
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Despamilator
|
2
|
+
class FilterBase
|
3
|
+
attr_accessor :text, :score, :filename, :matches
|
4
|
+
|
5
|
+
def initialize text, filename
|
6
|
+
@matches = 0
|
7
|
+
@filename = filename
|
8
|
+
@score = 0
|
9
|
+
@text = text
|
10
|
+
@matched = false
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
raise "No description defined in #{filename}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse blah
|
18
|
+
raise "No parser defined in #{filename}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def name
|
22
|
+
raise "No name defined in #{filename}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def matched?
|
26
|
+
@score > 0
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def append_score= new_score
|
32
|
+
@matches += 1
|
33
|
+
@score += new_score
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
class Despamilator
|
6
|
+
VERSION = "0.2"
|
7
|
+
|
8
|
+
def initialize text
|
9
|
+
@filters = Despamilator::Filter.new text
|
10
|
+
end
|
11
|
+
|
12
|
+
def score
|
13
|
+
@filters.score
|
14
|
+
end
|
15
|
+
|
16
|
+
def matched_by
|
17
|
+
@filters.matches
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
describe Despamilator do
|
4
|
+
before :each do
|
5
|
+
@dspam = Despamilator.new('this text is absolutely fine')
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should return a zero score for fine text" do
|
9
|
+
@dspam.score.should == 0
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should return no matching filter for fine text" do
|
13
|
+
@dspam.matched_by.should be_empty
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
context "HtmlTags" do
|
4
|
+
describe "filtering" do
|
5
|
+
[
|
6
|
+
'!--',
|
7
|
+
'!DOCTYPE',
|
8
|
+
'a',
|
9
|
+
'abbr',
|
10
|
+
'acronym',
|
11
|
+
'address',
|
12
|
+
'applet',
|
13
|
+
'area',
|
14
|
+
'b',
|
15
|
+
'base',
|
16
|
+
'basefont',
|
17
|
+
'bdo',
|
18
|
+
'big',
|
19
|
+
'blockquote',
|
20
|
+
'body',
|
21
|
+
'br',
|
22
|
+
'button',
|
23
|
+
'caption',
|
24
|
+
'center',
|
25
|
+
'cite',
|
26
|
+
'code',
|
27
|
+
'col',
|
28
|
+
'colgroup',
|
29
|
+
'dd',
|
30
|
+
'del',
|
31
|
+
'dfn',
|
32
|
+
'dir',
|
33
|
+
'div',
|
34
|
+
'dl',
|
35
|
+
'dt',
|
36
|
+
'em',
|
37
|
+
'fieldset',
|
38
|
+
'font',
|
39
|
+
'form',
|
40
|
+
'frame',
|
41
|
+
'frameset',
|
42
|
+
'h1',
|
43
|
+
'h2',
|
44
|
+
'h3',
|
45
|
+
'h4',
|
46
|
+
'h5',
|
47
|
+
'h6',
|
48
|
+
'head',
|
49
|
+
'hr',
|
50
|
+
'html',
|
51
|
+
'i',
|
52
|
+
'iframe',
|
53
|
+
'img',
|
54
|
+
'input',
|
55
|
+
'ins',
|
56
|
+
'isindex',
|
57
|
+
'kbd',
|
58
|
+
'label',
|
59
|
+
'legend',
|
60
|
+
'li',
|
61
|
+
'link',
|
62
|
+
'map',
|
63
|
+
'menu',
|
64
|
+
'meta',
|
65
|
+
'noframes',
|
66
|
+
'noscript',
|
67
|
+
'object',
|
68
|
+
'ol',
|
69
|
+
'optgroup',
|
70
|
+
'option',
|
71
|
+
'p',
|
72
|
+
'param',
|
73
|
+
'pre',
|
74
|
+
'q',
|
75
|
+
's',
|
76
|
+
'samp',
|
77
|
+
'select',
|
78
|
+
'small',
|
79
|
+
'span',
|
80
|
+
'strike',
|
81
|
+
'strong',
|
82
|
+
'style',
|
83
|
+
'sub',
|
84
|
+
'sup',
|
85
|
+
'table',
|
86
|
+
'tbody',
|
87
|
+
'td',
|
88
|
+
'textarea',
|
89
|
+
'tfoot',
|
90
|
+
'th',
|
91
|
+
'thead',
|
92
|
+
'title',
|
93
|
+
'tr',
|
94
|
+
'tt',
|
95
|
+
'u',
|
96
|
+
'ul',
|
97
|
+
'var',
|
98
|
+
'xmp'
|
99
|
+
].each do |script_tag|
|
100
|
+
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
101
|
+
[
|
102
|
+
"<#{script_tag}",
|
103
|
+
"#{script_tag}/>",
|
104
|
+
"<#{script_tag}/>",
|
105
|
+
"< #{script_tag} ",
|
106
|
+
"#{script_tag} />",
|
107
|
+
"<\n#{script_tag}\n/>",
|
108
|
+
"<\n#{script_tag} ",
|
109
|
+
"#{script_tag}\n/>",
|
110
|
+
"<\r#{script_tag}\r/>"
|
111
|
+
].each do |script_tag|
|
112
|
+
it "should detect '#{script_tag}'" do
|
113
|
+
dspam = Despamilator.new(script_tag)
|
114
|
+
dspam.score.should == 0.3
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe 'attributes' do
|
121
|
+
before :all do
|
122
|
+
@dspam = Despamilator.new('<xmp>').matched_by.first
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should have a name" do
|
126
|
+
@dspam.name.should == 'Detects HTML tags in text'
|
127
|
+
end
|
128
|
+
|
129
|
+
it "should have a description" do
|
130
|
+
@dspam.description.should == 'Searches for various HTML tags'
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should have a filename" do
|
134
|
+
@dspam.filename.should == 'html_tags.rb'
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
describe 'bug fixes' do
|
139
|
+
it "should detect an h1" do
|
140
|
+
Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
describe "NaughtyQ" do
|
4
|
+
it "should return a score for 1 misplaced q" do
|
5
|
+
dspam = Despamilator.new('qtu')
|
6
|
+
dspam.score.should == 0.2
|
7
|
+
end
|
8
|
+
|
9
|
+
describe 'attributes' do
|
10
|
+
before :each do
|
11
|
+
@filter = Despamilator.new('qtqt').matched_by.first
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should have a filename" do
|
15
|
+
@filter.filename.should == 'naughty_q.rb'
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should have a name" do
|
19
|
+
@filter.name.should == 'Naughty Q'
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should have a description" do
|
23
|
+
@filter.description.should == 'Detects possible misuse of the letter Q (English language)'
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should have a number of matches" do
|
27
|
+
@filter.matches.should == 2
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should have a score" do
|
31
|
+
@filter.score.should == 0.4
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should score more for 3 misplaced q's" do
|
36
|
+
dspam = Despamilator.new('qtuqsq')
|
37
|
+
dspam.score.to_s.should == 0.4.to_s
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
describe "NumbersAndWords" do
|
4
|
+
[1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
|
5
|
+
it "should return a blank for a #{number}" do
|
6
|
+
dspam = Despamilator.new(number)
|
7
|
+
dspam.score.should == 0
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
['wanga x5 mool', '4ghk', 'XTHL9'].each do |string|
|
12
|
+
it "should detect suspicious number word combos such as #{string}" do
|
13
|
+
dspam = Despamilator.new(string)
|
14
|
+
dspam.score.should == 0.1
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
['4wanga x5 mool', '4g6hk', 'XT7HL9', '77th8nd'].each do |string|
|
19
|
+
it "should detect multiple suspicious number word combos such as #{string}" do
|
20
|
+
dspam = Despamilator.new(string)
|
21
|
+
dspam.score.should == 0.2
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
[1, 2, 3, 4, 5, 6].each do |tag_no|
|
26
|
+
header_tag = "h#{tag_no}"
|
27
|
+
|
28
|
+
it "should ignore html header tag #{header_tag}" do
|
29
|
+
dspam = Despamilator.new(header_tag)
|
30
|
+
dspam.score.should == 0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'attributes' do
|
35
|
+
before :each do
|
36
|
+
@filter = Despamilator.new('X5T').matched_by.first
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should have a filename" do
|
40
|
+
@filter.filename.should == 'numbers_and_words.rb'
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should have a name" do
|
44
|
+
@filter.name.should == 'Numbers next to words'
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should have a description" do
|
48
|
+
@filter.description.should == 'Detects unusual number/word combinations'
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should have a number of matches" do
|
52
|
+
@filter.matches.should == 1
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should have a score" do
|
56
|
+
@filter.score.should == 0.1
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
context "ScriptTag" do
|
4
|
+
describe "detecting various script tags" do
|
5
|
+
['<script type="whatever">', '<script></script>', '</script>', '<script>', "<script\n>"].each do |script_tag|
|
6
|
+
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
7
|
+
it "should detect '#{script_tag}' of a script tag" do
|
8
|
+
dspam = Despamilator.new(script_tag)
|
9
|
+
dspam.score.should == 1
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'attributes' do
|
16
|
+
before :all do
|
17
|
+
@dspam = Despamilator.new('<script>').matched_by.first
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should have a name" do
|
21
|
+
@dspam.name.should == 'Detects script tags in text'
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have a description" do
|
25
|
+
@dspam.description.should == 'Searches for variations for the HTML script tag'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should have a filename" do
|
29
|
+
@dspam.filename.should == 'script_tag.rb'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
@@ -0,0 +1,21 @@
|
|
1
|
+
begin
|
2
|
+
require 'spec'
|
3
|
+
rescue LoadError
|
4
|
+
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
5
|
+
require 'spec'
|
6
|
+
end
|
7
|
+
begin
|
8
|
+
require 'spec/rake/spectask'
|
9
|
+
rescue LoadError
|
10
|
+
puts <<-EOS
|
11
|
+
To use rspec for testing you must install rspec gem:
|
12
|
+
gem install rspec
|
13
|
+
EOS
|
14
|
+
exit(0)
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Run the specs under spec/models"
|
18
|
+
Spec::Rake::SpecTask.new do |t|
|
19
|
+
t.spec_opts = ['--options', "spec/spec.opts"]
|
20
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
21
|
+
end
|