despamilator 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +44 -0
  3. data/PostInstall.txt +1 -0
  4. data/README.rdoc +107 -0
  5. data/Rakefile +33 -0
  6. data/despamilator.gemspec +42 -0
  7. data/lib/despamilator/filter/html_tags.rb +116 -0
  8. data/lib/despamilator/filter/naughty_q.rb +17 -0
  9. data/lib/despamilator/filter/numbers_and_words.rb +33 -0
  10. data/lib/despamilator/filter/script_tag.rb +13 -0
  11. data/lib/despamilator/filter.rb +52 -0
  12. data/lib/despamilator/filter_base.rb +37 -0
  13. data/lib/despamilator.rb +19 -0
  14. data/pkg/despamilator-0.1/History.txt +4 -0
  15. data/pkg/despamilator-0.1/Manifest.txt +21 -0
  16. data/pkg/despamilator-0.1/PostInstall.txt +1 -0
  17. data/pkg/despamilator-0.1/README.rdoc +107 -0
  18. data/pkg/despamilator-0.1/Rakefile +33 -0
  19. data/pkg/despamilator-0.1/despamilator.gemspec +42 -0
  20. data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +116 -0
  21. data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +17 -0
  22. data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +33 -0
  23. data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +13 -0
  24. data/pkg/despamilator-0.1/lib/despamilator/filter.rb +52 -0
  25. data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +37 -0
  26. data/pkg/despamilator-0.1/lib/despamilator.rb +19 -0
  27. data/pkg/despamilator-0.1/spec/despamilator_spec.rb +15 -0
  28. data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +144 -0
  29. data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +39 -0
  30. data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +59 -0
  31. data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +32 -0
  32. data/pkg/despamilator-0.1/spec/spec.opts +1 -0
  33. data/pkg/despamilator-0.1/spec/spec_helper.rb +10 -0
  34. data/pkg/despamilator-0.1/tasks/rspec.rake +21 -0
  35. data/pkg/despamilator-0.1.gem +0 -0
  36. data/pkg/despamilator-0.1.tgz +0 -0
  37. data/spec/despamilator_spec.rb +15 -0
  38. data/spec/filters/html_tags_spec.rb +144 -0
  39. data/spec/filters/naughty_q_spec.rb +39 -0
  40. data/spec/filters/numbers_and_words_spec.rb +59 -0
  41. data/spec/filters/script_tag_spec.rb +32 -0
  42. data/spec/spec.opts +1 -0
  43. data/spec/spec_helper.rb +10 -0
  44. data/tasks/rspec.rake +21 -0
  45. metadata +155 -0
@@ -0,0 +1,13 @@
1
+ def parse
2
+ if self.text.downcase.match(/<\/?script(>|\s+|\n|\r)/)
3
+ self.append_score = 1
4
+ end
5
+ end
6
+
7
+ def name
8
+ 'Detects script tags in text'
9
+ end
10
+
11
+ def description
12
+ 'Searches for variations for the HTML script tag'
13
+ end
@@ -0,0 +1,52 @@
1
+ class Despamilator
2
+ class Filter
3
+ attr_accessor :matches, :score
4
+
5
+ def initialize text
6
+ @@loaded ||= {}
7
+ @filters ||= []
8
+ @matches ||= []
9
+ @score ||= 0
10
+ load_filters text
11
+ run_filters
12
+ end
13
+
14
+ private
15
+
16
+ def load_filters text
17
+ Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
18
+ filter_name = classify_filename filter_file
19
+ filter = @@loaded[filter_name]
20
+
21
+ unless filter
22
+ filter_code = File.open(filter_file, File::RDWR).read
23
+ filter = Class.new
24
+ filter.class_eval(
25
+ "require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
26
+ )
27
+ end
28
+
29
+ @filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
30
+ end
31
+ end
32
+
33
+ def run_filters
34
+ @filters.each do |filter|
35
+ filter.parse
36
+
37
+ if filter.matched?
38
+ @matches.push(filter)
39
+ @score += filter.score
40
+ end
41
+ end
42
+ end
43
+
44
+ def classify_filename filename
45
+ classname = ''
46
+ File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
47
+ classname += filename_part.capitalize
48
+ end
49
+ classname || filename.capitalize
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,37 @@
1
+ class Despamilator
2
+ class FilterBase
3
+ attr_accessor :text, :score, :filename, :matches
4
+
5
+ def initialize text, filename
6
+ @matches = 0
7
+ @filename = filename
8
+ @score = 0
9
+ @text = text
10
+ @matched = false
11
+ end
12
+
13
+ def description
14
+ raise "No description defined in #{filename}"
15
+ end
16
+
17
+ def parse blah
18
+ raise "No parser defined in #{filename}"
19
+ end
20
+
21
+ def name
22
+ raise "No name defined in #{filename}"
23
+ end
24
+
25
+ def matched?
26
+ @score > 0
27
+ end
28
+
29
+ protected
30
+
31
+ def append_score= new_score
32
+ @matches += 1
33
+ @score += new_score
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,19 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'despamilator/filter'
4
+
5
+ class Despamilator
6
+ VERSION = "0.2"
7
+
8
+ def initialize text
9
+ @filters = Despamilator::Filter.new text
10
+ end
11
+
12
+ def score
13
+ @filters.score
14
+ end
15
+
16
+ def matched_by
17
+ @filters.matches
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ describe Despamilator do
4
+ before :each do
5
+ @dspam = Despamilator.new('this text is absolutely fine')
6
+ end
7
+
8
+ it "should return a zero score for fine text" do
9
+ @dspam.score.should == 0
10
+ end
11
+
12
+ it "should return no matching filter for fine text" do
13
+ @dspam.matched_by.should be_empty
14
+ end
15
+ end
@@ -0,0 +1,144 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ context "HtmlTags" do
4
+ describe "filtering" do
5
+ [
6
+ '!--',
7
+ '!DOCTYPE',
8
+ 'a',
9
+ 'abbr',
10
+ 'acronym',
11
+ 'address',
12
+ 'applet',
13
+ 'area',
14
+ 'b',
15
+ 'base',
16
+ 'basefont',
17
+ 'bdo',
18
+ 'big',
19
+ 'blockquote',
20
+ 'body',
21
+ 'br',
22
+ 'button',
23
+ 'caption',
24
+ 'center',
25
+ 'cite',
26
+ 'code',
27
+ 'col',
28
+ 'colgroup',
29
+ 'dd',
30
+ 'del',
31
+ 'dfn',
32
+ 'dir',
33
+ 'div',
34
+ 'dl',
35
+ 'dt',
36
+ 'em',
37
+ 'fieldset',
38
+ 'font',
39
+ 'form',
40
+ 'frame',
41
+ 'frameset',
42
+ 'h1',
43
+ 'h2',
44
+ 'h3',
45
+ 'h4',
46
+ 'h5',
47
+ 'h6',
48
+ 'head',
49
+ 'hr',
50
+ 'html',
51
+ 'i',
52
+ 'iframe',
53
+ 'img',
54
+ 'input',
55
+ 'ins',
56
+ 'isindex',
57
+ 'kbd',
58
+ 'label',
59
+ 'legend',
60
+ 'li',
61
+ 'link',
62
+ 'map',
63
+ 'menu',
64
+ 'meta',
65
+ 'noframes',
66
+ 'noscript',
67
+ 'object',
68
+ 'ol',
69
+ 'optgroup',
70
+ 'option',
71
+ 'p',
72
+ 'param',
73
+ 'pre',
74
+ 'q',
75
+ 's',
76
+ 'samp',
77
+ 'select',
78
+ 'small',
79
+ 'span',
80
+ 'strike',
81
+ 'strong',
82
+ 'style',
83
+ 'sub',
84
+ 'sup',
85
+ 'table',
86
+ 'tbody',
87
+ 'td',
88
+ 'textarea',
89
+ 'tfoot',
90
+ 'th',
91
+ 'thead',
92
+ 'title',
93
+ 'tr',
94
+ 'tt',
95
+ 'u',
96
+ 'ul',
97
+ 'var',
98
+ 'xmp'
99
+ ].each do |script_tag|
100
+ [script_tag.upcase, script_tag.downcase].each do |script_tag|
101
+ [
102
+ "<#{script_tag}",
103
+ "#{script_tag}/>",
104
+ "<#{script_tag}/>",
105
+ "< #{script_tag} ",
106
+ "#{script_tag} />",
107
+ "<\n#{script_tag}\n/>",
108
+ "<\n#{script_tag} ",
109
+ "#{script_tag}\n/>",
110
+ "<\r#{script_tag}\r/>"
111
+ ].each do |script_tag|
112
+ it "should detect '#{script_tag}'" do
113
+ dspam = Despamilator.new(script_tag)
114
+ dspam.score.should == 0.3
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ describe 'attributes' do
121
+ before :all do
122
+ @dspam = Despamilator.new('<xmp>').matched_by.first
123
+ end
124
+
125
+ it "should have a name" do
126
+ @dspam.name.should == 'Detects HTML tags in text'
127
+ end
128
+
129
+ it "should have a description" do
130
+ @dspam.description.should == 'Searches for various HTML tags'
131
+ end
132
+
133
+ it "should have a filename" do
134
+ @dspam.filename.should == 'html_tags.rb'
135
+ end
136
+ end
137
+
138
+ describe 'bug fixes' do
139
+ it "should detect an h1" do
140
+ Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,39 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ describe "NaughtyQ" do
4
+ it "should return a score for 1 misplaced q" do
5
+ dspam = Despamilator.new('qtu')
6
+ dspam.score.should == 0.2
7
+ end
8
+
9
+ describe 'attributes' do
10
+ before :each do
11
+ @filter = Despamilator.new('qtqt').matched_by.first
12
+ end
13
+
14
+ it "should have a filename" do
15
+ @filter.filename.should == 'naughty_q.rb'
16
+ end
17
+
18
+ it "should have a name" do
19
+ @filter.name.should == 'Naughty Q'
20
+ end
21
+
22
+ it "should have a description" do
23
+ @filter.description.should == 'Detects possible misuse of the letter Q (English language)'
24
+ end
25
+
26
+ it "should have a number of matches" do
27
+ @filter.matches.should == 2
28
+ end
29
+
30
+ it "should have a score" do
31
+ @filter.score.should == 0.4
32
+ end
33
+ end
34
+
35
+ it "should score more for 3 misplaced q's" do
36
+ dspam = Despamilator.new('qtuqsq')
37
+ dspam.score.to_s.should == 0.4.to_s
38
+ end
39
+ end
@@ -0,0 +1,59 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ describe "NumbersAndWords" do
4
+ [1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
5
+ it "should return a blank for a #{number}" do
6
+ dspam = Despamilator.new(number)
7
+ dspam.score.should == 0
8
+ end
9
+ end
10
+
11
+ ['wanga x5 mool', '4ghk', 'XTHL9'].each do |string|
12
+ it "should detect suspicious number word combos such as #{string}" do
13
+ dspam = Despamilator.new(string)
14
+ dspam.score.should == 0.1
15
+ end
16
+ end
17
+
18
+ ['4wanga x5 mool', '4g6hk', 'XT7HL9', '77th8nd'].each do |string|
19
+ it "should detect multiple suspicious number word combos such as #{string}" do
20
+ dspam = Despamilator.new(string)
21
+ dspam.score.should == 0.2
22
+ end
23
+ end
24
+
25
+ [1, 2, 3, 4, 5, 6].each do |tag_no|
26
+ header_tag = "h#{tag_no}"
27
+
28
+ it "should ignore html header tag #{header_tag}" do
29
+ dspam = Despamilator.new(header_tag)
30
+ dspam.score.should == 0
31
+ end
32
+ end
33
+
34
+ describe 'attributes' do
35
+ before :each do
36
+ @filter = Despamilator.new('X5T').matched_by.first
37
+ end
38
+
39
+ it "should have a filename" do
40
+ @filter.filename.should == 'numbers_and_words.rb'
41
+ end
42
+
43
+ it "should have a name" do
44
+ @filter.name.should == 'Numbers next to words'
45
+ end
46
+
47
+ it "should have a description" do
48
+ @filter.description.should == 'Detects unusual number/word combinations'
49
+ end
50
+
51
+ it "should have a number of matches" do
52
+ @filter.matches.should == 1
53
+ end
54
+
55
+ it "should have a score" do
56
+ @filter.score.should == 0.1
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ context "ScriptTag" do
4
+ describe "detecting various script tags" do
5
+ ['<script type="whatever">', '<script></script>', '</script>', '<script>', "<script\n>"].each do |script_tag|
6
+ [script_tag.upcase, script_tag.downcase].each do |script_tag|
7
+ it "should detect '#{script_tag}' of a script tag" do
8
+ dspam = Despamilator.new(script_tag)
9
+ dspam.score.should == 1
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ describe 'attributes' do
16
+ before :all do
17
+ @dspam = Despamilator.new('<script>').matched_by.first
18
+ end
19
+
20
+ it "should have a name" do
21
+ @dspam.name.should == 'Detects script tags in text'
22
+ end
23
+
24
+ it "should have a description" do
25
+ @dspam.description.should == 'Searches for variations for the HTML script tag'
26
+ end
27
+
28
+ it "should have a filename" do
29
+ @dspam.filename.should == 'script_tag.rb'
30
+ end
31
+ end
32
+ end
@@ -0,0 +1 @@
1
+ --colour
@@ -0,0 +1,10 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ gem 'rspec'
6
+ require 'spec'
7
+ end
8
+
9
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
10
+ require 'despamilator'
@@ -0,0 +1,21 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'spec'
6
+ end
7
+ begin
8
+ require 'spec/rake/spectask'
9
+ rescue LoadError
10
+ puts <<-EOS
11
+ To use rspec for testing you must install rspec gem:
12
+ gem install rspec
13
+ EOS
14
+ exit(0)
15
+ end
16
+
17
+ desc "Run the specs under spec/models"
18
+ Spec::Rake::SpecTask.new do |t|
19
+ t.spec_opts = ['--options', "spec/spec.opts"]
20
+ t.spec_files = FileList['spec/**/*_spec.rb']
21
+ end