despamilator 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +44 -0
  3. data/PostInstall.txt +1 -0
  4. data/README.rdoc +107 -0
  5. data/Rakefile +33 -0
  6. data/despamilator.gemspec +42 -0
  7. data/lib/despamilator/filter/html_tags.rb +116 -0
  8. data/lib/despamilator/filter/naughty_q.rb +17 -0
  9. data/lib/despamilator/filter/numbers_and_words.rb +33 -0
  10. data/lib/despamilator/filter/script_tag.rb +13 -0
  11. data/lib/despamilator/filter.rb +52 -0
  12. data/lib/despamilator/filter_base.rb +37 -0
  13. data/lib/despamilator.rb +19 -0
  14. data/pkg/despamilator-0.1/History.txt +4 -0
  15. data/pkg/despamilator-0.1/Manifest.txt +21 -0
  16. data/pkg/despamilator-0.1/PostInstall.txt +1 -0
  17. data/pkg/despamilator-0.1/README.rdoc +107 -0
  18. data/pkg/despamilator-0.1/Rakefile +33 -0
  19. data/pkg/despamilator-0.1/despamilator.gemspec +42 -0
  20. data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +116 -0
  21. data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +17 -0
  22. data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +33 -0
  23. data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +13 -0
  24. data/pkg/despamilator-0.1/lib/despamilator/filter.rb +52 -0
  25. data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +37 -0
  26. data/pkg/despamilator-0.1/lib/despamilator.rb +19 -0
  27. data/pkg/despamilator-0.1/spec/despamilator_spec.rb +15 -0
  28. data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +144 -0
  29. data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +39 -0
  30. data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +59 -0
  31. data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +32 -0
  32. data/pkg/despamilator-0.1/spec/spec.opts +1 -0
  33. data/pkg/despamilator-0.1/spec/spec_helper.rb +10 -0
  34. data/pkg/despamilator-0.1/tasks/rspec.rake +21 -0
  35. data/pkg/despamilator-0.1.gem +0 -0
  36. data/pkg/despamilator-0.1.tgz +0 -0
  37. data/spec/despamilator_spec.rb +15 -0
  38. data/spec/filters/html_tags_spec.rb +144 -0
  39. data/spec/filters/naughty_q_spec.rb +39 -0
  40. data/spec/filters/numbers_and_words_spec.rb +59 -0
  41. data/spec/filters/script_tag_spec.rb +32 -0
  42. data/spec/spec.opts +1 -0
  43. data/spec/spec_helper.rb +10 -0
  44. data/tasks/rspec.rake +21 -0
  45. metadata +155 -0
@@ -0,0 +1,13 @@
1
+ def parse
2
+ if self.text.downcase.match(/<\/?script(>|\s+|\n|\r)/)
3
+ self.append_score = 1
4
+ end
5
+ end
6
+
7
+ def name
8
+ 'Detects script tags in text'
9
+ end
10
+
11
+ def description
12
+ 'Searches for variations for the HTML script tag'
13
+ end
@@ -0,0 +1,52 @@
1
+ class Despamilator
2
+ class Filter
3
+ attr_accessor :matches, :score
4
+
5
+ def initialize text
6
+ @@loaded ||= {}
7
+ @filters ||= []
8
+ @matches ||= []
9
+ @score ||= 0
10
+ load_filters text
11
+ run_filters
12
+ end
13
+
14
+ private
15
+
16
+ def load_filters text
17
+ Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
18
+ filter_name = classify_filename filter_file
19
+ filter = @@loaded[filter_name]
20
+
21
+ unless filter
22
+ filter_code = File.open(filter_file, File::RDWR).read
23
+ filter = Class.new
24
+ filter.class_eval(
25
+ "require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
26
+ )
27
+ end
28
+
29
+ @filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
30
+ end
31
+ end
32
+
33
+ def run_filters
34
+ @filters.each do |filter|
35
+ filter.parse
36
+
37
+ if filter.matched?
38
+ @matches.push(filter)
39
+ @score += filter.score
40
+ end
41
+ end
42
+ end
43
+
44
+ def classify_filename filename
45
+ classname = ''
46
+ File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
47
+ classname += filename_part.capitalize
48
+ end
49
+ classname || filename.capitalize
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,37 @@
1
+ class Despamilator
2
+ class FilterBase
3
+ attr_accessor :text, :score, :filename, :matches
4
+
5
+ def initialize text, filename
6
+ @matches = 0
7
+ @filename = filename
8
+ @score = 0
9
+ @text = text
10
+ @matched = false
11
+ end
12
+
13
+ def description
14
+ raise "No description defined in #{filename}"
15
+ end
16
+
17
+ def parse blah
18
+ raise "No parser defined in #{filename}"
19
+ end
20
+
21
+ def name
22
+ raise "No name defined in #{filename}"
23
+ end
24
+
25
+ def matched?
26
+ @score > 0
27
+ end
28
+
29
+ protected
30
+
31
+ def append_score= new_score
32
+ @matches += 1
33
+ @score += new_score
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,19 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'despamilator/filter'
4
+
5
+ class Despamilator
6
+ VERSION = "0.2"
7
+
8
+ def initialize text
9
+ @filters = Despamilator::Filter.new text
10
+ end
11
+
12
+ def score
13
+ @filters.score
14
+ end
15
+
16
+ def matched_by
17
+ @filters.matches
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ describe Despamilator do
4
+ before :each do
5
+ @dspam = Despamilator.new('this text is absolutely fine')
6
+ end
7
+
8
+ it "should return a zero score for fine text" do
9
+ @dspam.score.should == 0
10
+ end
11
+
12
+ it "should return no matching filter for fine text" do
13
+ @dspam.matched_by.should be_empty
14
+ end
15
+ end
@@ -0,0 +1,144 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ context "HtmlTags" do
4
+ describe "filtering" do
5
+ [
6
+ '!--',
7
+ '!DOCTYPE',
8
+ 'a',
9
+ 'abbr',
10
+ 'acronym',
11
+ 'address',
12
+ 'applet',
13
+ 'area',
14
+ 'b',
15
+ 'base',
16
+ 'basefont',
17
+ 'bdo',
18
+ 'big',
19
+ 'blockquote',
20
+ 'body',
21
+ 'br',
22
+ 'button',
23
+ 'caption',
24
+ 'center',
25
+ 'cite',
26
+ 'code',
27
+ 'col',
28
+ 'colgroup',
29
+ 'dd',
30
+ 'del',
31
+ 'dfn',
32
+ 'dir',
33
+ 'div',
34
+ 'dl',
35
+ 'dt',
36
+ 'em',
37
+ 'fieldset',
38
+ 'font',
39
+ 'form',
40
+ 'frame',
41
+ 'frameset',
42
+ 'h1',
43
+ 'h2',
44
+ 'h3',
45
+ 'h4',
46
+ 'h5',
47
+ 'h6',
48
+ 'head',
49
+ 'hr',
50
+ 'html',
51
+ 'i',
52
+ 'iframe',
53
+ 'img',
54
+ 'input',
55
+ 'ins',
56
+ 'isindex',
57
+ 'kbd',
58
+ 'label',
59
+ 'legend',
60
+ 'li',
61
+ 'link',
62
+ 'map',
63
+ 'menu',
64
+ 'meta',
65
+ 'noframes',
66
+ 'noscript',
67
+ 'object',
68
+ 'ol',
69
+ 'optgroup',
70
+ 'option',
71
+ 'p',
72
+ 'param',
73
+ 'pre',
74
+ 'q',
75
+ 's',
76
+ 'samp',
77
+ 'select',
78
+ 'small',
79
+ 'span',
80
+ 'strike',
81
+ 'strong',
82
+ 'style',
83
+ 'sub',
84
+ 'sup',
85
+ 'table',
86
+ 'tbody',
87
+ 'td',
88
+ 'textarea',
89
+ 'tfoot',
90
+ 'th',
91
+ 'thead',
92
+ 'title',
93
+ 'tr',
94
+ 'tt',
95
+ 'u',
96
+ 'ul',
97
+ 'var',
98
+ 'xmp'
99
+ ].each do |script_tag|
100
+ [script_tag.upcase, script_tag.downcase].each do |script_tag|
101
+ [
102
+ "<#{script_tag}",
103
+ "#{script_tag}/>",
104
+ "<#{script_tag}/>",
105
+ "< #{script_tag} ",
106
+ "#{script_tag} />",
107
+ "<\n#{script_tag}\n/>",
108
+ "<\n#{script_tag} ",
109
+ "#{script_tag}\n/>",
110
+ "<\r#{script_tag}\r/>"
111
+ ].each do |script_tag|
112
+ it "should detect '#{script_tag}'" do
113
+ dspam = Despamilator.new(script_tag)
114
+ dspam.score.should == 0.3
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ describe 'attributes' do
121
+ before :all do
122
+ @dspam = Despamilator.new('<xmp>').matched_by.first
123
+ end
124
+
125
+ it "should have a name" do
126
+ @dspam.name.should == 'Detects HTML tags in text'
127
+ end
128
+
129
+ it "should have a description" do
130
+ @dspam.description.should == 'Searches for various HTML tags'
131
+ end
132
+
133
+ it "should have a filename" do
134
+ @dspam.filename.should == 'html_tags.rb'
135
+ end
136
+ end
137
+
138
+ describe 'bug fixes' do
139
+ it "should detect an h1" do
140
+ Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,39 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ describe "NaughtyQ" do
4
+ it "should return a score for 1 misplaced q" do
5
+ dspam = Despamilator.new('qtu')
6
+ dspam.score.should == 0.2
7
+ end
8
+
9
+ describe 'attributes' do
10
+ before :each do
11
+ @filter = Despamilator.new('qtqt').matched_by.first
12
+ end
13
+
14
+ it "should have a filename" do
15
+ @filter.filename.should == 'naughty_q.rb'
16
+ end
17
+
18
+ it "should have a name" do
19
+ @filter.name.should == 'Naughty Q'
20
+ end
21
+
22
+ it "should have a description" do
23
+ @filter.description.should == 'Detects possible misuse of the letter Q (English language)'
24
+ end
25
+
26
+ it "should have a number of matches" do
27
+ @filter.matches.should == 2
28
+ end
29
+
30
+ it "should have a score" do
31
+ @filter.score.should == 0.4
32
+ end
33
+ end
34
+
35
+ it "should score more for 3 misplaced q's" do
36
+ dspam = Despamilator.new('qtuqsq')
37
+ dspam.score.to_s.should == 0.4.to_s
38
+ end
39
+ end
@@ -0,0 +1,59 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ describe "NumbersAndWords" do
4
+ [1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
5
+ it "should return a blank for a #{number}" do
6
+ dspam = Despamilator.new(number)
7
+ dspam.score.should == 0
8
+ end
9
+ end
10
+
11
+ ['wanga x5 mool', '4ghk', 'XTHL9'].each do |string|
12
+ it "should detect suspicious number word combos such as #{string}" do
13
+ dspam = Despamilator.new(string)
14
+ dspam.score.should == 0.1
15
+ end
16
+ end
17
+
18
+ ['4wanga x5 mool', '4g6hk', 'XT7HL9', '77th8nd'].each do |string|
19
+ it "should detect multiple suspicious number word combos such as #{string}" do
20
+ dspam = Despamilator.new(string)
21
+ dspam.score.should == 0.2
22
+ end
23
+ end
24
+
25
+ [1, 2, 3, 4, 5, 6].each do |tag_no|
26
+ header_tag = "h#{tag_no}"
27
+
28
+ it "should ignore html header tag #{header_tag}" do
29
+ dspam = Despamilator.new(header_tag)
30
+ dspam.score.should == 0
31
+ end
32
+ end
33
+
34
+ describe 'attributes' do
35
+ before :each do
36
+ @filter = Despamilator.new('X5T').matched_by.first
37
+ end
38
+
39
+ it "should have a filename" do
40
+ @filter.filename.should == 'numbers_and_words.rb'
41
+ end
42
+
43
+ it "should have a name" do
44
+ @filter.name.should == 'Numbers next to words'
45
+ end
46
+
47
+ it "should have a description" do
48
+ @filter.description.should == 'Detects unusual number/word combinations'
49
+ end
50
+
51
+ it "should have a number of matches" do
52
+ @filter.matches.should == 1
53
+ end
54
+
55
+ it "should have a score" do
56
+ @filter.score.should == 0.1
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ context "ScriptTag" do
4
+ describe "detecting various script tags" do
5
+ ['<script type="whatever">', '<script></script>', '</script>', '<script>', "<script\n>"].each do |script_tag|
6
+ [script_tag.upcase, script_tag.downcase].each do |script_tag|
7
+ it "should detect '#{script_tag}' of a script tag" do
8
+ dspam = Despamilator.new(script_tag)
9
+ dspam.score.should == 1
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ describe 'attributes' do
16
+ before :all do
17
+ @dspam = Despamilator.new('<script>').matched_by.first
18
+ end
19
+
20
+ it "should have a name" do
21
+ @dspam.name.should == 'Detects script tags in text'
22
+ end
23
+
24
+ it "should have a description" do
25
+ @dspam.description.should == 'Searches for variations for the HTML script tag'
26
+ end
27
+
28
+ it "should have a filename" do
29
+ @dspam.filename.should == 'script_tag.rb'
30
+ end
31
+ end
32
+ end
@@ -0,0 +1 @@
1
+ --colour
@@ -0,0 +1,10 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ gem 'rspec'
6
+ require 'spec'
7
+ end
8
+
9
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
10
+ require 'despamilator'
@@ -0,0 +1,21 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'spec'
6
+ end
7
+ begin
8
+ require 'spec/rake/spectask'
9
+ rescue LoadError
10
+ puts <<-EOS
11
+ To use rspec for testing you must install rspec gem:
12
+ gem install rspec
13
+ EOS
14
+ exit(0)
15
+ end
16
+
17
+ desc "Run the specs under spec/models"
18
+ Spec::Rake::SpecTask.new do |t|
19
+ t.spec_opts = ['--options', "spec/spec.opts"]
20
+ t.spec_files = FileList['spec/**/*_spec.rb']
21
+ end