despamilator 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -1
- data/Manifest.txt +4 -23
- data/README.rdoc +4 -1
- data/despamilator.gemspec +1 -1
- data/lib/despamilator/filter/html_tags.rb +1 -1
- data/lib/despamilator/filter/ip_address_url.rb +11 -0
- data/lib/despamilator/filter/naughty_q.rb +3 -2
- data/lib/despamilator/filter/shouting.rb +21 -0
- data/lib/despamilator/filter.rb +6 -10
- data/lib/despamilator.rb +2 -2
- data/spec/filters/html_tags_spec.rb +12 -9
- data/spec/filters/ip_address_url_spec.rb +29 -0
- data/spec/filters/shouting_spec.rb +49 -0
- metadata +15 -51
- data/pkg/despamilator-0.1/History.txt +0 -4
- data/pkg/despamilator-0.1/Manifest.txt +0 -21
- data/pkg/despamilator-0.1/PostInstall.txt +0 -1
- data/pkg/despamilator-0.1/README.rdoc +0 -107
- data/pkg/despamilator-0.1/Rakefile +0 -33
- data/pkg/despamilator-0.1/despamilator.gemspec +0 -42
- data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +0 -116
- data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +0 -17
- data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +0 -33
- data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +0 -13
- data/pkg/despamilator-0.1/lib/despamilator/filter.rb +0 -52
- data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +0 -37
- data/pkg/despamilator-0.1/lib/despamilator.rb +0 -19
- data/pkg/despamilator-0.1/spec/despamilator_spec.rb +0 -15
- data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +0 -144
- data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +0 -39
- data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +0 -59
- data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +0 -32
- data/pkg/despamilator-0.1/spec/spec.opts +0 -1
- data/pkg/despamilator-0.1/spec/spec_helper.rb +0 -10
- data/pkg/despamilator-0.1/tasks/rspec.rake +0 -21
- data/pkg/despamilator-0.1.gem +0 -0
- data/pkg/despamilator-0.1.tgz +0 -0
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -7,38 +7,19 @@ despamilator.gemspec
|
|
7
7
|
lib/despamilator.rb
|
8
8
|
lib/despamilator/filter.rb
|
9
9
|
lib/despamilator/filter/html_tags.rb
|
10
|
+
lib/despamilator/filter/ip_address_url.rb
|
10
11
|
lib/despamilator/filter/naughty_q.rb
|
11
12
|
lib/despamilator/filter/numbers_and_words.rb
|
12
13
|
lib/despamilator/filter/script_tag.rb
|
14
|
+
lib/despamilator/filter/shouting.rb
|
13
15
|
lib/despamilator/filter_base.rb
|
14
|
-
pkg/despamilator-0.1.gem
|
15
|
-
pkg/despamilator-0.1.tgz
|
16
|
-
pkg/despamilator-0.1/History.txt
|
17
|
-
pkg/despamilator-0.1/Manifest.txt
|
18
|
-
pkg/despamilator-0.1/PostInstall.txt
|
19
|
-
pkg/despamilator-0.1/README.rdoc
|
20
|
-
pkg/despamilator-0.1/Rakefile
|
21
|
-
pkg/despamilator-0.1/despamilator.gemspec
|
22
|
-
pkg/despamilator-0.1/lib/despamilator.rb
|
23
|
-
pkg/despamilator-0.1/lib/despamilator/filter.rb
|
24
|
-
pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb
|
25
|
-
pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb
|
26
|
-
pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb
|
27
|
-
pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb
|
28
|
-
pkg/despamilator-0.1/lib/despamilator/filter_base.rb
|
29
|
-
pkg/despamilator-0.1/spec/despamilator_spec.rb
|
30
|
-
pkg/despamilator-0.1/spec/filters/html_tags_spec.rb
|
31
|
-
pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb
|
32
|
-
pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb
|
33
|
-
pkg/despamilator-0.1/spec/filters/script_tag_spec.rb
|
34
|
-
pkg/despamilator-0.1/spec/spec.opts
|
35
|
-
pkg/despamilator-0.1/spec/spec_helper.rb
|
36
|
-
pkg/despamilator-0.1/tasks/rspec.rake
|
37
16
|
spec/despamilator_spec.rb
|
38
17
|
spec/filters/html_tags_spec.rb
|
18
|
+
spec/filters/ip_address_url_spec.rb
|
39
19
|
spec/filters/naughty_q_spec.rb
|
40
20
|
spec/filters/numbers_and_words_spec.rb
|
41
21
|
spec/filters/script_tag_spec.rb
|
22
|
+
spec/filters/shouting_spec.rb
|
42
23
|
spec/spec.opts
|
43
24
|
spec/spec_helper.rb
|
44
25
|
tasks/rspec.rake
|
data/README.rdoc
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
7
|
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
8
|
-
Spam being submitted in my web forms
|
8
|
+
Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator will apply
|
9
9
|
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
10
10
|
|
11
11
|
== FEATURES/PROBLEMS:
|
@@ -15,6 +15,9 @@ some commonly used heuristics from the world of anti-spam to help you decide whe
|
|
15
15
|
== SYNOPSIS:
|
16
16
|
|
17
17
|
# using Despamilator
|
18
|
+
require 'rubygems'
|
19
|
+
require 'desplamilator'
|
20
|
+
|
18
21
|
dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
19
22
|
|
20
23
|
dspam.score #=> the total score for this string (1 is considered high)
|
data/despamilator.gemspec
CHANGED
@@ -7,11 +7,12 @@ def description
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def parse
|
10
|
-
matches = self.text.downcase.scan(/q
|
10
|
+
matches = self.text.downcase.scan(/q(\w|\d)/)
|
11
11
|
|
12
12
|
return unless matches
|
13
13
|
|
14
14
|
matches.each do |match|
|
15
|
-
|
15
|
+
match = match.first
|
16
|
+
self.append_score = 0.2 unless match == 'u' or match == 'a'
|
16
17
|
end
|
17
18
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
def name
|
2
|
+
'Shouting'
|
3
|
+
end
|
4
|
+
|
5
|
+
def description
|
6
|
+
'Detects and scores shouting (all caps)'
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse
|
10
|
+
# strip HTML
|
11
|
+
text = self.text.gsub(/<\/?[^>]*>/, "")
|
12
|
+
|
13
|
+
return if text.length < 20
|
14
|
+
|
15
|
+
uppercased = text.scan(/[A-Z]/).length
|
16
|
+
lowercased = text.scan(/[a-z]/).length
|
17
|
+
|
18
|
+
if uppercased > 0
|
19
|
+
self.append_score = (uppercased.to_f / (uppercased + lowercased)) * 0.5
|
20
|
+
end
|
21
|
+
end
|
data/lib/despamilator/filter.rb
CHANGED
@@ -3,7 +3,6 @@ class Despamilator
|
|
3
3
|
attr_accessor :matches, :score
|
4
4
|
|
5
5
|
def initialize text
|
6
|
-
@@loaded ||= {}
|
7
6
|
@filters ||= []
|
8
7
|
@matches ||= []
|
9
8
|
@score ||= 0
|
@@ -16,15 +15,12 @@ class Despamilator
|
|
16
15
|
def load_filters text
|
17
16
|
Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
|
18
17
|
filter_name = classify_filename filter_file
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
26
|
-
)
|
27
|
-
end
|
18
|
+
|
19
|
+
filter_code = File.open(filter_file, File::RDWR).read
|
20
|
+
filter = Class.new
|
21
|
+
filter.class_eval(
|
22
|
+
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
23
|
+
)
|
28
24
|
|
29
25
|
@filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
|
30
26
|
end
|
data/lib/despamilator.rb
CHANGED
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) ||
|
|
3
3
|
require 'despamilator/filter'
|
4
4
|
|
5
5
|
class Despamilator
|
6
|
-
VERSION = "0.
|
6
|
+
VERSION = "0.3"
|
7
7
|
|
8
8
|
def initialize text
|
9
9
|
@filters = Despamilator::Filter.new text
|
@@ -16,4 +16,4 @@ class Despamilator
|
|
16
16
|
def matched_by
|
17
17
|
@filters.matches
|
18
18
|
end
|
19
|
-
end
|
19
|
+
end
|
@@ -99,18 +99,17 @@ context "HtmlTags" do
|
|
99
99
|
].each do |script_tag|
|
100
100
|
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
101
101
|
[
|
102
|
-
"<#{script_tag}",
|
103
|
-
"#{script_tag}/>",
|
102
|
+
"<#{script_tag}>",
|
104
103
|
"<#{script_tag}/>",
|
105
|
-
"< #{script_tag} ",
|
106
|
-
"
|
104
|
+
"< #{script_tag} >",
|
105
|
+
"<#{script_tag} />",
|
107
106
|
"<\n#{script_tag}\n/>",
|
108
|
-
"<\n#{script_tag} ",
|
109
|
-
"
|
107
|
+
"<\n#{script_tag} >",
|
108
|
+
"<#{script_tag}\n/>",
|
110
109
|
"<\r#{script_tag}\r/>"
|
111
|
-
].each do |
|
112
|
-
it "should detect '#{
|
113
|
-
dspam = Despamilator.new(
|
110
|
+
].each do |tag|
|
111
|
+
it "should detect '#{tag}'" do
|
112
|
+
dspam = Despamilator.new(tag)
|
114
113
|
dspam.score.should == 0.3
|
115
114
|
end
|
116
115
|
end
|
@@ -139,6 +138,10 @@ context "HtmlTags" do
|
|
139
138
|
it "should detect an h1" do
|
140
139
|
Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
|
141
140
|
end
|
141
|
+
|
142
|
+
it "should not detect tags twice" do
|
143
|
+
Despamilator.new('<i>italic</i>').score.should == 0.3
|
144
|
+
end
|
142
145
|
end
|
143
146
|
end
|
144
147
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
describe "IP Address URL" do
|
4
|
+
describe 'attributes' do
|
5
|
+
before :each do
|
6
|
+
@filter = Despamilator.new('http://12.34.56.78/').matched_by.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have a filename" do
|
10
|
+
@filter.filename.should == 'ip_address_url.rb'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have a name" do
|
14
|
+
@filter.name.should == 'IP Address URL'
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should have a description" do
|
18
|
+
@filter.description.should == 'Detects IP address URLs'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should have a number of matches" do
|
22
|
+
@filter.matches.should == 1
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should have a score" do
|
26
|
+
@filter.score.should == 0.5
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
describe "Shouting" do
|
4
|
+
describe 'attributes' do
|
5
|
+
before :each do
|
6
|
+
@filter = Despamilator.new('HELLO THERE!! THIS IS SHOUTING!!').matched_by.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have a filename" do
|
10
|
+
@filter.filename.should == 'shouting.rb'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have a name" do
|
14
|
+
@filter.name.should == 'Shouting'
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should have a description" do
|
18
|
+
@filter.description.should == 'Detects and scores shouting (all caps)'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should have a number of matches" do
|
22
|
+
@filter.matches.should == 1
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should have a score" do
|
26
|
+
@filter.score.should == 0.5
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "filter" do
|
31
|
+
it "should strip out HTML" do
|
32
|
+
Despamilator.new('<H1>this is a flipping html tag whose contents is very long</h1>').matched_by.select {|a| a.name == 'Shouting'}.should be_empty
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should ignore strings less than 20 characters long" do
|
36
|
+
Despamilator.new('ABCD EFG HIJKLM NOP').matched_by.select {|a| a.name == 'Shouting'}.should be_empty
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should score based on a percentage of uppercase words" do
|
40
|
+
[
|
41
|
+
['this is a lowercased string', 0],
|
42
|
+
['this lil string is 50 PERCENT SHOUTING', 0.25],
|
43
|
+
['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
|
44
|
+
].each do |string, expected_score|
|
45
|
+
Despamilator.new(string).score.should == expected_score
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
CHANGED
@@ -4,8 +4,8 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
version: "0.
|
7
|
+
- 3
|
8
|
+
version: "0.3"
|
9
9
|
platform: ruby
|
10
10
|
authors:
|
11
11
|
- Stephen Hardisty
|
@@ -13,7 +13,7 @@ autorequire:
|
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
15
|
|
16
|
-
date: 2010-
|
16
|
+
date: 2010-05-11 00:00:00 +10:00
|
17
17
|
default_executable:
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
@@ -26,41 +26,27 @@ dependencies:
|
|
26
26
|
segments:
|
27
27
|
- 2
|
28
28
|
- 0
|
29
|
-
-
|
30
|
-
version: 2.0.
|
29
|
+
- 4
|
30
|
+
version: 2.0.4
|
31
31
|
type: :development
|
32
32
|
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: gemcutter
|
35
|
-
prerelease: false
|
36
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
segments:
|
41
|
-
- 0
|
42
|
-
- 5
|
43
|
-
- 0
|
44
|
-
version: 0.5.0
|
45
|
-
type: :development
|
46
|
-
version_requirements: *id002
|
47
33
|
- !ruby/object:Gem::Dependency
|
48
34
|
name: hoe
|
49
35
|
prerelease: false
|
50
|
-
requirement: &
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
38
|
- - ">="
|
53
39
|
- !ruby/object:Gem::Version
|
54
40
|
segments:
|
55
41
|
- 2
|
56
|
-
-
|
42
|
+
- 6
|
57
43
|
- 0
|
58
|
-
version: 2.
|
44
|
+
version: 2.6.0
|
59
45
|
type: :development
|
60
|
-
version_requirements: *
|
46
|
+
version_requirements: *id002
|
61
47
|
description: |-
|
62
48
|
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
63
|
-
Spam being submitted in my web forms
|
49
|
+
Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator will apply
|
64
50
|
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
65
51
|
email:
|
66
52
|
- moowahaha@hotmail.com
|
@@ -72,9 +58,6 @@ extra_rdoc_files:
|
|
72
58
|
- History.txt
|
73
59
|
- Manifest.txt
|
74
60
|
- PostInstall.txt
|
75
|
-
- pkg/despamilator-0.1/History.txt
|
76
|
-
- pkg/despamilator-0.1/Manifest.txt
|
77
|
-
- pkg/despamilator-0.1/PostInstall.txt
|
78
61
|
files:
|
79
62
|
- History.txt
|
80
63
|
- Manifest.txt
|
@@ -85,38 +68,19 @@ files:
|
|
85
68
|
- lib/despamilator.rb
|
86
69
|
- lib/despamilator/filter.rb
|
87
70
|
- lib/despamilator/filter/html_tags.rb
|
71
|
+
- lib/despamilator/filter/ip_address_url.rb
|
88
72
|
- lib/despamilator/filter/naughty_q.rb
|
89
73
|
- lib/despamilator/filter/numbers_and_words.rb
|
90
74
|
- lib/despamilator/filter/script_tag.rb
|
75
|
+
- lib/despamilator/filter/shouting.rb
|
91
76
|
- lib/despamilator/filter_base.rb
|
92
|
-
- pkg/despamilator-0.1.gem
|
93
|
-
- pkg/despamilator-0.1.tgz
|
94
|
-
- pkg/despamilator-0.1/History.txt
|
95
|
-
- pkg/despamilator-0.1/Manifest.txt
|
96
|
-
- pkg/despamilator-0.1/PostInstall.txt
|
97
|
-
- pkg/despamilator-0.1/README.rdoc
|
98
|
-
- pkg/despamilator-0.1/Rakefile
|
99
|
-
- pkg/despamilator-0.1/despamilator.gemspec
|
100
|
-
- pkg/despamilator-0.1/lib/despamilator.rb
|
101
|
-
- pkg/despamilator-0.1/lib/despamilator/filter.rb
|
102
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb
|
103
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb
|
104
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb
|
105
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb
|
106
|
-
- pkg/despamilator-0.1/lib/despamilator/filter_base.rb
|
107
|
-
- pkg/despamilator-0.1/spec/despamilator_spec.rb
|
108
|
-
- pkg/despamilator-0.1/spec/filters/html_tags_spec.rb
|
109
|
-
- pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb
|
110
|
-
- pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb
|
111
|
-
- pkg/despamilator-0.1/spec/filters/script_tag_spec.rb
|
112
|
-
- pkg/despamilator-0.1/spec/spec.opts
|
113
|
-
- pkg/despamilator-0.1/spec/spec_helper.rb
|
114
|
-
- pkg/despamilator-0.1/tasks/rspec.rake
|
115
77
|
- spec/despamilator_spec.rb
|
116
78
|
- spec/filters/html_tags_spec.rb
|
79
|
+
- spec/filters/ip_address_url_spec.rb
|
117
80
|
- spec/filters/naughty_q_spec.rb
|
118
81
|
- spec/filters/numbers_and_words_spec.rb
|
119
82
|
- spec/filters/script_tag_spec.rb
|
83
|
+
- spec/filters/shouting_spec.rb
|
120
84
|
- spec/spec.opts
|
121
85
|
- spec/spec_helper.rb
|
122
86
|
- tasks/rspec.rake
|
@@ -150,6 +114,6 @@ rubyforge_project: despamilator
|
|
150
114
|
rubygems_version: 1.3.6
|
151
115
|
signing_key:
|
152
116
|
specification_version: 3
|
153
|
-
summary: "Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms
|
117
|
+
summary: "Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms and CAPTCHAS being intrusive"
|
154
118
|
test_files: []
|
155
119
|
|
@@ -1,21 +0,0 @@
|
|
1
|
-
History.txt
|
2
|
-
Manifest.txt
|
3
|
-
PostInstall.txt
|
4
|
-
README.rdoc
|
5
|
-
Rakefile
|
6
|
-
despamilator.gemspec
|
7
|
-
lib/despamilator.rb
|
8
|
-
lib/despamilator/filter.rb
|
9
|
-
lib/despamilator/filter/html_tags.rb
|
10
|
-
lib/despamilator/filter/naughty_q.rb
|
11
|
-
lib/despamilator/filter/numbers_and_words.rb
|
12
|
-
lib/despamilator/filter/script_tag.rb
|
13
|
-
lib/despamilator/filter_base.rb
|
14
|
-
spec/despamilator_spec.rb
|
15
|
-
spec/filters/html_tags_spec.rb
|
16
|
-
spec/filters/naughty_q_spec.rb
|
17
|
-
spec/filters/numbers_and_words_spec.rb
|
18
|
-
spec/filters/script_tag_spec.rb
|
19
|
-
spec/spec.opts
|
20
|
-
spec/spec_helper.rb
|
21
|
-
tasks/rspec.rake
|
@@ -1 +0,0 @@
|
|
1
|
-
For more information on despamilator or to contribute more filters, see http://github.com/moowahaha/despamliator
|
@@ -1,107 +0,0 @@
|
|
1
|
-
= Despamilator
|
2
|
-
|
3
|
-
* http://github.com/moowahaha/despamliator
|
4
|
-
|
5
|
-
== DESCRIPTION:
|
6
|
-
|
7
|
-
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
8
|
-
Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
|
9
|
-
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
10
|
-
|
11
|
-
== FEATURES/PROBLEMS:
|
12
|
-
|
13
|
-
* rspec will be run twice. How annoying?!
|
14
|
-
|
15
|
-
== SYNOPSIS:
|
16
|
-
|
17
|
-
# using Despamilator
|
18
|
-
dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
19
|
-
|
20
|
-
dspam.score #=> the total score for this string (1 is considered high)
|
21
|
-
dspam.matched_by #=> array of matching filters
|
22
|
-
first_match = dspam.matched_by.first #=> first matching filter
|
23
|
-
first_match.name #=> some string with the name of the filter
|
24
|
-
first_match.description #=> some string to describe
|
25
|
-
first_match.score #=> the individual score assigned by this filter
|
26
|
-
|
27
|
-
# adding a new filter! example: detecting the letter "a"
|
28
|
-
# put the following code in lib/despamilator/filter/detect_letter_a.rb
|
29
|
-
def name
|
30
|
-
'Detecting the letter A'
|
31
|
-
end
|
32
|
-
|
33
|
-
def description
|
34
|
-
'Detects the letter "a" in a string for no reason other than a demo'
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse
|
38
|
-
if self.text.downcase.scan(/a/)
|
39
|
-
# add 0.1 to the score of the text
|
40
|
-
self.append_score = 0.1
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
== FILTERING:
|
45
|
-
|
46
|
-
As stated, this is a heuristic scanner so its up to the user to decide the thresholds of the scanner. I usually
|
47
|
-
say "it's spam" if the score reaches 1.
|
48
|
-
|
49
|
-
The score will be added to incrementally by each matching filter. So if there is some HTML in there, it will be added
|
50
|
-
to the score. If there is also a script tag of some sort, that will add more.
|
51
|
-
|
52
|
-
Each filter decides how much of a score it assigns. For example, detecting a number next to a letter
|
53
|
-
(the numbers_an_words filter) is only a mild hint compared with a script tag (detected by the script_tag filter).
|
54
|
-
|
55
|
-
=== NEW FILTERS:
|
56
|
-
|
57
|
-
I absolutely welcome new filters and experiments. New filters should be put in the 'lib/despamilator/filter/' directory.
|
58
|
-
The core filtering code will detect and use what is in there so you only need to drop the code in.
|
59
|
-
Filters should be simple, no classes etc wrapped around them and should try to perform one simple task.
|
60
|
-
They should always supply the following methods:
|
61
|
-
|
62
|
-
* name #=> the name of your filter.
|
63
|
-
* description #=> what your filter will look for.
|
64
|
-
* parse #=> the method that will be called when parsing.
|
65
|
-
|
66
|
-
Along side the above, the following methods are made available to each filter:
|
67
|
-
|
68
|
-
* text #=> a copy of the text your parser will parse
|
69
|
-
* append_score= #=> method to append a score to the text if there are matches in your parser.
|
70
|
-
* matched? #=> whether or not any filter has so far detected something suspect
|
71
|
-
* score #=> the current score assigned to the text
|
72
|
-
|
73
|
-
spec tests are an absolute must!
|
74
|
-
|
75
|
-
|
76
|
-
== REQUIREMENTS:
|
77
|
-
|
78
|
-
* hoe
|
79
|
-
* rspec
|
80
|
-
|
81
|
-
|
82
|
-
== INSTALL:
|
83
|
-
|
84
|
-
$ sudo gem install despamilator
|
85
|
-
|
86
|
-
== LICENSE:
|
87
|
-
|
88
|
-
Copyright (c) 2010 Stephen Hardisty
|
89
|
-
|
90
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
91
|
-
a copy of this software and associated documentation files (the
|
92
|
-
'Software'), to deal in the Software without restriction, including
|
93
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
94
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
95
|
-
permit persons to whom the Software is furnished to do so, subject to
|
96
|
-
the following conditions:
|
97
|
-
|
98
|
-
The above copyright notice and this permission notice shall be
|
99
|
-
included in all copies or substantial portions of the Software.
|
100
|
-
|
101
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
102
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
103
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
104
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
105
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
106
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
107
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
gem 'hoe', '>= 2.1.0'
|
3
|
-
require 'hoe'
|
4
|
-
require 'fileutils'
|
5
|
-
require './lib/despamilator'
|
6
|
-
|
7
|
-
Hoe.plugin :newgem
|
8
|
-
# Hoe.plugin :website
|
9
|
-
# Hoe.plugin :cucumberfeatures
|
10
|
-
|
11
|
-
# Generate all the Rake tasks
|
12
|
-
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
-
$hoe = Hoe.spec 'despamilator' do
|
14
|
-
self.developer 'Stephen Hardisty', 'moowahaha@hotmail.com'
|
15
|
-
self.post_install_message = 'PostInstall.txt'
|
16
|
-
self.rubyforge_name = self.name # TODO this is default value
|
17
|
-
# self.extra_deps = [['activesupport','>= 2.0.2']]
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'newgem/tasks'
|
22
|
-
Dir['tasks/**/*.rake'].each { |t| load t }
|
23
|
-
|
24
|
-
# TODO - want other tests/tasks run by default? Add them to the list
|
25
|
-
# remove_task :default
|
26
|
-
task :test => [:spec]
|
27
|
-
task :default => [:test]
|
28
|
-
task :install => [:install_gem]
|
29
|
-
|
30
|
-
task :cultivate do
|
31
|
-
system "touch Manifest.txt; rake check_manifest | grep -v \"(in \" | patch"
|
32
|
-
system "rake debug_gem | grep -v \"(in \" > `basename \\`pwd\\``.gemspec"
|
33
|
-
end
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do |s|
|
4
|
-
s.name = %q{despamilator}
|
5
|
-
s.version = "0.2"
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
-
s.authors = ["Stephen Hardisty"]
|
9
|
-
s.date = %q{2010-03-28}
|
10
|
-
s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
11
|
-
Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
|
12
|
-
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
|
13
|
-
s.email = ["moowahaha@hotmail.com"]
|
14
|
-
s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt"]
|
15
|
-
s.files = ["History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter_base.rb", "pkg/despamilator-0.1.gem", "pkg/despamilator-0.1.tgz", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt", "pkg/despamilator-0.1/README.rdoc", "pkg/despamilator-0.1/Rakefile", "pkg/despamilator-0.1/despamilator.gemspec", "pkg/despamilator-0.1/lib/despamilator.rb", "pkg/despamilator-0.1/lib/despamilator/filter.rb", "pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb", "pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb", "pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb", "pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb", "pkg/despamilator-0.1/lib/despamilator/filter_base.rb", "pkg/despamilator-0.1/spec/despamilator_spec.rb", "pkg/despamilator-0.1/spec/filters/html_tags_spec.rb", "pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb", "pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb", "pkg/despamilator-0.1/spec/filters/script_tag_spec.rb", "pkg/despamilator-0.1/spec/spec.opts", "pkg/despamilator-0.1/spec/spec_helper.rb", "pkg/despamilator-0.1/tasks/rspec.rake", "spec/despamilator_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "tasks/rspec.rake"]
|
16
|
-
s.homepage = %q{http://github.com/moowahaha/despamliator}
|
17
|
-
s.post_install_message = %q{PostInstall.txt}
|
18
|
-
s.rdoc_options = ["--main", "README.rdoc"]
|
19
|
-
s.require_paths = ["lib"]
|
20
|
-
s.rubyforge_project = %q{despamilator}
|
21
|
-
s.rubygems_version = %q{1.3.6}
|
22
|
-
s.summary = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms AND CAPTCHAS being intrusive}
|
23
|
-
|
24
|
-
if s.respond_to? :specification_version then
|
25
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
26
|
-
s.specification_version = 3
|
27
|
-
|
28
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
29
|
-
s.add_development_dependency(%q<rubyforge>, [">= 2.0.3"])
|
30
|
-
s.add_development_dependency(%q<gemcutter>, [">= 0.5.0"])
|
31
|
-
s.add_development_dependency(%q<hoe>, [">= 2.5.0"])
|
32
|
-
else
|
33
|
-
s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
|
34
|
-
s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
|
35
|
-
s.add_dependency(%q<hoe>, [">= 2.5.0"])
|
36
|
-
end
|
37
|
-
else
|
38
|
-
s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
|
39
|
-
s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
|
40
|
-
s.add_dependency(%q<hoe>, [">= 2.5.0"])
|
41
|
-
end
|
42
|
-
end
|
@@ -1,116 +0,0 @@
|
|
1
|
-
def parse
|
2
|
-
html = self.text.downcase
|
3
|
-
|
4
|
-
html_tags.each do |tag|
|
5
|
-
if html.match(/<\s*#{tag}\W/) || html.match(/<\n*#{tag}\W/) || html.match(/\W#{tag}\s*\//) || html.match(/\W#{tag}\n*\//)
|
6
|
-
self.append_score = 0.3
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
'Detects HTML tags in text'
|
13
|
-
end
|
14
|
-
|
15
|
-
def description
|
16
|
-
'Searches for various HTML tags'
|
17
|
-
end
|
18
|
-
|
19
|
-
def html_tags
|
20
|
-
# make sure these are lowercase, in order to save processing
|
21
|
-
[
|
22
|
-
'!--',
|
23
|
-
'!doctype',
|
24
|
-
'a',
|
25
|
-
'abbr',
|
26
|
-
'acronym',
|
27
|
-
'address',
|
28
|
-
'applet',
|
29
|
-
'area',
|
30
|
-
'b',
|
31
|
-
'base',
|
32
|
-
'basefont',
|
33
|
-
'bdo',
|
34
|
-
'big',
|
35
|
-
'blockquote',
|
36
|
-
'body',
|
37
|
-
'br',
|
38
|
-
'button',
|
39
|
-
'caption',
|
40
|
-
'center',
|
41
|
-
'cite',
|
42
|
-
'code',
|
43
|
-
'col',
|
44
|
-
'colgroup',
|
45
|
-
'dd',
|
46
|
-
'del',
|
47
|
-
'dfn',
|
48
|
-
'dir',
|
49
|
-
'div',
|
50
|
-
'dl',
|
51
|
-
'dt',
|
52
|
-
'em',
|
53
|
-
'fieldset',
|
54
|
-
'font',
|
55
|
-
'form',
|
56
|
-
'frame',
|
57
|
-
'frameset',
|
58
|
-
'h1',
|
59
|
-
'h2',
|
60
|
-
'h3',
|
61
|
-
'h4',
|
62
|
-
'h5',
|
63
|
-
'h6',
|
64
|
-
'head',
|
65
|
-
'hr',
|
66
|
-
'html',
|
67
|
-
'i',
|
68
|
-
'iframe',
|
69
|
-
'img',
|
70
|
-
'input',
|
71
|
-
'ins',
|
72
|
-
'isindex',
|
73
|
-
'kbd',
|
74
|
-
'label',
|
75
|
-
'legend',
|
76
|
-
'li',
|
77
|
-
'link',
|
78
|
-
'map',
|
79
|
-
'menu',
|
80
|
-
'meta',
|
81
|
-
'noframes',
|
82
|
-
'noscript',
|
83
|
-
'object',
|
84
|
-
'ol',
|
85
|
-
'optgroup',
|
86
|
-
'option',
|
87
|
-
'p',
|
88
|
-
'param',
|
89
|
-
'pre',
|
90
|
-
'q',
|
91
|
-
's',
|
92
|
-
'samp',
|
93
|
-
'select',
|
94
|
-
'small',
|
95
|
-
'span',
|
96
|
-
'strike',
|
97
|
-
'strong',
|
98
|
-
'style',
|
99
|
-
'sub',
|
100
|
-
'sup',
|
101
|
-
'table',
|
102
|
-
'tbody',
|
103
|
-
'td',
|
104
|
-
'textarea',
|
105
|
-
'tfoot',
|
106
|
-
'th',
|
107
|
-
'thead',
|
108
|
-
'title',
|
109
|
-
'tr',
|
110
|
-
'tt',
|
111
|
-
'u',
|
112
|
-
'ul',
|
113
|
-
'var',
|
114
|
-
'xmp'
|
115
|
-
]
|
116
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
def name
|
2
|
-
'Naughty Q'
|
3
|
-
end
|
4
|
-
|
5
|
-
def description
|
6
|
-
'Detects possible misuse of the letter Q (English language)'
|
7
|
-
end
|
8
|
-
|
9
|
-
def parse
|
10
|
-
matches = self.text.downcase.scan(/q./)
|
11
|
-
|
12
|
-
return unless matches
|
13
|
-
|
14
|
-
matches.each do |match|
|
15
|
-
self.append_score = 0.2 if match != 'qu' and match != 'qa' and match !~ /q\s/
|
16
|
-
end
|
17
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
def parse
|
2
|
-
string = self.text.downcase
|
3
|
-
|
4
|
-
# strip out "good numbers"
|
5
|
-
string.gsub!(/h[1-6]/, '')
|
6
|
-
string.gsub!(/(^|\b)\d+($|\b)/, '')
|
7
|
-
string.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
|
8
|
-
string.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
|
9
|
-
|
10
|
-
[
|
11
|
-
/\w\d+/,
|
12
|
-
/\d+\w/,
|
13
|
-
/\d+($|\b)/
|
14
|
-
].each do |regexp|
|
15
|
-
matches = string.scan(regexp)
|
16
|
-
|
17
|
-
next if matches.empty?
|
18
|
-
|
19
|
-
matches.each do |to_remove|
|
20
|
-
to_remove = to_remove.to_s
|
21
|
-
string.sub!(to_remove, '') unless to_remove.empty?
|
22
|
-
self.append_score = 0.1
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def name
|
28
|
-
'Numbers next to words'
|
29
|
-
end
|
30
|
-
|
31
|
-
def description
|
32
|
-
'Detects unusual number/word combinations'
|
33
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
class Despamilator
|
2
|
-
class Filter
|
3
|
-
attr_accessor :matches, :score
|
4
|
-
|
5
|
-
def initialize text
|
6
|
-
@@loaded ||= {}
|
7
|
-
@filters ||= []
|
8
|
-
@matches ||= []
|
9
|
-
@score ||= 0
|
10
|
-
load_filters text
|
11
|
-
run_filters
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def load_filters text
|
17
|
-
Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
|
18
|
-
filter_name = classify_filename filter_file
|
19
|
-
filter = @@loaded[filter_name]
|
20
|
-
|
21
|
-
unless filter
|
22
|
-
filter_code = File.open(filter_file, File::RDWR).read
|
23
|
-
filter = Class.new
|
24
|
-
filter.class_eval(
|
25
|
-
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
26
|
-
)
|
27
|
-
end
|
28
|
-
|
29
|
-
@filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def run_filters
|
34
|
-
@filters.each do |filter|
|
35
|
-
filter.parse
|
36
|
-
|
37
|
-
if filter.matched?
|
38
|
-
@matches.push(filter)
|
39
|
-
@score += filter.score
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def classify_filename filename
|
45
|
-
classname = ''
|
46
|
-
File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
|
47
|
-
classname += filename_part.capitalize
|
48
|
-
end
|
49
|
-
classname || filename.capitalize
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
class Despamilator
|
2
|
-
class FilterBase
|
3
|
-
attr_accessor :text, :score, :filename, :matches
|
4
|
-
|
5
|
-
def initialize text, filename
|
6
|
-
@matches = 0
|
7
|
-
@filename = filename
|
8
|
-
@score = 0
|
9
|
-
@text = text
|
10
|
-
@matched = false
|
11
|
-
end
|
12
|
-
|
13
|
-
def description
|
14
|
-
raise "No description defined in #{filename}"
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse blah
|
18
|
-
raise "No parser defined in #{filename}"
|
19
|
-
end
|
20
|
-
|
21
|
-
def name
|
22
|
-
raise "No name defined in #{filename}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def matched?
|
26
|
-
@score > 0
|
27
|
-
end
|
28
|
-
|
29
|
-
protected
|
30
|
-
|
31
|
-
def append_score= new_score
|
32
|
-
@matches += 1
|
33
|
-
@score += new_score
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
-
|
3
|
-
require 'despamilator/filter'
|
4
|
-
|
5
|
-
class Despamilator
|
6
|
-
VERSION = "0.2"
|
7
|
-
|
8
|
-
def initialize text
|
9
|
-
@filters = Despamilator::Filter.new text
|
10
|
-
end
|
11
|
-
|
12
|
-
def score
|
13
|
-
@filters.score
|
14
|
-
end
|
15
|
-
|
16
|
-
def matched_by
|
17
|
-
@filters.matches
|
18
|
-
end
|
19
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
-
|
3
|
-
describe Despamilator do
|
4
|
-
before :each do
|
5
|
-
@dspam = Despamilator.new('this text is absolutely fine')
|
6
|
-
end
|
7
|
-
|
8
|
-
it "should return a zero score for fine text" do
|
9
|
-
@dspam.score.should == 0
|
10
|
-
end
|
11
|
-
|
12
|
-
it "should return no matching filter for fine text" do
|
13
|
-
@dspam.matched_by.should be_empty
|
14
|
-
end
|
15
|
-
end
|
@@ -1,144 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
context "HtmlTags" do
|
4
|
-
describe "filtering" do
|
5
|
-
[
|
6
|
-
'!--',
|
7
|
-
'!DOCTYPE',
|
8
|
-
'a',
|
9
|
-
'abbr',
|
10
|
-
'acronym',
|
11
|
-
'address',
|
12
|
-
'applet',
|
13
|
-
'area',
|
14
|
-
'b',
|
15
|
-
'base',
|
16
|
-
'basefont',
|
17
|
-
'bdo',
|
18
|
-
'big',
|
19
|
-
'blockquote',
|
20
|
-
'body',
|
21
|
-
'br',
|
22
|
-
'button',
|
23
|
-
'caption',
|
24
|
-
'center',
|
25
|
-
'cite',
|
26
|
-
'code',
|
27
|
-
'col',
|
28
|
-
'colgroup',
|
29
|
-
'dd',
|
30
|
-
'del',
|
31
|
-
'dfn',
|
32
|
-
'dir',
|
33
|
-
'div',
|
34
|
-
'dl',
|
35
|
-
'dt',
|
36
|
-
'em',
|
37
|
-
'fieldset',
|
38
|
-
'font',
|
39
|
-
'form',
|
40
|
-
'frame',
|
41
|
-
'frameset',
|
42
|
-
'h1',
|
43
|
-
'h2',
|
44
|
-
'h3',
|
45
|
-
'h4',
|
46
|
-
'h5',
|
47
|
-
'h6',
|
48
|
-
'head',
|
49
|
-
'hr',
|
50
|
-
'html',
|
51
|
-
'i',
|
52
|
-
'iframe',
|
53
|
-
'img',
|
54
|
-
'input',
|
55
|
-
'ins',
|
56
|
-
'isindex',
|
57
|
-
'kbd',
|
58
|
-
'label',
|
59
|
-
'legend',
|
60
|
-
'li',
|
61
|
-
'link',
|
62
|
-
'map',
|
63
|
-
'menu',
|
64
|
-
'meta',
|
65
|
-
'noframes',
|
66
|
-
'noscript',
|
67
|
-
'object',
|
68
|
-
'ol',
|
69
|
-
'optgroup',
|
70
|
-
'option',
|
71
|
-
'p',
|
72
|
-
'param',
|
73
|
-
'pre',
|
74
|
-
'q',
|
75
|
-
's',
|
76
|
-
'samp',
|
77
|
-
'select',
|
78
|
-
'small',
|
79
|
-
'span',
|
80
|
-
'strike',
|
81
|
-
'strong',
|
82
|
-
'style',
|
83
|
-
'sub',
|
84
|
-
'sup',
|
85
|
-
'table',
|
86
|
-
'tbody',
|
87
|
-
'td',
|
88
|
-
'textarea',
|
89
|
-
'tfoot',
|
90
|
-
'th',
|
91
|
-
'thead',
|
92
|
-
'title',
|
93
|
-
'tr',
|
94
|
-
'tt',
|
95
|
-
'u',
|
96
|
-
'ul',
|
97
|
-
'var',
|
98
|
-
'xmp'
|
99
|
-
].each do |script_tag|
|
100
|
-
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
101
|
-
[
|
102
|
-
"<#{script_tag}",
|
103
|
-
"#{script_tag}/>",
|
104
|
-
"<#{script_tag}/>",
|
105
|
-
"< #{script_tag} ",
|
106
|
-
"#{script_tag} />",
|
107
|
-
"<\n#{script_tag}\n/>",
|
108
|
-
"<\n#{script_tag} ",
|
109
|
-
"#{script_tag}\n/>",
|
110
|
-
"<\r#{script_tag}\r/>"
|
111
|
-
].each do |script_tag|
|
112
|
-
it "should detect '#{script_tag}'" do
|
113
|
-
dspam = Despamilator.new(script_tag)
|
114
|
-
dspam.score.should == 0.3
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
describe 'attributes' do
|
121
|
-
before :all do
|
122
|
-
@dspam = Despamilator.new('<xmp>').matched_by.first
|
123
|
-
end
|
124
|
-
|
125
|
-
it "should have a name" do
|
126
|
-
@dspam.name.should == 'Detects HTML tags in text'
|
127
|
-
end
|
128
|
-
|
129
|
-
it "should have a description" do
|
130
|
-
@dspam.description.should == 'Searches for various HTML tags'
|
131
|
-
end
|
132
|
-
|
133
|
-
it "should have a filename" do
|
134
|
-
@dspam.filename.should == 'html_tags.rb'
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
describe 'bug fixes' do
|
139
|
-
it "should detect an h1" do
|
140
|
-
Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
|
141
|
-
end
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
describe "NaughtyQ" do
|
4
|
-
it "should return a score for 1 misplaced q" do
|
5
|
-
dspam = Despamilator.new('qtu')
|
6
|
-
dspam.score.should == 0.2
|
7
|
-
end
|
8
|
-
|
9
|
-
describe 'attributes' do
|
10
|
-
before :each do
|
11
|
-
@filter = Despamilator.new('qtqt').matched_by.first
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should have a filename" do
|
15
|
-
@filter.filename.should == 'naughty_q.rb'
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should have a name" do
|
19
|
-
@filter.name.should == 'Naughty Q'
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should have a description" do
|
23
|
-
@filter.description.should == 'Detects possible misuse of the letter Q (English language)'
|
24
|
-
end
|
25
|
-
|
26
|
-
it "should have a number of matches" do
|
27
|
-
@filter.matches.should == 2
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should have a score" do
|
31
|
-
@filter.score.should == 0.4
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should score more for 3 misplaced q's" do
|
36
|
-
dspam = Despamilator.new('qtuqsq')
|
37
|
-
dspam.score.to_s.should == 0.4.to_s
|
38
|
-
end
|
39
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
describe "NumbersAndWords" do
|
4
|
-
[1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
|
5
|
-
it "should return a blank for a #{number}" do
|
6
|
-
dspam = Despamilator.new(number)
|
7
|
-
dspam.score.should == 0
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
['wanga x5 mool', '4ghk', 'XTHL9'].each do |string|
|
12
|
-
it "should detect suspicious number word combos such as #{string}" do
|
13
|
-
dspam = Despamilator.new(string)
|
14
|
-
dspam.score.should == 0.1
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
['4wanga x5 mool', '4g6hk', 'XT7HL9', '77th8nd'].each do |string|
|
19
|
-
it "should detect multiple suspicious number word combos such as #{string}" do
|
20
|
-
dspam = Despamilator.new(string)
|
21
|
-
dspam.score.should == 0.2
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
[1, 2, 3, 4, 5, 6].each do |tag_no|
|
26
|
-
header_tag = "h#{tag_no}"
|
27
|
-
|
28
|
-
it "should ignore html header tag #{header_tag}" do
|
29
|
-
dspam = Despamilator.new(header_tag)
|
30
|
-
dspam.score.should == 0
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
describe 'attributes' do
|
35
|
-
before :each do
|
36
|
-
@filter = Despamilator.new('X5T').matched_by.first
|
37
|
-
end
|
38
|
-
|
39
|
-
it "should have a filename" do
|
40
|
-
@filter.filename.should == 'numbers_and_words.rb'
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should have a name" do
|
44
|
-
@filter.name.should == 'Numbers next to words'
|
45
|
-
end
|
46
|
-
|
47
|
-
it "should have a description" do
|
48
|
-
@filter.description.should == 'Detects unusual number/word combinations'
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should have a number of matches" do
|
52
|
-
@filter.matches.should == 1
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should have a score" do
|
56
|
-
@filter.score.should == 0.1
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
@@ -1,32 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
context "ScriptTag" do
|
4
|
-
describe "detecting various script tags" do
|
5
|
-
['<script type="whatever">', '<script></script>', '</script>', '<script>', "<script\n>"].each do |script_tag|
|
6
|
-
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
7
|
-
it "should detect '#{script_tag}' of a script tag" do
|
8
|
-
dspam = Despamilator.new(script_tag)
|
9
|
-
dspam.score.should == 1
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
describe 'attributes' do
|
16
|
-
before :all do
|
17
|
-
@dspam = Despamilator.new('<script>').matched_by.first
|
18
|
-
end
|
19
|
-
|
20
|
-
it "should have a name" do
|
21
|
-
@dspam.name.should == 'Detects script tags in text'
|
22
|
-
end
|
23
|
-
|
24
|
-
it "should have a description" do
|
25
|
-
@dspam.description.should == 'Searches for variations for the HTML script tag'
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should have a filename" do
|
29
|
-
@dspam.filename.should == 'script_tag.rb'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
@@ -1 +0,0 @@
|
|
1
|
-
--colour
|
@@ -1,21 +0,0 @@
|
|
1
|
-
begin
|
2
|
-
require 'spec'
|
3
|
-
rescue LoadError
|
4
|
-
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
5
|
-
require 'spec'
|
6
|
-
end
|
7
|
-
begin
|
8
|
-
require 'spec/rake/spectask'
|
9
|
-
rescue LoadError
|
10
|
-
puts <<-EOS
|
11
|
-
To use rspec for testing you must install rspec gem:
|
12
|
-
gem install rspec
|
13
|
-
EOS
|
14
|
-
exit(0)
|
15
|
-
end
|
16
|
-
|
17
|
-
desc "Run the specs under spec/models"
|
18
|
-
Spec::Rake::SpecTask.new do |t|
|
19
|
-
t.spec_opts = ['--options', "spec/spec.opts"]
|
20
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
21
|
-
end
|
data/pkg/despamilator-0.1.gem
DELETED
Binary file
|
data/pkg/despamilator-0.1.tgz
DELETED
Binary file
|