despamilator 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -1
- data/Manifest.txt +4 -23
- data/README.rdoc +4 -1
- data/despamilator.gemspec +1 -1
- data/lib/despamilator/filter/html_tags.rb +1 -1
- data/lib/despamilator/filter/ip_address_url.rb +11 -0
- data/lib/despamilator/filter/naughty_q.rb +3 -2
- data/lib/despamilator/filter/shouting.rb +21 -0
- data/lib/despamilator/filter.rb +6 -10
- data/lib/despamilator.rb +2 -2
- data/spec/filters/html_tags_spec.rb +12 -9
- data/spec/filters/ip_address_url_spec.rb +29 -0
- data/spec/filters/shouting_spec.rb +49 -0
- metadata +15 -51
- data/pkg/despamilator-0.1/History.txt +0 -4
- data/pkg/despamilator-0.1/Manifest.txt +0 -21
- data/pkg/despamilator-0.1/PostInstall.txt +0 -1
- data/pkg/despamilator-0.1/README.rdoc +0 -107
- data/pkg/despamilator-0.1/Rakefile +0 -33
- data/pkg/despamilator-0.1/despamilator.gemspec +0 -42
- data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +0 -116
- data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +0 -17
- data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +0 -33
- data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +0 -13
- data/pkg/despamilator-0.1/lib/despamilator/filter.rb +0 -52
- data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +0 -37
- data/pkg/despamilator-0.1/lib/despamilator.rb +0 -19
- data/pkg/despamilator-0.1/spec/despamilator_spec.rb +0 -15
- data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +0 -144
- data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +0 -39
- data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +0 -59
- data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +0 -32
- data/pkg/despamilator-0.1/spec/spec.opts +0 -1
- data/pkg/despamilator-0.1/spec/spec_helper.rb +0 -10
- data/pkg/despamilator-0.1/tasks/rspec.rake +0 -21
- data/pkg/despamilator-0.1.gem +0 -0
- data/pkg/despamilator-0.1.tgz +0 -0
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -7,38 +7,19 @@ despamilator.gemspec
|
|
7
7
|
lib/despamilator.rb
|
8
8
|
lib/despamilator/filter.rb
|
9
9
|
lib/despamilator/filter/html_tags.rb
|
10
|
+
lib/despamilator/filter/ip_address_url.rb
|
10
11
|
lib/despamilator/filter/naughty_q.rb
|
11
12
|
lib/despamilator/filter/numbers_and_words.rb
|
12
13
|
lib/despamilator/filter/script_tag.rb
|
14
|
+
lib/despamilator/filter/shouting.rb
|
13
15
|
lib/despamilator/filter_base.rb
|
14
|
-
pkg/despamilator-0.1.gem
|
15
|
-
pkg/despamilator-0.1.tgz
|
16
|
-
pkg/despamilator-0.1/History.txt
|
17
|
-
pkg/despamilator-0.1/Manifest.txt
|
18
|
-
pkg/despamilator-0.1/PostInstall.txt
|
19
|
-
pkg/despamilator-0.1/README.rdoc
|
20
|
-
pkg/despamilator-0.1/Rakefile
|
21
|
-
pkg/despamilator-0.1/despamilator.gemspec
|
22
|
-
pkg/despamilator-0.1/lib/despamilator.rb
|
23
|
-
pkg/despamilator-0.1/lib/despamilator/filter.rb
|
24
|
-
pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb
|
25
|
-
pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb
|
26
|
-
pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb
|
27
|
-
pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb
|
28
|
-
pkg/despamilator-0.1/lib/despamilator/filter_base.rb
|
29
|
-
pkg/despamilator-0.1/spec/despamilator_spec.rb
|
30
|
-
pkg/despamilator-0.1/spec/filters/html_tags_spec.rb
|
31
|
-
pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb
|
32
|
-
pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb
|
33
|
-
pkg/despamilator-0.1/spec/filters/script_tag_spec.rb
|
34
|
-
pkg/despamilator-0.1/spec/spec.opts
|
35
|
-
pkg/despamilator-0.1/spec/spec_helper.rb
|
36
|
-
pkg/despamilator-0.1/tasks/rspec.rake
|
37
16
|
spec/despamilator_spec.rb
|
38
17
|
spec/filters/html_tags_spec.rb
|
18
|
+
spec/filters/ip_address_url_spec.rb
|
39
19
|
spec/filters/naughty_q_spec.rb
|
40
20
|
spec/filters/numbers_and_words_spec.rb
|
41
21
|
spec/filters/script_tag_spec.rb
|
22
|
+
spec/filters/shouting_spec.rb
|
42
23
|
spec/spec.opts
|
43
24
|
spec/spec_helper.rb
|
44
25
|
tasks/rspec.rake
|
data/README.rdoc
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
7
|
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
8
|
-
Spam being submitted in my web forms
|
8
|
+
Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator will apply
|
9
9
|
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
10
10
|
|
11
11
|
== FEATURES/PROBLEMS:
|
@@ -15,6 +15,9 @@ some commonly used heuristics from the world of anti-spam to help you decide whe
|
|
15
15
|
== SYNOPSIS:
|
16
16
|
|
17
17
|
# using Despamilator
|
18
|
+
require 'rubygems'
|
19
|
+
require 'desplamilator'
|
20
|
+
|
18
21
|
dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
19
22
|
|
20
23
|
dspam.score #=> the total score for this string (1 is considered high)
|
data/despamilator.gemspec
CHANGED
@@ -7,11 +7,12 @@ def description
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def parse
|
10
|
-
matches = self.text.downcase.scan(/q
|
10
|
+
matches = self.text.downcase.scan(/q(\w|\d)/)
|
11
11
|
|
12
12
|
return unless matches
|
13
13
|
|
14
14
|
matches.each do |match|
|
15
|
-
|
15
|
+
match = match.first
|
16
|
+
self.append_score = 0.2 unless match == 'u' or match == 'a'
|
16
17
|
end
|
17
18
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
def name
|
2
|
+
'Shouting'
|
3
|
+
end
|
4
|
+
|
5
|
+
def description
|
6
|
+
'Detects and scores shouting (all caps)'
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse
|
10
|
+
# strip HTML
|
11
|
+
text = self.text.gsub(/<\/?[^>]*>/, "")
|
12
|
+
|
13
|
+
return if text.length < 20
|
14
|
+
|
15
|
+
uppercased = text.scan(/[A-Z]/).length
|
16
|
+
lowercased = text.scan(/[a-z]/).length
|
17
|
+
|
18
|
+
if uppercased > 0
|
19
|
+
self.append_score = (uppercased.to_f / (uppercased + lowercased)) * 0.5
|
20
|
+
end
|
21
|
+
end
|
data/lib/despamilator/filter.rb
CHANGED
@@ -3,7 +3,6 @@ class Despamilator
|
|
3
3
|
attr_accessor :matches, :score
|
4
4
|
|
5
5
|
def initialize text
|
6
|
-
@@loaded ||= {}
|
7
6
|
@filters ||= []
|
8
7
|
@matches ||= []
|
9
8
|
@score ||= 0
|
@@ -16,15 +15,12 @@ class Despamilator
|
|
16
15
|
def load_filters text
|
17
16
|
Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
|
18
17
|
filter_name = classify_filename filter_file
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
26
|
-
)
|
27
|
-
end
|
18
|
+
|
19
|
+
filter_code = File.open(filter_file, File::RDWR).read
|
20
|
+
filter = Class.new
|
21
|
+
filter.class_eval(
|
22
|
+
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
23
|
+
)
|
28
24
|
|
29
25
|
@filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
|
30
26
|
end
|
data/lib/despamilator.rb
CHANGED
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) ||
|
|
3
3
|
require 'despamilator/filter'
|
4
4
|
|
5
5
|
class Despamilator
|
6
|
-
VERSION = "0.
|
6
|
+
VERSION = "0.3"
|
7
7
|
|
8
8
|
def initialize text
|
9
9
|
@filters = Despamilator::Filter.new text
|
@@ -16,4 +16,4 @@ class Despamilator
|
|
16
16
|
def matched_by
|
17
17
|
@filters.matches
|
18
18
|
end
|
19
|
-
end
|
19
|
+
end
|
@@ -99,18 +99,17 @@ context "HtmlTags" do
|
|
99
99
|
].each do |script_tag|
|
100
100
|
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
101
101
|
[
|
102
|
-
"<#{script_tag}",
|
103
|
-
"#{script_tag}/>",
|
102
|
+
"<#{script_tag}>",
|
104
103
|
"<#{script_tag}/>",
|
105
|
-
"< #{script_tag} ",
|
106
|
-
"
|
104
|
+
"< #{script_tag} >",
|
105
|
+
"<#{script_tag} />",
|
107
106
|
"<\n#{script_tag}\n/>",
|
108
|
-
"<\n#{script_tag} ",
|
109
|
-
"
|
107
|
+
"<\n#{script_tag} >",
|
108
|
+
"<#{script_tag}\n/>",
|
110
109
|
"<\r#{script_tag}\r/>"
|
111
|
-
].each do |
|
112
|
-
it "should detect '#{
|
113
|
-
dspam = Despamilator.new(
|
110
|
+
].each do |tag|
|
111
|
+
it "should detect '#{tag}'" do
|
112
|
+
dspam = Despamilator.new(tag)
|
114
113
|
dspam.score.should == 0.3
|
115
114
|
end
|
116
115
|
end
|
@@ -139,6 +138,10 @@ context "HtmlTags" do
|
|
139
138
|
it "should detect an h1" do
|
140
139
|
Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
|
141
140
|
end
|
141
|
+
|
142
|
+
it "should not detect tags twice" do
|
143
|
+
Despamilator.new('<i>italic</i>').score.should == 0.3
|
144
|
+
end
|
142
145
|
end
|
143
146
|
end
|
144
147
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
describe "IP Address URL" do
|
4
|
+
describe 'attributes' do
|
5
|
+
before :each do
|
6
|
+
@filter = Despamilator.new('http://12.34.56.78/').matched_by.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have a filename" do
|
10
|
+
@filter.filename.should == 'ip_address_url.rb'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have a name" do
|
14
|
+
@filter.name.should == 'IP Address URL'
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should have a description" do
|
18
|
+
@filter.description.should == 'Detects IP address URLs'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should have a number of matches" do
|
22
|
+
@filter.matches.should == 1
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should have a score" do
|
26
|
+
@filter.score.should == 0.5
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
+
|
3
|
+
describe "Shouting" do
|
4
|
+
describe 'attributes' do
|
5
|
+
before :each do
|
6
|
+
@filter = Despamilator.new('HELLO THERE!! THIS IS SHOUTING!!').matched_by.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have a filename" do
|
10
|
+
@filter.filename.should == 'shouting.rb'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have a name" do
|
14
|
+
@filter.name.should == 'Shouting'
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should have a description" do
|
18
|
+
@filter.description.should == 'Detects and scores shouting (all caps)'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should have a number of matches" do
|
22
|
+
@filter.matches.should == 1
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should have a score" do
|
26
|
+
@filter.score.should == 0.5
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "filter" do
|
31
|
+
it "should strip out HTML" do
|
32
|
+
Despamilator.new('<H1>this is a flipping html tag whose contents is very long</h1>').matched_by.select {|a| a.name == 'Shouting'}.should be_empty
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should ignore strings less than 20 characters long" do
|
36
|
+
Despamilator.new('ABCD EFG HIJKLM NOP').matched_by.select {|a| a.name == 'Shouting'}.should be_empty
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should score based on a percentage of uppercase words" do
|
40
|
+
[
|
41
|
+
['this is a lowercased string', 0],
|
42
|
+
['this lil string is 50 PERCENT SHOUTING', 0.25],
|
43
|
+
['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
|
44
|
+
].each do |string, expected_score|
|
45
|
+
Despamilator.new(string).score.should == expected_score
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
CHANGED
@@ -4,8 +4,8 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
version: "0.
|
7
|
+
- 3
|
8
|
+
version: "0.3"
|
9
9
|
platform: ruby
|
10
10
|
authors:
|
11
11
|
- Stephen Hardisty
|
@@ -13,7 +13,7 @@ autorequire:
|
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
15
|
|
16
|
-
date: 2010-
|
16
|
+
date: 2010-05-11 00:00:00 +10:00
|
17
17
|
default_executable:
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
@@ -26,41 +26,27 @@ dependencies:
|
|
26
26
|
segments:
|
27
27
|
- 2
|
28
28
|
- 0
|
29
|
-
-
|
30
|
-
version: 2.0.
|
29
|
+
- 4
|
30
|
+
version: 2.0.4
|
31
31
|
type: :development
|
32
32
|
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: gemcutter
|
35
|
-
prerelease: false
|
36
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
segments:
|
41
|
-
- 0
|
42
|
-
- 5
|
43
|
-
- 0
|
44
|
-
version: 0.5.0
|
45
|
-
type: :development
|
46
|
-
version_requirements: *id002
|
47
33
|
- !ruby/object:Gem::Dependency
|
48
34
|
name: hoe
|
49
35
|
prerelease: false
|
50
|
-
requirement: &
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
38
|
- - ">="
|
53
39
|
- !ruby/object:Gem::Version
|
54
40
|
segments:
|
55
41
|
- 2
|
56
|
-
-
|
42
|
+
- 6
|
57
43
|
- 0
|
58
|
-
version: 2.
|
44
|
+
version: 2.6.0
|
59
45
|
type: :development
|
60
|
-
version_requirements: *
|
46
|
+
version_requirements: *id002
|
61
47
|
description: |-
|
62
48
|
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
63
|
-
Spam being submitted in my web forms
|
49
|
+
Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator will apply
|
64
50
|
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
65
51
|
email:
|
66
52
|
- moowahaha@hotmail.com
|
@@ -72,9 +58,6 @@ extra_rdoc_files:
|
|
72
58
|
- History.txt
|
73
59
|
- Manifest.txt
|
74
60
|
- PostInstall.txt
|
75
|
-
- pkg/despamilator-0.1/History.txt
|
76
|
-
- pkg/despamilator-0.1/Manifest.txt
|
77
|
-
- pkg/despamilator-0.1/PostInstall.txt
|
78
61
|
files:
|
79
62
|
- History.txt
|
80
63
|
- Manifest.txt
|
@@ -85,38 +68,19 @@ files:
|
|
85
68
|
- lib/despamilator.rb
|
86
69
|
- lib/despamilator/filter.rb
|
87
70
|
- lib/despamilator/filter/html_tags.rb
|
71
|
+
- lib/despamilator/filter/ip_address_url.rb
|
88
72
|
- lib/despamilator/filter/naughty_q.rb
|
89
73
|
- lib/despamilator/filter/numbers_and_words.rb
|
90
74
|
- lib/despamilator/filter/script_tag.rb
|
75
|
+
- lib/despamilator/filter/shouting.rb
|
91
76
|
- lib/despamilator/filter_base.rb
|
92
|
-
- pkg/despamilator-0.1.gem
|
93
|
-
- pkg/despamilator-0.1.tgz
|
94
|
-
- pkg/despamilator-0.1/History.txt
|
95
|
-
- pkg/despamilator-0.1/Manifest.txt
|
96
|
-
- pkg/despamilator-0.1/PostInstall.txt
|
97
|
-
- pkg/despamilator-0.1/README.rdoc
|
98
|
-
- pkg/despamilator-0.1/Rakefile
|
99
|
-
- pkg/despamilator-0.1/despamilator.gemspec
|
100
|
-
- pkg/despamilator-0.1/lib/despamilator.rb
|
101
|
-
- pkg/despamilator-0.1/lib/despamilator/filter.rb
|
102
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb
|
103
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb
|
104
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb
|
105
|
-
- pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb
|
106
|
-
- pkg/despamilator-0.1/lib/despamilator/filter_base.rb
|
107
|
-
- pkg/despamilator-0.1/spec/despamilator_spec.rb
|
108
|
-
- pkg/despamilator-0.1/spec/filters/html_tags_spec.rb
|
109
|
-
- pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb
|
110
|
-
- pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb
|
111
|
-
- pkg/despamilator-0.1/spec/filters/script_tag_spec.rb
|
112
|
-
- pkg/despamilator-0.1/spec/spec.opts
|
113
|
-
- pkg/despamilator-0.1/spec/spec_helper.rb
|
114
|
-
- pkg/despamilator-0.1/tasks/rspec.rake
|
115
77
|
- spec/despamilator_spec.rb
|
116
78
|
- spec/filters/html_tags_spec.rb
|
79
|
+
- spec/filters/ip_address_url_spec.rb
|
117
80
|
- spec/filters/naughty_q_spec.rb
|
118
81
|
- spec/filters/numbers_and_words_spec.rb
|
119
82
|
- spec/filters/script_tag_spec.rb
|
83
|
+
- spec/filters/shouting_spec.rb
|
120
84
|
- spec/spec.opts
|
121
85
|
- spec/spec_helper.rb
|
122
86
|
- tasks/rspec.rake
|
@@ -150,6 +114,6 @@ rubyforge_project: despamilator
|
|
150
114
|
rubygems_version: 1.3.6
|
151
115
|
signing_key:
|
152
116
|
specification_version: 3
|
153
|
-
summary: "Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms
|
117
|
+
summary: "Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms and CAPTCHAS being intrusive"
|
154
118
|
test_files: []
|
155
119
|
|
@@ -1,21 +0,0 @@
|
|
1
|
-
History.txt
|
2
|
-
Manifest.txt
|
3
|
-
PostInstall.txt
|
4
|
-
README.rdoc
|
5
|
-
Rakefile
|
6
|
-
despamilator.gemspec
|
7
|
-
lib/despamilator.rb
|
8
|
-
lib/despamilator/filter.rb
|
9
|
-
lib/despamilator/filter/html_tags.rb
|
10
|
-
lib/despamilator/filter/naughty_q.rb
|
11
|
-
lib/despamilator/filter/numbers_and_words.rb
|
12
|
-
lib/despamilator/filter/script_tag.rb
|
13
|
-
lib/despamilator/filter_base.rb
|
14
|
-
spec/despamilator_spec.rb
|
15
|
-
spec/filters/html_tags_spec.rb
|
16
|
-
spec/filters/naughty_q_spec.rb
|
17
|
-
spec/filters/numbers_and_words_spec.rb
|
18
|
-
spec/filters/script_tag_spec.rb
|
19
|
-
spec/spec.opts
|
20
|
-
spec/spec_helper.rb
|
21
|
-
tasks/rspec.rake
|
@@ -1 +0,0 @@
|
|
1
|
-
For more information on despamilator or to contribute more filters, see http://github.com/moowahaha/despamliator
|
@@ -1,107 +0,0 @@
|
|
1
|
-
= Despamilator
|
2
|
-
|
3
|
-
* http://github.com/moowahaha/despamliator
|
4
|
-
|
5
|
-
== DESCRIPTION:
|
6
|
-
|
7
|
-
Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
8
|
-
Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
|
9
|
-
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
|
10
|
-
|
11
|
-
== FEATURES/PROBLEMS:
|
12
|
-
|
13
|
-
* rspec will be run twice. How annoying?!
|
14
|
-
|
15
|
-
== SYNOPSIS:
|
16
|
-
|
17
|
-
# using Despamilator
|
18
|
-
dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
19
|
-
|
20
|
-
dspam.score #=> the total score for this string (1 is considered high)
|
21
|
-
dspam.matched_by #=> array of matching filters
|
22
|
-
first_match = dspam.matched_by.first #=> first matching filter
|
23
|
-
first_match.name #=> some string with the name of the filter
|
24
|
-
first_match.description #=> some string to describe
|
25
|
-
first_match.score #=> the individual score assigned by this filter
|
26
|
-
|
27
|
-
# adding a new filter! example: detecting the letter "a"
|
28
|
-
# put the following code in lib/despamilator/filter/detect_letter_a.rb
|
29
|
-
def name
|
30
|
-
'Detecting the letter A'
|
31
|
-
end
|
32
|
-
|
33
|
-
def description
|
34
|
-
'Detects the letter "a" in a string for no reason other than a demo'
|
35
|
-
end
|
36
|
-
|
37
|
-
def parse
|
38
|
-
if self.text.downcase.scan(/a/)
|
39
|
-
# add 0.1 to the score of the text
|
40
|
-
self.append_score = 0.1
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
== FILTERING:
|
45
|
-
|
46
|
-
As stated, this is a heuristic scanner so its up to the user to decide the thresholds of the scanner. I usually
|
47
|
-
say "it's spam" if the score reaches 1.
|
48
|
-
|
49
|
-
The score will be added to incrementally by each matching filter. So if there is some HTML in there, it will be added
|
50
|
-
to the score. If there is also a script tag of some sort, that will add more.
|
51
|
-
|
52
|
-
Each filter decides how much of a score it assigns. For example, detecting a number next to a letter
|
53
|
-
(the numbers_an_words filter) is only a mild hint compared with a script tag (detected by the script_tag filter).
|
54
|
-
|
55
|
-
=== NEW FILTERS:
|
56
|
-
|
57
|
-
I absolutely welcome new filters and experiments. New filters should be put in the 'lib/despamilator/filter/' directory.
|
58
|
-
The core filtering code will detect and use what is in there so you only need to drop the code in.
|
59
|
-
Filters should be simple, no classes etc wrapped around them and should try to perform one simple task.
|
60
|
-
They should always supply the following methods:
|
61
|
-
|
62
|
-
* name #=> the name of your filter.
|
63
|
-
* description #=> what your filter will look for.
|
64
|
-
* parse #=> the method that will be called when parsing.
|
65
|
-
|
66
|
-
Along side the above, the following methods are made available to each filter:
|
67
|
-
|
68
|
-
* text #=> a copy of the text your parser will parse
|
69
|
-
* append_score= #=> method to append a score to the text if there are matches in your parser.
|
70
|
-
* matched? #=> whether or not any filter has so far detected something suspect
|
71
|
-
* score #=> the current score assigned to the text
|
72
|
-
|
73
|
-
spec tests are an absolute must!
|
74
|
-
|
75
|
-
|
76
|
-
== REQUIREMENTS:
|
77
|
-
|
78
|
-
* hoe
|
79
|
-
* rspec
|
80
|
-
|
81
|
-
|
82
|
-
== INSTALL:
|
83
|
-
|
84
|
-
$ sudo gem install despamilator
|
85
|
-
|
86
|
-
== LICENSE:
|
87
|
-
|
88
|
-
Copyright (c) 2010 Stephen Hardisty
|
89
|
-
|
90
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
91
|
-
a copy of this software and associated documentation files (the
|
92
|
-
'Software'), to deal in the Software without restriction, including
|
93
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
94
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
95
|
-
permit persons to whom the Software is furnished to do so, subject to
|
96
|
-
the following conditions:
|
97
|
-
|
98
|
-
The above copyright notice and this permission notice shall be
|
99
|
-
included in all copies or substantial portions of the Software.
|
100
|
-
|
101
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
102
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
103
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
104
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
105
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
106
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
107
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
gem 'hoe', '>= 2.1.0'
|
3
|
-
require 'hoe'
|
4
|
-
require 'fileutils'
|
5
|
-
require './lib/despamilator'
|
6
|
-
|
7
|
-
Hoe.plugin :newgem
|
8
|
-
# Hoe.plugin :website
|
9
|
-
# Hoe.plugin :cucumberfeatures
|
10
|
-
|
11
|
-
# Generate all the Rake tasks
|
12
|
-
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
-
$hoe = Hoe.spec 'despamilator' do
|
14
|
-
self.developer 'Stephen Hardisty', 'moowahaha@hotmail.com'
|
15
|
-
self.post_install_message = 'PostInstall.txt'
|
16
|
-
self.rubyforge_name = self.name # TODO this is default value
|
17
|
-
# self.extra_deps = [['activesupport','>= 2.0.2']]
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'newgem/tasks'
|
22
|
-
Dir['tasks/**/*.rake'].each { |t| load t }
|
23
|
-
|
24
|
-
# TODO - want other tests/tasks run by default? Add them to the list
|
25
|
-
# remove_task :default
|
26
|
-
task :test => [:spec]
|
27
|
-
task :default => [:test]
|
28
|
-
task :install => [:install_gem]
|
29
|
-
|
30
|
-
task :cultivate do
|
31
|
-
system "touch Manifest.txt; rake check_manifest | grep -v \"(in \" | patch"
|
32
|
-
system "rake debug_gem | grep -v \"(in \" > `basename \\`pwd\\``.gemspec"
|
33
|
-
end
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do |s|
|
4
|
-
s.name = %q{despamilator}
|
5
|
-
s.version = "0.2"
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
-
s.authors = ["Stephen Hardisty"]
|
9
|
-
s.date = %q{2010-03-28}
|
10
|
-
s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
|
11
|
-
Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
|
12
|
-
some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
|
13
|
-
s.email = ["moowahaha@hotmail.com"]
|
14
|
-
s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt"]
|
15
|
-
s.files = ["History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter_base.rb", "pkg/despamilator-0.1.gem", "pkg/despamilator-0.1.tgz", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt", "pkg/despamilator-0.1/README.rdoc", "pkg/despamilator-0.1/Rakefile", "pkg/despamilator-0.1/despamilator.gemspec", "pkg/despamilator-0.1/lib/despamilator.rb", "pkg/despamilator-0.1/lib/despamilator/filter.rb", "pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb", "pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb", "pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb", "pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb", "pkg/despamilator-0.1/lib/despamilator/filter_base.rb", "pkg/despamilator-0.1/spec/despamilator_spec.rb", "pkg/despamilator-0.1/spec/filters/html_tags_spec.rb", "pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb", "pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb", "pkg/despamilator-0.1/spec/filters/script_tag_spec.rb", "pkg/despamilator-0.1/spec/spec.opts", "pkg/despamilator-0.1/spec/spec_helper.rb", "pkg/despamilator-0.1/tasks/rspec.rake", "spec/despamilator_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "tasks/rspec.rake"]
|
16
|
-
s.homepage = %q{http://github.com/moowahaha/despamliator}
|
17
|
-
s.post_install_message = %q{PostInstall.txt}
|
18
|
-
s.rdoc_options = ["--main", "README.rdoc"]
|
19
|
-
s.require_paths = ["lib"]
|
20
|
-
s.rubyforge_project = %q{despamilator}
|
21
|
-
s.rubygems_version = %q{1.3.6}
|
22
|
-
s.summary = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms AND CAPTCHAS being intrusive}
|
23
|
-
|
24
|
-
if s.respond_to? :specification_version then
|
25
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
26
|
-
s.specification_version = 3
|
27
|
-
|
28
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
29
|
-
s.add_development_dependency(%q<rubyforge>, [">= 2.0.3"])
|
30
|
-
s.add_development_dependency(%q<gemcutter>, [">= 0.5.0"])
|
31
|
-
s.add_development_dependency(%q<hoe>, [">= 2.5.0"])
|
32
|
-
else
|
33
|
-
s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
|
34
|
-
s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
|
35
|
-
s.add_dependency(%q<hoe>, [">= 2.5.0"])
|
36
|
-
end
|
37
|
-
else
|
38
|
-
s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
|
39
|
-
s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
|
40
|
-
s.add_dependency(%q<hoe>, [">= 2.5.0"])
|
41
|
-
end
|
42
|
-
end
|
@@ -1,116 +0,0 @@
|
|
1
|
-
def parse
|
2
|
-
html = self.text.downcase
|
3
|
-
|
4
|
-
html_tags.each do |tag|
|
5
|
-
if html.match(/<\s*#{tag}\W/) || html.match(/<\n*#{tag}\W/) || html.match(/\W#{tag}\s*\//) || html.match(/\W#{tag}\n*\//)
|
6
|
-
self.append_score = 0.3
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
'Detects HTML tags in text'
|
13
|
-
end
|
14
|
-
|
15
|
-
def description
|
16
|
-
'Searches for various HTML tags'
|
17
|
-
end
|
18
|
-
|
19
|
-
def html_tags
|
20
|
-
# make sure these are lowercase, in order to save processing
|
21
|
-
[
|
22
|
-
'!--',
|
23
|
-
'!doctype',
|
24
|
-
'a',
|
25
|
-
'abbr',
|
26
|
-
'acronym',
|
27
|
-
'address',
|
28
|
-
'applet',
|
29
|
-
'area',
|
30
|
-
'b',
|
31
|
-
'base',
|
32
|
-
'basefont',
|
33
|
-
'bdo',
|
34
|
-
'big',
|
35
|
-
'blockquote',
|
36
|
-
'body',
|
37
|
-
'br',
|
38
|
-
'button',
|
39
|
-
'caption',
|
40
|
-
'center',
|
41
|
-
'cite',
|
42
|
-
'code',
|
43
|
-
'col',
|
44
|
-
'colgroup',
|
45
|
-
'dd',
|
46
|
-
'del',
|
47
|
-
'dfn',
|
48
|
-
'dir',
|
49
|
-
'div',
|
50
|
-
'dl',
|
51
|
-
'dt',
|
52
|
-
'em',
|
53
|
-
'fieldset',
|
54
|
-
'font',
|
55
|
-
'form',
|
56
|
-
'frame',
|
57
|
-
'frameset',
|
58
|
-
'h1',
|
59
|
-
'h2',
|
60
|
-
'h3',
|
61
|
-
'h4',
|
62
|
-
'h5',
|
63
|
-
'h6',
|
64
|
-
'head',
|
65
|
-
'hr',
|
66
|
-
'html',
|
67
|
-
'i',
|
68
|
-
'iframe',
|
69
|
-
'img',
|
70
|
-
'input',
|
71
|
-
'ins',
|
72
|
-
'isindex',
|
73
|
-
'kbd',
|
74
|
-
'label',
|
75
|
-
'legend',
|
76
|
-
'li',
|
77
|
-
'link',
|
78
|
-
'map',
|
79
|
-
'menu',
|
80
|
-
'meta',
|
81
|
-
'noframes',
|
82
|
-
'noscript',
|
83
|
-
'object',
|
84
|
-
'ol',
|
85
|
-
'optgroup',
|
86
|
-
'option',
|
87
|
-
'p',
|
88
|
-
'param',
|
89
|
-
'pre',
|
90
|
-
'q',
|
91
|
-
's',
|
92
|
-
'samp',
|
93
|
-
'select',
|
94
|
-
'small',
|
95
|
-
'span',
|
96
|
-
'strike',
|
97
|
-
'strong',
|
98
|
-
'style',
|
99
|
-
'sub',
|
100
|
-
'sup',
|
101
|
-
'table',
|
102
|
-
'tbody',
|
103
|
-
'td',
|
104
|
-
'textarea',
|
105
|
-
'tfoot',
|
106
|
-
'th',
|
107
|
-
'thead',
|
108
|
-
'title',
|
109
|
-
'tr',
|
110
|
-
'tt',
|
111
|
-
'u',
|
112
|
-
'ul',
|
113
|
-
'var',
|
114
|
-
'xmp'
|
115
|
-
]
|
116
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
def name
|
2
|
-
'Naughty Q'
|
3
|
-
end
|
4
|
-
|
5
|
-
def description
|
6
|
-
'Detects possible misuse of the letter Q (English language)'
|
7
|
-
end
|
8
|
-
|
9
|
-
def parse
|
10
|
-
matches = self.text.downcase.scan(/q./)
|
11
|
-
|
12
|
-
return unless matches
|
13
|
-
|
14
|
-
matches.each do |match|
|
15
|
-
self.append_score = 0.2 if match != 'qu' and match != 'qa' and match !~ /q\s/
|
16
|
-
end
|
17
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
def parse
|
2
|
-
string = self.text.downcase
|
3
|
-
|
4
|
-
# strip out "good numbers"
|
5
|
-
string.gsub!(/h[1-6]/, '')
|
6
|
-
string.gsub!(/(^|\b)\d+($|\b)/, '')
|
7
|
-
string.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
|
8
|
-
string.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
|
9
|
-
|
10
|
-
[
|
11
|
-
/\w\d+/,
|
12
|
-
/\d+\w/,
|
13
|
-
/\d+($|\b)/
|
14
|
-
].each do |regexp|
|
15
|
-
matches = string.scan(regexp)
|
16
|
-
|
17
|
-
next if matches.empty?
|
18
|
-
|
19
|
-
matches.each do |to_remove|
|
20
|
-
to_remove = to_remove.to_s
|
21
|
-
string.sub!(to_remove, '') unless to_remove.empty?
|
22
|
-
self.append_score = 0.1
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def name
|
28
|
-
'Numbers next to words'
|
29
|
-
end
|
30
|
-
|
31
|
-
def description
|
32
|
-
'Detects unusual number/word combinations'
|
33
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
class Despamilator
|
2
|
-
class Filter
|
3
|
-
attr_accessor :matches, :score
|
4
|
-
|
5
|
-
def initialize text
|
6
|
-
@@loaded ||= {}
|
7
|
-
@filters ||= []
|
8
|
-
@matches ||= []
|
9
|
-
@score ||= 0
|
10
|
-
load_filters text
|
11
|
-
run_filters
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def load_filters text
|
17
|
-
Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
|
18
|
-
filter_name = classify_filename filter_file
|
19
|
-
filter = @@loaded[filter_name]
|
20
|
-
|
21
|
-
unless filter
|
22
|
-
filter_code = File.open(filter_file, File::RDWR).read
|
23
|
-
filter = Class.new
|
24
|
-
filter.class_eval(
|
25
|
-
"require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
|
26
|
-
)
|
27
|
-
end
|
28
|
-
|
29
|
-
@filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def run_filters
|
34
|
-
@filters.each do |filter|
|
35
|
-
filter.parse
|
36
|
-
|
37
|
-
if filter.matched?
|
38
|
-
@matches.push(filter)
|
39
|
-
@score += filter.score
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def classify_filename filename
|
45
|
-
classname = ''
|
46
|
-
File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
|
47
|
-
classname += filename_part.capitalize
|
48
|
-
end
|
49
|
-
classname || filename.capitalize
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
class Despamilator
|
2
|
-
class FilterBase
|
3
|
-
attr_accessor :text, :score, :filename, :matches
|
4
|
-
|
5
|
-
def initialize text, filename
|
6
|
-
@matches = 0
|
7
|
-
@filename = filename
|
8
|
-
@score = 0
|
9
|
-
@text = text
|
10
|
-
@matched = false
|
11
|
-
end
|
12
|
-
|
13
|
-
def description
|
14
|
-
raise "No description defined in #{filename}"
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse blah
|
18
|
-
raise "No parser defined in #{filename}"
|
19
|
-
end
|
20
|
-
|
21
|
-
def name
|
22
|
-
raise "No name defined in #{filename}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def matched?
|
26
|
-
@score > 0
|
27
|
-
end
|
28
|
-
|
29
|
-
protected
|
30
|
-
|
31
|
-
def append_score= new_score
|
32
|
-
@matches += 1
|
33
|
-
@score += new_score
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
-
|
3
|
-
require 'despamilator/filter'
|
4
|
-
|
5
|
-
class Despamilator
|
6
|
-
VERSION = "0.2"
|
7
|
-
|
8
|
-
def initialize text
|
9
|
-
@filters = Despamilator::Filter.new text
|
10
|
-
end
|
11
|
-
|
12
|
-
def score
|
13
|
-
@filters.score
|
14
|
-
end
|
15
|
-
|
16
|
-
def matched_by
|
17
|
-
@filters.matches
|
18
|
-
end
|
19
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
-
|
3
|
-
describe Despamilator do
|
4
|
-
before :each do
|
5
|
-
@dspam = Despamilator.new('this text is absolutely fine')
|
6
|
-
end
|
7
|
-
|
8
|
-
it "should return a zero score for fine text" do
|
9
|
-
@dspam.score.should == 0
|
10
|
-
end
|
11
|
-
|
12
|
-
it "should return no matching filter for fine text" do
|
13
|
-
@dspam.matched_by.should be_empty
|
14
|
-
end
|
15
|
-
end
|
@@ -1,144 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
context "HtmlTags" do
|
4
|
-
describe "filtering" do
|
5
|
-
[
|
6
|
-
'!--',
|
7
|
-
'!DOCTYPE',
|
8
|
-
'a',
|
9
|
-
'abbr',
|
10
|
-
'acronym',
|
11
|
-
'address',
|
12
|
-
'applet',
|
13
|
-
'area',
|
14
|
-
'b',
|
15
|
-
'base',
|
16
|
-
'basefont',
|
17
|
-
'bdo',
|
18
|
-
'big',
|
19
|
-
'blockquote',
|
20
|
-
'body',
|
21
|
-
'br',
|
22
|
-
'button',
|
23
|
-
'caption',
|
24
|
-
'center',
|
25
|
-
'cite',
|
26
|
-
'code',
|
27
|
-
'col',
|
28
|
-
'colgroup',
|
29
|
-
'dd',
|
30
|
-
'del',
|
31
|
-
'dfn',
|
32
|
-
'dir',
|
33
|
-
'div',
|
34
|
-
'dl',
|
35
|
-
'dt',
|
36
|
-
'em',
|
37
|
-
'fieldset',
|
38
|
-
'font',
|
39
|
-
'form',
|
40
|
-
'frame',
|
41
|
-
'frameset',
|
42
|
-
'h1',
|
43
|
-
'h2',
|
44
|
-
'h3',
|
45
|
-
'h4',
|
46
|
-
'h5',
|
47
|
-
'h6',
|
48
|
-
'head',
|
49
|
-
'hr',
|
50
|
-
'html',
|
51
|
-
'i',
|
52
|
-
'iframe',
|
53
|
-
'img',
|
54
|
-
'input',
|
55
|
-
'ins',
|
56
|
-
'isindex',
|
57
|
-
'kbd',
|
58
|
-
'label',
|
59
|
-
'legend',
|
60
|
-
'li',
|
61
|
-
'link',
|
62
|
-
'map',
|
63
|
-
'menu',
|
64
|
-
'meta',
|
65
|
-
'noframes',
|
66
|
-
'noscript',
|
67
|
-
'object',
|
68
|
-
'ol',
|
69
|
-
'optgroup',
|
70
|
-
'option',
|
71
|
-
'p',
|
72
|
-
'param',
|
73
|
-
'pre',
|
74
|
-
'q',
|
75
|
-
's',
|
76
|
-
'samp',
|
77
|
-
'select',
|
78
|
-
'small',
|
79
|
-
'span',
|
80
|
-
'strike',
|
81
|
-
'strong',
|
82
|
-
'style',
|
83
|
-
'sub',
|
84
|
-
'sup',
|
85
|
-
'table',
|
86
|
-
'tbody',
|
87
|
-
'td',
|
88
|
-
'textarea',
|
89
|
-
'tfoot',
|
90
|
-
'th',
|
91
|
-
'thead',
|
92
|
-
'title',
|
93
|
-
'tr',
|
94
|
-
'tt',
|
95
|
-
'u',
|
96
|
-
'ul',
|
97
|
-
'var',
|
98
|
-
'xmp'
|
99
|
-
].each do |script_tag|
|
100
|
-
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
101
|
-
[
|
102
|
-
"<#{script_tag}",
|
103
|
-
"#{script_tag}/>",
|
104
|
-
"<#{script_tag}/>",
|
105
|
-
"< #{script_tag} ",
|
106
|
-
"#{script_tag} />",
|
107
|
-
"<\n#{script_tag}\n/>",
|
108
|
-
"<\n#{script_tag} ",
|
109
|
-
"#{script_tag}\n/>",
|
110
|
-
"<\r#{script_tag}\r/>"
|
111
|
-
].each do |script_tag|
|
112
|
-
it "should detect '#{script_tag}'" do
|
113
|
-
dspam = Despamilator.new(script_tag)
|
114
|
-
dspam.score.should == 0.3
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
describe 'attributes' do
|
121
|
-
before :all do
|
122
|
-
@dspam = Despamilator.new('<xmp>').matched_by.first
|
123
|
-
end
|
124
|
-
|
125
|
-
it "should have a name" do
|
126
|
-
@dspam.name.should == 'Detects HTML tags in text'
|
127
|
-
end
|
128
|
-
|
129
|
-
it "should have a description" do
|
130
|
-
@dspam.description.should == 'Searches for various HTML tags'
|
131
|
-
end
|
132
|
-
|
133
|
-
it "should have a filename" do
|
134
|
-
@dspam.filename.should == 'html_tags.rb'
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
describe 'bug fixes' do
|
139
|
-
it "should detect an h1" do
|
140
|
-
Despamilator.new('<h1>TITLE!!</h1>').score.should == 0.3
|
141
|
-
end
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
describe "NaughtyQ" do
|
4
|
-
it "should return a score for 1 misplaced q" do
|
5
|
-
dspam = Despamilator.new('qtu')
|
6
|
-
dspam.score.should == 0.2
|
7
|
-
end
|
8
|
-
|
9
|
-
describe 'attributes' do
|
10
|
-
before :each do
|
11
|
-
@filter = Despamilator.new('qtqt').matched_by.first
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should have a filename" do
|
15
|
-
@filter.filename.should == 'naughty_q.rb'
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should have a name" do
|
19
|
-
@filter.name.should == 'Naughty Q'
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should have a description" do
|
23
|
-
@filter.description.should == 'Detects possible misuse of the letter Q (English language)'
|
24
|
-
end
|
25
|
-
|
26
|
-
it "should have a number of matches" do
|
27
|
-
@filter.matches.should == 2
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should have a score" do
|
31
|
-
@filter.score.should == 0.4
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should score more for 3 misplaced q's" do
|
36
|
-
dspam = Despamilator.new('qtuqsq')
|
37
|
-
dspam.score.to_s.should == 0.4.to_s
|
38
|
-
end
|
39
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
describe "NumbersAndWords" do
|
4
|
-
[1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
|
5
|
-
it "should return a blank for a #{number}" do
|
6
|
-
dspam = Despamilator.new(number)
|
7
|
-
dspam.score.should == 0
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
['wanga x5 mool', '4ghk', 'XTHL9'].each do |string|
|
12
|
-
it "should detect suspicious number word combos such as #{string}" do
|
13
|
-
dspam = Despamilator.new(string)
|
14
|
-
dspam.score.should == 0.1
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
['4wanga x5 mool', '4g6hk', 'XT7HL9', '77th8nd'].each do |string|
|
19
|
-
it "should detect multiple suspicious number word combos such as #{string}" do
|
20
|
-
dspam = Despamilator.new(string)
|
21
|
-
dspam.score.should == 0.2
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
[1, 2, 3, 4, 5, 6].each do |tag_no|
|
26
|
-
header_tag = "h#{tag_no}"
|
27
|
-
|
28
|
-
it "should ignore html header tag #{header_tag}" do
|
29
|
-
dspam = Despamilator.new(header_tag)
|
30
|
-
dspam.score.should == 0
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
describe 'attributes' do
|
35
|
-
before :each do
|
36
|
-
@filter = Despamilator.new('X5T').matched_by.first
|
37
|
-
end
|
38
|
-
|
39
|
-
it "should have a filename" do
|
40
|
-
@filter.filename.should == 'numbers_and_words.rb'
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should have a name" do
|
44
|
-
@filter.name.should == 'Numbers next to words'
|
45
|
-
end
|
46
|
-
|
47
|
-
it "should have a description" do
|
48
|
-
@filter.description.should == 'Detects unusual number/word combinations'
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should have a number of matches" do
|
52
|
-
@filter.matches.should == 1
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should have a score" do
|
56
|
-
@filter.score.should == 0.1
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
@@ -1,32 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../spec_helper.rb'
|
2
|
-
|
3
|
-
context "ScriptTag" do
|
4
|
-
describe "detecting various script tags" do
|
5
|
-
['<script type="whatever">', '<script></script>', '</script>', '<script>', "<script\n>"].each do |script_tag|
|
6
|
-
[script_tag.upcase, script_tag.downcase].each do |script_tag|
|
7
|
-
it "should detect '#{script_tag}' of a script tag" do
|
8
|
-
dspam = Despamilator.new(script_tag)
|
9
|
-
dspam.score.should == 1
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
describe 'attributes' do
|
16
|
-
before :all do
|
17
|
-
@dspam = Despamilator.new('<script>').matched_by.first
|
18
|
-
end
|
19
|
-
|
20
|
-
it "should have a name" do
|
21
|
-
@dspam.name.should == 'Detects script tags in text'
|
22
|
-
end
|
23
|
-
|
24
|
-
it "should have a description" do
|
25
|
-
@dspam.description.should == 'Searches for variations for the HTML script tag'
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should have a filename" do
|
29
|
-
@dspam.filename.should == 'script_tag.rb'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
@@ -1 +0,0 @@
|
|
1
|
-
--colour
|
@@ -1,21 +0,0 @@
|
|
1
|
-
begin
|
2
|
-
require 'spec'
|
3
|
-
rescue LoadError
|
4
|
-
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
5
|
-
require 'spec'
|
6
|
-
end
|
7
|
-
begin
|
8
|
-
require 'spec/rake/spectask'
|
9
|
-
rescue LoadError
|
10
|
-
puts <<-EOS
|
11
|
-
To use rspec for testing you must install rspec gem:
|
12
|
-
gem install rspec
|
13
|
-
EOS
|
14
|
-
exit(0)
|
15
|
-
end
|
16
|
-
|
17
|
-
desc "Run the specs under spec/models"
|
18
|
-
Spec::Rake::SpecTask.new do |t|
|
19
|
-
t.spec_opts = ['--options', "spec/spec.opts"]
|
20
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
21
|
-
end
|
data/pkg/despamilator-0.1.gem
DELETED
Binary file
|
data/pkg/despamilator-0.1.tgz
DELETED
Binary file
|