despamilator 1.0 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -46,3 +46,13 @@
46
46
  * Split out rails plugin (now despamilator-rails).
47
47
 
48
48
  * Added rdoc.
49
+
50
+ === 1.1
51
+
52
+ * Added new filter to detect a cache busting technique.
53
+
54
+ * Significantly increased aggressiveness of URI and HTML filters.
55
+
56
+ * Added a heap of new spam and clean samples.
57
+
58
+ * Added test string ("gtubs") that will score 100. For use in tests.
data/Manifest.txt CHANGED
@@ -11,6 +11,7 @@ despamilator.gemspec
11
11
  lib/despamilator.rb
12
12
  lib/despamilator/filter.rb
13
13
  lib/despamilator/filter/funky_consonant.rb
14
+ lib/despamilator/filter/gtubs_test_filter.rb
14
15
  lib/despamilator/filter/html_tags.rb
15
16
  lib/despamilator/filter/ip_address_url.rb
16
17
  lib/despamilator/filter/long_words.rb
@@ -20,12 +21,14 @@ lib/despamilator/filter/numbers_and_words.rb
20
21
  lib/despamilator/filter/script_tag.rb
21
22
  lib/despamilator/filter/shouting.rb
22
23
  lib/despamilator/filter/square_brackets.rb
24
+ lib/despamilator/filter/trailing_number.rb
23
25
  lib/despamilator/filter/urls.rb
24
26
  lib/despamilator/filter_base.rb
25
27
  scripts/despamilator_score.rb
26
28
  spec/despamilator_spec.rb
27
29
  spec/filter_base_spec.rb
28
30
  spec/filters/funky_consonant_spec.rb
31
+ spec/filters/gtubs_test_filter_spec.rb
29
32
  spec/filters/html_tags_spec.rb
30
33
  spec/filters/ip_address_url_spec.rb
31
34
  spec/filters/long_words_spec.rb
@@ -35,6 +38,7 @@ spec/filters/numbers_and_words_spec.rb
35
38
  spec/filters/script_tag_spec.rb
36
39
  spec/filters/shouting_spec.rb
37
40
  spec/filters/square_brackets_spec.rb
41
+ spec/filters/trailing_number_spec.rb
38
42
  spec/filters/urls_spec.rb
39
43
  spec/helpers/corpus_helper.rb
40
44
  spec/helpers/filter_helper.rb
data/Rakefile CHANGED
@@ -21,8 +21,7 @@ end
21
21
  require 'newgem/tasks'
22
22
  Dir['tasks/**/*.rake'].each { |t| load t }
23
23
 
24
- # TODO - want other tests/tasks run by default? Add them to the list
25
- # remove_task :default
24
+ desc "Run tests"
26
25
  task :test => [:spec]
27
26
  task :default => [:test]
28
27
  task :install => [:install_gem]
data/despamilator.gemspec CHANGED
@@ -2,17 +2,17 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{despamilator}
5
- s.version = "1.0"
5
+ s.version = "1.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Stephen Hardisty"]
9
- s.date = %q{2010-12-11}
9
+ s.date = %q{2011-01-26}
10
10
  s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
11
11
  Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator will apply
12
12
  some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
13
13
  s.email = ["moowahaha@hotmail.com"]
14
14
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt"]
15
- s.files = [".rspec", ".rvmrc", "Gemfile", "Gemfile.lock", "History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/funky_consonant.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/ip_address_url.rb", "lib/despamilator/filter/long_words.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/naughty_words.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter/shouting.rb", "lib/despamilator/filter/square_brackets.rb", "lib/despamilator/filter/urls.rb", "lib/despamilator/filter_base.rb", "scripts/despamilator_score.rb", "spec/despamilator_spec.rb", "spec/filter_base_spec.rb", "spec/filters/funky_consonant_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/ip_address_url_spec.rb", "spec/filters/long_words_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/naughty_words_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/filters/shouting_spec.rb", "spec/filters/square_brackets_spec.rb", "spec/filters/urls_spec.rb", "spec/helpers/corpus_helper.rb", "spec/helpers/filter_helper.rb", "spec/helpers/spec_helper.rb", "tasks/test.rake"]
15
+ s.files = [".rspec", ".rvmrc", "Gemfile", "Gemfile.lock", "History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/funky_consonant.rb", "lib/despamilator/filter/gtubs_test_filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/ip_address_url.rb", "lib/despamilator/filter/long_words.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/naughty_words.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter/shouting.rb", "lib/despamilator/filter/square_brackets.rb", "lib/despamilator/filter/trailing_number.rb", "lib/despamilator/filter/urls.rb", "lib/despamilator/filter_base.rb", "scripts/despamilator_score.rb", "spec/despamilator_spec.rb", "spec/filter_base_spec.rb", "spec/filters/funky_consonant_spec.rb", "spec/filters/gtubs_test_filter_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/ip_address_url_spec.rb", "spec/filters/long_words_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/naughty_words_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/filters/shouting_spec.rb", "spec/filters/square_brackets_spec.rb", "spec/filters/trailing_number_spec.rb", "spec/filters/urls_spec.rb", "spec/helpers/corpus_helper.rb", "spec/helpers/filter_helper.rb", "spec/helpers/spec_helper.rb", "tasks/test.rake"]
16
16
  s.homepage = %q{http://github.com/moowahaha/despamilator}
17
17
  s.post_install_message = %q{PostInstall.txt}
18
18
  s.rdoc_options = ["--main", "README.rdoc"]
@@ -26,11 +26,14 @@ some commonly used heuristics from the world of anti-spam to help you decide whe
26
26
  s.specification_version = 3
27
27
 
28
28
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
29
+ s.add_development_dependency(%q<rubyforge>, [">= 2.0.4"])
29
30
  s.add_development_dependency(%q<hoe>, [">= 2.7.0"])
30
31
  else
32
+ s.add_dependency(%q<rubyforge>, [">= 2.0.4"])
31
33
  s.add_dependency(%q<hoe>, [">= 2.7.0"])
32
34
  end
33
35
  else
36
+ s.add_dependency(%q<rubyforge>, [">= 2.0.4"])
34
37
  s.add_dependency(%q<hoe>, [">= 2.7.0"])
35
38
  end
36
39
  end
data/lib/despamilator.rb CHANGED
@@ -14,7 +14,7 @@ require 'despamilator/filter'
14
14
  # dspam.matched_by #=> array of matching filters
15
15
 
16
16
  class Despamilator
17
- VERSION = "1.0"
17
+ VERSION = "1.1"
18
18
 
19
19
  # Constructor. Takes the text you which to parse and score.
20
20
 
@@ -0,0 +1,21 @@
1
+ require 'despamilator/filter_base'
2
+
3
+ module DespamilatorFilter
4
+
5
+ class GtubsTestFilter < Despamilator::FilterBase
6
+
7
+ def name
8
+ 'GTubs Test Filter'
9
+ end
10
+
11
+ def description
12
+ 'Detects the special test string (Despamilator.gtubs_test_string) and assigns a big score.'
13
+ end
14
+
15
+ def parse text
16
+ self.append_score = 100 if text == Despamilator.gtubs_test_string
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -9,7 +9,7 @@ module DespamilatorFilter
9
9
 
10
10
  html_tags.each do |tag|
11
11
  if text.match(/<\s*#{tag}\W/) || text.match(/<\n*#{tag}\W/) || text.match(/\W#{tag}\s*\//) || text.match(/\W#{tag}\n*\//)
12
- self.append_score = 0.3
12
+ self.append_score = 0.45
13
13
  end
14
14
  end
15
15
  end
@@ -39,6 +39,8 @@ module DespamilatorFilter
39
39
  cock
40
40
  pussy
41
41
  clit
42
+ preteen
43
+ lolita
42
44
  }
43
45
  end
44
46
 
@@ -0,0 +1,21 @@
1
+ require 'despamilator/filter_base'
2
+
3
+ module DespamilatorFilter
4
+
5
+ class TrailingNumber < Despamilator::FilterBase
6
+
7
+ def name
8
+ 'Trailing Number'
9
+ end
10
+
11
+ def description
12
+ 'Detects a trailing cache busting number'
13
+ end
14
+
15
+ def parse text
16
+ self.append_score = 0.1 if text =~ /\b\d+\s*$/
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -18,7 +18,7 @@ module DespamilatorFilter
18
18
  text.gsub!(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
19
19
 
20
20
  1.upto(text.scan(/http:\/\//).length) do
21
- self.append_score = 0.20
21
+ self.append_score = 0.5
22
22
  end
23
23
  end
24
24
 
@@ -0,0 +1,9 @@
1
+ describe DespamilatorFilter::GtubsTestFilter do
2
+ the_name_should_be 'GTubs Test Filter'
3
+ the_description_should_be 'Detects the special test string (Despamilator.gtubs_test_string) and assigns a big score.'
4
+
5
+ despamilator_should_apply_the_filter_for(Despamilator.gtubs_test_string)
6
+
7
+ a_single_match_of(Despamilator.gtubs_test_string, should_score: 100)
8
+ a_multiple_match_of(Despamilator.gtubs_test_string, should_score: [100, 1.times])
9
+ end
@@ -5,8 +5,8 @@ describe DespamilatorFilter::HtmlTags do
5
5
 
6
6
  despamilator_should_apply_the_filter_for('<xmp>')
7
7
 
8
- a_single_match_of('<xmp>', should_score: 0.3)
9
- a_multiple_match_of('<h1></h1> <h2></h2>', should_score: [0.6, 2.times])
8
+ a_single_match_of('<xmp>', should_score: 0.45)
9
+ a_multiple_match_of('<h1></h1> <h2></h2>', should_score: [0.9, 2.times])
10
10
 
11
11
  [
12
12
  '!--',
@@ -117,7 +117,7 @@ describe DespamilatorFilter::HtmlTags do
117
117
  it "should detect '#{tag}'" do
118
118
  dspam = DespamilatorFilter::HtmlTags.new
119
119
  dspam.parse(tag)
120
- dspam.score.should == 0.3
120
+ dspam.score.should == 0.45
121
121
  end
122
122
 
123
123
  end
@@ -0,0 +1,10 @@
1
+ describe DespamilatorFilter::TrailingNumber do
2
+
3
+ the_name_should_be 'Trailing Number'
4
+ the_description_should_be 'Detects a trailing cache busting number'
5
+
6
+ despamilator_should_apply_the_filter_for('hello 123 ')
7
+
8
+ a_single_match_of('hello 123', should_score: 0.1)
9
+
10
+ end
@@ -5,7 +5,7 @@ describe DespamilatorFilter::URLs do
5
5
 
6
6
  despamilator_should_apply_the_filter_for('zt')
7
7
 
8
- a_single_match_of('http://www.blah.com', should_score: 0.2)
9
- a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.4, 2.times])
8
+ a_single_match_of('http://www.blah.com', should_score: 0.5)
9
+ a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [1.0, 2.times])
10
10
 
11
11
  end
metadata CHANGED
@@ -4,8 +4,8 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 0
8
- version: "1.0"
7
+ - 1
8
+ version: "1.1"
9
9
  platform: ruby
10
10
  authors:
11
11
  - Stephen Hardisty
@@ -13,7 +13,7 @@ autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
15
 
16
- date: 2011-01-09 00:00:00 +11:00
16
+ date: 2011-01-26 00:00:00 +11:00
17
17
  default_executable:
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
@@ -74,6 +74,7 @@ files:
74
74
  - lib/despamilator.rb
75
75
  - lib/despamilator/filter.rb
76
76
  - lib/despamilator/filter/funky_consonant.rb
77
+ - lib/despamilator/filter/gtubs_test_filter.rb
77
78
  - lib/despamilator/filter/html_tags.rb
78
79
  - lib/despamilator/filter/ip_address_url.rb
79
80
  - lib/despamilator/filter/long_words.rb
@@ -83,12 +84,14 @@ files:
83
84
  - lib/despamilator/filter/script_tag.rb
84
85
  - lib/despamilator/filter/shouting.rb
85
86
  - lib/despamilator/filter/square_brackets.rb
87
+ - lib/despamilator/filter/trailing_number.rb
86
88
  - lib/despamilator/filter/urls.rb
87
89
  - lib/despamilator/filter_base.rb
88
90
  - scripts/despamilator_score.rb
89
91
  - spec/despamilator_spec.rb
90
92
  - spec/filter_base_spec.rb
91
93
  - spec/filters/funky_consonant_spec.rb
94
+ - spec/filters/gtubs_test_filter_spec.rb
92
95
  - spec/filters/html_tags_spec.rb
93
96
  - spec/filters/ip_address_url_spec.rb
94
97
  - spec/filters/long_words_spec.rb
@@ -98,6 +101,7 @@ files:
98
101
  - spec/filters/script_tag_spec.rb
99
102
  - spec/filters/shouting_spec.rb
100
103
  - spec/filters/square_brackets_spec.rb
104
+ - spec/filters/trailing_number_spec.rb
101
105
  - spec/filters/urls_spec.rb
102
106
  - spec/helpers/corpus_helper.rb
103
107
  - spec/helpers/filter_helper.rb