despamilator 1.0 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -46,3 +46,13 @@
46
46
  * Split out rails plugin (now despamilator-rails).
47
47
 
48
48
  * Added rdoc.
49
+
50
+ === 1.1
51
+
52
+ * Added new filter to detect a cache busting technique.
53
+
54
+ * Significantly increased aggressiveness of URI and HTML filters.
55
+
56
+ * Added a heap of new spam and clean samples.
57
+
58
+ * Added test string ("gtubs") that will score 100. For use in tests.
data/Manifest.txt CHANGED
@@ -11,6 +11,7 @@ despamilator.gemspec
11
11
  lib/despamilator.rb
12
12
  lib/despamilator/filter.rb
13
13
  lib/despamilator/filter/funky_consonant.rb
14
+ lib/despamilator/filter/gtubs_test_filter.rb
14
15
  lib/despamilator/filter/html_tags.rb
15
16
  lib/despamilator/filter/ip_address_url.rb
16
17
  lib/despamilator/filter/long_words.rb
@@ -20,12 +21,14 @@ lib/despamilator/filter/numbers_and_words.rb
20
21
  lib/despamilator/filter/script_tag.rb
21
22
  lib/despamilator/filter/shouting.rb
22
23
  lib/despamilator/filter/square_brackets.rb
24
+ lib/despamilator/filter/trailing_number.rb
23
25
  lib/despamilator/filter/urls.rb
24
26
  lib/despamilator/filter_base.rb
25
27
  scripts/despamilator_score.rb
26
28
  spec/despamilator_spec.rb
27
29
  spec/filter_base_spec.rb
28
30
  spec/filters/funky_consonant_spec.rb
31
+ spec/filters/gtubs_test_filter_spec.rb
29
32
  spec/filters/html_tags_spec.rb
30
33
  spec/filters/ip_address_url_spec.rb
31
34
  spec/filters/long_words_spec.rb
@@ -35,6 +38,7 @@ spec/filters/numbers_and_words_spec.rb
35
38
  spec/filters/script_tag_spec.rb
36
39
  spec/filters/shouting_spec.rb
37
40
  spec/filters/square_brackets_spec.rb
41
+ spec/filters/trailing_number_spec.rb
38
42
  spec/filters/urls_spec.rb
39
43
  spec/helpers/corpus_helper.rb
40
44
  spec/helpers/filter_helper.rb
data/Rakefile CHANGED
@@ -21,8 +21,7 @@ end
21
21
  require 'newgem/tasks'
22
22
  Dir['tasks/**/*.rake'].each { |t| load t }
23
23
 
24
- # TODO - want other tests/tasks run by default? Add them to the list
25
- # remove_task :default
24
+ desc "Run tests"
26
25
  task :test => [:spec]
27
26
  task :default => [:test]
28
27
  task :install => [:install_gem]
data/despamilator.gemspec CHANGED
@@ -2,17 +2,17 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{despamilator}
5
- s.version = "1.0"
5
+ s.version = "1.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Stephen Hardisty"]
9
- s.date = %q{2010-12-11}
9
+ s.date = %q{2011-01-26}
10
10
  s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
11
11
  Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator will apply
12
12
  some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
13
13
  s.email = ["moowahaha@hotmail.com"]
14
14
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt"]
15
- s.files = [".rspec", ".rvmrc", "Gemfile", "Gemfile.lock", "History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/funky_consonant.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/ip_address_url.rb", "lib/despamilator/filter/long_words.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/naughty_words.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter/shouting.rb", "lib/despamilator/filter/square_brackets.rb", "lib/despamilator/filter/urls.rb", "lib/despamilator/filter_base.rb", "scripts/despamilator_score.rb", "spec/despamilator_spec.rb", "spec/filter_base_spec.rb", "spec/filters/funky_consonant_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/ip_address_url_spec.rb", "spec/filters/long_words_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/naughty_words_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/filters/shouting_spec.rb", "spec/filters/square_brackets_spec.rb", "spec/filters/urls_spec.rb", "spec/helpers/corpus_helper.rb", "spec/helpers/filter_helper.rb", "spec/helpers/spec_helper.rb", "tasks/test.rake"]
15
+ s.files = [".rspec", ".rvmrc", "Gemfile", "Gemfile.lock", "History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/funky_consonant.rb", "lib/despamilator/filter/gtubs_test_filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/ip_address_url.rb", "lib/despamilator/filter/long_words.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/naughty_words.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter/shouting.rb", "lib/despamilator/filter/square_brackets.rb", "lib/despamilator/filter/trailing_number.rb", "lib/despamilator/filter/urls.rb", "lib/despamilator/filter_base.rb", "scripts/despamilator_score.rb", "spec/despamilator_spec.rb", "spec/filter_base_spec.rb", "spec/filters/funky_consonant_spec.rb", "spec/filters/gtubs_test_filter_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/ip_address_url_spec.rb", "spec/filters/long_words_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/naughty_words_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/filters/shouting_spec.rb", "spec/filters/square_brackets_spec.rb", "spec/filters/trailing_number_spec.rb", "spec/filters/urls_spec.rb", "spec/helpers/corpus_helper.rb", "spec/helpers/filter_helper.rb", "spec/helpers/spec_helper.rb", "tasks/test.rake"]
16
16
  s.homepage = %q{http://github.com/moowahaha/despamilator}
17
17
  s.post_install_message = %q{PostInstall.txt}
18
18
  s.rdoc_options = ["--main", "README.rdoc"]
@@ -26,11 +26,14 @@ some commonly used heuristics from the world of anti-spam to help you decide whe
26
26
  s.specification_version = 3
27
27
 
28
28
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
29
+ s.add_development_dependency(%q<rubyforge>, [">= 2.0.4"])
29
30
  s.add_development_dependency(%q<hoe>, [">= 2.7.0"])
30
31
  else
32
+ s.add_dependency(%q<rubyforge>, [">= 2.0.4"])
31
33
  s.add_dependency(%q<hoe>, [">= 2.7.0"])
32
34
  end
33
35
  else
36
+ s.add_dependency(%q<rubyforge>, [">= 2.0.4"])
34
37
  s.add_dependency(%q<hoe>, [">= 2.7.0"])
35
38
  end
36
39
  end
data/lib/despamilator.rb CHANGED
@@ -14,7 +14,7 @@ require 'despamilator/filter'
14
14
  # dspam.matched_by #=> array of matching filters
15
15
 
16
16
  class Despamilator
17
- VERSION = "1.0"
17
+ VERSION = "1.1"
18
18
 
19
19
  # Constructor. Takes the text you which to parse and score.
20
20
 
@@ -0,0 +1,21 @@
1
+ require 'despamilator/filter_base'
2
+
3
+ module DespamilatorFilter
4
+
5
+ class GtubsTestFilter < Despamilator::FilterBase
6
+
7
+ def name
8
+ 'GTubs Test Filter'
9
+ end
10
+
11
+ def description
12
+ 'Detects the special test string (Despamilator.gtubs_test_string) and assigns a big score.'
13
+ end
14
+
15
+ def parse text
16
+ self.append_score = 100 if text == Despamilator.gtubs_test_string
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -9,7 +9,7 @@ module DespamilatorFilter
9
9
 
10
10
  html_tags.each do |tag|
11
11
  if text.match(/<\s*#{tag}\W/) || text.match(/<\n*#{tag}\W/) || text.match(/\W#{tag}\s*\//) || text.match(/\W#{tag}\n*\//)
12
- self.append_score = 0.3
12
+ self.append_score = 0.45
13
13
  end
14
14
  end
15
15
  end
@@ -39,6 +39,8 @@ module DespamilatorFilter
39
39
  cock
40
40
  pussy
41
41
  clit
42
+ preteen
43
+ lolita
42
44
  }
43
45
  end
44
46
 
@@ -0,0 +1,21 @@
1
+ require 'despamilator/filter_base'
2
+
3
+ module DespamilatorFilter
4
+
5
+ class TrailingNumber < Despamilator::FilterBase
6
+
7
+ def name
8
+ 'Trailing Number'
9
+ end
10
+
11
+ def description
12
+ 'Detects a trailing cache busting number'
13
+ end
14
+
15
+ def parse text
16
+ self.append_score = 0.1 if text =~ /\b\d+\s*$/
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -18,7 +18,7 @@ module DespamilatorFilter
18
18
  text.gsub!(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
19
19
 
20
20
  1.upto(text.scan(/http:\/\//).length) do
21
- self.append_score = 0.20
21
+ self.append_score = 0.5
22
22
  end
23
23
  end
24
24
 
@@ -0,0 +1,9 @@
1
+ describe DespamilatorFilter::GtubsTestFilter do
2
+ the_name_should_be 'GTubs Test Filter'
3
+ the_description_should_be 'Detects the special test string (Despamilator.gtubs_test_string) and assigns a big score.'
4
+
5
+ despamilator_should_apply_the_filter_for(Despamilator.gtubs_test_string)
6
+
7
+ a_single_match_of(Despamilator.gtubs_test_string, should_score: 100)
8
+ a_multiple_match_of(Despamilator.gtubs_test_string, should_score: [100, 1.times])
9
+ end
@@ -5,8 +5,8 @@ describe DespamilatorFilter::HtmlTags do
5
5
 
6
6
  despamilator_should_apply_the_filter_for('<xmp>')
7
7
 
8
- a_single_match_of('<xmp>', should_score: 0.3)
9
- a_multiple_match_of('<h1></h1> <h2></h2>', should_score: [0.6, 2.times])
8
+ a_single_match_of('<xmp>', should_score: 0.45)
9
+ a_multiple_match_of('<h1></h1> <h2></h2>', should_score: [0.9, 2.times])
10
10
 
11
11
  [
12
12
  '!--',
@@ -117,7 +117,7 @@ describe DespamilatorFilter::HtmlTags do
117
117
  it "should detect '#{tag}'" do
118
118
  dspam = DespamilatorFilter::HtmlTags.new
119
119
  dspam.parse(tag)
120
- dspam.score.should == 0.3
120
+ dspam.score.should == 0.45
121
121
  end
122
122
 
123
123
  end
@@ -0,0 +1,10 @@
1
+ describe DespamilatorFilter::TrailingNumber do
2
+
3
+ the_name_should_be 'Trailing Number'
4
+ the_description_should_be 'Detects a trailing cache busting number'
5
+
6
+ despamilator_should_apply_the_filter_for('hello 123 ')
7
+
8
+ a_single_match_of('hello 123', should_score: 0.1)
9
+
10
+ end
@@ -5,7 +5,7 @@ describe DespamilatorFilter::URLs do
5
5
 
6
6
  despamilator_should_apply_the_filter_for('zt')
7
7
 
8
- a_single_match_of('http://www.blah.com', should_score: 0.2)
9
- a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.4, 2.times])
8
+ a_single_match_of('http://www.blah.com', should_score: 0.5)
9
+ a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [1.0, 2.times])
10
10
 
11
11
  end
metadata CHANGED
@@ -4,8 +4,8 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 0
8
- version: "1.0"
7
+ - 1
8
+ version: "1.1"
9
9
  platform: ruby
10
10
  authors:
11
11
  - Stephen Hardisty
@@ -13,7 +13,7 @@ autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
15
 
16
- date: 2011-01-09 00:00:00 +11:00
16
+ date: 2011-01-26 00:00:00 +11:00
17
17
  default_executable:
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
@@ -74,6 +74,7 @@ files:
74
74
  - lib/despamilator.rb
75
75
  - lib/despamilator/filter.rb
76
76
  - lib/despamilator/filter/funky_consonant.rb
77
+ - lib/despamilator/filter/gtubs_test_filter.rb
77
78
  - lib/despamilator/filter/html_tags.rb
78
79
  - lib/despamilator/filter/ip_address_url.rb
79
80
  - lib/despamilator/filter/long_words.rb
@@ -83,12 +84,14 @@ files:
83
84
  - lib/despamilator/filter/script_tag.rb
84
85
  - lib/despamilator/filter/shouting.rb
85
86
  - lib/despamilator/filter/square_brackets.rb
87
+ - lib/despamilator/filter/trailing_number.rb
86
88
  - lib/despamilator/filter/urls.rb
87
89
  - lib/despamilator/filter_base.rb
88
90
  - scripts/despamilator_score.rb
89
91
  - spec/despamilator_spec.rb
90
92
  - spec/filter_base_spec.rb
91
93
  - spec/filters/funky_consonant_spec.rb
94
+ - spec/filters/gtubs_test_filter_spec.rb
92
95
  - spec/filters/html_tags_spec.rb
93
96
  - spec/filters/ip_address_url_spec.rb
94
97
  - spec/filters/long_words_spec.rb
@@ -98,6 +101,7 @@ files:
98
101
  - spec/filters/script_tag_spec.rb
99
102
  - spec/filters/shouting_spec.rb
100
103
  - spec/filters/square_brackets_spec.rb
104
+ - spec/filters/trailing_number_spec.rb
101
105
  - spec/filters/urls_spec.rb
102
106
  - spec/helpers/corpus_helper.rb
103
107
  - spec/helpers/filter_helper.rb