despamilator 2.0.1 → 2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/History.txt +7 -0
  2. data/lib/despamilator.rb +38 -7
  3. data/lib/despamilator/filter.rb +39 -23
  4. data/lib/despamilator/filter/gtubs_test_filter.rb +4 -4
  5. data/lib/despamilator/filter/html_tags.rb +9 -7
  6. data/lib/despamilator/filter/ip_address_url.rb +6 -4
  7. data/lib/despamilator/filter/long_words.rb +7 -5
  8. data/lib/despamilator/filter/mixed_case.rb +21 -0
  9. data/lib/despamilator/filter/naughty_words.rb +5 -5
  10. data/lib/despamilator/filter/numbers_and_words.rb +19 -11
  11. data/lib/despamilator/filter/obfuscated_urls.rb +41 -0
  12. data/lib/despamilator/filter/prices.rb +19 -0
  13. data/lib/despamilator/filter/script_tag.rb +4 -4
  14. data/lib/despamilator/filter/shouting.rb +9 -6
  15. data/lib/despamilator/filter/spammy_tlds.rb +22 -0
  16. data/lib/despamilator/filter/square_brackets.rb +5 -5
  17. data/lib/despamilator/filter/trailing_number.rb +4 -4
  18. data/lib/despamilator/filter/unusual_characters.rb +5 -5
  19. data/lib/despamilator/filter/urls.rb +7 -9
  20. data/lib/despamilator/filter/very_long_domain_name.rb +27 -0
  21. data/lib/despamilator/filter/weird_punctuation.rb +44 -0
  22. data/lib/despamilator/subject.rb +30 -0
  23. data/lib/despamilator/subject/text.rb +32 -0
  24. data/lib/despamilator/version.rb +3 -0
  25. metadata +29 -75
  26. data/.rspec +0 -2
  27. data/.rvmrc +0 -1
  28. data/Gemfile +0 -12
  29. data/Gemfile.lock +0 -47
  30. data/Manifest.txt +0 -46
  31. data/PostInstall.txt +0 -1
  32. data/Rakefile +0 -39
  33. data/conf/unusual_characters.txt +0 -6674
  34. data/despamilator.gemspec +0 -38
  35. data/lib/despamilator/filter_base.rb +0 -82
  36. data/scripts/despamilator_score.rb +0 -25
  37. data/scripts/from_file.rb +0 -26
  38. data/spec/despamilator_spec.rb +0 -13
  39. data/spec/filter_base_spec.rb +0 -30
  40. data/spec/filters/gtubs_test_filter_spec.rb +0 -9
  41. data/spec/filters/html_tags_spec.rb +0 -129
  42. data/spec/filters/ip_address_url_spec.rb +0 -11
  43. data/spec/filters/long_words_spec.rb +0 -11
  44. data/spec/filters/naughty_words_spec.rb +0 -11
  45. data/spec/filters/numbers_and_words_spec.rb +0 -34
  46. data/spec/filters/script_tag_spec.rb +0 -22
  47. data/spec/filters/shouting_spec.rb +0 -45
  48. data/spec/filters/square_brackets_spec.rb +0 -11
  49. data/spec/filters/trailing_number_spec.rb +0 -10
  50. data/spec/filters/unusual_characters_spec.rb +0 -9
  51. data/spec/filters/urls_spec.rb +0 -11
  52. data/spec/helpers/corpus_helper.rb +0 -5
  53. data/spec/helpers/filter_helper.rb +0 -59
  54. data/spec/helpers/spec_helper.rb +0 -6
  55. data/tasks/test.rake +0 -6
@@ -0,0 +1,22 @@
1
+ require 'despamilator/filter'
2
+
3
+ module DespamilatorFilter
4
+
5
+ class SpammyTLDs < Despamilator::Filter
6
+
7
+ def name
8
+ 'Spammy TLDs'
9
+ end
10
+
11
+ def description
12
+ 'Detects TLDs that are more commonly associated with spam.'
13
+ end
14
+
15
+ def parse subject
16
+ matches = subject.text.count(/\w{5,}\.(info|biz)\b/)
17
+ subject.register_match!({:score => 0.05 * matches, :filter => self}) if matches > 0
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -1,8 +1,8 @@
1
- require 'despamilator/filter_base'
1
+ require 'despamilator/filter'
2
2
 
3
3
  module DespamilatorFilter
4
4
 
5
- class SquareBrackets < Despamilator::FilterBase
5
+ class SquareBrackets < Despamilator::Filter
6
6
 
7
7
  def name
8
8
  'Square Brackets'
@@ -12,9 +12,9 @@ module DespamilatorFilter
12
12
  'Detects each square bracket in a string'
13
13
  end
14
14
 
15
- def parse text
16
- text.downcase.scan(/(\[|\])/).each do |match|
17
- self.append_score = 0.05
15
+ def parse subject
16
+ subject.text.downcase.scan(/(\[|\])/).each do |match|
17
+ subject.register_match!({:score => 0.05, :filter => self})
18
18
  end
19
19
  end
20
20
 
@@ -1,8 +1,8 @@
1
- require 'despamilator/filter_base'
1
+ require 'despamilator/filter'
2
2
 
3
3
  module DespamilatorFilter
4
4
 
5
- class TrailingNumber < Despamilator::FilterBase
5
+ class TrailingNumber < Despamilator::Filter
6
6
 
7
7
  def name
8
8
  'Trailing Number'
@@ -12,8 +12,8 @@ module DespamilatorFilter
12
12
  'Detects a trailing cache busting number'
13
13
  end
14
14
 
15
- def parse text
16
- self.append_score = 0.1 if text =~ /\b\d+\s*$/
15
+ def parse subject
16
+ subject.register_match!({:score => 0.1, :filter => self}) if subject.text.without_uris =~ /\b\d+\s*$/
17
17
  end
18
18
 
19
19
  end
@@ -1,8 +1,8 @@
1
- require 'despamilator/filter_base'
1
+ require 'despamilator/filter'
2
2
 
3
3
  module DespamilatorFilter
4
4
 
5
- class UnusualCharacters < Despamilator::FilterBase
5
+ class UnusualCharacters < Despamilator::Filter
6
6
 
7
7
  def name
8
8
  'Unusual Characters'
@@ -12,10 +12,10 @@ module DespamilatorFilter
12
12
  'Detects and scores each occurrence of an unusual 2 or 3 character combination'
13
13
  end
14
14
 
15
- def parse text
15
+ def parse subject
16
16
  initialize_combos
17
- tokenize(text).each do |token|
18
- self.append_score = 0.05 if @@combos[token.to_sym]
17
+ tokenize(subject.text.without_uris).each do |token|
18
+ subject.register_match!({:score => 0.05, :filter => self}) if @@combos[token.to_sym]
19
19
  end
20
20
  end
21
21
 
@@ -1,8 +1,8 @@
1
- require 'despamilator/filter_base'
1
+ require 'despamilator/filter'
2
2
 
3
3
  module DespamilatorFilter
4
4
 
5
- class URLs < Despamilator::FilterBase
5
+ class URLs < Despamilator::Filter
6
6
 
7
7
  def name
8
8
  'URLs'
@@ -12,13 +12,11 @@ module DespamilatorFilter
12
12
  'Detects each url in a string'
13
13
  end
14
14
 
15
- def parse text
16
- text.downcase!
17
-
18
- text.gsub!(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
19
-
20
- 1.upto(text.scan(/http:\/\//).length) do
21
- self.append_score = 0.4
15
+ def parse subject
16
+ text = subject.text.downcase.gsub(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
17
+ matches = text.count(/https?:\/\//)
18
+ 1.upto(matches > 2 ? 2 : matches) do
19
+ subject.register_match!({:score => 0.4, :filter => self})
22
20
  end
23
21
  end
24
22
 
@@ -0,0 +1,27 @@
1
+ require 'despamilator/filter'
2
+ require 'domainatrix'
3
+
4
+ module DespamilatorFilter
5
+
6
+ class VeryLongDomainName < Despamilator::Filter
7
+
8
+ def name
9
+ 'Very Long Domain Name'
10
+ end
11
+
12
+ def description
13
+ 'Detects unusually long domain names.'
14
+ end
15
+
16
+ def parse subject
17
+ subject.text.scan(URI.regexp).each do |url_parts|
18
+ url_parts.compact!
19
+ next if !url_parts[1] or url_parts[1] !~ /(\w|-){5,}\.\w{2,5}/
20
+ url = Domainatrix.parse('http://' + url_parts[1])
21
+ subject.register_match!({:score => 0.4, :filter => self}) if url.domain.length > 20
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,44 @@
1
+ require 'despamilator/filter'
2
+
3
+ module DespamilatorFilter
4
+
5
+ class WeirdPunctuation < Despamilator::Filter
6
+
7
+ def name
8
+ 'Weird Punctuation'
9
+ end
10
+
11
+ def description
12
+ 'Detects unusual use of punctuation.'
13
+ end
14
+
15
+ def parse subject
16
+ text = subject.text.without_uris.downcase
17
+
18
+ text.gsub!(/\w&\w/, 'xx')
19
+ text.gsub!(/[a-z](!|\?)(\s|$)/, 'x')
20
+ text.gsub!(/(?:#{punctuation}){20,}/, '')
21
+ matches = text.remove_and_count!(/(?:\W|\s|^)(#{punctuation})/)
22
+ matches += text.remove_and_count!(/\w,\w/)
23
+ matches += text.remove_and_count!(/\w\w\.\w/)
24
+ matches += text.remove_and_count!(/\w\.\w\w/)
25
+ matches += text.remove_and_count!(/(#{punctuation})(#{punctuation})/)
26
+ matches += text.remove_and_count!(/(#{punctuation})$/)
27
+ matches += text.remove_and_count!(/(?:\W|\s|^)\d+(#{punctuation})/)
28
+
29
+ subject.register_match!({:score => 0.03 * matches, :filter => self}) if matches > 0
30
+ end
31
+
32
+ private
33
+
34
+ def punctuation
35
+ @punctuation ||= %w{~ ` ! @ # $ % ^ & * _ - + = , / ? | \\ : ;}.map do |punctuation_character|
36
+ Regexp.escape(punctuation_character)
37
+ end.join('|')
38
+
39
+ @punctuation
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,30 @@
1
+ require 'despamilator/subject/text'
2
+
3
+ class Despamilator
4
+ class Subject
5
+ attr_reader :score, :text
6
+
7
+ def initialize text
8
+ @score = 0.0
9
+ @matches = {}
10
+ @text = Despamilator::Subject::Text.new(text)
11
+ end
12
+
13
+ def register_match! details
14
+ @score += details[:score] || raise('A score must be supplied')
15
+ filter = details[:filter] || raise('A filter must be supplied')
16
+
17
+ @matches[filter] ||= 0.0
18
+ @matches[filter] += details[:score]
19
+ end
20
+
21
+ def matches
22
+ @matches.map do |filter, score|
23
+ {:filter => filter, :score => score}
24
+ end.sort do |a, b|
25
+ b[:score] <=> a[:score]
26
+ end
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,32 @@
1
+ require 'uri'
2
+
3
+ class Despamilator
4
+ class Subject
5
+ class Text < String
6
+
7
+ def initialize text
8
+ super text
9
+ freeze
10
+ end
11
+
12
+ def without_uris
13
+ gsub(/\b(?:https?|mailto|ftp):.+?(\s|$)/i, '')
14
+ end
15
+
16
+ def words
17
+ split(/\W+/)
18
+ end
19
+
20
+ def count pattern
21
+ scan(pattern).flatten.compact.length
22
+ end
23
+
24
+ def remove_and_count! pattern
25
+ count = count(pattern)
26
+ gsub!(pattern, '')
27
+ count
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ class Despamilator
2
+ VERSION = 2.1
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: despamilator
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: '2.1'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,102 +9,58 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-08-11 00:00:00.000000000 -04:00
13
- default_executable:
12
+ date: 2011-09-11 00:00:00.000000000Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
- name: rubyforge
17
- requirement: &2730850 !ruby/object:Gem::Requirement
15
+ name: domainatrix
16
+ requirement: &70312466224620 !ruby/object:Gem::Requirement
18
17
  none: false
19
18
  requirements:
20
19
  - - ! '>='
21
20
  - !ruby/object:Gem::Version
22
- version: 2.0.4
23
- type: :development
21
+ version: '0'
22
+ type: :runtime
24
23
  prerelease: false
25
- version_requirements: *2730850
26
- - !ruby/object:Gem::Dependency
27
- name: hoe
28
- requirement: &2730610 !ruby/object:Gem::Requirement
29
- none: false
30
- requirements:
31
- - - ! '>='
32
- - !ruby/object:Gem::Version
33
- version: 2.7.0
34
- type: :development
35
- prerelease: false
36
- version_requirements: *2730610
24
+ version_requirements: *70312466224620
37
25
  description: ! 'Despamilator is a plugin based spam detector designed for use on your
38
- web forms borne out of two annoyances:
39
-
40
- Spam being submitted in my web forms and CAPTCHAS being intrusive. Despamilator
41
- will apply
42
-
43
- some commonly used heuristics from the world of anti-spam to help you decide whether
44
- your users are human or machine.'
26
+ web forms borne out of two annoyances: Spam being submitted in my web forms and
27
+ CAPTCHAS being intrusive. Despamilator will apply some commonly used heuristics
28
+ from the world of anti-spam to help you decide whether your users are human or machine.'
45
29
  email:
46
30
  - moowahaha@hotmail.com
47
31
  executables: []
48
32
  extensions: []
49
- extra_rdoc_files:
50
- - History.txt
51
- - Manifest.txt
52
- - PostInstall.txt
53
- - conf/unusual_characters.txt
33
+ extra_rdoc_files: []
54
34
  files:
55
- - .rspec
56
- - .rvmrc
57
- - Gemfile
58
- - Gemfile.lock
59
- - History.txt
60
- - Manifest.txt
61
- - PostInstall.txt
62
- - README.rdoc
63
- - Rakefile
64
- - conf/unusual_characters.txt
65
- - despamilator.gemspec
66
- - lib/despamilator.rb
67
- - lib/despamilator/filter.rb
68
35
  - lib/despamilator/filter/gtubs_test_filter.rb
69
36
  - lib/despamilator/filter/html_tags.rb
70
37
  - lib/despamilator/filter/ip_address_url.rb
71
38
  - lib/despamilator/filter/long_words.rb
39
+ - lib/despamilator/filter/mixed_case.rb
72
40
  - lib/despamilator/filter/naughty_words.rb
73
41
  - lib/despamilator/filter/numbers_and_words.rb
42
+ - lib/despamilator/filter/obfuscated_urls.rb
43
+ - lib/despamilator/filter/prices.rb
74
44
  - lib/despamilator/filter/script_tag.rb
75
45
  - lib/despamilator/filter/shouting.rb
46
+ - lib/despamilator/filter/spammy_tlds.rb
76
47
  - lib/despamilator/filter/square_brackets.rb
77
48
  - lib/despamilator/filter/trailing_number.rb
78
49
  - lib/despamilator/filter/unusual_characters.rb
79
50
  - lib/despamilator/filter/urls.rb
80
- - lib/despamilator/filter_base.rb
81
- - scripts/despamilator_score.rb
82
- - scripts/from_file.rb
83
- - spec/despamilator_spec.rb
84
- - spec/filter_base_spec.rb
85
- - spec/filters/gtubs_test_filter_spec.rb
86
- - spec/filters/html_tags_spec.rb
87
- - spec/filters/ip_address_url_spec.rb
88
- - spec/filters/long_words_spec.rb
89
- - spec/filters/naughty_words_spec.rb
90
- - spec/filters/numbers_and_words_spec.rb
91
- - spec/filters/script_tag_spec.rb
92
- - spec/filters/shouting_spec.rb
93
- - spec/filters/square_brackets_spec.rb
94
- - spec/filters/trailing_number_spec.rb
95
- - spec/filters/unusual_characters_spec.rb
96
- - spec/filters/urls_spec.rb
97
- - spec/helpers/corpus_helper.rb
98
- - spec/helpers/filter_helper.rb
99
- - spec/helpers/spec_helper.rb
100
- - tasks/test.rake
101
- has_rdoc: true
51
+ - lib/despamilator/filter/very_long_domain_name.rb
52
+ - lib/despamilator/filter/weird_punctuation.rb
53
+ - lib/despamilator/filter.rb
54
+ - lib/despamilator/subject/text.rb
55
+ - lib/despamilator/subject.rb
56
+ - lib/despamilator/version.rb
57
+ - lib/despamilator.rb
58
+ - README.rdoc
59
+ - History.txt
102
60
  homepage: http://github.com/moowahaha/despamilator
103
61
  licenses: []
104
- post_install_message: PostInstall.txt
105
- rdoc_options:
106
- - --main
107
- - README.rdoc
62
+ post_install_message:
63
+ rdoc_options: []
108
64
  require_paths:
109
65
  - lib
110
66
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -118,13 +74,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
74
  requirements:
119
75
  - - ! '>='
120
76
  - !ruby/object:Gem::Version
121
- version: '0'
77
+ version: 1.3.6
122
78
  requirements: []
123
79
  rubyforge_project: despamilator
124
- rubygems_version: 1.5.2
80
+ rubygems_version: 1.8.6
125
81
  signing_key:
126
82
  specification_version: 3
127
- summary: ! 'Despamilator is a plugin based spam detector designed for use on your
128
- web forms borne out of two annoyances: Spam being submitted in my web forms and
129
- CAPTCHAS being intrusive'
83
+ summary: Stop web form Spam!
130
84
  test_files: []
data/.rspec DELETED
@@ -1,2 +0,0 @@
1
- --color
2
- --require=./spec/helpers/spec_helper.rb
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm --create use 1.9.2@despamilator
data/Gemfile DELETED
@@ -1,12 +0,0 @@
1
- # A sample Gemfile
2
- source "http://rubygems.org"
3
-
4
- gem 'hoe', '>= 2.7.0'
5
- gem 'newgem', '>= 1.5.3'
6
- gem 'rdoc', '>= 3.2'
7
-
8
- group :test do
9
- gem 'rspec', '>= 2.0.1'
10
- gem 'simplecov', '>= 0.3.7'
11
- gem 'one_hundred_percent_coverage'
12
- end