despamilator 2.0.1 → 2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/lib/despamilator.rb +38 -7
- data/lib/despamilator/filter.rb +39 -23
- data/lib/despamilator/filter/gtubs_test_filter.rb +4 -4
- data/lib/despamilator/filter/html_tags.rb +9 -7
- data/lib/despamilator/filter/ip_address_url.rb +6 -4
- data/lib/despamilator/filter/long_words.rb +7 -5
- data/lib/despamilator/filter/mixed_case.rb +21 -0
- data/lib/despamilator/filter/naughty_words.rb +5 -5
- data/lib/despamilator/filter/numbers_and_words.rb +19 -11
- data/lib/despamilator/filter/obfuscated_urls.rb +41 -0
- data/lib/despamilator/filter/prices.rb +19 -0
- data/lib/despamilator/filter/script_tag.rb +4 -4
- data/lib/despamilator/filter/shouting.rb +9 -6
- data/lib/despamilator/filter/spammy_tlds.rb +22 -0
- data/lib/despamilator/filter/square_brackets.rb +5 -5
- data/lib/despamilator/filter/trailing_number.rb +4 -4
- data/lib/despamilator/filter/unusual_characters.rb +5 -5
- data/lib/despamilator/filter/urls.rb +7 -9
- data/lib/despamilator/filter/very_long_domain_name.rb +27 -0
- data/lib/despamilator/filter/weird_punctuation.rb +44 -0
- data/lib/despamilator/subject.rb +30 -0
- data/lib/despamilator/subject/text.rb +32 -0
- data/lib/despamilator/version.rb +3 -0
- metadata +29 -75
- data/.rspec +0 -2
- data/.rvmrc +0 -1
- data/Gemfile +0 -12
- data/Gemfile.lock +0 -47
- data/Manifest.txt +0 -46
- data/PostInstall.txt +0 -1
- data/Rakefile +0 -39
- data/conf/unusual_characters.txt +0 -6674
- data/despamilator.gemspec +0 -38
- data/lib/despamilator/filter_base.rb +0 -82
- data/scripts/despamilator_score.rb +0 -25
- data/scripts/from_file.rb +0 -26
- data/spec/despamilator_spec.rb +0 -13
- data/spec/filter_base_spec.rb +0 -30
- data/spec/filters/gtubs_test_filter_spec.rb +0 -9
- data/spec/filters/html_tags_spec.rb +0 -129
- data/spec/filters/ip_address_url_spec.rb +0 -11
- data/spec/filters/long_words_spec.rb +0 -11
- data/spec/filters/naughty_words_spec.rb +0 -11
- data/spec/filters/numbers_and_words_spec.rb +0 -34
- data/spec/filters/script_tag_spec.rb +0 -22
- data/spec/filters/shouting_spec.rb +0 -45
- data/spec/filters/square_brackets_spec.rb +0 -11
- data/spec/filters/trailing_number_spec.rb +0 -10
- data/spec/filters/unusual_characters_spec.rb +0 -9
- data/spec/filters/urls_spec.rb +0 -11
- data/spec/helpers/corpus_helper.rb +0 -5
- data/spec/helpers/filter_helper.rb +0 -59
- data/spec/helpers/spec_helper.rb +0 -6
- data/tasks/test.rake +0 -6
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'despamilator/filter'
|
2
|
+
|
3
|
+
module DespamilatorFilter
|
4
|
+
|
5
|
+
class SpammyTLDs < Despamilator::Filter
|
6
|
+
|
7
|
+
def name
|
8
|
+
'Spammy TLDs'
|
9
|
+
end
|
10
|
+
|
11
|
+
def description
|
12
|
+
'Detects TLDs that are more commonly associated with spam.'
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse subject
|
16
|
+
matches = subject.text.count(/\w{5,}\.(info|biz)\b/)
|
17
|
+
subject.register_match!({:score => 0.05 * matches, :filter => self}) if matches > 0
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require 'despamilator/
|
1
|
+
require 'despamilator/filter'
|
2
2
|
|
3
3
|
module DespamilatorFilter
|
4
4
|
|
5
|
-
class SquareBrackets < Despamilator::
|
5
|
+
class SquareBrackets < Despamilator::Filter
|
6
6
|
|
7
7
|
def name
|
8
8
|
'Square Brackets'
|
@@ -12,9 +12,9 @@ module DespamilatorFilter
|
|
12
12
|
'Detects each square bracket in a string'
|
13
13
|
end
|
14
14
|
|
15
|
-
def parse
|
16
|
-
text.downcase.scan(/(\[|\])/).each do |match|
|
17
|
-
|
15
|
+
def parse subject
|
16
|
+
subject.text.downcase.scan(/(\[|\])/).each do |match|
|
17
|
+
subject.register_match!({:score => 0.05, :filter => self})
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require 'despamilator/
|
1
|
+
require 'despamilator/filter'
|
2
2
|
|
3
3
|
module DespamilatorFilter
|
4
4
|
|
5
|
-
class TrailingNumber < Despamilator::
|
5
|
+
class TrailingNumber < Despamilator::Filter
|
6
6
|
|
7
7
|
def name
|
8
8
|
'Trailing Number'
|
@@ -12,8 +12,8 @@ module DespamilatorFilter
|
|
12
12
|
'Detects a trailing cache busting number'
|
13
13
|
end
|
14
14
|
|
15
|
-
def parse
|
16
|
-
|
15
|
+
def parse subject
|
16
|
+
subject.register_match!({:score => 0.1, :filter => self}) if subject.text.without_uris =~ /\b\d+\s*$/
|
17
17
|
end
|
18
18
|
|
19
19
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require 'despamilator/
|
1
|
+
require 'despamilator/filter'
|
2
2
|
|
3
3
|
module DespamilatorFilter
|
4
4
|
|
5
|
-
class UnusualCharacters < Despamilator::
|
5
|
+
class UnusualCharacters < Despamilator::Filter
|
6
6
|
|
7
7
|
def name
|
8
8
|
'Unusual Characters'
|
@@ -12,10 +12,10 @@ module DespamilatorFilter
|
|
12
12
|
'Detects and scores each occurrence of an unusual 2 or 3 character combination'
|
13
13
|
end
|
14
14
|
|
15
|
-
def parse
|
15
|
+
def parse subject
|
16
16
|
initialize_combos
|
17
|
-
tokenize(text).each do |token|
|
18
|
-
|
17
|
+
tokenize(subject.text.without_uris).each do |token|
|
18
|
+
subject.register_match!({:score => 0.05, :filter => self}) if @@combos[token.to_sym]
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require 'despamilator/
|
1
|
+
require 'despamilator/filter'
|
2
2
|
|
3
3
|
module DespamilatorFilter
|
4
4
|
|
5
|
-
class URLs < Despamilator::
|
5
|
+
class URLs < Despamilator::Filter
|
6
6
|
|
7
7
|
def name
|
8
8
|
'URLs'
|
@@ -12,13 +12,11 @@ module DespamilatorFilter
|
|
12
12
|
'Detects each url in a string'
|
13
13
|
end
|
14
14
|
|
15
|
-
def parse
|
16
|
-
text.downcase
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
1.upto(text.scan(/http:\/\//).length) do
|
21
|
-
self.append_score = 0.4
|
15
|
+
def parse subject
|
16
|
+
text = subject.text.downcase.gsub(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
|
17
|
+
matches = text.count(/https?:\/\//)
|
18
|
+
1.upto(matches > 2 ? 2 : matches) do
|
19
|
+
subject.register_match!({:score => 0.4, :filter => self})
|
22
20
|
end
|
23
21
|
end
|
24
22
|
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'despamilator/filter'
|
2
|
+
require 'domainatrix'
|
3
|
+
|
4
|
+
module DespamilatorFilter
|
5
|
+
|
6
|
+
class VeryLongDomainName < Despamilator::Filter
|
7
|
+
|
8
|
+
def name
|
9
|
+
'Very Long Domain Name'
|
10
|
+
end
|
11
|
+
|
12
|
+
def description
|
13
|
+
'Detects unusually long domain names.'
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse subject
|
17
|
+
subject.text.scan(URI.regexp).each do |url_parts|
|
18
|
+
url_parts.compact!
|
19
|
+
next if !url_parts[1] or url_parts[1] !~ /(\w|-){5,}\.\w{2,5}/
|
20
|
+
url = Domainatrix.parse('http://' + url_parts[1])
|
21
|
+
subject.register_match!({:score => 0.4, :filter => self}) if url.domain.length > 20
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'despamilator/filter'
|
2
|
+
|
3
|
+
module DespamilatorFilter
|
4
|
+
|
5
|
+
class WeirdPunctuation < Despamilator::Filter
|
6
|
+
|
7
|
+
def name
|
8
|
+
'Weird Punctuation'
|
9
|
+
end
|
10
|
+
|
11
|
+
def description
|
12
|
+
'Detects unusual use of punctuation.'
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse subject
|
16
|
+
text = subject.text.without_uris.downcase
|
17
|
+
|
18
|
+
text.gsub!(/\w&\w/, 'xx')
|
19
|
+
text.gsub!(/[a-z](!|\?)(\s|$)/, 'x')
|
20
|
+
text.gsub!(/(?:#{punctuation}){20,}/, '')
|
21
|
+
matches = text.remove_and_count!(/(?:\W|\s|^)(#{punctuation})/)
|
22
|
+
matches += text.remove_and_count!(/\w,\w/)
|
23
|
+
matches += text.remove_and_count!(/\w\w\.\w/)
|
24
|
+
matches += text.remove_and_count!(/\w\.\w\w/)
|
25
|
+
matches += text.remove_and_count!(/(#{punctuation})(#{punctuation})/)
|
26
|
+
matches += text.remove_and_count!(/(#{punctuation})$/)
|
27
|
+
matches += text.remove_and_count!(/(?:\W|\s|^)\d+(#{punctuation})/)
|
28
|
+
|
29
|
+
subject.register_match!({:score => 0.03 * matches, :filter => self}) if matches > 0
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def punctuation
|
35
|
+
@punctuation ||= %w{~ ` ! @ # $ % ^ & * _ - + = , / ? | \\ : ;}.map do |punctuation_character|
|
36
|
+
Regexp.escape(punctuation_character)
|
37
|
+
end.join('|')
|
38
|
+
|
39
|
+
@punctuation
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'despamilator/subject/text'
|
2
|
+
|
3
|
+
class Despamilator
|
4
|
+
class Subject
|
5
|
+
attr_reader :score, :text
|
6
|
+
|
7
|
+
def initialize text
|
8
|
+
@score = 0.0
|
9
|
+
@matches = {}
|
10
|
+
@text = Despamilator::Subject::Text.new(text)
|
11
|
+
end
|
12
|
+
|
13
|
+
def register_match! details
|
14
|
+
@score += details[:score] || raise('A score must be supplied')
|
15
|
+
filter = details[:filter] || raise('A filter must be supplied')
|
16
|
+
|
17
|
+
@matches[filter] ||= 0.0
|
18
|
+
@matches[filter] += details[:score]
|
19
|
+
end
|
20
|
+
|
21
|
+
def matches
|
22
|
+
@matches.map do |filter, score|
|
23
|
+
{:filter => filter, :score => score}
|
24
|
+
end.sort do |a, b|
|
25
|
+
b[:score] <=> a[:score]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
class Despamilator
|
4
|
+
class Subject
|
5
|
+
class Text < String
|
6
|
+
|
7
|
+
def initialize text
|
8
|
+
super text
|
9
|
+
freeze
|
10
|
+
end
|
11
|
+
|
12
|
+
def without_uris
|
13
|
+
gsub(/\b(?:https?|mailto|ftp):.+?(\s|$)/i, '')
|
14
|
+
end
|
15
|
+
|
16
|
+
def words
|
17
|
+
split(/\W+/)
|
18
|
+
end
|
19
|
+
|
20
|
+
def count pattern
|
21
|
+
scan(pattern).flatten.compact.length
|
22
|
+
end
|
23
|
+
|
24
|
+
def remove_and_count! pattern
|
25
|
+
count = count(pattern)
|
26
|
+
gsub!(pattern, '')
|
27
|
+
count
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: despamilator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: '2.1'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,102 +9,58 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
13
|
-
default_executable:
|
12
|
+
date: 2011-09-11 00:00:00.000000000Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
17
|
-
requirement: &
|
15
|
+
name: domainatrix
|
16
|
+
requirement: &70312466224620 !ruby/object:Gem::Requirement
|
18
17
|
none: false
|
19
18
|
requirements:
|
20
19
|
- - ! '>='
|
21
20
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
23
|
-
type: :
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
24
23
|
prerelease: false
|
25
|
-
version_requirements: *
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: hoe
|
28
|
-
requirement: &2730610 !ruby/object:Gem::Requirement
|
29
|
-
none: false
|
30
|
-
requirements:
|
31
|
-
- - ! '>='
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 2.7.0
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: *2730610
|
24
|
+
version_requirements: *70312466224620
|
37
25
|
description: ! 'Despamilator is a plugin based spam detector designed for use on your
|
38
|
-
web forms borne out of two annoyances:
|
39
|
-
|
40
|
-
|
41
|
-
will apply
|
42
|
-
|
43
|
-
some commonly used heuristics from the world of anti-spam to help you decide whether
|
44
|
-
your users are human or machine.'
|
26
|
+
web forms borne out of two annoyances: Spam being submitted in my web forms and
|
27
|
+
CAPTCHAS being intrusive. Despamilator will apply some commonly used heuristics
|
28
|
+
from the world of anti-spam to help you decide whether your users are human or machine.'
|
45
29
|
email:
|
46
30
|
- moowahaha@hotmail.com
|
47
31
|
executables: []
|
48
32
|
extensions: []
|
49
|
-
extra_rdoc_files:
|
50
|
-
- History.txt
|
51
|
-
- Manifest.txt
|
52
|
-
- PostInstall.txt
|
53
|
-
- conf/unusual_characters.txt
|
33
|
+
extra_rdoc_files: []
|
54
34
|
files:
|
55
|
-
- .rspec
|
56
|
-
- .rvmrc
|
57
|
-
- Gemfile
|
58
|
-
- Gemfile.lock
|
59
|
-
- History.txt
|
60
|
-
- Manifest.txt
|
61
|
-
- PostInstall.txt
|
62
|
-
- README.rdoc
|
63
|
-
- Rakefile
|
64
|
-
- conf/unusual_characters.txt
|
65
|
-
- despamilator.gemspec
|
66
|
-
- lib/despamilator.rb
|
67
|
-
- lib/despamilator/filter.rb
|
68
35
|
- lib/despamilator/filter/gtubs_test_filter.rb
|
69
36
|
- lib/despamilator/filter/html_tags.rb
|
70
37
|
- lib/despamilator/filter/ip_address_url.rb
|
71
38
|
- lib/despamilator/filter/long_words.rb
|
39
|
+
- lib/despamilator/filter/mixed_case.rb
|
72
40
|
- lib/despamilator/filter/naughty_words.rb
|
73
41
|
- lib/despamilator/filter/numbers_and_words.rb
|
42
|
+
- lib/despamilator/filter/obfuscated_urls.rb
|
43
|
+
- lib/despamilator/filter/prices.rb
|
74
44
|
- lib/despamilator/filter/script_tag.rb
|
75
45
|
- lib/despamilator/filter/shouting.rb
|
46
|
+
- lib/despamilator/filter/spammy_tlds.rb
|
76
47
|
- lib/despamilator/filter/square_brackets.rb
|
77
48
|
- lib/despamilator/filter/trailing_number.rb
|
78
49
|
- lib/despamilator/filter/unusual_characters.rb
|
79
50
|
- lib/despamilator/filter/urls.rb
|
80
|
-
- lib/despamilator/
|
81
|
-
-
|
82
|
-
-
|
83
|
-
-
|
84
|
-
-
|
85
|
-
-
|
86
|
-
-
|
87
|
-
-
|
88
|
-
-
|
89
|
-
- spec/filters/naughty_words_spec.rb
|
90
|
-
- spec/filters/numbers_and_words_spec.rb
|
91
|
-
- spec/filters/script_tag_spec.rb
|
92
|
-
- spec/filters/shouting_spec.rb
|
93
|
-
- spec/filters/square_brackets_spec.rb
|
94
|
-
- spec/filters/trailing_number_spec.rb
|
95
|
-
- spec/filters/unusual_characters_spec.rb
|
96
|
-
- spec/filters/urls_spec.rb
|
97
|
-
- spec/helpers/corpus_helper.rb
|
98
|
-
- spec/helpers/filter_helper.rb
|
99
|
-
- spec/helpers/spec_helper.rb
|
100
|
-
- tasks/test.rake
|
101
|
-
has_rdoc: true
|
51
|
+
- lib/despamilator/filter/very_long_domain_name.rb
|
52
|
+
- lib/despamilator/filter/weird_punctuation.rb
|
53
|
+
- lib/despamilator/filter.rb
|
54
|
+
- lib/despamilator/subject/text.rb
|
55
|
+
- lib/despamilator/subject.rb
|
56
|
+
- lib/despamilator/version.rb
|
57
|
+
- lib/despamilator.rb
|
58
|
+
- README.rdoc
|
59
|
+
- History.txt
|
102
60
|
homepage: http://github.com/moowahaha/despamilator
|
103
61
|
licenses: []
|
104
|
-
post_install_message:
|
105
|
-
rdoc_options:
|
106
|
-
- --main
|
107
|
-
- README.rdoc
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options: []
|
108
64
|
require_paths:
|
109
65
|
- lib
|
110
66
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -118,13 +74,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
74
|
requirements:
|
119
75
|
- - ! '>='
|
120
76
|
- !ruby/object:Gem::Version
|
121
|
-
version:
|
77
|
+
version: 1.3.6
|
122
78
|
requirements: []
|
123
79
|
rubyforge_project: despamilator
|
124
|
-
rubygems_version: 1.
|
80
|
+
rubygems_version: 1.8.6
|
125
81
|
signing_key:
|
126
82
|
specification_version: 3
|
127
|
-
summary:
|
128
|
-
web forms borne out of two annoyances: Spam being submitted in my web forms and
|
129
|
-
CAPTCHAS being intrusive'
|
83
|
+
summary: Stop web form Spam!
|
130
84
|
test_files: []
|
data/.rspec
DELETED
data/.rvmrc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
rvm --create use 1.9.2@despamilator
|
data/Gemfile
DELETED