despamilator 2.0.1 → 2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/lib/despamilator.rb +38 -7
- data/lib/despamilator/filter.rb +39 -23
- data/lib/despamilator/filter/gtubs_test_filter.rb +4 -4
- data/lib/despamilator/filter/html_tags.rb +9 -7
- data/lib/despamilator/filter/ip_address_url.rb +6 -4
- data/lib/despamilator/filter/long_words.rb +7 -5
- data/lib/despamilator/filter/mixed_case.rb +21 -0
- data/lib/despamilator/filter/naughty_words.rb +5 -5
- data/lib/despamilator/filter/numbers_and_words.rb +19 -11
- data/lib/despamilator/filter/obfuscated_urls.rb +41 -0
- data/lib/despamilator/filter/prices.rb +19 -0
- data/lib/despamilator/filter/script_tag.rb +4 -4
- data/lib/despamilator/filter/shouting.rb +9 -6
- data/lib/despamilator/filter/spammy_tlds.rb +22 -0
- data/lib/despamilator/filter/square_brackets.rb +5 -5
- data/lib/despamilator/filter/trailing_number.rb +4 -4
- data/lib/despamilator/filter/unusual_characters.rb +5 -5
- data/lib/despamilator/filter/urls.rb +7 -9
- data/lib/despamilator/filter/very_long_domain_name.rb +27 -0
- data/lib/despamilator/filter/weird_punctuation.rb +44 -0
- data/lib/despamilator/subject.rb +30 -0
- data/lib/despamilator/subject/text.rb +32 -0
- data/lib/despamilator/version.rb +3 -0
- metadata +29 -75
- data/.rspec +0 -2
- data/.rvmrc +0 -1
- data/Gemfile +0 -12
- data/Gemfile.lock +0 -47
- data/Manifest.txt +0 -46
- data/PostInstall.txt +0 -1
- data/Rakefile +0 -39
- data/conf/unusual_characters.txt +0 -6674
- data/despamilator.gemspec +0 -38
- data/lib/despamilator/filter_base.rb +0 -82
- data/scripts/despamilator_score.rb +0 -25
- data/scripts/from_file.rb +0 -26
- data/spec/despamilator_spec.rb +0 -13
- data/spec/filter_base_spec.rb +0 -30
- data/spec/filters/gtubs_test_filter_spec.rb +0 -9
- data/spec/filters/html_tags_spec.rb +0 -129
- data/spec/filters/ip_address_url_spec.rb +0 -11
- data/spec/filters/long_words_spec.rb +0 -11
- data/spec/filters/naughty_words_spec.rb +0 -11
- data/spec/filters/numbers_and_words_spec.rb +0 -34
- data/spec/filters/script_tag_spec.rb +0 -22
- data/spec/filters/shouting_spec.rb +0 -45
- data/spec/filters/square_brackets_spec.rb +0 -11
- data/spec/filters/trailing_number_spec.rb +0 -10
- data/spec/filters/unusual_characters_spec.rb +0 -9
- data/spec/filters/urls_spec.rb +0 -11
- data/spec/helpers/corpus_helper.rb +0 -5
- data/spec/helpers/filter_helper.rb +0 -59
- data/spec/helpers/spec_helper.rb +0 -6
- data/tasks/test.rake +0 -6
@@ -1,45 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::Shouting do
|
2
|
-
|
3
|
-
the_name_should_be 'Shouting'
|
4
|
-
the_description_should_be 'Detects and scores shouting (all caps)'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('this lil string is 50 PERCENT SHOUTING')
|
7
|
-
|
8
|
-
a_single_match_of('this lil string is 50 PERCENT SHOUTING', should_score: 0.25)
|
9
|
-
a_multiple_match_of('HELLO THERE!! THIS IS SHOUTING!!', should_score: [0.5, 1.times])
|
10
|
-
|
11
|
-
describe "exceptions" do
|
12
|
-
|
13
|
-
before :all do
|
14
|
-
@filter = DespamilatorFilter::Shouting.new
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should strip out HTML" do
|
18
|
-
@filter.parse('<H1>this is a flipping html tag whose contents is very long</h1>')
|
19
|
-
@filter.score.should == 0
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should ignore strings less than 20 characters long" do
|
23
|
-
@filter.parse('ABCD EFG HIJKLM NOP')
|
24
|
-
@filter.score.should == 0
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
[
|
30
|
-
['this is a lowercased string', 0],
|
31
|
-
['This is a String with Capital Letters', 0],
|
32
|
-
['this lil string is 50 PERCENT SHOUTING', 0.25],
|
33
|
-
['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
|
34
|
-
].each do |string, expected_score|
|
35
|
-
|
36
|
-
it "should score the string '#{string}' based on a percentage of uppercase words" do
|
37
|
-
filter = DespamilatorFilter::Shouting.new
|
38
|
-
|
39
|
-
filter.parse(string)
|
40
|
-
filter.score.should == expected_score
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|
@@ -1,11 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::SquareBrackets do
|
2
|
-
|
3
|
-
the_name_should_be 'Square Brackets'
|
4
|
-
the_description_should_be 'Detects each square bracket in a string'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('[')
|
7
|
-
|
8
|
-
a_single_match_of('[', should_score: 0.05)
|
9
|
-
a_multiple_match_of('[]', should_score: [0.1, 2.times])
|
10
|
-
|
11
|
-
end
|
@@ -1,10 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::TrailingNumber do
|
2
|
-
|
3
|
-
the_name_should_be 'Trailing Number'
|
4
|
-
the_description_should_be 'Detects a trailing cache busting number'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('hello 123 ')
|
7
|
-
|
8
|
-
a_single_match_of('hello 123', should_score: 0.1)
|
9
|
-
|
10
|
-
end
|
@@ -1,9 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::UnusualCharacters do
|
2
|
-
the_name_should_be 'Unusual Characters'
|
3
|
-
the_description_should_be 'Detects and scores each occurrence of an unusual 2 or 3 character combination'
|
4
|
-
|
5
|
-
despamilator_should_apply_the_filter_for('sx')
|
6
|
-
|
7
|
-
a_single_match_of('sx', should_score: 0.05)
|
8
|
-
a_multiple_match_of('sxsx', should_score: [0.1, 2.times])
|
9
|
-
end
|
data/spec/filters/urls_spec.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::URLs do
|
2
|
-
|
3
|
-
the_name_should_be 'URLs'
|
4
|
-
the_description_should_be 'Detects each url in a string'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('http://www.blah.com')
|
7
|
-
|
8
|
-
a_single_match_of('http://www.blah.com', should_score: 0.4)
|
9
|
-
a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.8, 2.times])
|
10
|
-
|
11
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
def the_name_should_be expected_name
|
2
|
-
it "should have a name" do
|
3
|
-
described_class.new.name.should == expected_name
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
def the_description_should_be expected_description
|
8
|
-
it "should have a description" do
|
9
|
-
described_class.new.description.should == expected_description
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def a_single_match_of string, expectation
|
14
|
-
describe 'detecting a single match' do
|
15
|
-
|
16
|
-
before :all do
|
17
|
-
@filter = described_class.new
|
18
|
-
@filter.parse(string)
|
19
|
-
end
|
20
|
-
|
21
|
-
it "should only match once" do
|
22
|
-
@filter.matches.should == 1
|
23
|
-
end
|
24
|
-
|
25
|
-
it "should have a score" do
|
26
|
-
@filter.score.should == expectation[:should_score]
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def a_multiple_match_of string, expectation
|
33
|
-
describe 'detecting a multiple matches' do
|
34
|
-
|
35
|
-
before :all do
|
36
|
-
@filter = described_class.new
|
37
|
-
@filter.parse(string)
|
38
|
-
end
|
39
|
-
|
40
|
-
it "should match many times" do
|
41
|
-
@filter.matches.should == expectation[:should_score].last.count
|
42
|
-
end
|
43
|
-
|
44
|
-
it "should have a score" do
|
45
|
-
@filter.score.should == expectation[:should_score].first
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def despamilator_should_apply_the_filter_for string
|
52
|
-
|
53
|
-
it "should be applied during filtering" do
|
54
|
-
filter_name = described_class.new.name
|
55
|
-
despamilator = Despamilator.new(string)
|
56
|
-
despamilator.matched_by.collect { |f| f.name == filter_name }.should_not be_empty
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
data/spec/helpers/spec_helper.rb
DELETED