despamilator 2.0.1 → 2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/lib/despamilator.rb +38 -7
- data/lib/despamilator/filter.rb +39 -23
- data/lib/despamilator/filter/gtubs_test_filter.rb +4 -4
- data/lib/despamilator/filter/html_tags.rb +9 -7
- data/lib/despamilator/filter/ip_address_url.rb +6 -4
- data/lib/despamilator/filter/long_words.rb +7 -5
- data/lib/despamilator/filter/mixed_case.rb +21 -0
- data/lib/despamilator/filter/naughty_words.rb +5 -5
- data/lib/despamilator/filter/numbers_and_words.rb +19 -11
- data/lib/despamilator/filter/obfuscated_urls.rb +41 -0
- data/lib/despamilator/filter/prices.rb +19 -0
- data/lib/despamilator/filter/script_tag.rb +4 -4
- data/lib/despamilator/filter/shouting.rb +9 -6
- data/lib/despamilator/filter/spammy_tlds.rb +22 -0
- data/lib/despamilator/filter/square_brackets.rb +5 -5
- data/lib/despamilator/filter/trailing_number.rb +4 -4
- data/lib/despamilator/filter/unusual_characters.rb +5 -5
- data/lib/despamilator/filter/urls.rb +7 -9
- data/lib/despamilator/filter/very_long_domain_name.rb +27 -0
- data/lib/despamilator/filter/weird_punctuation.rb +44 -0
- data/lib/despamilator/subject.rb +30 -0
- data/lib/despamilator/subject/text.rb +32 -0
- data/lib/despamilator/version.rb +3 -0
- metadata +29 -75
- data/.rspec +0 -2
- data/.rvmrc +0 -1
- data/Gemfile +0 -12
- data/Gemfile.lock +0 -47
- data/Manifest.txt +0 -46
- data/PostInstall.txt +0 -1
- data/Rakefile +0 -39
- data/conf/unusual_characters.txt +0 -6674
- data/despamilator.gemspec +0 -38
- data/lib/despamilator/filter_base.rb +0 -82
- data/scripts/despamilator_score.rb +0 -25
- data/scripts/from_file.rb +0 -26
- data/spec/despamilator_spec.rb +0 -13
- data/spec/filter_base_spec.rb +0 -30
- data/spec/filters/gtubs_test_filter_spec.rb +0 -9
- data/spec/filters/html_tags_spec.rb +0 -129
- data/spec/filters/ip_address_url_spec.rb +0 -11
- data/spec/filters/long_words_spec.rb +0 -11
- data/spec/filters/naughty_words_spec.rb +0 -11
- data/spec/filters/numbers_and_words_spec.rb +0 -34
- data/spec/filters/script_tag_spec.rb +0 -22
- data/spec/filters/shouting_spec.rb +0 -45
- data/spec/filters/square_brackets_spec.rb +0 -11
- data/spec/filters/trailing_number_spec.rb +0 -10
- data/spec/filters/unusual_characters_spec.rb +0 -9
- data/spec/filters/urls_spec.rb +0 -11
- data/spec/helpers/corpus_helper.rb +0 -5
- data/spec/helpers/filter_helper.rb +0 -59
- data/spec/helpers/spec_helper.rb +0 -6
- data/tasks/test.rake +0 -6
@@ -1,45 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::Shouting do
|
2
|
-
|
3
|
-
the_name_should_be 'Shouting'
|
4
|
-
the_description_should_be 'Detects and scores shouting (all caps)'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('this lil string is 50 PERCENT SHOUTING')
|
7
|
-
|
8
|
-
a_single_match_of('this lil string is 50 PERCENT SHOUTING', should_score: 0.25)
|
9
|
-
a_multiple_match_of('HELLO THERE!! THIS IS SHOUTING!!', should_score: [0.5, 1.times])
|
10
|
-
|
11
|
-
describe "exceptions" do
|
12
|
-
|
13
|
-
before :all do
|
14
|
-
@filter = DespamilatorFilter::Shouting.new
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should strip out HTML" do
|
18
|
-
@filter.parse('<H1>this is a flipping html tag whose contents is very long</h1>')
|
19
|
-
@filter.score.should == 0
|
20
|
-
end
|
21
|
-
|
22
|
-
it "should ignore strings less than 20 characters long" do
|
23
|
-
@filter.parse('ABCD EFG HIJKLM NOP')
|
24
|
-
@filter.score.should == 0
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
[
|
30
|
-
['this is a lowercased string', 0],
|
31
|
-
['This is a String with Capital Letters', 0],
|
32
|
-
['this lil string is 50 PERCENT SHOUTING', 0.25],
|
33
|
-
['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
|
34
|
-
].each do |string, expected_score|
|
35
|
-
|
36
|
-
it "should score the string '#{string}' based on a percentage of uppercase words" do
|
37
|
-
filter = DespamilatorFilter::Shouting.new
|
38
|
-
|
39
|
-
filter.parse(string)
|
40
|
-
filter.score.should == expected_score
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|
@@ -1,11 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::SquareBrackets do
|
2
|
-
|
3
|
-
the_name_should_be 'Square Brackets'
|
4
|
-
the_description_should_be 'Detects each square bracket in a string'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('[')
|
7
|
-
|
8
|
-
a_single_match_of('[', should_score: 0.05)
|
9
|
-
a_multiple_match_of('[]', should_score: [0.1, 2.times])
|
10
|
-
|
11
|
-
end
|
@@ -1,10 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::TrailingNumber do
|
2
|
-
|
3
|
-
the_name_should_be 'Trailing Number'
|
4
|
-
the_description_should_be 'Detects a trailing cache busting number'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('hello 123 ')
|
7
|
-
|
8
|
-
a_single_match_of('hello 123', should_score: 0.1)
|
9
|
-
|
10
|
-
end
|
@@ -1,9 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::UnusualCharacters do
|
2
|
-
the_name_should_be 'Unusual Characters'
|
3
|
-
the_description_should_be 'Detects and scores each occurrence of an unusual 2 or 3 character combination'
|
4
|
-
|
5
|
-
despamilator_should_apply_the_filter_for('sx')
|
6
|
-
|
7
|
-
a_single_match_of('sx', should_score: 0.05)
|
8
|
-
a_multiple_match_of('sxsx', should_score: [0.1, 2.times])
|
9
|
-
end
|
data/spec/filters/urls_spec.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
describe DespamilatorFilter::URLs do
|
2
|
-
|
3
|
-
the_name_should_be 'URLs'
|
4
|
-
the_description_should_be 'Detects each url in a string'
|
5
|
-
|
6
|
-
despamilator_should_apply_the_filter_for('http://www.blah.com')
|
7
|
-
|
8
|
-
a_single_match_of('http://www.blah.com', should_score: 0.4)
|
9
|
-
a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.8, 2.times])
|
10
|
-
|
11
|
-
end
|
@@ -1,59 +0,0 @@
|
|
1
|
-
def the_name_should_be expected_name
|
2
|
-
it "should have a name" do
|
3
|
-
described_class.new.name.should == expected_name
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
def the_description_should_be expected_description
|
8
|
-
it "should have a description" do
|
9
|
-
described_class.new.description.should == expected_description
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def a_single_match_of string, expectation
|
14
|
-
describe 'detecting a single match' do
|
15
|
-
|
16
|
-
before :all do
|
17
|
-
@filter = described_class.new
|
18
|
-
@filter.parse(string)
|
19
|
-
end
|
20
|
-
|
21
|
-
it "should only match once" do
|
22
|
-
@filter.matches.should == 1
|
23
|
-
end
|
24
|
-
|
25
|
-
it "should have a score" do
|
26
|
-
@filter.score.should == expectation[:should_score]
|
27
|
-
end
|
28
|
-
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def a_multiple_match_of string, expectation
|
33
|
-
describe 'detecting a multiple matches' do
|
34
|
-
|
35
|
-
before :all do
|
36
|
-
@filter = described_class.new
|
37
|
-
@filter.parse(string)
|
38
|
-
end
|
39
|
-
|
40
|
-
it "should match many times" do
|
41
|
-
@filter.matches.should == expectation[:should_score].last.count
|
42
|
-
end
|
43
|
-
|
44
|
-
it "should have a score" do
|
45
|
-
@filter.score.should == expectation[:should_score].first
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def despamilator_should_apply_the_filter_for string
|
52
|
-
|
53
|
-
it "should be applied during filtering" do
|
54
|
-
filter_name = described_class.new.name
|
55
|
-
despamilator = Despamilator.new(string)
|
56
|
-
despamilator.matched_by.collect { |f| f.name == filter_name }.should_not be_empty
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
data/spec/helpers/spec_helper.rb
DELETED