despamilator 2.0.1 → 2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/History.txt +7 -0
  2. data/lib/despamilator.rb +38 -7
  3. data/lib/despamilator/filter.rb +39 -23
  4. data/lib/despamilator/filter/gtubs_test_filter.rb +4 -4
  5. data/lib/despamilator/filter/html_tags.rb +9 -7
  6. data/lib/despamilator/filter/ip_address_url.rb +6 -4
  7. data/lib/despamilator/filter/long_words.rb +7 -5
  8. data/lib/despamilator/filter/mixed_case.rb +21 -0
  9. data/lib/despamilator/filter/naughty_words.rb +5 -5
  10. data/lib/despamilator/filter/numbers_and_words.rb +19 -11
  11. data/lib/despamilator/filter/obfuscated_urls.rb +41 -0
  12. data/lib/despamilator/filter/prices.rb +19 -0
  13. data/lib/despamilator/filter/script_tag.rb +4 -4
  14. data/lib/despamilator/filter/shouting.rb +9 -6
  15. data/lib/despamilator/filter/spammy_tlds.rb +22 -0
  16. data/lib/despamilator/filter/square_brackets.rb +5 -5
  17. data/lib/despamilator/filter/trailing_number.rb +4 -4
  18. data/lib/despamilator/filter/unusual_characters.rb +5 -5
  19. data/lib/despamilator/filter/urls.rb +7 -9
  20. data/lib/despamilator/filter/very_long_domain_name.rb +27 -0
  21. data/lib/despamilator/filter/weird_punctuation.rb +44 -0
  22. data/lib/despamilator/subject.rb +30 -0
  23. data/lib/despamilator/subject/text.rb +32 -0
  24. data/lib/despamilator/version.rb +3 -0
  25. metadata +29 -75
  26. data/.rspec +0 -2
  27. data/.rvmrc +0 -1
  28. data/Gemfile +0 -12
  29. data/Gemfile.lock +0 -47
  30. data/Manifest.txt +0 -46
  31. data/PostInstall.txt +0 -1
  32. data/Rakefile +0 -39
  33. data/conf/unusual_characters.txt +0 -6674
  34. data/despamilator.gemspec +0 -38
  35. data/lib/despamilator/filter_base.rb +0 -82
  36. data/scripts/despamilator_score.rb +0 -25
  37. data/scripts/from_file.rb +0 -26
  38. data/spec/despamilator_spec.rb +0 -13
  39. data/spec/filter_base_spec.rb +0 -30
  40. data/spec/filters/gtubs_test_filter_spec.rb +0 -9
  41. data/spec/filters/html_tags_spec.rb +0 -129
  42. data/spec/filters/ip_address_url_spec.rb +0 -11
  43. data/spec/filters/long_words_spec.rb +0 -11
  44. data/spec/filters/naughty_words_spec.rb +0 -11
  45. data/spec/filters/numbers_and_words_spec.rb +0 -34
  46. data/spec/filters/script_tag_spec.rb +0 -22
  47. data/spec/filters/shouting_spec.rb +0 -45
  48. data/spec/filters/square_brackets_spec.rb +0 -11
  49. data/spec/filters/trailing_number_spec.rb +0 -10
  50. data/spec/filters/unusual_characters_spec.rb +0 -9
  51. data/spec/filters/urls_spec.rb +0 -11
  52. data/spec/helpers/corpus_helper.rb +0 -5
  53. data/spec/helpers/filter_helper.rb +0 -59
  54. data/spec/helpers/spec_helper.rb +0 -6
  55. data/tasks/test.rake +0 -6
@@ -1,45 +0,0 @@
1
- describe DespamilatorFilter::Shouting do
2
-
3
- the_name_should_be 'Shouting'
4
- the_description_should_be 'Detects and scores shouting (all caps)'
5
-
6
- despamilator_should_apply_the_filter_for('this lil string is 50 PERCENT SHOUTING')
7
-
8
- a_single_match_of('this lil string is 50 PERCENT SHOUTING', should_score: 0.25)
9
- a_multiple_match_of('HELLO THERE!! THIS IS SHOUTING!!', should_score: [0.5, 1.times])
10
-
11
- describe "exceptions" do
12
-
13
- before :all do
14
- @filter = DespamilatorFilter::Shouting.new
15
- end
16
-
17
- it "should strip out HTML" do
18
- @filter.parse('<H1>this is a flipping html tag whose contents is very long</h1>')
19
- @filter.score.should == 0
20
- end
21
-
22
- it "should ignore strings less than 20 characters long" do
23
- @filter.parse('ABCD EFG HIJKLM NOP')
24
- @filter.score.should == 0
25
- end
26
-
27
- end
28
-
29
- [
30
- ['this is a lowercased string', 0],
31
- ['This is a String with Capital Letters', 0],
32
- ['this lil string is 50 PERCENT SHOUTING', 0.25],
33
- ['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
34
- ].each do |string, expected_score|
35
-
36
- it "should score the string '#{string}' based on a percentage of uppercase words" do
37
- filter = DespamilatorFilter::Shouting.new
38
-
39
- filter.parse(string)
40
- filter.score.should == expected_score
41
- end
42
-
43
- end
44
-
45
- end
@@ -1,11 +0,0 @@
1
- describe DespamilatorFilter::SquareBrackets do
2
-
3
- the_name_should_be 'Square Brackets'
4
- the_description_should_be 'Detects each square bracket in a string'
5
-
6
- despamilator_should_apply_the_filter_for('[')
7
-
8
- a_single_match_of('[', should_score: 0.05)
9
- a_multiple_match_of('[]', should_score: [0.1, 2.times])
10
-
11
- end
@@ -1,10 +0,0 @@
1
- describe DespamilatorFilter::TrailingNumber do
2
-
3
- the_name_should_be 'Trailing Number'
4
- the_description_should_be 'Detects a trailing cache busting number'
5
-
6
- despamilator_should_apply_the_filter_for('hello 123 ')
7
-
8
- a_single_match_of('hello 123', should_score: 0.1)
9
-
10
- end
@@ -1,9 +0,0 @@
1
- describe DespamilatorFilter::UnusualCharacters do
2
- the_name_should_be 'Unusual Characters'
3
- the_description_should_be 'Detects and scores each occurrence of an unusual 2 or 3 character combination'
4
-
5
- despamilator_should_apply_the_filter_for('sx')
6
-
7
- a_single_match_of('sx', should_score: 0.05)
8
- a_multiple_match_of('sxsx', should_score: [0.1, 2.times])
9
- end
@@ -1,11 +0,0 @@
1
- describe DespamilatorFilter::URLs do
2
-
3
- the_name_should_be 'URLs'
4
- the_description_should_be 'Detects each url in a string'
5
-
6
- despamilator_should_apply_the_filter_for('http://www.blah.com')
7
-
8
- a_single_match_of('http://www.blah.com', should_score: 0.4)
9
- a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.8, 2.times])
10
-
11
- end
@@ -1,5 +0,0 @@
1
- require 'zlib'
2
-
3
- def unzip_file filename
4
- Zlib::GzipReader.open(filename).read
5
- end
@@ -1,59 +0,0 @@
1
- def the_name_should_be expected_name
2
- it "should have a name" do
3
- described_class.new.name.should == expected_name
4
- end
5
- end
6
-
7
- def the_description_should_be expected_description
8
- it "should have a description" do
9
- described_class.new.description.should == expected_description
10
- end
11
- end
12
-
13
- def a_single_match_of string, expectation
14
- describe 'detecting a single match' do
15
-
16
- before :all do
17
- @filter = described_class.new
18
- @filter.parse(string)
19
- end
20
-
21
- it "should only match once" do
22
- @filter.matches.should == 1
23
- end
24
-
25
- it "should have a score" do
26
- @filter.score.should == expectation[:should_score]
27
- end
28
-
29
- end
30
- end
31
-
32
- def a_multiple_match_of string, expectation
33
- describe 'detecting a multiple matches' do
34
-
35
- before :all do
36
- @filter = described_class.new
37
- @filter.parse(string)
38
- end
39
-
40
- it "should match many times" do
41
- @filter.matches.should == expectation[:should_score].last.count
42
- end
43
-
44
- it "should have a score" do
45
- @filter.score.should == expectation[:should_score].first
46
- end
47
-
48
- end
49
- end
50
-
51
- def despamilator_should_apply_the_filter_for string
52
-
53
- it "should be applied during filtering" do
54
- filter_name = described_class.new.name
55
- despamilator = Despamilator.new(string)
56
- despamilator.matched_by.collect { |f| f.name == filter_name }.should_not be_empty
57
- end
58
-
59
- end
@@ -1,6 +0,0 @@
1
- require 'one_hundred_percent_coverage' if ENV['WITH_COVERAGE'].to_i == 1
2
- require File.join(File.dirname(__FILE__), '..', '..', 'lib', 'despamilator')
3
-
4
- Dir.glob(File.join(File.dirname(__FILE__), '*.rb')).each do |file|
5
- require file
6
- end
data/tasks/test.rake DELETED
@@ -1,6 +0,0 @@
1
- ENV['WITH_COVERAGE'] = '1'
2
-
3
- desc "Run the spec tests with coverage"
4
- task :test do
5
- Rake::Task[:spec].invoke
6
- end