RubyGems - despamilator - Versions diffs - 2.0.1 → 2.1 - Mend

despamilator 2.0.1 → 2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/History.txt +7 -0
data/lib/despamilator.rb +38 -7
data/lib/despamilator/filter.rb +39 -23
data/lib/despamilator/filter/gtubs_test_filter.rb +4 -4
data/lib/despamilator/filter/html_tags.rb +9 -7
data/lib/despamilator/filter/ip_address_url.rb +6 -4
data/lib/despamilator/filter/long_words.rb +7 -5
data/lib/despamilator/filter/mixed_case.rb +21 -0
data/lib/despamilator/filter/naughty_words.rb +5 -5
data/lib/despamilator/filter/numbers_and_words.rb +19 -11
data/lib/despamilator/filter/obfuscated_urls.rb +41 -0
data/lib/despamilator/filter/prices.rb +19 -0
data/lib/despamilator/filter/script_tag.rb +4 -4
data/lib/despamilator/filter/shouting.rb +9 -6
data/lib/despamilator/filter/spammy_tlds.rb +22 -0
data/lib/despamilator/filter/square_brackets.rb +5 -5
data/lib/despamilator/filter/trailing_number.rb +4 -4
data/lib/despamilator/filter/unusual_characters.rb +5 -5
data/lib/despamilator/filter/urls.rb +7 -9
data/lib/despamilator/filter/very_long_domain_name.rb +27 -0
data/lib/despamilator/filter/weird_punctuation.rb +44 -0
data/lib/despamilator/subject.rb +30 -0
data/lib/despamilator/subject/text.rb +32 -0
data/lib/despamilator/version.rb +3 -0
metadata +29 -75
data/.rspec +0 -2
data/.rvmrc +0 -1
data/Gemfile +0 -12
data/Gemfile.lock +0 -47
data/Manifest.txt +0 -46
data/PostInstall.txt +0 -1
data/Rakefile +0 -39
data/conf/unusual_characters.txt +0 -6674
data/despamilator.gemspec +0 -38
data/lib/despamilator/filter_base.rb +0 -82
data/scripts/despamilator_score.rb +0 -25
data/scripts/from_file.rb +0 -26
data/spec/despamilator_spec.rb +0 -13
data/spec/filter_base_spec.rb +0 -30
data/spec/filters/gtubs_test_filter_spec.rb +0 -9
data/spec/filters/html_tags_spec.rb +0 -129
data/spec/filters/ip_address_url_spec.rb +0 -11
data/spec/filters/long_words_spec.rb +0 -11
data/spec/filters/naughty_words_spec.rb +0 -11
data/spec/filters/numbers_and_words_spec.rb +0 -34
data/spec/filters/script_tag_spec.rb +0 -22
data/spec/filters/shouting_spec.rb +0 -45
data/spec/filters/square_brackets_spec.rb +0 -11
data/spec/filters/trailing_number_spec.rb +0 -10
data/spec/filters/unusual_characters_spec.rb +0 -9
data/spec/filters/urls_spec.rb +0 -11
data/spec/helpers/corpus_helper.rb +0 -5
data/spec/helpers/filter_helper.rb +0 -59
data/spec/helpers/spec_helper.rb +0 -6
data/tasks/test.rake +0 -6

data/spec/filters/shouting_spec.rb DELETED Viewed

@@ -1,45 +0,0 @@
-describe DespamilatorFilter::Shouting do
-  the_name_should_be 'Shouting'
-  the_description_should_be 'Detects and scores shouting (all caps)'
-  despamilator_should_apply_the_filter_for('this lil string is 50 PERCENT SHOUTING')
-  a_single_match_of('this lil string is 50 PERCENT SHOUTING', should_score: 0.25)
-  a_multiple_match_of('HELLO THERE!! THIS IS SHOUTING!!', should_score: [0.5, 1.times])
-  describe "exceptions" do
-    before :all do
-      @filter = DespamilatorFilter::Shouting.new
-    end
-    it "should strip out HTML" do
-      @filter.parse('<H1>this is a flipping html tag whose contents is very long</h1>')
-      @filter.score.should == 0
-    end
-    it "should ignore strings less than 20 characters long" do
-      @filter.parse('ABCD EFG HIJKLM NOP')
-      @filter.score.should == 0
-    end
-  end
-  [
-          ['this is a lowercased string', 0],
-          ['This is a String with Capital Letters', 0],
-          ['this lil string is 50 PERCENT SHOUTING', 0.25],
-          ['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
-  ].each do |string, expected_score|
-    it "should score the string '#{string}' based on a percentage of uppercase words" do
-      filter = DespamilatorFilter::Shouting.new
-      filter.parse(string)
-      filter.score.should == expected_score
-    end
-  end
-end

data/spec/filters/square_brackets_spec.rb DELETED Viewed

@@ -1,11 +0,0 @@
-describe DespamilatorFilter::SquareBrackets do
-  the_name_should_be 'Square Brackets'
-  the_description_should_be 'Detects each square bracket in a string'
-  despamilator_should_apply_the_filter_for('[')
-  a_single_match_of('[', should_score: 0.05)
-  a_multiple_match_of('[]', should_score: [0.1, 2.times])
-end

data/spec/filters/trailing_number_spec.rb DELETED Viewed

@@ -1,10 +0,0 @@
-describe DespamilatorFilter::TrailingNumber do
-  the_name_should_be 'Trailing Number'
-  the_description_should_be 'Detects a trailing cache busting number'
-  despamilator_should_apply_the_filter_for('hello 123 ')
-  a_single_match_of('hello 123', should_score: 0.1)
-end

data/spec/filters/unusual_characters_spec.rb DELETED Viewed

@@ -1,9 +0,0 @@
-describe DespamilatorFilter::UnusualCharacters do
-  the_name_should_be 'Unusual Characters'
-  the_description_should_be 'Detects and scores each occurrence of an unusual 2 or 3 character combination'
-  despamilator_should_apply_the_filter_for('sx')
-  a_single_match_of('sx', should_score: 0.05)
-  a_multiple_match_of('sxsx', should_score: [0.1, 2.times])
-end

data/spec/filters/urls_spec.rb DELETED Viewed

@@ -1,11 +0,0 @@
-describe DespamilatorFilter::URLs do
-  the_name_should_be 'URLs'
-  the_description_should_be 'Detects each url in a string'
-  despamilator_should_apply_the_filter_for('http://www.blah.com')
-  a_single_match_of('http://www.blah.com', should_score: 0.4)
-  a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.8, 2.times])
-end

data/spec/helpers/corpus_helper.rb DELETED Viewed

@@ -1,5 +0,0 @@
-require 'zlib'
-def unzip_file filename
-  Zlib::GzipReader.open(filename).read
-end

data/spec/helpers/filter_helper.rb DELETED Viewed

@@ -1,59 +0,0 @@
-def the_name_should_be expected_name
-  it "should have a name" do
-    described_class.new.name.should == expected_name
-  end
-end
-def the_description_should_be expected_description
-  it "should have a description" do
-    described_class.new.description.should == expected_description
-  end
-end
-def a_single_match_of string, expectation
-  describe 'detecting a single match' do
-    before :all do
-      @filter = described_class.new
-      @filter.parse(string)
-    end
-    it "should only match once" do
-      @filter.matches.should == 1
-    end
-    it "should have a score" do
-      @filter.score.should == expectation[:should_score]
-    end
-  end
-end
-def a_multiple_match_of string, expectation
-  describe 'detecting a multiple matches' do
-    before :all do
-      @filter = described_class.new
-      @filter.parse(string)
-    end
-    it "should match many times" do
-      @filter.matches.should == expectation[:should_score].last.count
-    end
-    it "should have a score" do
-      @filter.score.should == expectation[:should_score].first
-    end
-  end
-end
-def despamilator_should_apply_the_filter_for string
-    it "should be applied during filtering" do
-      filter_name = described_class.new.name
-      despamilator = Despamilator.new(string)
-      despamilator.matched_by.collect { |f| f.name == filter_name }.should_not be_empty
-    end
-end

data/spec/helpers/spec_helper.rb DELETED Viewed

@@ -1,6 +0,0 @@
-require 'one_hundred_percent_coverage' if ENV['WITH_COVERAGE'].to_i == 1
-require File.join(File.dirname(__FILE__), '..', '..', 'lib', 'despamilator')
-Dir.glob(File.join(File.dirname(__FILE__), '*.rb')).each do |file|
-  require file
-end

data/tasks/test.rake DELETED Viewed

@@ -1,6 +0,0 @@
-ENV['WITH_COVERAGE'] = '1'
-desc "Run the spec tests with coverage"
-task :test do
-  Rake::Task[:spec].invoke
-end