RubyGems - gluttonberg-core - Versions diffs - 2.5.5 → 2.5.6 - Mend

gluttonberg-core 2.5.5 → 2.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

data/lib/gluttonberg/content/despamilator/filter/unusual_characters.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class UnusualCharacters < Despamilator::Filter
+        def name
+          'Unusual Characters'
+        end
+        def description
+          'Detects and scores each occurrence of an unusual 2 or 3 character combination'
+        end
+        def parse subject
+          initialize_combos
+          tokenize(subject.text.without_uris).each do |token|
+            subject.register_match!({:score => 0.05, :filter => self}) if @@combos[token.to_sym]
+          end
+        end
+        private
+        def tokenize text
+          tokens = []
+          text.downcase.split(/[^a-z]/).each do |word|
+            word.chars.each_with_index do |c, i|
+              substr = word[i,i+3]
+              tokens << substr.to_sym if substr.length == 3
+              tokens << substr[0,2].to_sym if substr.length > 1
+            end
+          end
+          tokens
+        end
+        def initialize_combos
+          @@combos ||= {}
+          return @@combos unless @@combos.empty?
+          File.open(File.join(File.dirname(__FILE__), %w{.. conf unusual_characters.txt}), 'r').each do |line|
+            @@combos[line.strip.to_sym] = true
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/urls.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class URLs < Despamilator::Filter
+        def name
+          'URLs'
+        end
+        def description
+          'Detects each url in a string'
+        end
+        def parse subject
+          text = subject.text.downcase.gsub(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
+          matches = text.count(/https?:\/\//)
+          comment_number_of_urls_allowed = Gluttonberg::Setting.get_setting("comment_number_of_urls_allowed")
+          score_for_one_url = 0.4
+          if !comment_number_of_urls_allowed.blank? && comment_number_of_urls_allowed.to_i > 0
+            comment_number_of_urls_allowed = comment_number_of_urls_allowed.to_i
+            score_for_one_url = 1.0 / comment_number_of_urls_allowed.to_i
+          end
+          1.upto(matches > 2 ? 2 : matches) do
+            subject.register_match!({:score => score_for_one_url, :filter => self})
+          end
+          comment_email_as_spam = Gluttonberg::Setting.get_setting("comment_email_as_spam")
+          if comment_email_as_spam == "Yes"
+            text_temp = text.strip
+            extracted_urls = URI.extract(text_temp)
+            subject.register_match!({
+             :score => 1.0, :filter => self
+            }) if extracted_urls.length > 0 && extracted_urls[0] == text_temp
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/very_long_domain_name.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    require 'domainatrix'
+    module DespamilatorFilter
+      class VeryLongDomainName < Despamilator::Filter
+        def name
+          'Very Long Domain Name'
+        end
+        def description
+          'Detects unusually long domain names.'
+        end
+        def parse subject
+          subject.text.scan(URI.regexp).each do |url_parts|
+            url_parts.compact!
+            next if !url_parts[1] or url_parts[1] !~ /(\w|-){5,}\.\w{2,5}/
+            url = Domainatrix.parse('http://' + url_parts[1])
+            subject.register_match!({:score => 0.4, :filter => self}) if url.domain.length > 20
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/weird_punctuation.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class WeirdPunctuation < Despamilator::Filter
+        def name
+          'Weird Punctuation'
+        end
+        def description
+          'Detects unusual use of punctuation.'
+        end
+        def parse subject
+          text = subject.text.without_uris.downcase
+          text.gsub!(/\w&\w/, 'xx')
+          text.gsub!(/[a-z](!|\?)(\s|$)/, 'x')
+          text.gsub!(/(?:#{punctuation}){20,}/, '')
+          matches = text.remove_and_count!(/(?:\W|\s|^)(#{punctuation})/)
+          matches += text.remove_and_count!(/\w,\w/)
+          matches += text.remove_and_count!(/\w\w\.\w/)
+          matches += text.remove_and_count!(/\w\.\w\w/)
+          matches += text.remove_and_count!(/(#{punctuation})(#{punctuation})/)
+          matches += text.remove_and_count!(/(#{punctuation})$/)
+          matches += text.remove_and_count!(/(?:\W|\s|^)\d+(#{punctuation})/)
+          subject.register_match!({:score => 0.03 * matches, :filter => self}) if matches > 0
+        end
+        private
+        def punctuation
+          @punctuation ||= %w{~ ` ! @ # $ % ^ & * _ - + = , / ? | \\ : ;}.map do |punctuation_character|
+            Regexp.escape(punctuation_character)
+          end.join('|')
+          @punctuation
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter.rb ADDED Viewed

@@ -0,0 +1,57 @@
+module Gluttonberg
+  module Content
+    class Despamilator
+    #This class is the base class of all the despamilator filters.
+    #
+    #== EXAMPLE:
+    #
+    #This example is to detect the letter "a". Put the code in
+    #lib/despamilator/filter/detect_letter_a.rb:
+    #
+    #  require 'despamilator/filter_base'
+    #
+    #  module DespamilatorFilter
+    #
+    #    class DetectLetterA < Despamilator::FilterBase
+    #
+    #      def name
+    #        'Detecting the letter A'
+    #      end
+    #
+    #      def description
+    #        'Detects the letter "a" in a string for no reason other than a demo'
+    #      end
+    #
+    #      def parse text
+    #        if text.downcase.scan(/a/)
+    #        # add 0.1 to the score of the text
+    #        self.append_score = 0.1
+    #      end
+    #    end
+    #  end
+      class Filter
+        # The nice description of the filter. Usually no more than a sentence.
+        def description
+          raise "No description defined for #{self.class}"
+        end
+        # This method parses some text. The score is assigned to the same instance.
+        def parse text
+          raise "No parser defined for #{self.class}"
+        end
+        # The one or two word name for the filter.
+        def name
+          raise "No name defined for #{self.class}"
+        end
+      end
+    end
+  end #content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/subject/text.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Gluttonberg
+  module Content
+    require 'uri'
+    class Despamilator
+      class Subject
+        class Text < String
+          def initialize text
+            super text
+            freeze
+          end
+          def without_uris
+            gsub(/\b(?:https?|mailto|ftp):.+?(\s|$)/i, '')
+          end
+          def words
+            split(/\W+/)
+          end
+          def count pattern
+            scan(pattern).flatten.compact.length
+          end
+          def remove_and_count! pattern
+            count = count(pattern)
+            gsub!(pattern, '')
+            count
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/subject.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/subject/text'
+    class Despamilator
+      class Subject
+        attr_reader :score, :text
+        def initialize text
+          @score = 0.0
+          @matches = {}
+          @text = Despamilator::Subject::Text.new(text)
+        end
+        def register_match! details
+          @score += details[:score] || raise('A score must be supplied')
+          filter = details[:filter] || raise('A filter must be supplied')
+          @matches[filter] ||= 0.0
+          @matches[filter] += details[:score]
+        end
+        def matches
+          @matches.map do |filter, score|
+            {:filter => filter, :score => score}
+          end.sort do |a, b|
+            b[:score] <=> a[:score]
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/version.rb ADDED Viewed

@@ -0,0 +1,7 @@
+module Gluttonberg
+  module Content
+    class Despamilator
+      VERSION = '2.1.4'
+    end
+  end
+end

data/lib/gluttonberg/content/despamilator.rb ADDED Viewed

@@ -0,0 +1,79 @@
+module Gluttonberg
+  module Content
+    $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
+    require 'despamilator/filter'
+    Dir.glob(File.join(File.dirname(__FILE__), 'despamilator', 'filter', '*.rb')).each do |filter_file|
+      require filter_file
+    end
+    require 'despamilator/subject'
+    require 'ostruct'
+    #== SYNOPSIS:
+    #
+    #  require 'despamilator'
+    #
+    #  # some time later...
+    #
+    #  dspam = Despamilator.new('some text with an <h2> tag qthhg')
+    #
+    #  dspam.score #=> the total score for this string (1 is normally my threshold).
+    #  dspam.matches #=> array of hashes containing matching filters and their score.
+    class Despamilator
+      # Constructor. Takes the text you which to parse and score.
+      def initialize text
+        @subject = Despamilator::Subject.new text
+        run_filters @subject
+      end
+      # Returns the total score as a Float.
+      def score
+        @subject.score
+      end
+      def matched_by
+        warn 'Despamilator.matched_by is deprecated, please use Despamilator.matches by 2011-12-31.'
+        matches.map do |match|
+          filter = match[:filter]
+          OpenStruct.new(
+              :name => filter.name,
+              :description => filter.description,
+              :score => match[:score]
+          )
+        end
+      end
+      # Returns an array of scores and filters that have matched and contributed to the score.
+      # Each element is a a child of the Despamilator::FilterBase class.
+      def matches
+        @subject.matches
+      end
+      # Generic Test for Unsolicited Bulk Submissions. Similar to SpamAssassin's GTUBE.
+      # A string that will result in a spam score of at least 100. Handy for testing.
+      def self.gtubs_test_string
+        '89913b8a065b7092721fe995877e097681683af9d3ab767146d5d6fd050fc0bda7ab99f4232d94a1'
+      end
+      private
+      def run_filters subject
+        filter_namespace = Gluttonberg::Content.const_get('DespamilatorFilter')
+        filter_namespace.constants.each do |filter_class|
+          filter = filter_namespace.const_get(filter_class).new
+          filter.parse(subject)
+        end
+      end
+    end
+  end #content
+end # Gluttonberg

data/lib/gluttonberg/content.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 content = Pathname(__FILE__).dirname.expand_path
-require File.join(content, 'content', 'slug_management')
+require File.join(content, "content", "slug_management")
 require File.join(content, "content", "block")
 require File.join(content, "content", "block_localization")
 require File.join(content, "content", "localization")
@@ -8,9 +8,10 @@ require File.join(content, "content", "publishable")
 require File.join(content, "content", "versioning")
 require File.join(content, "content", "clean_html")
 require File.join(content, "content", "import_export_csv")
+require File.join(content, "content", "despamilator")
 module Gluttonberg
-  # The content module contains a whole bunch classes and mixins related to the
+  # The content module contains a whole bunch classes and mixins related to the
   # pages, localizations, content models and helpers for rendering content
   # inside of views.
   module Content
@@ -19,7 +20,7 @@ module Gluttonberg
     @@localizations = {}
     @@localization_associations = nil
     @@localization_classes = nil
     # This is called after the application loads so that we can define any
     # extra associations or do house-keeping once everything is required and
     # running
@@ -29,10 +30,10 @@ module Gluttonberg
       @@localization_classes = @@localizations.values
       @@content_associations = Block.classes.collect { |k| k.association_name }
     end
     # For each content class that is registered, a corresponding association is
     # declared against the Page model. We need to keep track of these, which
-    # is what this method does. It just returns an array of the association
+    # is what this method does. It just returns an array of the association
     # names.
     def self.non_localized_associations
       @@non_localized_associations ||= begin
@@ -40,24 +41,24 @@ module Gluttonberg
         non_localized.collect {|c| c.association_name }
       end
     end
     # Return the collection of content association names.
     def self.content_associations
       @@content_associations
     end
-    # If a content class has the is_localized declaration, this method is used
+    # If a content class has the is_localized declaration, this method is used
     # to register it so we can keep track of all localized content.
     def self.register_localization(assoc_name, klass)
       @@localizations[assoc_name] = klass
     end
-    # Returns a hash of content classes that are localized, keyed to the
+    # Returns a hash of content classes that are localized, keyed to the
     # association name.
     def self.localizations
       @@localizations
     end
     # Returns an array of the localization association names.
     def self.localization_associations
       @@localization_associations