RubyGems - gluttonberg-core - Versions diffs - 2.5.5 → 2.5.6 - Mend

gluttonberg-core 2.5.5 → 2.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

data/lib/gluttonberg/content/despamilator/filter/emails.rb ADDED Viewed

@@ -0,0 +1,49 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class Emails < Despamilator::Filter
+        def name
+          'Emails'
+        end
+        def description
+          'Detects each emails in a string'
+        end
+        def parse subject
+          @email_regex ||= begin
+            email_name_regex  = '[A-Z0-9_\.%\+\-\']+'
+            domain_head_regex = '(?:[A-Z0-9\-]+\.)+'
+            domain_tld_regex  = '(?:[A-Z]{2,4}|museum|travel)'
+            /\A#{email_name_regex}@#{domain_head_regex}#{domain_tld_regex}\z/i
+          end
+          comment_email_as_spam = Gluttonberg::Setting.get_setting("comment_email_as_spam")
+          if comment_email_as_spam == "Yes"
+            text = subject.text.strip
+            subject.register_match!({
+             :score => 1.0, :filter => self
+            }) if @email_regex.match(text)
+          end
+          comment_number_of_emails_allowed = Gluttonberg::Setting.get_setting("comment_number_of_emails_allowed")
+          if !comment_number_of_emails_allowed.blank? && comment_number_of_emails_allowed.to_i > 0
+            comment_number_of_emails_allowed = comment_number_of_emails_allowed.to_i
+            subject.text.split(/%s/).each do |word|
+              subject.register_match!({
+               :score => (1.0/comment_number_of_emails_allowed), :filter => self
+              }) if @email_regex.match(word)
+            end
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/gtubs_test_filter.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class GtubsTestFilter < Despamilator::Filter
+        def name
+          'GTubs Test Filter'
+        end
+        def description
+          'Detects the special test string (Despamilator.gtubs_test_string) and assigns a big score.'
+        end
+        def parse subject
+          subject.register_match!({:score => 100, :filter => self}) if subject.text == Despamilator.gtubs_test_string
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/html_tags.rb ADDED Viewed

@@ -0,0 +1,134 @@
+module Gluttonberg
+  module Content
+    module DespamilatorFilter
+      class HtmlTags < Despamilator::Filter
+        def parse subject
+          text = subject.text.downcase
+          html_tags.each do |tag|
+            opening_elements = text.count(/<\s*#{tag}\W/)
+            closing_elements = text.count(/\W#{tag}\s*\/>/)
+            if opening_elements > 0 or closing_elements > 0
+              safest_element_count = opening_elements > closing_elements ? opening_elements : closing_elements
+              subject.register_match!({:score => 0.6 * safest_element_count, :filter => self})
+            end
+          end
+        end
+        def name
+          'HTML tags'
+        end
+        def description
+          'Detects HTML tags in text'
+        end
+        def html_tags
+          # make sure these are lowercase, in order to save processing
+          [
+                  '!--',
+                  '!doctype',
+                  'a',
+                  'abbr',
+                  'acronym',
+                  'address',
+                  'applet',
+                  'area',
+                  'b',
+                  'base',
+                  'basefont',
+                  'bdo',
+                  'big',
+                  'blockquote',
+                  'body',
+                  'br',
+                  'button',
+                  'caption',
+                  'center',
+                  'cite',
+                  'code',
+                  'col',
+                  'colgroup',
+                  'dd',
+                  'del',
+                  'dfn',
+                  'dir',
+                  'div',
+                  'dl',
+                  'dt',
+                  'em',
+                  'fieldset',
+                  'font',
+                  'form',
+                  'frame',
+                  'frameset',
+                  'h1',
+                  'h2',
+                  'h3',
+                  'h4',
+                  'h5',
+                  'h6',
+                  'head',
+                  'hr',
+                  'html',
+                  'i',
+                  'iframe',
+                  'img',
+                  'input',
+                  'ins',
+                  'isindex',
+                  'kbd',
+                  'label',
+                  'legend',
+                  'li',
+                  'link',
+                  'map',
+                  'menu',
+                  'meta',
+                  'noframes',
+                  'noscript',
+                  'object',
+                  'ol',
+                  'optgroup',
+                  'option',
+                  'p',
+                  'param',
+                  'pre',
+                  'q',
+                  's',
+                  'samp',
+                  'select',
+                  'small',
+                  'span',
+                  'strike',
+                  'strong',
+                  'style',
+                  'sub',
+                  'sup',
+                  'table',
+                  'tbody',
+                  'td',
+                  'textarea',
+                  'tfoot',
+                  'th',
+                  'thead',
+                  'title',
+                  'tr',
+                  'tt',
+                  'u',
+                  'ul',
+                  'var',
+                  'xmp'
+          ]
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/ip_address_url.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class IPAddressURL < Despamilator::Filter
+        def name
+          'IP Address URL'
+        end
+        def description
+          'Detects IP address URLs'
+        end
+        def parse subject
+          subject.register_match!({
+              :score => 0.5, :filter => self
+          }) if subject.text.downcase.count(/http:\/\/\d+\.\d+\.\d+\.\d+/) > 0
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/long_words.rb ADDED Viewed

@@ -0,0 +1,29 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class LongWords < Despamilator::Filter
+        def name
+          'Long Words'
+        end
+        def description
+          'Detects long and unbroken strings'
+        end
+        def parse subject
+          subject.text.without_uris.words.each do |word|
+            subject.register_match!({
+              :score => 0.1, :filter => self
+            }) if word.length > 20
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/mixed_case.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Gluttonberg
+  module Content
+    module DespamilatorFilter
+      class MixedCase < Despamilator::Filter
+        def name
+          'Mixed Case String'
+        end
+        def description
+          'Detects mixed case strings.'
+        end
+        def parse subject
+          text = subject.text.without_uris
+          count = text.remove_and_count!(/[a-z][A-Z]/)
+          count += text.remove_and_count!(/[a-z][A-Z][a-z]/)
+          subject.register_match!({:score => 0.1 * count, :filter => self}) if count > 0
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/naughty_words.rb ADDED Viewed

@@ -0,0 +1,80 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class NaughtyWords < Despamilator::Filter
+        def name
+          'Naughty Words'
+        end
+        def description
+          'Detects cheeky words'
+        end
+        def parse subject
+          text = subject.text.downcase
+          naughty_words.each do |word|
+            subject.register_match!({:score => 0.1, :filter => self}) if text =~ /\b#{word}s?\b/
+          end
+          gb_blacklist_settings = Gluttonberg::Setting.get_setting("comment_blacklist")
+          unless gb_blacklist_settings.blank?
+            gb_blacklist_settings_words = gb_blacklist_settings.split(",")
+            gb_blacklist_settings_words.each do |word|
+              subject.register_match!({:score => 1.0, :filter => self}) if text =~ /\b#{word.strip.downcase}s?\b/
+            end
+          end
+        end
+        def local_parse subject
+          local_score = 0.0
+          unless subject.blank?
+            text = subject.downcase
+            naughty_words.each do |word|
+              local_score += 0.1 if text =~ /\b#{word}s?\b/
+            end
+            gb_blacklist_settings = Gluttonberg::Setting.get_setting("comment_blacklist")
+            unless gb_blacklist_settings.blank?
+              gb_blacklist_settings_words = gb_blacklist_settings.split(",")
+              gb_blacklist_settings_words.each do |word|
+                local_score += 1.0 if text.include?(word.strip.downcase)
+              end
+            end
+          end
+          local_score
+        end
+        def naughty_words
+          words = %w{
+            underage
+            penis
+            viagra
+            bondage
+            cunt
+            fuck
+            shit
+            dick
+            tits
+            nude
+            dicks
+            shemale
+            dildo
+            porn
+            cock
+            pussy
+            clit
+            preteen
+            lolita
+           }
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/no_vowels.rb ADDED Viewed

@@ -0,0 +1,28 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class NoVowels < Despamilator::Filter
+        NO_VOWELS_REGEX = /^[b-df-hj-np-tv-xzB-DF-HJ-NP-TV-XZ]+$/
+        def name
+          'No Vowels'
+        end
+        def description
+          'Detects things that are all letters but no vowels and separated by spaces'
+        end
+        def parse(subject)
+          words = subject.text.split(/\s+/).select{|str| str.match(NO_VOWELS_REGEX)}
+          unless words.empty?
+            subject.register_match!({:score => ((words.length ** 2).to_f / 100) , :filter => self})
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/numbers_and_words.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class NumbersAndWords < Despamilator::Filter
+        def parse subject
+          text = tidy_text(subject)
+          [
+                  /\w\d+/,
+                  /\d+\w/,
+                  /\d+($|\b)/
+          ].each do |regexp|
+            matches = text.scan(regexp)
+            next if matches.empty?
+            matches.each do |to_remove|
+              to_remove = to_remove.to_s
+              text.sub!(to_remove, '') unless to_remove.empty?
+              subject.register_match!({:score => 0.1, :filter => self})
+            end
+          end
+        end
+        def name
+          'Numbers next to words'
+        end
+        def description
+          'Detects unusual number/word combinations'
+        end
+        private
+        def tidy_text subject
+          text = subject.text.without_uris
+          text.downcase!
+          # strip out "good numbers"
+          text.gsub!(/h[1-6]/, '')
+          text.gsub!(/(^|\b)\d+($|\b)/, '')
+          text.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
+          text.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
+          text
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/obfuscated_urls.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module Gluttonberg
+  module Content
+    module DespamilatorFilter
+      class ObfuscatedURLs < Despamilator::Filter
+        def name
+          'Obfuscated URLs'
+        end
+        def description
+          'Finds lame attempts at obfuscating urls.'
+        end
+        def parse subject
+          text = subject.text.without_uris.downcase
+          count = find_space_separated_parts text
+          count += find_space_separated_characters text
+          # weird maths below is due to some issue with ruby 1.9.2 multiplying floats by 3 (?!)
+          subject.register_match!({:score => (4.0 * count) / 10, :filter => self}) if count > 0
+        end
+        private
+        def find_space_separated_parts text
+          text.count(/www\s+\w+\s+com/)
+        end
+        def find_space_separated_characters text
+          count = 0
+          text.split(/[a-z][a-z]/).each do |candidate|
+            candidate.strip!
+            candidate.gsub!(/\s+/, '')
+            count += 1 if candidate =~ /\w{5,}\.\w{2,3}/
+          end
+          count
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/prices.rb ADDED Viewed

@@ -0,0 +1,23 @@
+module Gluttonberg
+  module Content
+    module DespamilatorFilter
+      class Prices < Despamilator::Filter
+        def name
+          'Prices'
+        end
+        def description
+          'Detects prices in text.'
+        end
+        def parse subject
+          price_count = subject.text.count(/\$\s*\d+/)
+          subject.register_match!({:score => 0.075 * price_count, :filter => self}) if price_count > 0
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/script_tag.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class ScriptTag < Despamilator::Filter
+        def parse subject
+          subject.register_match!({:score => 1, :filter => self}) if subject.text.downcase.match(/<\/?script(>|\s+|\n|\r)/)
+        end
+        def name
+          'Script tag'
+        end
+        def description
+          'Searches for variations for the HTML script tag'
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/shouting.rb ADDED Viewed

@@ -0,0 +1,38 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class Shouting < Despamilator::Filter
+        def name
+          'Shouting'
+        end
+        def description
+          'Detects and scores shouting (all caps)'
+        end
+        def parse subject
+          # strip HTML
+          text = subject.text.gsub(/<\/?[^>]*>/, "")
+          return if text.length < 20
+          uppercased = text.scan(/[A-Z][A-Z]+/).join.length
+          lowercased = text.count(/[a-z]/)
+          if uppercased > 0
+            subject.register_match!({
+                :score => (uppercased.to_f / (uppercased + lowercased)) * 0.5,
+                :filter => self
+            })
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/spammy_tlds.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class SpammyTLDs < Despamilator::Filter
+        def name
+          'Spammy TLDs'
+        end
+        def description
+          'Detects TLDs that are more commonly associated with spam.'
+        end
+        def parse subject
+          matches = subject.text.count(/\w{5,}\.(info|biz|xxx)\b/)
+          subject.register_match!({:score => 0.05 * matches, :filter => self}) if matches > 0
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/square_brackets.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class SquareBrackets < Despamilator::Filter
+        def name
+          'Square Brackets'
+        end
+        def description
+          'Detects each square bracket in a string'
+        end
+        def parse subject
+          subject.text.downcase.scan(/(\[|\])/).each do |match|
+            subject.register_match!({:score => 0.05, :filter => self})
+          end
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg

data/lib/gluttonberg/content/despamilator/filter/trailing_number.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Gluttonberg
+  module Content
+    require 'despamilator/filter'
+    module DespamilatorFilter
+      class TrailingNumber < Despamilator::Filter
+        def name
+          'Trailing Number'
+        end
+        def description
+          'Detects a trailing cache busting number'
+        end
+        def parse subject
+          subject.register_match!({:score => 0.1, :filter => self}) if subject.text.without_uris =~ /\b\d+\s*$/
+        end
+      end
+    end
+  end #Content
+end #Gluttonberg