despamilator 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +44 -0
  3. data/PostInstall.txt +1 -0
  4. data/README.rdoc +107 -0
  5. data/Rakefile +33 -0
  6. data/despamilator.gemspec +42 -0
  7. data/lib/despamilator/filter/html_tags.rb +116 -0
  8. data/lib/despamilator/filter/naughty_q.rb +17 -0
  9. data/lib/despamilator/filter/numbers_and_words.rb +33 -0
  10. data/lib/despamilator/filter/script_tag.rb +13 -0
  11. data/lib/despamilator/filter.rb +52 -0
  12. data/lib/despamilator/filter_base.rb +37 -0
  13. data/lib/despamilator.rb +19 -0
  14. data/pkg/despamilator-0.1/History.txt +4 -0
  15. data/pkg/despamilator-0.1/Manifest.txt +21 -0
  16. data/pkg/despamilator-0.1/PostInstall.txt +1 -0
  17. data/pkg/despamilator-0.1/README.rdoc +107 -0
  18. data/pkg/despamilator-0.1/Rakefile +33 -0
  19. data/pkg/despamilator-0.1/despamilator.gemspec +42 -0
  20. data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +116 -0
  21. data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +17 -0
  22. data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +33 -0
  23. data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +13 -0
  24. data/pkg/despamilator-0.1/lib/despamilator/filter.rb +52 -0
  25. data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +37 -0
  26. data/pkg/despamilator-0.1/lib/despamilator.rb +19 -0
  27. data/pkg/despamilator-0.1/spec/despamilator_spec.rb +15 -0
  28. data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +144 -0
  29. data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +39 -0
  30. data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +59 -0
  31. data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +32 -0
  32. data/pkg/despamilator-0.1/spec/spec.opts +1 -0
  33. data/pkg/despamilator-0.1/spec/spec_helper.rb +10 -0
  34. data/pkg/despamilator-0.1/tasks/rspec.rake +21 -0
  35. data/pkg/despamilator-0.1.gem +0 -0
  36. data/pkg/despamilator-0.1.tgz +0 -0
  37. data/spec/despamilator_spec.rb +15 -0
  38. data/spec/filters/html_tags_spec.rb +144 -0
  39. data/spec/filters/naughty_q_spec.rb +39 -0
  40. data/spec/filters/numbers_and_words_spec.rb +59 -0
  41. data/spec/filters/script_tag_spec.rb +32 -0
  42. data/spec/spec.opts +1 -0
  43. data/spec/spec_helper.rb +10 -0
  44. data/tasks/rspec.rake +21 -0
  45. metadata +155 -0
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2010-01-30
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,44 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ despamilator.gemspec
7
+ lib/despamilator.rb
8
+ lib/despamilator/filter.rb
9
+ lib/despamilator/filter/html_tags.rb
10
+ lib/despamilator/filter/naughty_q.rb
11
+ lib/despamilator/filter/numbers_and_words.rb
12
+ lib/despamilator/filter/script_tag.rb
13
+ lib/despamilator/filter_base.rb
14
+ pkg/despamilator-0.1.gem
15
+ pkg/despamilator-0.1.tgz
16
+ pkg/despamilator-0.1/History.txt
17
+ pkg/despamilator-0.1/Manifest.txt
18
+ pkg/despamilator-0.1/PostInstall.txt
19
+ pkg/despamilator-0.1/README.rdoc
20
+ pkg/despamilator-0.1/Rakefile
21
+ pkg/despamilator-0.1/despamilator.gemspec
22
+ pkg/despamilator-0.1/lib/despamilator.rb
23
+ pkg/despamilator-0.1/lib/despamilator/filter.rb
24
+ pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb
25
+ pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb
26
+ pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb
27
+ pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb
28
+ pkg/despamilator-0.1/lib/despamilator/filter_base.rb
29
+ pkg/despamilator-0.1/spec/despamilator_spec.rb
30
+ pkg/despamilator-0.1/spec/filters/html_tags_spec.rb
31
+ pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb
32
+ pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb
33
+ pkg/despamilator-0.1/spec/filters/script_tag_spec.rb
34
+ pkg/despamilator-0.1/spec/spec.opts
35
+ pkg/despamilator-0.1/spec/spec_helper.rb
36
+ pkg/despamilator-0.1/tasks/rspec.rake
37
+ spec/despamilator_spec.rb
38
+ spec/filters/html_tags_spec.rb
39
+ spec/filters/naughty_q_spec.rb
40
+ spec/filters/numbers_and_words_spec.rb
41
+ spec/filters/script_tag_spec.rb
42
+ spec/spec.opts
43
+ spec/spec_helper.rb
44
+ tasks/rspec.rake
data/PostInstall.txt ADDED
@@ -0,0 +1 @@
1
+ For more information on despamilator or to contribute more filters, see http://github.com/moowahaha/despamliator
data/README.rdoc ADDED
@@ -0,0 +1,107 @@
1
+ = Despamilator
2
+
3
+ * http://github.com/moowahaha/despamliator
4
+
5
+ == DESCRIPTION:
6
+
7
+ Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
8
+ Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
9
+ some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
10
+
11
+ == FEATURES/PROBLEMS:
12
+
13
+ * rspec will be run twice. How annoying?!
14
+
15
+ == SYNOPSIS:
16
+
17
+ # using Despamilator
18
+ dspam = Despamilator.new('some text with an <h2> tag qthhg')
19
+
20
+ dspam.score #=> the total score for this string (1 is considered high)
21
+ dspam.matched_by #=> array of matching filters
22
+ first_match = dspam.matched_by.first #=> first matching filter
23
+ first_match.name #=> some string with the name of the filter
24
+ first_match.description #=> some string to describe
25
+ first_match.score #=> the individual score assigned by this filter
26
+
27
+ # adding a new filter! example: detecting the letter "a"
28
+ # put the following code in lib/despamilator/filter/detect_letter_a.rb
29
+ def name
30
+ 'Detecting the letter A'
31
+ end
32
+
33
+ def description
34
+ 'Detects the letter "a" in a string for no reason other than a demo'
35
+ end
36
+
37
+ def parse
38
+ if self.text.downcase.scan(/a/)
39
+ # add 0.1 to the score of the text
40
+ self.append_score = 0.1
41
+ end
42
+ end
43
+
44
+ == FILTERING:
45
+
46
+ As stated, this is a heuristic scanner so its up to the user to decide the thresholds of the scanner. I usually
47
+ say "it's spam" if the score reaches 1.
48
+
49
+ The score will be added to incrementally by each matching filter. So if there is some HTML in there, it will be added
50
+ to the score. If there is also a script tag of some sort, that will add more.
51
+
52
+ Each filter decides how much of a score it assigns. For example, detecting a number next to a letter
53
+ (the numbers_an_words filter) is only a mild hint compared with a script tag (detected by the script_tag filter).
54
+
55
+ === NEW FILTERS:
56
+
57
+ I absolutely welcome new filters and experiments. New filters should be put in the 'lib/despamilator/filter/' directory.
58
+ The core filtering code will detect and use what is in there so you only need to drop the code in.
59
+ Filters should be simple, no classes etc wrapped around them and should try to perform one simple task.
60
+ They should always supply the following methods:
61
+
62
+ * name #=> the name of your filter.
63
+ * description #=> what your filter will look for.
64
+ * parse #=> the method that will be called when parsing.
65
+
66
+ Along side the above, the following methods are made available to each filter:
67
+
68
+ * text #=> a copy of the text your parser will parse
69
+ * append_score= #=> method to append a score to the text if there are matches in your parser.
70
+ * matched? #=> whether or not any filter has so far detected something suspect
71
+ * score #=> the current score assigned to the text
72
+
73
+ spec tests are an absolute must!
74
+
75
+
76
+ == REQUIREMENTS:
77
+
78
+ * hoe
79
+ * rspec
80
+
81
+
82
+ == INSTALL:
83
+
84
+ $ sudo gem install despamilator
85
+
86
+ == LICENSE:
87
+
88
+ Copyright (c) 2010 Stephen Hardisty
89
+
90
+ Permission is hereby granted, free of charge, to any person obtaining
91
+ a copy of this software and associated documentation files (the
92
+ 'Software'), to deal in the Software without restriction, including
93
+ without limitation the rights to use, copy, modify, merge, publish,
94
+ distribute, sublicense, and/or sell copies of the Software, and to
95
+ permit persons to whom the Software is furnished to do so, subject to
96
+ the following conditions:
97
+
98
+ The above copyright notice and this permission notice shall be
99
+ included in all copies or substantial portions of the Software.
100
+
101
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
102
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
103
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
104
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
105
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
106
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
107
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/despamilator'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'despamilator' do
14
+ self.developer 'Stephen Hardisty', 'moowahaha@hotmail.com'
15
+ self.post_install_message = 'PostInstall.txt'
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ task :test => [:spec]
27
+ task :default => [:test]
28
+ task :install => [:install_gem]
29
+
30
+ task :cultivate do
31
+ system "touch Manifest.txt; rake check_manifest | grep -v \"(in \" | patch"
32
+ system "rake debug_gem | grep -v \"(in \" > `basename \\`pwd\\``.gemspec"
33
+ end
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{despamilator}
5
+ s.version = "0.2"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Stephen Hardisty"]
9
+ s.date = %q{2010-03-28}
10
+ s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
11
+ Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
12
+ some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
13
+ s.email = ["moowahaha@hotmail.com"]
14
+ s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt"]
15
+ s.files = ["History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter_base.rb", "pkg/despamilator-0.1.gem", "pkg/despamilator-0.1.tgz", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt", "pkg/despamilator-0.1/README.rdoc", "pkg/despamilator-0.1/Rakefile", "pkg/despamilator-0.1/despamilator.gemspec", "pkg/despamilator-0.1/lib/despamilator.rb", "pkg/despamilator-0.1/lib/despamilator/filter.rb", "pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb", "pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb", "pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb", "pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb", "pkg/despamilator-0.1/lib/despamilator/filter_base.rb", "pkg/despamilator-0.1/spec/despamilator_spec.rb", "pkg/despamilator-0.1/spec/filters/html_tags_spec.rb", "pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb", "pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb", "pkg/despamilator-0.1/spec/filters/script_tag_spec.rb", "pkg/despamilator-0.1/spec/spec.opts", "pkg/despamilator-0.1/spec/spec_helper.rb", "pkg/despamilator-0.1/tasks/rspec.rake", "spec/despamilator_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "tasks/rspec.rake"]
16
+ s.homepage = %q{http://github.com/moowahaha/despamliator}
17
+ s.post_install_message = %q{PostInstall.txt}
18
+ s.rdoc_options = ["--main", "README.rdoc"]
19
+ s.require_paths = ["lib"]
20
+ s.rubyforge_project = %q{despamilator}
21
+ s.rubygems_version = %q{1.3.6}
22
+ s.summary = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms AND CAPTCHAS being intrusive}
23
+
24
+ if s.respond_to? :specification_version then
25
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
26
+ s.specification_version = 3
27
+
28
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
29
+ s.add_development_dependency(%q<rubyforge>, [">= 2.0.3"])
30
+ s.add_development_dependency(%q<gemcutter>, [">= 0.5.0"])
31
+ s.add_development_dependency(%q<hoe>, [">= 2.5.0"])
32
+ else
33
+ s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
34
+ s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
35
+ s.add_dependency(%q<hoe>, [">= 2.5.0"])
36
+ end
37
+ else
38
+ s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
39
+ s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
40
+ s.add_dependency(%q<hoe>, [">= 2.5.0"])
41
+ end
42
+ end
@@ -0,0 +1,116 @@
1
+ def parse
2
+ html = self.text.downcase
3
+
4
+ html_tags.each do |tag|
5
+ if html.match(/<\s*#{tag}\W/) || html.match(/<\n*#{tag}\W/) || html.match(/\W#{tag}\s*\//) || html.match(/\W#{tag}\n*\//)
6
+ self.append_score = 0.3
7
+ end
8
+ end
9
+ end
10
+
11
+ def name
12
+ 'Detects HTML tags in text'
13
+ end
14
+
15
+ def description
16
+ 'Searches for various HTML tags'
17
+ end
18
+
19
+ def html_tags
20
+ # make sure these are lowercase, in order to save processing
21
+ [
22
+ '!--',
23
+ '!doctype',
24
+ 'a',
25
+ 'abbr',
26
+ 'acronym',
27
+ 'address',
28
+ 'applet',
29
+ 'area',
30
+ 'b',
31
+ 'base',
32
+ 'basefont',
33
+ 'bdo',
34
+ 'big',
35
+ 'blockquote',
36
+ 'body',
37
+ 'br',
38
+ 'button',
39
+ 'caption',
40
+ 'center',
41
+ 'cite',
42
+ 'code',
43
+ 'col',
44
+ 'colgroup',
45
+ 'dd',
46
+ 'del',
47
+ 'dfn',
48
+ 'dir',
49
+ 'div',
50
+ 'dl',
51
+ 'dt',
52
+ 'em',
53
+ 'fieldset',
54
+ 'font',
55
+ 'form',
56
+ 'frame',
57
+ 'frameset',
58
+ 'h1',
59
+ 'h2',
60
+ 'h3',
61
+ 'h4',
62
+ 'h5',
63
+ 'h6',
64
+ 'head',
65
+ 'hr',
66
+ 'html',
67
+ 'i',
68
+ 'iframe',
69
+ 'img',
70
+ 'input',
71
+ 'ins',
72
+ 'isindex',
73
+ 'kbd',
74
+ 'label',
75
+ 'legend',
76
+ 'li',
77
+ 'link',
78
+ 'map',
79
+ 'menu',
80
+ 'meta',
81
+ 'noframes',
82
+ 'noscript',
83
+ 'object',
84
+ 'ol',
85
+ 'optgroup',
86
+ 'option',
87
+ 'p',
88
+ 'param',
89
+ 'pre',
90
+ 'q',
91
+ 's',
92
+ 'samp',
93
+ 'select',
94
+ 'small',
95
+ 'span',
96
+ 'strike',
97
+ 'strong',
98
+ 'style',
99
+ 'sub',
100
+ 'sup',
101
+ 'table',
102
+ 'tbody',
103
+ 'td',
104
+ 'textarea',
105
+ 'tfoot',
106
+ 'th',
107
+ 'thead',
108
+ 'title',
109
+ 'tr',
110
+ 'tt',
111
+ 'u',
112
+ 'ul',
113
+ 'var',
114
+ 'xmp'
115
+ ]
116
+ end
@@ -0,0 +1,17 @@
1
+ def name
2
+ 'Naughty Q'
3
+ end
4
+
5
+ def description
6
+ 'Detects possible misuse of the letter Q (English language)'
7
+ end
8
+
9
+ def parse
10
+ matches = self.text.downcase.scan(/q./)
11
+
12
+ return unless matches
13
+
14
+ matches.each do |match|
15
+ self.append_score = 0.2 if match != 'qu' and match != 'qa' and match !~ /q\s/
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ def parse
2
+ string = self.text.downcase
3
+
4
+ # strip out "good numbers"
5
+ string.gsub!(/h[1-6]/, '')
6
+ string.gsub!(/(^|\b)\d+($|\b)/, '')
7
+ string.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
8
+ string.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
9
+
10
+ [
11
+ /\w\d+/,
12
+ /\d+\w/,
13
+ /\d+($|\b)/
14
+ ].each do |regexp|
15
+ matches = string.scan(regexp)
16
+
17
+ next if matches.empty?
18
+
19
+ matches.each do |to_remove|
20
+ to_remove = to_remove.to_s
21
+ string.sub!(to_remove, '') unless to_remove.empty?
22
+ self.append_score = 0.1
23
+ end
24
+ end
25
+ end
26
+
27
+ def name
28
+ 'Numbers next to words'
29
+ end
30
+
31
+ def description
32
+ 'Detects unusual number/word combinations'
33
+ end
@@ -0,0 +1,13 @@
1
+ def parse
2
+ if self.text.downcase.match(/<\/?script(>|\s+|\n|\r)/)
3
+ self.append_score = 1
4
+ end
5
+ end
6
+
7
+ def name
8
+ 'Detects script tags in text'
9
+ end
10
+
11
+ def description
12
+ 'Searches for variations for the HTML script tag'
13
+ end
@@ -0,0 +1,52 @@
1
+ class Despamilator
2
+ class Filter
3
+ attr_accessor :matches, :score
4
+
5
+ def initialize text
6
+ @@loaded ||= {}
7
+ @filters ||= []
8
+ @matches ||= []
9
+ @score ||= 0
10
+ load_filters text
11
+ run_filters
12
+ end
13
+
14
+ private
15
+
16
+ def load_filters text
17
+ Dir.glob(File.dirname(__FILE__) + "/filter/*.rb").each do |filter_file|
18
+ filter_name = classify_filename filter_file
19
+ filter = @@loaded[filter_name]
20
+
21
+ unless filter
22
+ filter_code = File.open(filter_file, File::RDWR).read
23
+ filter = Class.new
24
+ filter.class_eval(
25
+ "require 'despamilator/filter_base'\nclass #{filter_name} < Despamilator::FilterBase\n#{filter_code}\nend"
26
+ )
27
+ end
28
+
29
+ @filters.push(filter.const_get(filter_name).new(text.to_s.dup, File.basename(filter_file)))
30
+ end
31
+ end
32
+
33
+ def run_filters
34
+ @filters.each do |filter|
35
+ filter.parse
36
+
37
+ if filter.matched?
38
+ @matches.push(filter)
39
+ @score += filter.score
40
+ end
41
+ end
42
+ end
43
+
44
+ def classify_filename filename
45
+ classname = ''
46
+ File.basename(filename).gsub(/\.rb$/, '').split('_').each do |filename_part|
47
+ classname += filename_part.capitalize
48
+ end
49
+ classname || filename.capitalize
50
+ end
51
+ end
52
+ end