despamilator 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +44 -0
  3. data/PostInstall.txt +1 -0
  4. data/README.rdoc +107 -0
  5. data/Rakefile +33 -0
  6. data/despamilator.gemspec +42 -0
  7. data/lib/despamilator/filter/html_tags.rb +116 -0
  8. data/lib/despamilator/filter/naughty_q.rb +17 -0
  9. data/lib/despamilator/filter/numbers_and_words.rb +33 -0
  10. data/lib/despamilator/filter/script_tag.rb +13 -0
  11. data/lib/despamilator/filter.rb +52 -0
  12. data/lib/despamilator/filter_base.rb +37 -0
  13. data/lib/despamilator.rb +19 -0
  14. data/pkg/despamilator-0.1/History.txt +4 -0
  15. data/pkg/despamilator-0.1/Manifest.txt +21 -0
  16. data/pkg/despamilator-0.1/PostInstall.txt +1 -0
  17. data/pkg/despamilator-0.1/README.rdoc +107 -0
  18. data/pkg/despamilator-0.1/Rakefile +33 -0
  19. data/pkg/despamilator-0.1/despamilator.gemspec +42 -0
  20. data/pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb +116 -0
  21. data/pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb +17 -0
  22. data/pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb +33 -0
  23. data/pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb +13 -0
  24. data/pkg/despamilator-0.1/lib/despamilator/filter.rb +52 -0
  25. data/pkg/despamilator-0.1/lib/despamilator/filter_base.rb +37 -0
  26. data/pkg/despamilator-0.1/lib/despamilator.rb +19 -0
  27. data/pkg/despamilator-0.1/spec/despamilator_spec.rb +15 -0
  28. data/pkg/despamilator-0.1/spec/filters/html_tags_spec.rb +144 -0
  29. data/pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb +39 -0
  30. data/pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb +59 -0
  31. data/pkg/despamilator-0.1/spec/filters/script_tag_spec.rb +32 -0
  32. data/pkg/despamilator-0.1/spec/spec.opts +1 -0
  33. data/pkg/despamilator-0.1/spec/spec_helper.rb +10 -0
  34. data/pkg/despamilator-0.1/tasks/rspec.rake +21 -0
  35. data/pkg/despamilator-0.1.gem +0 -0
  36. data/pkg/despamilator-0.1.tgz +0 -0
  37. data/spec/despamilator_spec.rb +15 -0
  38. data/spec/filters/html_tags_spec.rb +144 -0
  39. data/spec/filters/naughty_q_spec.rb +39 -0
  40. data/spec/filters/numbers_and_words_spec.rb +59 -0
  41. data/spec/filters/script_tag_spec.rb +32 -0
  42. data/spec/spec.opts +1 -0
  43. data/spec/spec_helper.rb +10 -0
  44. data/tasks/rspec.rake +21 -0
  45. metadata +155 -0
@@ -0,0 +1,37 @@
1
+ class Despamilator
2
+ class FilterBase
3
+ attr_accessor :text, :score, :filename, :matches
4
+
5
+ def initialize text, filename
6
+ @matches = 0
7
+ @filename = filename
8
+ @score = 0
9
+ @text = text
10
+ @matched = false
11
+ end
12
+
13
+ def description
14
+ raise "No description defined in #{filename}"
15
+ end
16
+
17
+ def parse blah
18
+ raise "No parser defined in #{filename}"
19
+ end
20
+
21
+ def name
22
+ raise "No name defined in #{filename}"
23
+ end
24
+
25
+ def matched?
26
+ @score > 0
27
+ end
28
+
29
+ protected
30
+
31
+ def append_score= new_score
32
+ @matches += 1
33
+ @score += new_score
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,19 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'despamilator/filter'
4
+
5
+ class Despamilator
6
+ VERSION = "0.2"
7
+
8
+ def initialize text
9
+ @filters = Despamilator::Filter.new text
10
+ end
11
+
12
+ def score
13
+ @filters.score
14
+ end
15
+
16
+ def matched_by
17
+ @filters.matches
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2010-01-30
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
@@ -0,0 +1,21 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ despamilator.gemspec
7
+ lib/despamilator.rb
8
+ lib/despamilator/filter.rb
9
+ lib/despamilator/filter/html_tags.rb
10
+ lib/despamilator/filter/naughty_q.rb
11
+ lib/despamilator/filter/numbers_and_words.rb
12
+ lib/despamilator/filter/script_tag.rb
13
+ lib/despamilator/filter_base.rb
14
+ spec/despamilator_spec.rb
15
+ spec/filters/html_tags_spec.rb
16
+ spec/filters/naughty_q_spec.rb
17
+ spec/filters/numbers_and_words_spec.rb
18
+ spec/filters/script_tag_spec.rb
19
+ spec/spec.opts
20
+ spec/spec_helper.rb
21
+ tasks/rspec.rake
@@ -0,0 +1 @@
1
+ For more information on despamilator or to contribute more filters, see http://github.com/moowahaha/despamliator
@@ -0,0 +1,107 @@
1
+ = Despamilator
2
+
3
+ * http://github.com/moowahaha/despamliator
4
+
5
+ == DESCRIPTION:
6
+
7
+ Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
8
+ Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
9
+ some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.
10
+
11
+ == FEATURES/PROBLEMS:
12
+
13
+ * rspec will be run twice. How annoying?!
14
+
15
+ == SYNOPSIS:
16
+
17
+ # using Despamilator
18
+ dspam = Despamilator.new('some text with an <h2> tag qthhg')
19
+
20
+ dspam.score #=> the total score for this string (1 is considered high)
21
+ dspam.matched_by #=> array of matching filters
22
+ first_match = dspam.matched_by.first #=> first matching filter
23
+ first_match.name #=> some string with the name of the filter
24
+ first_match.description #=> some string to describe
25
+ first_match.score #=> the individual score assigned by this filter
26
+
27
+ # adding a new filter! example: detecting the letter "a"
28
+ # put the following code in lib/despamilator/filter/detect_letter_a.rb
29
+ def name
30
+ 'Detecting the letter A'
31
+ end
32
+
33
+ def description
34
+ 'Detects the letter "a" in a string for no reason other than a demo'
35
+ end
36
+
37
+ def parse
38
+ if self.text.downcase.scan(/a/)
39
+ # add 0.1 to the score of the text
40
+ self.append_score = 0.1
41
+ end
42
+ end
43
+
44
+ == FILTERING:
45
+
46
+ As stated, this is a heuristic scanner so its up to the user to decide the thresholds of the scanner. I usually
47
+ say "it's spam" if the score reaches 1.
48
+
49
+ The score will be added to incrementally by each matching filter. So if there is some HTML in there, it will be added
50
+ to the score. If there is also a script tag of some sort, that will add more.
51
+
52
+ Each filter decides how much of a score it assigns. For example, detecting a number next to a letter
53
+ (the numbers_an_words filter) is only a mild hint compared with a script tag (detected by the script_tag filter).
54
+
55
+ === NEW FILTERS:
56
+
57
+ I absolutely welcome new filters and experiments. New filters should be put in the 'lib/despamilator/filter/' directory.
58
+ The core filtering code will detect and use what is in there so you only need to drop the code in.
59
+ Filters should be simple, no classes etc wrapped around them and should try to perform one simple task.
60
+ They should always supply the following methods:
61
+
62
+ * name #=> the name of your filter.
63
+ * description #=> what your filter will look for.
64
+ * parse #=> the method that will be called when parsing.
65
+
66
+ Along side the above, the following methods are made available to each filter:
67
+
68
+ * text #=> a copy of the text your parser will parse
69
+ * append_score= #=> method to append a score to the text if there are matches in your parser.
70
+ * matched? #=> whether or not any filter has so far detected something suspect
71
+ * score #=> the current score assigned to the text
72
+
73
+ spec tests are an absolute must!
74
+
75
+
76
+ == REQUIREMENTS:
77
+
78
+ * hoe
79
+ * rspec
80
+
81
+
82
+ == INSTALL:
83
+
84
+ $ sudo gem install despamilator
85
+
86
+ == LICENSE:
87
+
88
+ Copyright (c) 2010 Stephen Hardisty
89
+
90
+ Permission is hereby granted, free of charge, to any person obtaining
91
+ a copy of this software and associated documentation files (the
92
+ 'Software'), to deal in the Software without restriction, including
93
+ without limitation the rights to use, copy, modify, merge, publish,
94
+ distribute, sublicense, and/or sell copies of the Software, and to
95
+ permit persons to whom the Software is furnished to do so, subject to
96
+ the following conditions:
97
+
98
+ The above copyright notice and this permission notice shall be
99
+ included in all copies or substantial portions of the Software.
100
+
101
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
102
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
103
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
104
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
105
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
106
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
107
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/despamilator'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'despamilator' do
14
+ self.developer 'Stephen Hardisty', 'moowahaha@hotmail.com'
15
+ self.post_install_message = 'PostInstall.txt'
16
+ self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ task :test => [:spec]
27
+ task :default => [:test]
28
+ task :install => [:install_gem]
29
+
30
+ task :cultivate do
31
+ system "touch Manifest.txt; rake check_manifest | grep -v \"(in \" | patch"
32
+ system "rake debug_gem | grep -v \"(in \" > `basename \\`pwd\\``.gemspec"
33
+ end
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{despamilator}
5
+ s.version = "0.2"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Stephen Hardisty"]
9
+ s.date = %q{2010-03-28}
10
+ s.description = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances:
11
+ Spam being submitted in my web forms AND CAPTCHAS being intrusive. Despamilator will apply
12
+ some commonly used heuristics from the world of anti-spam to help you decide whether your users are human or machine.}
13
+ s.email = ["moowahaha@hotmail.com"]
14
+ s.extra_rdoc_files = ["History.txt", "Manifest.txt", "PostInstall.txt", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt"]
15
+ s.files = ["History.txt", "Manifest.txt", "PostInstall.txt", "README.rdoc", "Rakefile", "despamilator.gemspec", "lib/despamilator.rb", "lib/despamilator/filter.rb", "lib/despamilator/filter/html_tags.rb", "lib/despamilator/filter/naughty_q.rb", "lib/despamilator/filter/numbers_and_words.rb", "lib/despamilator/filter/script_tag.rb", "lib/despamilator/filter_base.rb", "pkg/despamilator-0.1.gem", "pkg/despamilator-0.1.tgz", "pkg/despamilator-0.1/History.txt", "pkg/despamilator-0.1/Manifest.txt", "pkg/despamilator-0.1/PostInstall.txt", "pkg/despamilator-0.1/README.rdoc", "pkg/despamilator-0.1/Rakefile", "pkg/despamilator-0.1/despamilator.gemspec", "pkg/despamilator-0.1/lib/despamilator.rb", "pkg/despamilator-0.1/lib/despamilator/filter.rb", "pkg/despamilator-0.1/lib/despamilator/filter/html_tags.rb", "pkg/despamilator-0.1/lib/despamilator/filter/naughty_q.rb", "pkg/despamilator-0.1/lib/despamilator/filter/numbers_and_words.rb", "pkg/despamilator-0.1/lib/despamilator/filter/script_tag.rb", "pkg/despamilator-0.1/lib/despamilator/filter_base.rb", "pkg/despamilator-0.1/spec/despamilator_spec.rb", "pkg/despamilator-0.1/spec/filters/html_tags_spec.rb", "pkg/despamilator-0.1/spec/filters/naughty_q_spec.rb", "pkg/despamilator-0.1/spec/filters/numbers_and_words_spec.rb", "pkg/despamilator-0.1/spec/filters/script_tag_spec.rb", "pkg/despamilator-0.1/spec/spec.opts", "pkg/despamilator-0.1/spec/spec_helper.rb", "pkg/despamilator-0.1/tasks/rspec.rake", "spec/despamilator_spec.rb", "spec/filters/html_tags_spec.rb", "spec/filters/naughty_q_spec.rb", "spec/filters/numbers_and_words_spec.rb", "spec/filters/script_tag_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "tasks/rspec.rake"]
16
+ s.homepage = %q{http://github.com/moowahaha/despamliator}
17
+ s.post_install_message = %q{PostInstall.txt}
18
+ s.rdoc_options = ["--main", "README.rdoc"]
19
+ s.require_paths = ["lib"]
20
+ s.rubyforge_project = %q{despamilator}
21
+ s.rubygems_version = %q{1.3.6}
22
+ s.summary = %q{Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms AND CAPTCHAS being intrusive}
23
+
24
+ if s.respond_to? :specification_version then
25
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
26
+ s.specification_version = 3
27
+
28
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
29
+ s.add_development_dependency(%q<rubyforge>, [">= 2.0.3"])
30
+ s.add_development_dependency(%q<gemcutter>, [">= 0.5.0"])
31
+ s.add_development_dependency(%q<hoe>, [">= 2.5.0"])
32
+ else
33
+ s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
34
+ s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
35
+ s.add_dependency(%q<hoe>, [">= 2.5.0"])
36
+ end
37
+ else
38
+ s.add_dependency(%q<rubyforge>, [">= 2.0.3"])
39
+ s.add_dependency(%q<gemcutter>, [">= 0.5.0"])
40
+ s.add_dependency(%q<hoe>, [">= 2.5.0"])
41
+ end
42
+ end
@@ -0,0 +1,116 @@
1
+ def parse
2
+ html = self.text.downcase
3
+
4
+ html_tags.each do |tag|
5
+ if html.match(/<\s*#{tag}\W/) || html.match(/<\n*#{tag}\W/) || html.match(/\W#{tag}\s*\//) || html.match(/\W#{tag}\n*\//)
6
+ self.append_score = 0.3
7
+ end
8
+ end
9
+ end
10
+
11
+ def name
12
+ 'Detects HTML tags in text'
13
+ end
14
+
15
+ def description
16
+ 'Searches for various HTML tags'
17
+ end
18
+
19
+ def html_tags
20
+ # make sure these are lowercase, in order to save processing
21
+ [
22
+ '!--',
23
+ '!doctype',
24
+ 'a',
25
+ 'abbr',
26
+ 'acronym',
27
+ 'address',
28
+ 'applet',
29
+ 'area',
30
+ 'b',
31
+ 'base',
32
+ 'basefont',
33
+ 'bdo',
34
+ 'big',
35
+ 'blockquote',
36
+ 'body',
37
+ 'br',
38
+ 'button',
39
+ 'caption',
40
+ 'center',
41
+ 'cite',
42
+ 'code',
43
+ 'col',
44
+ 'colgroup',
45
+ 'dd',
46
+ 'del',
47
+ 'dfn',
48
+ 'dir',
49
+ 'div',
50
+ 'dl',
51
+ 'dt',
52
+ 'em',
53
+ 'fieldset',
54
+ 'font',
55
+ 'form',
56
+ 'frame',
57
+ 'frameset',
58
+ 'h1',
59
+ 'h2',
60
+ 'h3',
61
+ 'h4',
62
+ 'h5',
63
+ 'h6',
64
+ 'head',
65
+ 'hr',
66
+ 'html',
67
+ 'i',
68
+ 'iframe',
69
+ 'img',
70
+ 'input',
71
+ 'ins',
72
+ 'isindex',
73
+ 'kbd',
74
+ 'label',
75
+ 'legend',
76
+ 'li',
77
+ 'link',
78
+ 'map',
79
+ 'menu',
80
+ 'meta',
81
+ 'noframes',
82
+ 'noscript',
83
+ 'object',
84
+ 'ol',
85
+ 'optgroup',
86
+ 'option',
87
+ 'p',
88
+ 'param',
89
+ 'pre',
90
+ 'q',
91
+ 's',
92
+ 'samp',
93
+ 'select',
94
+ 'small',
95
+ 'span',
96
+ 'strike',
97
+ 'strong',
98
+ 'style',
99
+ 'sub',
100
+ 'sup',
101
+ 'table',
102
+ 'tbody',
103
+ 'td',
104
+ 'textarea',
105
+ 'tfoot',
106
+ 'th',
107
+ 'thead',
108
+ 'title',
109
+ 'tr',
110
+ 'tt',
111
+ 'u',
112
+ 'ul',
113
+ 'var',
114
+ 'xmp'
115
+ ]
116
+ end
@@ -0,0 +1,17 @@
1
+ def name
2
+ 'Naughty Q'
3
+ end
4
+
5
+ def description
6
+ 'Detects possible misuse of the letter Q (English language)'
7
+ end
8
+
9
+ def parse
10
+ matches = self.text.downcase.scan(/q./)
11
+
12
+ return unless matches
13
+
14
+ matches.each do |match|
15
+ self.append_score = 0.2 if match != 'qu' and match != 'qa' and match !~ /q\s/
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ def parse
2
+ string = self.text.downcase
3
+
4
+ # strip out "good numbers"
5
+ string.gsub!(/h[1-6]/, '')
6
+ string.gsub!(/(^|\b)\d+($|\b)/, '')
7
+ string.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
8
+ string.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
9
+
10
+ [
11
+ /\w\d+/,
12
+ /\d+\w/,
13
+ /\d+($|\b)/
14
+ ].each do |regexp|
15
+ matches = string.scan(regexp)
16
+
17
+ next if matches.empty?
18
+
19
+ matches.each do |to_remove|
20
+ to_remove = to_remove.to_s
21
+ string.sub!(to_remove, '') unless to_remove.empty?
22
+ self.append_score = 0.1
23
+ end
24
+ end
25
+ end
26
+
27
+ def name
28
+ 'Numbers next to words'
29
+ end
30
+
31
+ def description
32
+ 'Detects unusual number/word combinations'
33
+ end