spamham 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/README.rdoc +3 -0
- data/Rakefile +2 -0
- data/init.rb +1 -0
- data/lib/spamham/configure.rb +25 -0
- data/lib/spamham/link.rb +21 -0
- data/lib/spamham/scan.rb +32 -0
- data/lib/spamham/string.rb +10 -0
- data/lib/spamham/version.rb +3 -0
- data/lib/spamham/wordlist.rb +18 -0
- data/lib/spamham.rb +21 -0
- data/spamham.gemspec +21 -0
- data/spec/spamham_spec.rb +35 -0
- metadata +81 -0
data/Gemfile
ADDED
data/README.rdoc
ADDED
data/Rakefile
ADDED
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'lib/spamham.rb'
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class SpamHam
|
2
|
+
def conf(config = nil)
|
3
|
+
@config = config || {}
|
4
|
+
load_config_from_defaults
|
5
|
+
if File.exist?("config/spamham.rb")
|
6
|
+
load_config_from_file
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def load_config_from_file
|
13
|
+
load 'config/spamham.rb'
|
14
|
+
user_config
|
15
|
+
end
|
16
|
+
|
17
|
+
#Sets and unset values to thier defaults
|
18
|
+
def load_config_from_defaults
|
19
|
+
@config[:trigger_weight] ||= 30
|
20
|
+
@config[:link_weight] ||= 10
|
21
|
+
@config[:link_multiple] ||= 1.1
|
22
|
+
@config[:multiple_link_style_weight] ||= 20
|
23
|
+
@config[:link_styles] ||= [/<a href="(.*?)">(.*?)<\/a>/,/\[url=(.*?)\](.*?)\[\/url\]/]
|
24
|
+
end
|
25
|
+
end
|
data/lib/spamham/link.rb
ADDED
data/lib/spamham/scan.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
class SpamHam
|
2
|
+
def scan
|
3
|
+
@string_weight = 0
|
4
|
+
build_link_weights
|
5
|
+
build_word_list_weights
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def build_link_weights
|
11
|
+
links.each do |linkstyle|
|
12
|
+
linkstyle.each do |link|
|
13
|
+
@string_weight = @string_weight + (@config[:link_weight] * (@config[:link_multiple] * (linkstyle.index(link) + 1)))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
@string_weight = @string_weight + @config[:multiple_link_style_weight] if links.size >= 2
|
17
|
+
end
|
18
|
+
|
19
|
+
def links
|
20
|
+
matched_links = []
|
21
|
+
@config[:link_styles].each do |style|
|
22
|
+
matched_links.push(@string.scan(style)) unless @string.scan(style) == [] and matched_links.index(@string.scan(style)) != nil
|
23
|
+
end
|
24
|
+
matched_links
|
25
|
+
end
|
26
|
+
|
27
|
+
def build_word_list_weights
|
28
|
+
wordlist.each do |word|
|
29
|
+
@string_weight = @string_weight + (@string.scan(/#{word}/).count)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class SpamHam
|
2
|
+
def wordlist
|
3
|
+
[
|
4
|
+
"viagra",
|
5
|
+
"fake watches",
|
6
|
+
"enlargement",
|
7
|
+
"penis",
|
8
|
+
"designer",
|
9
|
+
"brands",
|
10
|
+
"buy",
|
11
|
+
"online slots",
|
12
|
+
"valium",
|
13
|
+
"ambien",
|
14
|
+
"dosage",
|
15
|
+
"propecia"
|
16
|
+
] + (@config[:additional_words] || [])
|
17
|
+
end
|
18
|
+
end
|
data/lib/spamham.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spamham/configure.rb'
|
2
|
+
require 'spamham/scan.rb'
|
3
|
+
require 'spamham/string.rb'
|
4
|
+
require 'spamham/wordlist.rb'
|
5
|
+
|
6
|
+
class SpamHam
|
7
|
+
def initialize
|
8
|
+
self.conf
|
9
|
+
end
|
10
|
+
|
11
|
+
def spam?(string)
|
12
|
+
@string=string
|
13
|
+
self.scan
|
14
|
+
@string=nil
|
15
|
+
@string_weight >= @config[:trigger_weight]
|
16
|
+
end
|
17
|
+
|
18
|
+
def weight
|
19
|
+
@string_weight
|
20
|
+
end
|
21
|
+
end
|
data/spamham.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "spamham/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "spamham"
|
7
|
+
s.version = SpamHam::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Adam \"Arcath\" Laycock"]
|
10
|
+
s.email = ["adam@arcath.net"]
|
11
|
+
s.homepage = "http://www.arcath.net"
|
12
|
+
s.summary = %q{Provides Spam filtering for ruby and rails}
|
13
|
+
s.description = %q{...}
|
14
|
+
|
15
|
+
s.rubyforge_project = "spamham"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'lib/spamham.rb'
|
2
|
+
|
3
|
+
describe SpamHam, "#spam?" do
|
4
|
+
it "Should return false for \"not spam\"" do
|
5
|
+
SpamHam.new.spam?("not spam").should == false
|
6
|
+
end
|
7
|
+
|
8
|
+
it "Should return true for \"<a href=\"host\">text</a> [url=host1]text1[/url] \"<a href=\"host2\">text2</a>\"" do
|
9
|
+
SpamHam.new.spam?("<a href=\"host\">text</a> [url=host1]text1[/url] \"<a href=\"host2\">text2</a>").should == true
|
10
|
+
end
|
11
|
+
|
12
|
+
it "Should return true for \"ixxdzakarvtstdldkpxt, [url=http://www.amosboergoats.com/]Buy Cialis[/url], hgOvsFa.\"" do
|
13
|
+
ham = SpamHam.new.spam?("ixxdzakarvtstdldkpxt, [url=http://www.amosboergoats.com/]Buy Cialis[/url], hgOvsFa.").should == true
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe SpamHam, "#weight" do
|
18
|
+
it "Should return 1 for \"penis\"" do
|
19
|
+
ham=SpamHam.new
|
20
|
+
ham.spam?("penis")
|
21
|
+
ham.weight.should == 1
|
22
|
+
end
|
23
|
+
|
24
|
+
it "Should return 3.6 for \"designer brands\"" do
|
25
|
+
ham=SpamHam.new
|
26
|
+
ham.spam?("designer brands")
|
27
|
+
ham.weight.should <= 3.7
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe String, "#spam?" do
|
32
|
+
it "Should return flase for \"string of characters\"" do
|
33
|
+
"string of characters".spam?.should == false
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: spamham
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Adam "Arcath" Laycock
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-23 00:00:00 +00:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: ...
|
23
|
+
email:
|
24
|
+
- adam@arcath.net
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- init.rb
|
37
|
+
- lib/spamham.rb
|
38
|
+
- lib/spamham/configure.rb
|
39
|
+
- lib/spamham/link.rb
|
40
|
+
- lib/spamham/scan.rb
|
41
|
+
- lib/spamham/string.rb
|
42
|
+
- lib/spamham/version.rb
|
43
|
+
- lib/spamham/wordlist.rb
|
44
|
+
- spamham.gemspec
|
45
|
+
- spec/spamham_spec.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://www.arcath.net
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 3
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
hash: 3
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
requirements: []
|
74
|
+
|
75
|
+
rubyforge_project: spamham
|
76
|
+
rubygems_version: 1.3.7
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: Provides Spam filtering for ruby and rails
|
80
|
+
test_files: []
|
81
|
+
|