spamham 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/README.rdoc +3 -0
- data/Rakefile +2 -0
- data/init.rb +1 -0
- data/lib/spamham/configure.rb +25 -0
- data/lib/spamham/link.rb +21 -0
- data/lib/spamham/scan.rb +32 -0
- data/lib/spamham/string.rb +10 -0
- data/lib/spamham/version.rb +3 -0
- data/lib/spamham/wordlist.rb +18 -0
- data/lib/spamham.rb +21 -0
- data/spamham.gemspec +21 -0
- data/spec/spamham_spec.rb +35 -0
- metadata +81 -0
data/Gemfile
ADDED
data/README.rdoc
ADDED
data/Rakefile
ADDED
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'lib/spamham.rb'
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class SpamHam
|
2
|
+
def conf(config = nil)
|
3
|
+
@config = config || {}
|
4
|
+
load_config_from_defaults
|
5
|
+
if File.exist?("config/spamham.rb")
|
6
|
+
load_config_from_file
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def load_config_from_file
|
13
|
+
load 'config/spamham.rb'
|
14
|
+
user_config
|
15
|
+
end
|
16
|
+
|
17
|
+
#Sets and unset values to thier defaults
|
18
|
+
def load_config_from_defaults
|
19
|
+
@config[:trigger_weight] ||= 30
|
20
|
+
@config[:link_weight] ||= 10
|
21
|
+
@config[:link_multiple] ||= 1.1
|
22
|
+
@config[:multiple_link_style_weight] ||= 20
|
23
|
+
@config[:link_styles] ||= [/<a href="(.*?)">(.*?)<\/a>/,/\[url=(.*?)\](.*?)\[\/url\]/]
|
24
|
+
end
|
25
|
+
end
|
data/lib/spamham/link.rb
ADDED
data/lib/spamham/scan.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
class SpamHam
|
2
|
+
def scan
|
3
|
+
@string_weight = 0
|
4
|
+
build_link_weights
|
5
|
+
build_word_list_weights
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def build_link_weights
|
11
|
+
links.each do |linkstyle|
|
12
|
+
linkstyle.each do |link|
|
13
|
+
@string_weight = @string_weight + (@config[:link_weight] * (@config[:link_multiple] * (linkstyle.index(link) + 1)))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
@string_weight = @string_weight + @config[:multiple_link_style_weight] if links.size >= 2
|
17
|
+
end
|
18
|
+
|
19
|
+
def links
|
20
|
+
matched_links = []
|
21
|
+
@config[:link_styles].each do |style|
|
22
|
+
matched_links.push(@string.scan(style)) unless @string.scan(style) == [] and matched_links.index(@string.scan(style)) != nil
|
23
|
+
end
|
24
|
+
matched_links
|
25
|
+
end
|
26
|
+
|
27
|
+
def build_word_list_weights
|
28
|
+
wordlist.each do |word|
|
29
|
+
@string_weight = @string_weight + (@string.scan(/#{word}/).count)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class SpamHam
|
2
|
+
def wordlist
|
3
|
+
[
|
4
|
+
"viagra",
|
5
|
+
"fake watches",
|
6
|
+
"enlargement",
|
7
|
+
"penis",
|
8
|
+
"designer",
|
9
|
+
"brands",
|
10
|
+
"buy",
|
11
|
+
"online slots",
|
12
|
+
"valium",
|
13
|
+
"ambien",
|
14
|
+
"dosage",
|
15
|
+
"propecia"
|
16
|
+
] + (@config[:additional_words] || [])
|
17
|
+
end
|
18
|
+
end
|
data/lib/spamham.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spamham/configure.rb'
|
2
|
+
require 'spamham/scan.rb'
|
3
|
+
require 'spamham/string.rb'
|
4
|
+
require 'spamham/wordlist.rb'
|
5
|
+
|
6
|
+
class SpamHam
|
7
|
+
def initialize
|
8
|
+
self.conf
|
9
|
+
end
|
10
|
+
|
11
|
+
def spam?(string)
|
12
|
+
@string=string
|
13
|
+
self.scan
|
14
|
+
@string=nil
|
15
|
+
@string_weight >= @config[:trigger_weight]
|
16
|
+
end
|
17
|
+
|
18
|
+
def weight
|
19
|
+
@string_weight
|
20
|
+
end
|
21
|
+
end
|
data/spamham.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "spamham/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "spamham"
|
7
|
+
s.version = SpamHam::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Adam \"Arcath\" Laycock"]
|
10
|
+
s.email = ["adam@arcath.net"]
|
11
|
+
s.homepage = "http://www.arcath.net"
|
12
|
+
s.summary = %q{Provides Spam filtering for ruby and rails}
|
13
|
+
s.description = %q{...}
|
14
|
+
|
15
|
+
s.rubyforge_project = "spamham"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'lib/spamham.rb'
|
2
|
+
|
3
|
+
describe SpamHam, "#spam?" do
|
4
|
+
it "Should return false for \"not spam\"" do
|
5
|
+
SpamHam.new.spam?("not spam").should == false
|
6
|
+
end
|
7
|
+
|
8
|
+
it "Should return true for \"<a href=\"host\">text</a> [url=host1]text1[/url] \"<a href=\"host2\">text2</a>\"" do
|
9
|
+
SpamHam.new.spam?("<a href=\"host\">text</a> [url=host1]text1[/url] \"<a href=\"host2\">text2</a>").should == true
|
10
|
+
end
|
11
|
+
|
12
|
+
it "Should return true for \"ixxdzakarvtstdldkpxt, [url=http://www.amosboergoats.com/]Buy Cialis[/url], hgOvsFa.\"" do
|
13
|
+
ham = SpamHam.new.spam?("ixxdzakarvtstdldkpxt, [url=http://www.amosboergoats.com/]Buy Cialis[/url], hgOvsFa.").should == true
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe SpamHam, "#weight" do
|
18
|
+
it "Should return 1 for \"penis\"" do
|
19
|
+
ham=SpamHam.new
|
20
|
+
ham.spam?("penis")
|
21
|
+
ham.weight.should == 1
|
22
|
+
end
|
23
|
+
|
24
|
+
it "Should return 3.6 for \"designer brands\"" do
|
25
|
+
ham=SpamHam.new
|
26
|
+
ham.spam?("designer brands")
|
27
|
+
ham.weight.should <= 3.7
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe String, "#spam?" do
|
32
|
+
it "Should return flase for \"string of characters\"" do
|
33
|
+
"string of characters".spam?.should == false
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: spamham
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Adam "Arcath" Laycock
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-23 00:00:00 +00:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: ...
|
23
|
+
email:
|
24
|
+
- adam@arcath.net
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- init.rb
|
37
|
+
- lib/spamham.rb
|
38
|
+
- lib/spamham/configure.rb
|
39
|
+
- lib/spamham/link.rb
|
40
|
+
- lib/spamham/scan.rb
|
41
|
+
- lib/spamham/string.rb
|
42
|
+
- lib/spamham/version.rb
|
43
|
+
- lib/spamham/wordlist.rb
|
44
|
+
- spamham.gemspec
|
45
|
+
- spec/spamham_spec.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://www.arcath.net
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 3
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
hash: 3
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
requirements: []
|
74
|
+
|
75
|
+
rubyforge_project: spamham
|
76
|
+
rubygems_version: 1.3.7
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: Provides Spam filtering for ruby and rails
|
80
|
+
test_files: []
|
81
|
+
|