swallow 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -0
- data/bin/swallow +7 -0
- data/lib/email.rb +54 -0
- data/lib/maildir.rb +109 -0
- data/lib/ruleset.rb +50 -0
- data/lib/spamdetector.rb +130 -0
- data/lib/swallow.rb +44 -0
- data/lib/time_extentions.rb +98 -0
- data/test/test_folder.rb +9 -0
- data/test/test_ruleset.rb +8 -0
- data/test/test_spam.rb +12 -0
- metadata +63 -0
data/README
ADDED
data/bin/swallow
ADDED
data/lib/email.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'time_extentions'
|
4
|
+
|
5
|
+
module Swallow
|
6
|
+
class Email
|
7
|
+
attr_accessor :id, :weird_id, :host, :order
|
8
|
+
attr_accessor :filename
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
path_parts = filename.split(/\//)
|
13
|
+
parse_filename(path_parts[-1])
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_filename(file)
|
17
|
+
uppername, @order, @read = file.split(/,/)
|
18
|
+
@id, @weird_id, @host = uppername.split(/\./)
|
19
|
+
end
|
20
|
+
|
21
|
+
def newer_than?(limit)
|
22
|
+
File.open(@filename).newer_than?(Date.today - limit)
|
23
|
+
end
|
24
|
+
|
25
|
+
def older_than?(limit)
|
26
|
+
File.open(@filename).older_than?(Date.today - limit)
|
27
|
+
end
|
28
|
+
|
29
|
+
def read?
|
30
|
+
@read == "S"
|
31
|
+
end
|
32
|
+
|
33
|
+
def mark_read
|
34
|
+
# modify filename to "S" on end
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_s
|
38
|
+
File.open(@filename).read
|
39
|
+
end
|
40
|
+
def delete
|
41
|
+
File.delete(@filename)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if $0 == __FILE__ then
|
47
|
+
include Swallow
|
48
|
+
e = Email.new(ARGV[0])
|
49
|
+
puts e
|
50
|
+
puts "The email is unread" if e.read? == false
|
51
|
+
puts "Email ID: #{e.id}"
|
52
|
+
puts "The email is newer than one day" if e.newer_than?(1)
|
53
|
+
puts "The email is older than one day" if e.older_than?(1)
|
54
|
+
end
|
data/lib/maildir.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'email'
|
4
|
+
require 'time_extentions'
|
5
|
+
|
6
|
+
module Swallow
|
7
|
+
class MailDirFolder
|
8
|
+
def initialize(folder_path)
|
9
|
+
@folder_path = folder_path
|
10
|
+
@sub_folders = ["cur", "new", "tmp"]
|
11
|
+
end
|
12
|
+
|
13
|
+
def enum
|
14
|
+
Dir.chdir(@folder_path)
|
15
|
+
#puts "Subfolders:"
|
16
|
+
a = Array.new
|
17
|
+
Dir.glob("\.*").sort.each { |dir|
|
18
|
+
next unless File.directory?(dir)
|
19
|
+
next if dir == "." or dir == ".."
|
20
|
+
#puts "#{dir}"
|
21
|
+
a << dir
|
22
|
+
}
|
23
|
+
a
|
24
|
+
end
|
25
|
+
|
26
|
+
def path
|
27
|
+
@folder_path
|
28
|
+
end
|
29
|
+
|
30
|
+
def count
|
31
|
+
v = 0
|
32
|
+
@sub_folders.each { |dir|
|
33
|
+
v += length(dir)
|
34
|
+
}
|
35
|
+
return v
|
36
|
+
end
|
37
|
+
|
38
|
+
def unread_count
|
39
|
+
length("new")
|
40
|
+
end
|
41
|
+
|
42
|
+
def read_count
|
43
|
+
length("cur")
|
44
|
+
end
|
45
|
+
|
46
|
+
def unknown_count
|
47
|
+
length("tmp")
|
48
|
+
end
|
49
|
+
|
50
|
+
def length(sub_path)
|
51
|
+
length = 0
|
52
|
+
Dir.open(File.join(@folder_path,sub_path)).each { |filename|
|
53
|
+
next if filename == "." or filename == ".."
|
54
|
+
length += 1
|
55
|
+
}
|
56
|
+
length
|
57
|
+
end
|
58
|
+
|
59
|
+
def emails
|
60
|
+
list = Array.new
|
61
|
+
@sub_folders.each { |sub_path|
|
62
|
+
path = File.join(@folder_path, sub_path)
|
63
|
+
Dir.open(path).each { |f|
|
64
|
+
filename = File.join(path,f)
|
65
|
+
next if File.directory?(filename)
|
66
|
+
list << Email.new(filename)
|
67
|
+
}
|
68
|
+
}
|
69
|
+
return list
|
70
|
+
end
|
71
|
+
|
72
|
+
def delete_all_emails
|
73
|
+
count = emails.length
|
74
|
+
emails.each {|email| email.delete }
|
75
|
+
puts "Deleted #{count} emails"
|
76
|
+
end
|
77
|
+
|
78
|
+
def emails_newer_than(limit)
|
79
|
+
emails.find_all { |e| e.newer_than?(limit) }
|
80
|
+
end
|
81
|
+
|
82
|
+
def emails_older_than(limit)
|
83
|
+
emails.find_all { |e| e.older_than?(limit) }
|
84
|
+
end
|
85
|
+
|
86
|
+
def sweep!(limit)
|
87
|
+
if limit.kind_of? Integer then
|
88
|
+
@sub_folder.each { |sub_path|
|
89
|
+
Dir.open(File.join(@folder_path,sub_path)).sweep!(limit)
|
90
|
+
}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
def to_s
|
94
|
+
"#{@folder_path} has #{unread_count} unread messages and #{read_count} read messages (#{unknown_count})."
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
if $0 == __FILE__ then
|
100
|
+
include Swallow
|
101
|
+
root = "/home/#{ENV['USER']}/Maildir/"
|
102
|
+
m = MailDirFolder.new(root)
|
103
|
+
puts m.to_s
|
104
|
+
m.enum.each { |folder|
|
105
|
+
n = MailDirFolder.new(m.path+folder)
|
106
|
+
puts n.to_s
|
107
|
+
puts n.enum
|
108
|
+
}
|
109
|
+
end
|
data/lib/ruleset.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Swallow
|
6
|
+
class RuleSet
|
7
|
+
attr_reader :rules
|
8
|
+
|
9
|
+
def initialize(filename)
|
10
|
+
@rules = YAML.load_file(filename)
|
11
|
+
@rules.each do |rule|
|
12
|
+
if rule[:folder] != nil then
|
13
|
+
if rule[:domains] != nil then
|
14
|
+
# replace the string domain list with a regexp domain list
|
15
|
+
rule[:domains] = rule[:domains].map { |i| Regexp.quote(i) }
|
16
|
+
end
|
17
|
+
if rule[:to] != nil then
|
18
|
+
# replace the string to list with a regexp to list
|
19
|
+
rule[:to] = rule[:to].map { |i| Regexp.quote(i) }
|
20
|
+
end
|
21
|
+
if rule[:from] != nil then
|
22
|
+
# replace the string from list with a regexp from list
|
23
|
+
rule[:from] = rule[:from].map { |i| Regexp.quote(i) }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
def to_s
|
29
|
+
s = String.new("Rules Hash Holds:\n")
|
30
|
+
@rules.each { |rule|
|
31
|
+
s << "#{rule[:folder]}"
|
32
|
+
s << "#{rule[:to]}" if rule[:to] != nil
|
33
|
+
s << "#{rule[:from]}" if rule[:from] != nil
|
34
|
+
s << "#{rule[:domains]}" if rules[:domains] != nil
|
35
|
+
}
|
36
|
+
s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Self Test
|
42
|
+
if $0 == __FILE__
|
43
|
+
r = RuleSet.new(ARGV[0])
|
44
|
+
r.getRuleset.each { |rules|
|
45
|
+
puts "Folder: #{rules[:folder]}"
|
46
|
+
rules[:domains].each { |domain|
|
47
|
+
puts "Domain: #{domain}"
|
48
|
+
} unless rules[:domains] == nil
|
49
|
+
}
|
50
|
+
end
|
data/lib/spamdetector.rb
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
require 'maildir'
|
3
|
+
|
4
|
+
def pipe(program, text)
|
5
|
+
IO.popen(program,"w") do |f|
|
6
|
+
f.print(text)
|
7
|
+
end
|
8
|
+
return $?>>8
|
9
|
+
end
|
10
|
+
|
11
|
+
def process(program, text)
|
12
|
+
IO.popen(program, "wr") do |f|
|
13
|
+
f.print(text)
|
14
|
+
return f.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module Swallow
|
19
|
+
class SpamDetector
|
20
|
+
def initialize
|
21
|
+
@innocent_cmd = "cat"
|
22
|
+
@training_cmd = "cat"
|
23
|
+
end
|
24
|
+
|
25
|
+
def process_folder(cmd, folder, should_delete = false)
|
26
|
+
count = 0
|
27
|
+
if folder.kind_of? MailDirFolder
|
28
|
+
count = folder.emails.length
|
29
|
+
folder.emails.each { |email|
|
30
|
+
pipe(cmd, email.to_s)
|
31
|
+
email.delete if should_delete == true
|
32
|
+
}
|
33
|
+
end
|
34
|
+
if folder.kind_of? String
|
35
|
+
Dir.open(folder).each { |f|
|
36
|
+
next if File.directory?(f)
|
37
|
+
File.open(File.join(folder,f)) do |out|
|
38
|
+
pipe(cmd, out.read)
|
39
|
+
end
|
40
|
+
count += 1
|
41
|
+
File.delete(File.join(folder,f)) if should_delete == true
|
42
|
+
}
|
43
|
+
end
|
44
|
+
puts "Processed #{count} messages in #{folder}"
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_cmd(params)
|
48
|
+
if params.kind_of? Hash
|
49
|
+
return "echo"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def train_and_clean_folder(folder)
|
54
|
+
process_folder(generate_cmd(:source => "corpus", :class => "spam"), folder, true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def train_and_retain_folder(folder)
|
58
|
+
process_folder(generate_cmd(:source => "corpus", :class => "innocent"), folder)
|
59
|
+
end
|
60
|
+
|
61
|
+
def is_spam?(message_text)
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_innocent?(message_text)
|
65
|
+
end
|
66
|
+
|
67
|
+
# this function takes an array of email objects and trains them as innocent
|
68
|
+
def train_emails_as_innocent(emails)
|
69
|
+
if emails != nil then
|
70
|
+
if emails.kind_of? Array then
|
71
|
+
emails.each { |email|
|
72
|
+
if email.kind_of? Email then
|
73
|
+
pipe(generate_cmd(:source => "corpus", :class => "innocent"), email.to_s)
|
74
|
+
end
|
75
|
+
}
|
76
|
+
puts "Trained #{emails.length} emails as innocent"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# this function takes an array of email objects and trains them as spam
|
82
|
+
def train_emails_as_spam(emails)
|
83
|
+
if emails != nil then
|
84
|
+
if emails.kind_of? Array then
|
85
|
+
emails.each { |email|
|
86
|
+
if email.kind_of? Email then
|
87
|
+
pipe(generate_cmd(:source => "corpus", :class => "spam"), email.to_s)
|
88
|
+
end
|
89
|
+
}
|
90
|
+
puts "Trained #{emails.length} emails as spam"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class DSpamEngine < SpamDetector
|
97
|
+
def initialize(add_params = nil)
|
98
|
+
@base_cmd="dspam"
|
99
|
+
if add_params == nil then
|
100
|
+
@cmd_params = Hash.new
|
101
|
+
else
|
102
|
+
@cmd_params = add_params
|
103
|
+
end
|
104
|
+
if @cmd_params[:mode] == nil
|
105
|
+
@cmd_params[:mode] = "teft"
|
106
|
+
end
|
107
|
+
if @cmd_params[:path] != nil
|
108
|
+
@base_cmd=@cmd_params[:path]
|
109
|
+
end
|
110
|
+
if pipe(@base_cmd + " --version", "") == 127
|
111
|
+
throw IOError
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def generate_cmd(params)
|
116
|
+
if params.kind_of? Hash
|
117
|
+
return "#{@base_cmd} --mode=#{@cmd_params[:mode]} --source=#{params[:source]} --class=#{params[:class]} --feature=noise --user=#{ENV['USER']}"
|
118
|
+
else
|
119
|
+
return "#{@base_cmd} --mode=#{@cmd_params[:mode]}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
if $0 == __FILE__ then
|
126
|
+
include Swallow
|
127
|
+
exit(1) unless ARGV.length > 0
|
128
|
+
d = DSpamEngine.new()
|
129
|
+
d.train_and_clean_folder(ARGV[0])
|
130
|
+
end
|
data/lib/swallow.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'maildir'
|
4
|
+
require 'spamdetector'
|
5
|
+
require 'ruleset'
|
6
|
+
require 'yaml'
|
7
|
+
|
8
|
+
module Swallow
|
9
|
+
class Admin
|
10
|
+
def initialize(config_file = nil)
|
11
|
+
if config_file != nil then
|
12
|
+
@config_path = config_file
|
13
|
+
else
|
14
|
+
@config_path = "#{ENV['HOME']}/.swallow-config"
|
15
|
+
end
|
16
|
+
@config = YAML.load_file(@config_path)
|
17
|
+
@dspam = DSpamEngine.new(:path => @config[:dspam_path])
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_junk
|
21
|
+
# Create Folder Objects
|
22
|
+
junk = MailDirFolder.new(@config[:maildir] + @config[:junk])
|
23
|
+
corpus = MailDirFolder.new(@config[:maildir] + @config[:corpus])
|
24
|
+
|
25
|
+
# Clean out the Junk Folder
|
26
|
+
@dspam.train_and_clean_folder(junk)
|
27
|
+
@dspam.train_and_clean_folder(corpus)
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_inbox
|
31
|
+
inbox = MailDirFolder.new(@config[:maildir] + @config[:inbox])
|
32
|
+
|
33
|
+
# Feed new messages under the limit in the inbox to the filter as innocent corpus
|
34
|
+
@dspam.train_emails_as_innocent(inbox.emails_newer_than(@config[:limit]))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if $0 == __FILE__ then
|
40
|
+
admin = Swallow::Admin.new
|
41
|
+
admin.process_junk
|
42
|
+
admin.process_inbox
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
############################################
|
3
|
+
# Taken from Ajax Scripts
|
4
|
+
# @see http://ajax.stealthsettings.com/ruby/converting-between-time-and-datetime-objects/
|
5
|
+
require 'date'
|
6
|
+
class Time
|
7
|
+
def to_a_datetime
|
8
|
+
# Convert seconds + microseconds into a fractional number of seconds
|
9
|
+
seconds = sec + Rational(usec, 10**6)
|
10
|
+
# Convert a UTC offset measured in minutes to one measured in a
|
11
|
+
# fraction of a day.
|
12
|
+
offset = Rational(utc_offset, 60 * 60 * 24)
|
13
|
+
DateTime.new(year, month, day, hour, min, seconds, offset)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class Date
|
18
|
+
def to_gm_time
|
19
|
+
to_time(new_offset, :gm)
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_local_time
|
23
|
+
to_time(new_offset(DateTime.now.offset-offset), :local)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def to_time(dest, method)
|
28
|
+
#Convert a fraction of a day to a number of microseconds
|
29
|
+
usec = (dest.sec_fraction * 60 * 60 * 24 * (10**6)).to_i
|
30
|
+
Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min,dest.sec, usec)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# End Snippet
|
35
|
+
############################################
|
36
|
+
|
37
|
+
class File
|
38
|
+
def older_than?(date)
|
39
|
+
self.ctime.to_a_datetime < date
|
40
|
+
end
|
41
|
+
def newer_than?(date)
|
42
|
+
self.ctime.to_a_datetime > date
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Dir
|
47
|
+
# sweep directory of anything over a certain limit old
|
48
|
+
def files_older_than(limit)
|
49
|
+
list = Array.new
|
50
|
+
self.entries.each { |f|
|
51
|
+
filename = File.join(path,f)
|
52
|
+
next if File.directory?(filename)
|
53
|
+
if File.open(filename).older_than?(Date.today - limit) then
|
54
|
+
#puts "File #{filename} is older than limit"
|
55
|
+
list << filename
|
56
|
+
end
|
57
|
+
}
|
58
|
+
return list
|
59
|
+
end
|
60
|
+
def files_newer_than(limit)
|
61
|
+
list = Array.new
|
62
|
+
self.entries.each { |f|
|
63
|
+
filename = File.join(path,f)
|
64
|
+
next if File.directory?(filename)
|
65
|
+
if File.open(filename).newer_than?(Date.today - limit) then
|
66
|
+
#puts "File #{filename} is newer than limit"
|
67
|
+
list << filename
|
68
|
+
end
|
69
|
+
}
|
70
|
+
return list
|
71
|
+
end
|
72
|
+
def sweep!(limit)
|
73
|
+
files_older_than(limit).each { |f|
|
74
|
+
File.delete(f)
|
75
|
+
}
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class Regexp
|
80
|
+
def to_reg
|
81
|
+
/#{to_s}/i
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
if $0 == __FILE__ then
|
86
|
+
dir = ARGV[0]
|
87
|
+
limit = Integer(ARGV[1])
|
88
|
+
puts "Files Older than #{limit} day(s) old"
|
89
|
+
Dir.open(dir).files_older_than(limit).each { |file|
|
90
|
+
puts "#{file}"
|
91
|
+
}
|
92
|
+
puts "Files Newer than #{limit} days(s) old"
|
93
|
+
Dir.open(dir).files_newer_than(limit).each { |file|
|
94
|
+
puts "#{file}"
|
95
|
+
}
|
96
|
+
end
|
97
|
+
|
98
|
+
|
data/test/test_folder.rb
ADDED
data/test/test_spam.rb
ADDED
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: swallow
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2007-10-29 00:00:00 -05:00
|
8
|
+
summary: A backend IMAP/Maildir Email Sorting/Routing/Cleanup system
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: swallow-dev@erik.rainey.name
|
12
|
+
homepage: http://erik.rainey.name/swallow
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: swallow
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Erik Rainey
|
31
|
+
files:
|
32
|
+
- bin/swallow
|
33
|
+
- lib/time_extentions.rb
|
34
|
+
- lib/maildir.rb
|
35
|
+
- lib/ruleset.rb
|
36
|
+
- lib/email.rb
|
37
|
+
- lib/swallow.rb
|
38
|
+
- lib/spamdetector.rb
|
39
|
+
- README
|
40
|
+
test_files:
|
41
|
+
- test/test_spam.rb
|
42
|
+
- test/test_ruleset.rb
|
43
|
+
- test/test_folder.rb
|
44
|
+
rdoc_options: []
|
45
|
+
|
46
|
+
extra_rdoc_files:
|
47
|
+
- README
|
48
|
+
executables:
|
49
|
+
- swallow
|
50
|
+
extensions: []
|
51
|
+
|
52
|
+
requirements: []
|
53
|
+
|
54
|
+
dependencies:
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: gurgitate-mail
|
57
|
+
version_requirement:
|
58
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.8.5
|
63
|
+
version:
|