swallow 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -0
- data/bin/swallow +7 -0
- data/lib/email.rb +54 -0
- data/lib/maildir.rb +109 -0
- data/lib/ruleset.rb +50 -0
- data/lib/spamdetector.rb +130 -0
- data/lib/swallow.rb +44 -0
- data/lib/time_extentions.rb +98 -0
- data/test/test_folder.rb +9 -0
- data/test/test_ruleset.rb +8 -0
- data/test/test_spam.rb +12 -0
- metadata +63 -0
data/README
ADDED
data/bin/swallow
ADDED
data/lib/email.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'time_extentions'
|
4
|
+
|
5
|
+
module Swallow
|
6
|
+
class Email
|
7
|
+
attr_accessor :id, :weird_id, :host, :order
|
8
|
+
attr_accessor :filename
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
path_parts = filename.split(/\//)
|
13
|
+
parse_filename(path_parts[-1])
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_filename(file)
|
17
|
+
uppername, @order, @read = file.split(/,/)
|
18
|
+
@id, @weird_id, @host = uppername.split(/\./)
|
19
|
+
end
|
20
|
+
|
21
|
+
def newer_than?(limit)
|
22
|
+
File.open(@filename).newer_than?(Date.today - limit)
|
23
|
+
end
|
24
|
+
|
25
|
+
def older_than?(limit)
|
26
|
+
File.open(@filename).older_than?(Date.today - limit)
|
27
|
+
end
|
28
|
+
|
29
|
+
def read?
|
30
|
+
@read == "S"
|
31
|
+
end
|
32
|
+
|
33
|
+
def mark_read
|
34
|
+
# modify filename to "S" on end
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_s
|
38
|
+
File.open(@filename).read
|
39
|
+
end
|
40
|
+
def delete
|
41
|
+
File.delete(@filename)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if $0 == __FILE__ then
|
47
|
+
include Swallow
|
48
|
+
e = Email.new(ARGV[0])
|
49
|
+
puts e
|
50
|
+
puts "The email is unread" if e.read? == false
|
51
|
+
puts "Email ID: #{e.id}"
|
52
|
+
puts "The email is newer than one day" if e.newer_than?(1)
|
53
|
+
puts "The email is older than one day" if e.older_than?(1)
|
54
|
+
end
|
data/lib/maildir.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'email'
|
4
|
+
require 'time_extentions'
|
5
|
+
|
6
|
+
module Swallow
|
7
|
+
class MailDirFolder
|
8
|
+
def initialize(folder_path)
|
9
|
+
@folder_path = folder_path
|
10
|
+
@sub_folders = ["cur", "new", "tmp"]
|
11
|
+
end
|
12
|
+
|
13
|
+
def enum
|
14
|
+
Dir.chdir(@folder_path)
|
15
|
+
#puts "Subfolders:"
|
16
|
+
a = Array.new
|
17
|
+
Dir.glob("\.*").sort.each { |dir|
|
18
|
+
next unless File.directory?(dir)
|
19
|
+
next if dir == "." or dir == ".."
|
20
|
+
#puts "#{dir}"
|
21
|
+
a << dir
|
22
|
+
}
|
23
|
+
a
|
24
|
+
end
|
25
|
+
|
26
|
+
def path
|
27
|
+
@folder_path
|
28
|
+
end
|
29
|
+
|
30
|
+
def count
|
31
|
+
v = 0
|
32
|
+
@sub_folders.each { |dir|
|
33
|
+
v += length(dir)
|
34
|
+
}
|
35
|
+
return v
|
36
|
+
end
|
37
|
+
|
38
|
+
def unread_count
|
39
|
+
length("new")
|
40
|
+
end
|
41
|
+
|
42
|
+
def read_count
|
43
|
+
length("cur")
|
44
|
+
end
|
45
|
+
|
46
|
+
def unknown_count
|
47
|
+
length("tmp")
|
48
|
+
end
|
49
|
+
|
50
|
+
def length(sub_path)
|
51
|
+
length = 0
|
52
|
+
Dir.open(File.join(@folder_path,sub_path)).each { |filename|
|
53
|
+
next if filename == "." or filename == ".."
|
54
|
+
length += 1
|
55
|
+
}
|
56
|
+
length
|
57
|
+
end
|
58
|
+
|
59
|
+
def emails
|
60
|
+
list = Array.new
|
61
|
+
@sub_folders.each { |sub_path|
|
62
|
+
path = File.join(@folder_path, sub_path)
|
63
|
+
Dir.open(path).each { |f|
|
64
|
+
filename = File.join(path,f)
|
65
|
+
next if File.directory?(filename)
|
66
|
+
list << Email.new(filename)
|
67
|
+
}
|
68
|
+
}
|
69
|
+
return list
|
70
|
+
end
|
71
|
+
|
72
|
+
def delete_all_emails
|
73
|
+
count = emails.length
|
74
|
+
emails.each {|email| email.delete }
|
75
|
+
puts "Deleted #{count} emails"
|
76
|
+
end
|
77
|
+
|
78
|
+
def emails_newer_than(limit)
|
79
|
+
emails.find_all { |e| e.newer_than?(limit) }
|
80
|
+
end
|
81
|
+
|
82
|
+
def emails_older_than(limit)
|
83
|
+
emails.find_all { |e| e.older_than?(limit) }
|
84
|
+
end
|
85
|
+
|
86
|
+
def sweep!(limit)
|
87
|
+
if limit.kind_of? Integer then
|
88
|
+
@sub_folder.each { |sub_path|
|
89
|
+
Dir.open(File.join(@folder_path,sub_path)).sweep!(limit)
|
90
|
+
}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
def to_s
|
94
|
+
"#{@folder_path} has #{unread_count} unread messages and #{read_count} read messages (#{unknown_count})."
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
if $0 == __FILE__ then
|
100
|
+
include Swallow
|
101
|
+
root = "/home/#{ENV['USER']}/Maildir/"
|
102
|
+
m = MailDirFolder.new(root)
|
103
|
+
puts m.to_s
|
104
|
+
m.enum.each { |folder|
|
105
|
+
n = MailDirFolder.new(m.path+folder)
|
106
|
+
puts n.to_s
|
107
|
+
puts n.enum
|
108
|
+
}
|
109
|
+
end
|
data/lib/ruleset.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Swallow
|
6
|
+
class RuleSet
|
7
|
+
attr_reader :rules
|
8
|
+
|
9
|
+
def initialize(filename)
|
10
|
+
@rules = YAML.load_file(filename)
|
11
|
+
@rules.each do |rule|
|
12
|
+
if rule[:folder] != nil then
|
13
|
+
if rule[:domains] != nil then
|
14
|
+
# replace the string domain list with a regexp domain list
|
15
|
+
rule[:domains] = rule[:domains].map { |i| Regexp.quote(i) }
|
16
|
+
end
|
17
|
+
if rule[:to] != nil then
|
18
|
+
# replace the string to list with a regexp to list
|
19
|
+
rule[:to] = rule[:to].map { |i| Regexp.quote(i) }
|
20
|
+
end
|
21
|
+
if rule[:from] != nil then
|
22
|
+
# replace the string from list with a regexp from list
|
23
|
+
rule[:from] = rule[:from].map { |i| Regexp.quote(i) }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
def to_s
|
29
|
+
s = String.new("Rules Hash Holds:\n")
|
30
|
+
@rules.each { |rule|
|
31
|
+
s << "#{rule[:folder]}"
|
32
|
+
s << "#{rule[:to]}" if rule[:to] != nil
|
33
|
+
s << "#{rule[:from]}" if rule[:from] != nil
|
34
|
+
s << "#{rule[:domains]}" if rules[:domains] != nil
|
35
|
+
}
|
36
|
+
s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Self Test
|
42
|
+
if $0 == __FILE__
|
43
|
+
r = RuleSet.new(ARGV[0])
|
44
|
+
r.getRuleset.each { |rules|
|
45
|
+
puts "Folder: #{rules[:folder]}"
|
46
|
+
rules[:domains].each { |domain|
|
47
|
+
puts "Domain: #{domain}"
|
48
|
+
} unless rules[:domains] == nil
|
49
|
+
}
|
50
|
+
end
|
data/lib/spamdetector.rb
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
require 'maildir'
|
3
|
+
|
4
|
+
def pipe(program, text)
|
5
|
+
IO.popen(program,"w") do |f|
|
6
|
+
f.print(text)
|
7
|
+
end
|
8
|
+
return $?>>8
|
9
|
+
end
|
10
|
+
|
11
|
+
def process(program, text)
|
12
|
+
IO.popen(program, "wr") do |f|
|
13
|
+
f.print(text)
|
14
|
+
return f.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module Swallow
|
19
|
+
class SpamDetector
|
20
|
+
def initialize
|
21
|
+
@innocent_cmd = "cat"
|
22
|
+
@training_cmd = "cat"
|
23
|
+
end
|
24
|
+
|
25
|
+
def process_folder(cmd, folder, should_delete = false)
|
26
|
+
count = 0
|
27
|
+
if folder.kind_of? MailDirFolder
|
28
|
+
count = folder.emails.length
|
29
|
+
folder.emails.each { |email|
|
30
|
+
pipe(cmd, email.to_s)
|
31
|
+
email.delete if should_delete == true
|
32
|
+
}
|
33
|
+
end
|
34
|
+
if folder.kind_of? String
|
35
|
+
Dir.open(folder).each { |f|
|
36
|
+
next if File.directory?(f)
|
37
|
+
File.open(File.join(folder,f)) do |out|
|
38
|
+
pipe(cmd, out.read)
|
39
|
+
end
|
40
|
+
count += 1
|
41
|
+
File.delete(File.join(folder,f)) if should_delete == true
|
42
|
+
}
|
43
|
+
end
|
44
|
+
puts "Processed #{count} messages in #{folder}"
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_cmd(params)
|
48
|
+
if params.kind_of? Hash
|
49
|
+
return "echo"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def train_and_clean_folder(folder)
|
54
|
+
process_folder(generate_cmd(:source => "corpus", :class => "spam"), folder, true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def train_and_retain_folder(folder)
|
58
|
+
process_folder(generate_cmd(:source => "corpus", :class => "innocent"), folder)
|
59
|
+
end
|
60
|
+
|
61
|
+
def is_spam?(message_text)
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_innocent?(message_text)
|
65
|
+
end
|
66
|
+
|
67
|
+
# this function takes an array of email objects and trains them as innocent
|
68
|
+
def train_emails_as_innocent(emails)
|
69
|
+
if emails != nil then
|
70
|
+
if emails.kind_of? Array then
|
71
|
+
emails.each { |email|
|
72
|
+
if email.kind_of? Email then
|
73
|
+
pipe(generate_cmd(:source => "corpus", :class => "innocent"), email.to_s)
|
74
|
+
end
|
75
|
+
}
|
76
|
+
puts "Trained #{emails.length} emails as innocent"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# this function takes an array of email objects and trains them as spam
|
82
|
+
def train_emails_as_spam(emails)
|
83
|
+
if emails != nil then
|
84
|
+
if emails.kind_of? Array then
|
85
|
+
emails.each { |email|
|
86
|
+
if email.kind_of? Email then
|
87
|
+
pipe(generate_cmd(:source => "corpus", :class => "spam"), email.to_s)
|
88
|
+
end
|
89
|
+
}
|
90
|
+
puts "Trained #{emails.length} emails as spam"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class DSpamEngine < SpamDetector
|
97
|
+
def initialize(add_params = nil)
|
98
|
+
@base_cmd="dspam"
|
99
|
+
if add_params == nil then
|
100
|
+
@cmd_params = Hash.new
|
101
|
+
else
|
102
|
+
@cmd_params = add_params
|
103
|
+
end
|
104
|
+
if @cmd_params[:mode] == nil
|
105
|
+
@cmd_params[:mode] = "teft"
|
106
|
+
end
|
107
|
+
if @cmd_params[:path] != nil
|
108
|
+
@base_cmd=@cmd_params[:path]
|
109
|
+
end
|
110
|
+
if pipe(@base_cmd + " --version", "") == 127
|
111
|
+
throw IOError
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def generate_cmd(params)
|
116
|
+
if params.kind_of? Hash
|
117
|
+
return "#{@base_cmd} --mode=#{@cmd_params[:mode]} --source=#{params[:source]} --class=#{params[:class]} --feature=noise --user=#{ENV['USER']}"
|
118
|
+
else
|
119
|
+
return "#{@base_cmd} --mode=#{@cmd_params[:mode]}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
if $0 == __FILE__ then
|
126
|
+
include Swallow
|
127
|
+
exit(1) unless ARGV.length > 0
|
128
|
+
d = DSpamEngine.new()
|
129
|
+
d.train_and_clean_folder(ARGV[0])
|
130
|
+
end
|
data/lib/swallow.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'maildir'
|
4
|
+
require 'spamdetector'
|
5
|
+
require 'ruleset'
|
6
|
+
require 'yaml'
|
7
|
+
|
8
|
+
module Swallow
|
9
|
+
class Admin
|
10
|
+
def initialize(config_file = nil)
|
11
|
+
if config_file != nil then
|
12
|
+
@config_path = config_file
|
13
|
+
else
|
14
|
+
@config_path = "#{ENV['HOME']}/.swallow-config"
|
15
|
+
end
|
16
|
+
@config = YAML.load_file(@config_path)
|
17
|
+
@dspam = DSpamEngine.new(:path => @config[:dspam_path])
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_junk
|
21
|
+
# Create Folder Objects
|
22
|
+
junk = MailDirFolder.new(@config[:maildir] + @config[:junk])
|
23
|
+
corpus = MailDirFolder.new(@config[:maildir] + @config[:corpus])
|
24
|
+
|
25
|
+
# Clean out the Junk Folder
|
26
|
+
@dspam.train_and_clean_folder(junk)
|
27
|
+
@dspam.train_and_clean_folder(corpus)
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_inbox
|
31
|
+
inbox = MailDirFolder.new(@config[:maildir] + @config[:inbox])
|
32
|
+
|
33
|
+
# Feed new messages under the limit in the inbox to the filter as innocent corpus
|
34
|
+
@dspam.train_emails_as_innocent(inbox.emails_newer_than(@config[:limit]))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if $0 == __FILE__ then
|
40
|
+
admin = Swallow::Admin.new
|
41
|
+
admin.process_junk
|
42
|
+
admin.process_inbox
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
############################################
|
3
|
+
# Taken from Ajax Scripts
|
4
|
+
# @see http://ajax.stealthsettings.com/ruby/converting-between-time-and-datetime-objects/
|
5
|
+
require 'date'
|
6
|
+
class Time
|
7
|
+
def to_a_datetime
|
8
|
+
# Convert seconds + microseconds into a fractional number of seconds
|
9
|
+
seconds = sec + Rational(usec, 10**6)
|
10
|
+
# Convert a UTC offset measured in minutes to one measured in a
|
11
|
+
# fraction of a day.
|
12
|
+
offset = Rational(utc_offset, 60 * 60 * 24)
|
13
|
+
DateTime.new(year, month, day, hour, min, seconds, offset)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class Date
|
18
|
+
def to_gm_time
|
19
|
+
to_time(new_offset, :gm)
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_local_time
|
23
|
+
to_time(new_offset(DateTime.now.offset-offset), :local)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def to_time(dest, method)
|
28
|
+
#Convert a fraction of a day to a number of microseconds
|
29
|
+
usec = (dest.sec_fraction * 60 * 60 * 24 * (10**6)).to_i
|
30
|
+
Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min,dest.sec, usec)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# End Snippet
|
35
|
+
############################################
|
36
|
+
|
37
|
+
class File
|
38
|
+
def older_than?(date)
|
39
|
+
self.ctime.to_a_datetime < date
|
40
|
+
end
|
41
|
+
def newer_than?(date)
|
42
|
+
self.ctime.to_a_datetime > date
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Dir
|
47
|
+
# sweep directory of anything over a certain limit old
|
48
|
+
def files_older_than(limit)
|
49
|
+
list = Array.new
|
50
|
+
self.entries.each { |f|
|
51
|
+
filename = File.join(path,f)
|
52
|
+
next if File.directory?(filename)
|
53
|
+
if File.open(filename).older_than?(Date.today - limit) then
|
54
|
+
#puts "File #{filename} is older than limit"
|
55
|
+
list << filename
|
56
|
+
end
|
57
|
+
}
|
58
|
+
return list
|
59
|
+
end
|
60
|
+
def files_newer_than(limit)
|
61
|
+
list = Array.new
|
62
|
+
self.entries.each { |f|
|
63
|
+
filename = File.join(path,f)
|
64
|
+
next if File.directory?(filename)
|
65
|
+
if File.open(filename).newer_than?(Date.today - limit) then
|
66
|
+
#puts "File #{filename} is newer than limit"
|
67
|
+
list << filename
|
68
|
+
end
|
69
|
+
}
|
70
|
+
return list
|
71
|
+
end
|
72
|
+
def sweep!(limit)
|
73
|
+
files_older_than(limit).each { |f|
|
74
|
+
File.delete(f)
|
75
|
+
}
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class Regexp
|
80
|
+
def to_reg
|
81
|
+
/#{to_s}/i
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
if $0 == __FILE__ then
|
86
|
+
dir = ARGV[0]
|
87
|
+
limit = Integer(ARGV[1])
|
88
|
+
puts "Files Older than #{limit} day(s) old"
|
89
|
+
Dir.open(dir).files_older_than(limit).each { |file|
|
90
|
+
puts "#{file}"
|
91
|
+
}
|
92
|
+
puts "Files Newer than #{limit} days(s) old"
|
93
|
+
Dir.open(dir).files_newer_than(limit).each { |file|
|
94
|
+
puts "#{file}"
|
95
|
+
}
|
96
|
+
end
|
97
|
+
|
98
|
+
|
data/test/test_folder.rb
ADDED
data/test/test_spam.rb
ADDED
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: swallow
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2007-10-29 00:00:00 -05:00
|
8
|
+
summary: A backend IMAP/Maildir Email Sorting/Routing/Cleanup system
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: swallow-dev@erik.rainey.name
|
12
|
+
homepage: http://erik.rainey.name/swallow
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: swallow
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Erik Rainey
|
31
|
+
files:
|
32
|
+
- bin/swallow
|
33
|
+
- lib/time_extentions.rb
|
34
|
+
- lib/maildir.rb
|
35
|
+
- lib/ruleset.rb
|
36
|
+
- lib/email.rb
|
37
|
+
- lib/swallow.rb
|
38
|
+
- lib/spamdetector.rb
|
39
|
+
- README
|
40
|
+
test_files:
|
41
|
+
- test/test_spam.rb
|
42
|
+
- test/test_ruleset.rb
|
43
|
+
- test/test_folder.rb
|
44
|
+
rdoc_options: []
|
45
|
+
|
46
|
+
extra_rdoc_files:
|
47
|
+
- README
|
48
|
+
executables:
|
49
|
+
- swallow
|
50
|
+
extensions: []
|
51
|
+
|
52
|
+
requirements: []
|
53
|
+
|
54
|
+
dependencies:
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: gurgitate-mail
|
57
|
+
version_requirement:
|
58
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.8.5
|
63
|
+
version:
|