swallow 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,2 @@
1
+ Just gem install this package. Be sure to set your GEM_HOME and GEM_PATH.
2
+
data/bin/swallow ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require "swallow"
4
+
5
+ admin = Swallow::Admin.new
6
+ admin.process_junk
7
+ admin.process_inbox
data/lib/email.rb ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'time_extentions'
4
+
5
+ module Swallow
6
+ class Email
7
+ attr_accessor :id, :weird_id, :host, :order
8
+ attr_accessor :filename
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ path_parts = filename.split(/\//)
13
+ parse_filename(path_parts[-1])
14
+ end
15
+
16
+ def parse_filename(file)
17
+ uppername, @order, @read = file.split(/,/)
18
+ @id, @weird_id, @host = uppername.split(/\./)
19
+ end
20
+
21
+ def newer_than?(limit)
22
+ File.open(@filename).newer_than?(Date.today - limit)
23
+ end
24
+
25
+ def older_than?(limit)
26
+ File.open(@filename).older_than?(Date.today - limit)
27
+ end
28
+
29
+ def read?
30
+ @read == "S"
31
+ end
32
+
33
+ def mark_read
34
+ # modify filename to "S" on end
35
+ end
36
+
37
+ def to_s
38
+ File.open(@filename).read
39
+ end
40
+ def delete
41
+ File.delete(@filename)
42
+ end
43
+ end
44
+ end
45
+
46
+ if $0 == __FILE__ then
47
+ include Swallow
48
+ e = Email.new(ARGV[0])
49
+ puts e
50
+ puts "The email is unread" if e.read? == false
51
+ puts "Email ID: #{e.id}"
52
+ puts "The email is newer than one day" if e.newer_than?(1)
53
+ puts "The email is older than one day" if e.older_than?(1)
54
+ end
data/lib/maildir.rb ADDED
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'email'
4
+ require 'time_extentions'
5
+
6
+ module Swallow
7
+ class MailDirFolder
8
+ def initialize(folder_path)
9
+ @folder_path = folder_path
10
+ @sub_folders = ["cur", "new", "tmp"]
11
+ end
12
+
13
+ def enum
14
+ Dir.chdir(@folder_path)
15
+ #puts "Subfolders:"
16
+ a = Array.new
17
+ Dir.glob("\.*").sort.each { |dir|
18
+ next unless File.directory?(dir)
19
+ next if dir == "." or dir == ".."
20
+ #puts "#{dir}"
21
+ a << dir
22
+ }
23
+ a
24
+ end
25
+
26
+ def path
27
+ @folder_path
28
+ end
29
+
30
+ def count
31
+ v = 0
32
+ @sub_folders.each { |dir|
33
+ v += length(dir)
34
+ }
35
+ return v
36
+ end
37
+
38
+ def unread_count
39
+ length("new")
40
+ end
41
+
42
+ def read_count
43
+ length("cur")
44
+ end
45
+
46
+ def unknown_count
47
+ length("tmp")
48
+ end
49
+
50
+ def length(sub_path)
51
+ length = 0
52
+ Dir.open(File.join(@folder_path,sub_path)).each { |filename|
53
+ next if filename == "." or filename == ".."
54
+ length += 1
55
+ }
56
+ length
57
+ end
58
+
59
+ def emails
60
+ list = Array.new
61
+ @sub_folders.each { |sub_path|
62
+ path = File.join(@folder_path, sub_path)
63
+ Dir.open(path).each { |f|
64
+ filename = File.join(path,f)
65
+ next if File.directory?(filename)
66
+ list << Email.new(filename)
67
+ }
68
+ }
69
+ return list
70
+ end
71
+
72
+ def delete_all_emails
73
+ count = emails.length
74
+ emails.each {|email| email.delete }
75
+ puts "Deleted #{count} emails"
76
+ end
77
+
78
+ def emails_newer_than(limit)
79
+ emails.find_all { |e| e.newer_than?(limit) }
80
+ end
81
+
82
+ def emails_older_than(limit)
83
+ emails.find_all { |e| e.older_than?(limit) }
84
+ end
85
+
86
+ def sweep!(limit)
87
+ if limit.kind_of? Integer then
88
+ @sub_folder.each { |sub_path|
89
+ Dir.open(File.join(@folder_path,sub_path)).sweep!(limit)
90
+ }
91
+ end
92
+ end
93
+ def to_s
94
+ "#{@folder_path} has #{unread_count} unread messages and #{read_count} read messages (#{unknown_count})."
95
+ end
96
+ end
97
+ end
98
+
99
+ if $0 == __FILE__ then
100
+ include Swallow
101
+ root = "/home/#{ENV['USER']}/Maildir/"
102
+ m = MailDirFolder.new(root)
103
+ puts m.to_s
104
+ m.enum.each { |folder|
105
+ n = MailDirFolder.new(m.path+folder)
106
+ puts n.to_s
107
+ puts n.enum
108
+ }
109
+ end
data/lib/ruleset.rb ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'yaml'
4
+
5
+ module Swallow
6
+ class RuleSet
7
+ attr_reader :rules
8
+
9
+ def initialize(filename)
10
+ @rules = YAML.load_file(filename)
11
+ @rules.each do |rule|
12
+ if rule[:folder] != nil then
13
+ if rule[:domains] != nil then
14
+ # replace the string domain list with a regexp domain list
15
+ rule[:domains] = rule[:domains].map { |i| Regexp.quote(i) }
16
+ end
17
+ if rule[:to] != nil then
18
+ # replace the string to list with a regexp to list
19
+ rule[:to] = rule[:to].map { |i| Regexp.quote(i) }
20
+ end
21
+ if rule[:from] != nil then
22
+ # replace the string from list with a regexp from list
23
+ rule[:from] = rule[:from].map { |i| Regexp.quote(i) }
24
+ end
25
+ end
26
+ end
27
+ end
28
+ def to_s
29
+ s = String.new("Rules Hash Holds:\n")
30
+ @rules.each { |rule|
31
+ s << "#{rule[:folder]}"
32
+ s << "#{rule[:to]}" if rule[:to] != nil
33
+ s << "#{rule[:from]}" if rule[:from] != nil
34
+ s << "#{rule[:domains]}" if rules[:domains] != nil
35
+ }
36
+ s
37
+ end
38
+ end
39
+ end
40
+
41
+ # Self Test
42
+ if $0 == __FILE__
43
+ r = RuleSet.new(ARGV[0])
44
+ r.getRuleset.each { |rules|
45
+ puts "Folder: #{rules[:folder]}"
46
+ rules[:domains].each { |domain|
47
+ puts "Domain: #{domain}"
48
+ } unless rules[:domains] == nil
49
+ }
50
+ end
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/ruby -w
2
+ require 'maildir'
3
+
4
+ def pipe(program, text)
5
+ IO.popen(program,"w") do |f|
6
+ f.print(text)
7
+ end
8
+ return $?>>8
9
+ end
10
+
11
+ def process(program, text)
12
+ IO.popen(program, "wr") do |f|
13
+ f.print(text)
14
+ return f.read
15
+ end
16
+ end
17
+
18
+ module Swallow
19
+ class SpamDetector
20
+ def initialize
21
+ @innocent_cmd = "cat"
22
+ @training_cmd = "cat"
23
+ end
24
+
25
+ def process_folder(cmd, folder, should_delete = false)
26
+ count = 0
27
+ if folder.kind_of? MailDirFolder
28
+ count = folder.emails.length
29
+ folder.emails.each { |email|
30
+ pipe(cmd, email.to_s)
31
+ email.delete if should_delete == true
32
+ }
33
+ end
34
+ if folder.kind_of? String
35
+ Dir.open(folder).each { |f|
36
+ next if File.directory?(f)
37
+ File.open(File.join(folder,f)) do |out|
38
+ pipe(cmd, out.read)
39
+ end
40
+ count += 1
41
+ File.delete(File.join(folder,f)) if should_delete == true
42
+ }
43
+ end
44
+ puts "Processed #{count} messages in #{folder}"
45
+ end
46
+
47
+ def generate_cmd(params)
48
+ if params.kind_of? Hash
49
+ return "echo"
50
+ end
51
+ end
52
+
53
+ def train_and_clean_folder(folder)
54
+ process_folder(generate_cmd(:source => "corpus", :class => "spam"), folder, true)
55
+ end
56
+
57
+ def train_and_retain_folder(folder)
58
+ process_folder(generate_cmd(:source => "corpus", :class => "innocent"), folder)
59
+ end
60
+
61
+ def is_spam?(message_text)
62
+ end
63
+
64
+ def is_innocent?(message_text)
65
+ end
66
+
67
+ # this function takes an array of email objects and trains them as innocent
68
+ def train_emails_as_innocent(emails)
69
+ if emails != nil then
70
+ if emails.kind_of? Array then
71
+ emails.each { |email|
72
+ if email.kind_of? Email then
73
+ pipe(generate_cmd(:source => "corpus", :class => "innocent"), email.to_s)
74
+ end
75
+ }
76
+ puts "Trained #{emails.length} emails as innocent"
77
+ end
78
+ end
79
+ end
80
+
81
+ # this function takes an array of email objects and trains them as spam
82
+ def train_emails_as_spam(emails)
83
+ if emails != nil then
84
+ if emails.kind_of? Array then
85
+ emails.each { |email|
86
+ if email.kind_of? Email then
87
+ pipe(generate_cmd(:source => "corpus", :class => "spam"), email.to_s)
88
+ end
89
+ }
90
+ puts "Trained #{emails.length} emails as spam"
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ class DSpamEngine < SpamDetector
97
+ def initialize(add_params = nil)
98
+ @base_cmd="dspam"
99
+ if add_params == nil then
100
+ @cmd_params = Hash.new
101
+ else
102
+ @cmd_params = add_params
103
+ end
104
+ if @cmd_params[:mode] == nil
105
+ @cmd_params[:mode] = "teft"
106
+ end
107
+ if @cmd_params[:path] != nil
108
+ @base_cmd=@cmd_params[:path]
109
+ end
110
+ if pipe(@base_cmd + " --version", "") == 127
111
+ throw IOError
112
+ end
113
+ end
114
+
115
+ def generate_cmd(params)
116
+ if params.kind_of? Hash
117
+ return "#{@base_cmd} --mode=#{@cmd_params[:mode]} --source=#{params[:source]} --class=#{params[:class]} --feature=noise --user=#{ENV['USER']}"
118
+ else
119
+ return "#{@base_cmd} --mode=#{@cmd_params[:mode]}"
120
+ end
121
+ end
122
+ end
123
+ end
124
+
125
+ if $0 == __FILE__ then
126
+ include Swallow
127
+ exit(1) unless ARGV.length > 0
128
+ d = DSpamEngine.new()
129
+ d.train_and_clean_folder(ARGV[0])
130
+ end
data/lib/swallow.rb ADDED
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'maildir'
4
+ require 'spamdetector'
5
+ require 'ruleset'
6
+ require 'yaml'
7
+
8
+ module Swallow
9
+ class Admin
10
+ def initialize(config_file = nil)
11
+ if config_file != nil then
12
+ @config_path = config_file
13
+ else
14
+ @config_path = "#{ENV['HOME']}/.swallow-config"
15
+ end
16
+ @config = YAML.load_file(@config_path)
17
+ @dspam = DSpamEngine.new(:path => @config[:dspam_path])
18
+ end
19
+
20
+ def process_junk
21
+ # Create Folder Objects
22
+ junk = MailDirFolder.new(@config[:maildir] + @config[:junk])
23
+ corpus = MailDirFolder.new(@config[:maildir] + @config[:corpus])
24
+
25
+ # Clean out the Junk Folder
26
+ @dspam.train_and_clean_folder(junk)
27
+ @dspam.train_and_clean_folder(corpus)
28
+ end
29
+
30
+ def process_inbox
31
+ inbox = MailDirFolder.new(@config[:maildir] + @config[:inbox])
32
+
33
+ # Feed new messages under the limit in the inbox to the filter as innocent corpus
34
+ @dspam.train_emails_as_innocent(inbox.emails_newer_than(@config[:limit]))
35
+ end
36
+ end
37
+ end
38
+
39
+ if $0 == __FILE__ then
40
+ admin = Swallow::Admin.new
41
+ admin.process_junk
42
+ admin.process_inbox
43
+ end
44
+
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/ruby -w
2
+ ############################################
3
+ # Taken from Ajax Scripts
4
+ # @see http://ajax.stealthsettings.com/ruby/converting-between-time-and-datetime-objects/
5
+ require 'date'
6
+ class Time
7
+ def to_a_datetime
8
+ # Convert seconds + microseconds into a fractional number of seconds
9
+ seconds = sec + Rational(usec, 10**6)
10
+ # Convert a UTC offset measured in minutes to one measured in a
11
+ # fraction of a day.
12
+ offset = Rational(utc_offset, 60 * 60 * 24)
13
+ DateTime.new(year, month, day, hour, min, seconds, offset)
14
+ end
15
+ end
16
+
17
+ class Date
18
+ def to_gm_time
19
+ to_time(new_offset, :gm)
20
+ end
21
+
22
+ def to_local_time
23
+ to_time(new_offset(DateTime.now.offset-offset), :local)
24
+ end
25
+
26
+ private
27
+ def to_time(dest, method)
28
+ #Convert a fraction of a day to a number of microseconds
29
+ usec = (dest.sec_fraction * 60 * 60 * 24 * (10**6)).to_i
30
+ Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min,dest.sec, usec)
31
+ end
32
+ end
33
+
34
+ # End Snippet
35
+ ############################################
36
+
37
+ class File
38
+ def older_than?(date)
39
+ self.ctime.to_a_datetime < date
40
+ end
41
+ def newer_than?(date)
42
+ self.ctime.to_a_datetime > date
43
+ end
44
+ end
45
+
46
+ class Dir
47
+ # sweep directory of anything over a certain limit old
48
+ def files_older_than(limit)
49
+ list = Array.new
50
+ self.entries.each { |f|
51
+ filename = File.join(path,f)
52
+ next if File.directory?(filename)
53
+ if File.open(filename).older_than?(Date.today - limit) then
54
+ #puts "File #{filename} is older than limit"
55
+ list << filename
56
+ end
57
+ }
58
+ return list
59
+ end
60
+ def files_newer_than(limit)
61
+ list = Array.new
62
+ self.entries.each { |f|
63
+ filename = File.join(path,f)
64
+ next if File.directory?(filename)
65
+ if File.open(filename).newer_than?(Date.today - limit) then
66
+ #puts "File #{filename} is newer than limit"
67
+ list << filename
68
+ end
69
+ }
70
+ return list
71
+ end
72
+ def sweep!(limit)
73
+ files_older_than(limit).each { |f|
74
+ File.delete(f)
75
+ }
76
+ end
77
+ end
78
+
79
+ class Regexp
80
+ def to_reg
81
+ /#{to_s}/i
82
+ end
83
+ end
84
+
85
+ if $0 == __FILE__ then
86
+ dir = ARGV[0]
87
+ limit = Integer(ARGV[1])
88
+ puts "Files Older than #{limit} day(s) old"
89
+ Dir.open(dir).files_older_than(limit).each { |file|
90
+ puts "#{file}"
91
+ }
92
+ puts "Files Newer than #{limit} days(s) old"
93
+ Dir.open(dir).files_newer_than(limit).each { |file|
94
+ puts "#{file}"
95
+ }
96
+ end
97
+
98
+
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require '../lib/maildir'
4
+
5
+ if $0 == __FILE__ then
6
+ include Swallow
7
+ m = MailDirFolder.new(ARGV[0])
8
+ m.delete_all_emails
9
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ gem 'swallow'
5
+ include 'Swallow'
6
+
7
+ rules = RuleSet.new("#{ENV['HOME']}/gurgitate/ruleset.yaml")
8
+
data/test/test_spam.rb ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require '../lib/maildir'
4
+ require '../lib/spamdetector'
5
+
6
+ if $0 == __FILE__ then
7
+ include Swallow
8
+ exit(1) unless ARGV.length > 0
9
+ m = MailDirFolder.new(ARGV[0])
10
+ d = DSpamEngine.new
11
+ d.train_and_clean_folder(m)
12
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: swallow
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.0.1
7
+ date: 2007-10-29 00:00:00 -05:00
8
+ summary: A backend IMAP/Maildir Email Sorting/Routing/Cleanup system
9
+ require_paths:
10
+ - lib
11
+ email: swallow-dev@erik.rainey.name
12
+ homepage: http://erik.rainey.name/swallow
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: swallow
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Erik Rainey
31
+ files:
32
+ - bin/swallow
33
+ - lib/time_extentions.rb
34
+ - lib/maildir.rb
35
+ - lib/ruleset.rb
36
+ - lib/email.rb
37
+ - lib/swallow.rb
38
+ - lib/spamdetector.rb
39
+ - README
40
+ test_files:
41
+ - test/test_spam.rb
42
+ - test/test_ruleset.rb
43
+ - test/test_folder.rb
44
+ rdoc_options: []
45
+
46
+ extra_rdoc_files:
47
+ - README
48
+ executables:
49
+ - swallow
50
+ extensions: []
51
+
52
+ requirements: []
53
+
54
+ dependencies:
55
+ - !ruby/object:Gem::Dependency
56
+ name: gurgitate-mail
57
+ version_requirement:
58
+ version_requirements: !ruby/object:Gem::Version::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.8.5
63
+ version: