sup 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sup might be problematic. Click here for more details.

data/lib/sup/imap.rb CHANGED
@@ -31,6 +31,8 @@ require 'time'
31
31
  module Redwood
32
32
 
33
33
  class IMAP < Source
34
+ SCAN_INTERVAL = 60 # seconds
35
+
34
36
  attr_reader_cloned :labels
35
37
  attr_accessor :username, :password
36
38
 
@@ -46,26 +48,51 @@ class IMAP < Source
46
48
  @imap = nil
47
49
  @imap_ids = {}
48
50
  @ids = []
51
+ @last_scan = nil
49
52
  @labels = [:unread]
50
53
  @labels << :inbox unless archived?
51
54
  @labels << mailbox.intern unless mailbox =~ /inbox/i
52
55
  @mutex = Mutex.new
53
56
  end
54
57
 
55
- def say s
56
- @say_id = BufferManager.say s, @say_id if BufferManager.instantiated?
57
- Redwood::log s
58
+ def host; @parsed_uri.host; end
59
+ def port; @parsed_uri.port || (ssl? ? 993 : 143); end
60
+ def mailbox
61
+ x = @parsed_uri.path[1..-1]
62
+ x.nil? || x.empty? ? 'INBOX' : x
58
63
  end
59
- def shutup
60
- BufferManager.clear @say_id if BufferManager.instantiated?
61
- @say_id = nil
64
+ def ssl?; @parsed_uri.scheme == 'imaps' end
65
+
66
+ def load_header id
67
+ MBox::read_header StringIO.new(raw_header(id))
68
+ end
69
+
70
+ def load_message id
71
+ RMail::Parser.read raw_full_message(id)
72
+ end
73
+
74
+ def raw_header id
75
+ @mutex.synchronize do
76
+ connect
77
+ header, flags = get_imap_fields id, 'RFC822.HEADER', 'FLAGS'
78
+ header = "Status: RO\n" + header if flags.include? :Seen # fake an mbox-style read header
79
+ header.gsub(/\r\n/, "\n")
80
+ end
81
+ end
82
+
83
+ def raw_full_message id
84
+ @mutex.synchronize do
85
+ connect
86
+ get_imap_fields(id, 'RFC822').first.gsub(/\r\n/, "\n")
87
+ end
62
88
  end
63
- private :say, :shutup
64
89
 
65
90
  def connect
66
91
  return false if broken?
67
92
  return true if @imap
68
93
 
94
+ say "Connecting to IMAP server #{host}:#{port}..."
95
+
69
96
  ## ok, this is FUCKING ANNOYING.
70
97
  ##
71
98
  ## what imap.rb likes to do is, if an exception occurs, catch it
@@ -79,88 +106,120 @@ class IMAP < Source
79
106
  ##
80
107
  ## FUCK!!!!!!!!!
81
108
 
82
- say "Connecting to IMAP server #{host}:#{port}..."
83
-
109
+ exception = nil
84
110
  Redwood::reporting_thread do
85
111
  begin
86
112
  #raise Net::IMAP::ByeResponseError, "simulated imap failure"
87
- @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl?
113
+ @imap = Net::IMAP.new host, port, ssl?
88
114
  say "Logging in..."
89
115
  @imap.authenticate 'LOGIN', @username, @password
90
- say "Sizing mailbox..."
91
- @imap.examine mailbox
92
- last_id = @imap.responses["EXISTS"][-1]
93
-
94
- say "Reading headers (because IMAP sucks)..."
95
- values = @imap.fetch(1 .. last_id, ['RFC822.SIZE', 'INTERNALDATE'])
96
-
97
- say "Successfully connected to #{@parsed_uri}"
98
-
99
- values.each do |v|
100
- id = make_id v
101
- @ids << id
102
- @imap_ids[id] = v.seqno
103
- end
116
+ scan_mailbox
117
+ say "Successfully connected to #{@parsed_uri}."
104
118
  rescue SocketError, Net::IMAP::Error, SourceError => e
105
- self.broken_msg = e.message.chomp # fucking chomp! fuck!!!
106
- @imap = nil
107
- Redwood::log "error connecting to IMAP server: #{self.broken_msg}"
119
+ exception = e
108
120
  ensure
109
121
  shutup
110
122
  end
111
123
  end.join
112
124
 
113
- !!@imap
125
+ die_from exception, :while => "connecting" if exception
114
126
  end
115
- private :connect
116
127
 
117
- def make_id imap_stuff
118
- msize, mdate = imap_stuff.attr['RFC822.SIZE'], Time.parse(imap_stuff.attr["INTERNALDATE"])
119
- sprintf("%d%07d", mdate.to_i, msize).to_i
120
- end
121
- private :make_id
128
+ def each
129
+ @mutex.synchronize { connect or raise SourceError, broken_msg }
122
130
 
123
- def host; @parsed_uri.host; end
124
- def port; @parsed_uri.port || (ssl? ? 993 : 143); end
125
- def mailbox
126
- x = @parsed_uri.path[1..-1]
127
- x.nil? || x.empty? ? 'INBOX' : x
128
- end
129
- def ssl?; @parsed_uri.scheme == 'imaps' end
131
+ start = @ids.index(cur_offset || start_offset) or die_from "Unknown message id #{cur_offset || start_offset}.", :suggest_rebuild => true # couldn't find the most recent email
130
132
 
131
- def load_header id
132
- MBox::read_header StringIO.new(raw_header(id))
133
+ start.upto(@ids.length - 1) do |i|
134
+ id = @ids[i]
135
+ self.cur_offset = id
136
+ yield id, labels
137
+ end
133
138
  end
134
139
 
135
- def load_message id
136
- RMail::Parser.read raw_full_message(id)
140
+ def start_offset
141
+ @mutex.synchronize { connect }
142
+ @ids.first
137
143
  end
138
144
 
139
- ## load the full header text
140
- def raw_header id
145
+ def end_offset
141
146
  @mutex.synchronize do
142
- connect or raise SourceError, broken_msg
143
- get_imap_field(id, 'RFC822.HEADER').gsub(/\r\n/, "\n")
147
+ begin
148
+ connect
149
+ scan_mailbox
150
+ rescue SocketError, Net::IMAP::Error => e
151
+ die_from e, :while => "scanning mailbox"
152
+ end
144
153
  end
154
+ @ids.last
145
155
  end
146
156
 
147
- def raw_full_message id
148
- @mutex.synchronize do
149
- connect or raise SourceError, broken_msg
150
- get_imap_field(id, 'RFC822').gsub(/\r\n/, "\n")
157
+ def pct_done; 100.0 * (@ids.index(cur_offset) || 0).to_f / (@ids.length - 1).to_f; end
158
+
159
+ private
160
+
161
+ def say s
162
+ @say_id = BufferManager.say s, @say_id if BufferManager.instantiated?
163
+ Redwood::log s
164
+ end
165
+
166
+ def shutup
167
+ BufferManager.clear @say_id if BufferManager.instantiated?
168
+ @say_id = nil
169
+ end
170
+
171
+ def scan_mailbox
172
+ return if @last_scan && (Time.now - @last_scan) < SCAN_INTERVAL
173
+
174
+ @imap.examine mailbox
175
+ last_id = @imap.responses["EXISTS"].last
176
+ @last_scan = Time.now
177
+ return if last_id == @ids.length
178
+ Redwood::log "fetching IMAP headers #{(@ids.length + 1) .. last_id}"
179
+ values = @imap.fetch((@ids.length + 1) .. last_id, ['RFC822.SIZE', 'INTERNALDATE'])
180
+ values.each do |v|
181
+ id = make_id v
182
+ @ids << id
183
+ @imap_ids[id] = v.seqno
151
184
  end
152
185
  end
153
186
 
154
- def get_imap_field id, field
187
+ def die_from e, opts={}
188
+ @imap = nil
189
+
190
+ message =
191
+ case e
192
+ when Exception
193
+ "Error while #{opts[:while]}: #{e.message.chomp}."
194
+ when String
195
+ e
196
+ end
197
+
198
+ message += " It is likely that messages have been deleted from this IMAP mailbox. Please run sup-import --rebuild #{to_s} to correct this problem." if opts[:suggest_rebuild]
199
+
200
+ self.broken_msg = message
201
+ Redwood::log message
202
+ BufferManager.flash "Error communicating with IMAP server. See log for details."
203
+ raise SourceError, message
204
+ end
205
+
206
+ ## build a fake unique id
207
+ def make_id imap_stuff
208
+ # use 7 digits for the size. why 7? seems nice.
209
+ msize, mdate = imap_stuff.attr['RFC822.SIZE'] % 10000000, Time.parse(imap_stuff.attr["INTERNALDATE"])
210
+ sprintf("%d%07d", mdate.to_i, msize).to_i
211
+ end
212
+
213
+ def get_imap_fields id, *fields
155
214
  retries = 0
156
215
  f = nil
157
- imap_id = @imap_ids[id] or raise SourceError, "Unknown message id #{id}. It is likely that messages have been deleted from this IMAP mailbox."
216
+ imap_id = @imap_ids[id] or die_from "Unknown message id #{id}.", :suggest_rebuild => true
158
217
  begin
159
- f = @imap.fetch imap_id, [field, 'RFC822.SIZE', 'INTERNALDATE']
218
+ f = @imap.fetch imap_id, (fields + ['RFC822.SIZE', 'INTERNALDATE']).uniq
160
219
  got_id = make_id f[0]
161
- raise SourceError, "IMAP message mismatch: requested #{id}, got #{got_id}. It is likely the IMAP mailbox has been modified." unless got_id == id
162
- rescue Net::IMAP::Error => e
163
- raise SourceError, e.message
220
+ die_from "IMAP message mismatch: requested #{id}, got #{got_id}.", :suggest_rebuild => true unless id == got_id
221
+ rescue SocketError, Net::IMAP::Error => e
222
+ die_from e, :while => "communicating with IMAP server"
164
223
  rescue Errno::EPIPE
165
224
  if (retries += 1) <= 3
166
225
  @imap = nil
@@ -168,31 +227,9 @@ class IMAP < Source
168
227
  retry
169
228
  end
170
229
  end
171
- raise SourceError, "null IMAP field '#{field}' for message with id #{id} imap id #{imap_id}" if f.nil?
172
-
173
- f[0].attr[field]
174
- end
175
- private :get_imap_field
176
-
177
- def each
178
- @mutex.synchronize { connect or raise SourceError, broken_msg }
179
-
180
- start = @ids.index(cur_offset || start_offset)
181
- start.upto(@ids.length - 1) do |i|
182
- id = @ids[i]
183
- self.cur_offset = id
184
- yield id, labels
185
- end
186
- end
230
+ die_from "Null IMAP field '#{field}' for message with id #{id} imap id #{imap_id}." if f.nil?
187
231
 
188
- def start_offset
189
- @mutex.synchronize { connect or raise SourceError, broken_msg }
190
- @ids.first
191
- end
192
-
193
- def end_offset
194
- @mutex.synchronize { connect or raise SourceError, broken_msg }
195
- @ids.last
232
+ fields.map { |field| f[0].attr[field] }
196
233
  end
197
234
  end
198
235
 
data/lib/sup/index.rb CHANGED
@@ -136,7 +136,7 @@ class Index
136
136
  ## significant slowdown.
137
137
  ##
138
138
  ## TODO: make this configurable, i guess
139
- if false
139
+ if true
140
140
  date_min = m.date - (SAME_SUBJECT_DATE_LIMIT * 12 * 3600)
141
141
  date_max = m.date + (SAME_SUBJECT_DATE_LIMIT * 12 * 3600)
142
142
 
@@ -146,6 +146,7 @@ class Index
146
146
  sq.add_term t
147
147
  end
148
148
  q.add_query sq, :must
149
+ q.add_query Ferret::Search::TermQuery.new(:label, "spam"), :must_not
149
150
  q.add_query Ferret::Search::RangeQuery.new(:date, :>= => date_min.to_indexable_s, :<= => date_max.to_indexable_s), :must
150
151
 
151
152
  pending = @index.search(q).hits.map { |hit| @index[hit.doc][:message_id] }
@@ -29,7 +29,7 @@ class Loader < Source
29
29
 
30
30
  def start_offset; 0; end
31
31
  def end_offset; File.size @f; end
32
- def total; end_offset; end
32
+ def pct_done; 100.0 * cur_offset.to_f / end_offset.to_f; end
33
33
 
34
34
  def load_header offset
35
35
  header = nil
@@ -66,6 +66,7 @@ class Buffer
66
66
  x = @buf.index(what, start - @start)
67
67
  x.nil? ? nil : x + @start
68
68
  end
69
+
69
70
  def rindex what, start=0
70
71
  x = @buf.rindex(what, start - @start)
71
72
  x.nil? ? nil : x + @start
@@ -91,6 +92,9 @@ class SSHFile
91
92
  REASONABLE_TRANSFER_SIZE = 1024 * 32
92
93
  SIZE_CHECK_INTERVAL = 60 * 1 # seconds
93
94
 
95
+ @@shells = {}
96
+ @@shells_mutex = Mutex.new
97
+
94
98
  def initialize host, fn, ssh_opts={}
95
99
  @buf = Buffer.new
96
100
  @host = host
@@ -102,6 +106,7 @@ class SSHFile
102
106
  @broken_msg = nil
103
107
  @shell = nil
104
108
  @shell_mutex = Mutex.new
109
+ @buf_mutex = Mutex.new
105
110
  end
106
111
 
107
112
  def to_s; "mbox+ssh://#@host/#@fn"; end ## TODO: remove thisis EVILness
@@ -113,28 +118,32 @@ class SSHFile
113
118
  Redwood::log s
114
119
  end
115
120
  def shutup
116
- BufferManager.clear @say_id if BufferManager.instantiated?
121
+ BufferManager.clear @say_id if BufferManager.instantiated? && @say_id
117
122
  @say_id = nil
118
123
  end
119
124
  private :say, :shutup
120
125
 
121
126
  def connect
122
127
  raise SSHFileError, @broken_msg if broken?
128
+ return if @shell
123
129
 
124
- @shell_mutex.synchronize do
125
- return if @shell
126
-
127
- begin
128
- say "Opening SSH connection to #{@host}..."
129
- #raise SSHFileError, "simulated SSH file error"
130
- session = Net::SSH.start @host, @ssh_opts
131
- say "Starting SSH shell..."
132
- @shell = session.shell.sync
133
- say "Checking for #@fn..."
134
- raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0
135
- ensure
136
- shutup
130
+ @key = [@host, @ssh_opts[:username]]
131
+ begin
132
+ @shell = @@shells_mutex.synchronize do
133
+ unless @@shells.member? @key
134
+ say "Opening SSH connection to #{@host} for #@fn..."
135
+ #raise SSHFileError, "simulated SSH file error"
136
+ session = Net::SSH.start @host, @ssh_opts
137
+ say "Starting SSH shell..."
138
+ @@shells[@key] = session.shell.sync
139
+ end
140
+ @@shells[@key]
137
141
  end
142
+
143
+ say "Checking for #@fn..."
144
+ @shell_mutex.synchronize { raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0 }
145
+ ensure
146
+ shutup
138
147
  end
139
148
  end
140
149
 
@@ -154,15 +163,19 @@ class SSHFile
154
163
 
155
164
  def gets
156
165
  return nil if eof?
157
- make_buf_include @offset
158
- expand_buf_forward while @buf.index("\n", @offset).nil? && @buf.endd < size
159
- returning(@buf[@offset .. (@buf.index("\n", @offset) || -1)]) { |line| @offset += line.length }
166
+ @buf_mutex.synchronize do
167
+ make_buf_include @offset
168
+ expand_buf_forward while @buf.index("\n", @offset).nil? && @buf.endd < size
169
+ returning(@buf[@offset .. (@buf.index("\n", @offset) || -1)]) { |line| @offset += line.length }
170
+ end
160
171
  end
161
172
 
162
173
  def read n
163
174
  return nil if eof?
164
- make_buf_include @offset, n
165
- @buf[@offset ... (@offset += n)]
175
+ @buf_mutex.synchronize do
176
+ make_buf_include @offset, n
177
+ @buf[@offset ... (@offset += n)]
178
+ end
166
179
  end
167
180
 
168
181
  private
@@ -173,14 +186,17 @@ private
173
186
  connect
174
187
  # MBox::debug "sending command: #{cmd.inspect}"
175
188
  begin
176
- result = @shell.send_command cmd
189
+ result = @shell_mutex.synchronize { x = @shell.send_command cmd; sleep 0.25; x }
177
190
  raise SSHFileError, "Failure during remote command #{cmd.inspect}: #{(result.stderr || result.stdout || "")[0 .. 100]}" unless result.status == 0
178
191
  rescue Net::SSH::Exception # these happen occasionally for no apparent reason. gotta love that nondeterminism!
179
192
  retry if (retries += 1) <= 3
180
193
  raise
181
194
  rescue Errno::EPIPE
182
195
  if (retries += 1) <= e
183
- @shell = nil
196
+ @@shells_mutex.synchronize do
197
+ @shell = nil
198
+ @@shells[@key] = nil
199
+ end
184
200
  connect
185
201
  retry
186
202
  end
data/lib/sup/message.rb CHANGED
@@ -82,9 +82,9 @@ class Message
82
82
 
83
83
  attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source,
84
84
  :cc, :bcc, :labels, :list_address, :recipient_email, :replyto,
85
- :source_info, :status
85
+ :source_info
86
86
 
87
- bool_reader :dirty
87
+ bool_reader :dirty, :source_marked_read
88
88
 
89
89
  ## if you specify a :header, will use values from that. otherwise, will try and
90
90
  ## load the header from the source.
@@ -92,6 +92,7 @@ class Message
92
92
  @source = opts[:source] or raise ArgumentError, "source can't be nil"
93
93
  @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil"
94
94
  @snippet = opts[:snippet] || ""
95
+ @have_snippet = !opts[:snippet].nil?
95
96
  @labels = opts[:labels] || []
96
97
  @dirty = false
97
98
 
@@ -129,8 +130,8 @@ class Message
129
130
  nil
130
131
  end
131
132
 
132
- @recipient_email = header["delivered-to"]
133
- @status = header["status"]
133
+ @recipient_email = header["x-original-to"] || header["envelope-to"] || header["delivered-to"]
134
+ @source_marked_read = header["status"] == "RO"
134
135
  end
135
136
  private :read_header
136
137
 
@@ -177,9 +178,15 @@ class Message
177
178
  [Text.new(error_message(@source.broken_msg.split("\n")))]
178
179
  else
179
180
  begin
181
+ ## we need to re-read the header because it contains information
182
+ ## that we don't store in the index. actually i think it's just
183
+ ## the mailing list address (if any), so this is kinda overkill.
184
+ ## i could just store that in the index, but i think there might
185
+ ## be other things like that in the future, and i'd rather not
186
+ ## bloat the index.
180
187
  read_header @source.load_header(@source_info)
181
188
  message_to_chunks @source.load_message(@source_info)
182
- rescue SourceError, SocketError => e
189
+ rescue SourceError, SocketError, MessageFormatError => e
183
190
  [Text.new(error_message(e.message))]
184
191
  end
185
192
  end
@@ -246,9 +253,7 @@ private
246
253
  ret = [] <<
247
254
  case m.header.content_type
248
255
  when "text/plain", nil
249
- raise MessageFormatError, "no message body before decode (source #@source info #@source_info)" unless
250
- m.body
251
- body = m.decode or raise MessageFormatError, "no message body"
256
+ m.body && body = m.decode or raise MessageFormatError, "for some bizarre reason, RubyMail was unable to parse this message."
252
257
  text_to_chunks body.normalize_whitespace.split("\n")
253
258
  when /^multipart\//
254
259
  nil
@@ -322,8 +327,7 @@ private
322
327
  chunk_lines << line
323
328
  end
324
329
 
325
- if state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) &&
326
- line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
330
+ if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
327
331
  @snippet += " " unless @snippet.empty?
328
332
  @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
329
333
  @snippet = @snippet[0 ... SNIPPET_LEN].chomp