sup 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sup might be problematic. Click here for more details.

data/lib/sup/imap.rb CHANGED
@@ -25,14 +25,16 @@ require 'time'
25
25
  ## slow for large mailboxes, and we'll just have to hope that there
26
26
  ## are no collisions. ho ho! a perfectly reasonable solution!
27
27
 
28
- ## fuck you, imap committee. you managed to design something as shitty
29
- ## as mbox but goddamn THIRTY YEARS LATER.
30
-
28
+ ## fuck you, imap committee. you managed to design something nearly as
29
+ ## shitty as mbox but goddamn THIRTY YEARS LATER.
31
30
  module Redwood
32
31
 
33
32
  class IMAP < Source
34
33
  SCAN_INTERVAL = 60 # seconds
35
34
 
35
+ ## upon these errors we'll try to rereconnect a few times
36
+ RECOVERABLE_ERRORS = [ Errno::EPIPE, Errno::ETIMEDOUT ]
37
+
36
38
  attr_reader_cloned :labels
37
39
  attr_accessor :username, :password
38
40
 
@@ -62,6 +64,7 @@ class IMAP < Source
62
64
  x.nil? || x.empty? ? 'INBOX' : x
63
65
  end
64
66
  def ssl?; @parsed_uri.scheme == 'imaps' end
67
+ def == o; o.is_a?(IMAP) && o.uri == self.uri && o.username == self.username; end
65
68
 
66
69
  def load_header id
67
70
  MBox::read_header StringIO.new(raw_header(id))
@@ -72,42 +75,87 @@ class IMAP < Source
72
75
  end
73
76
 
74
77
  def raw_header id
75
- @mutex.synchronize do
76
- connect
77
- header, flags = get_imap_fields id, 'RFC822.HEADER', 'FLAGS'
78
- header = "Status: RO\n" + header if flags.include? :Seen # fake an mbox-style read header
79
- header.gsub(/\r\n/, "\n")
80
- end
78
+ unsynchronized_scan_mailbox
79
+ header, flags = get_imap_fields id, 'RFC822.HEADER', 'FLAGS'
80
+ header = header + "Status: RO\n" if flags.include? :Seen # fake an mbox-style read header # TODO: improve source-marked-as-read reporting system
81
+ header.gsub(/\r\n/, "\n")
81
82
  end
83
+ synchronized :raw_header
82
84
 
83
85
  def raw_full_message id
84
- @mutex.synchronize do
85
- connect
86
- get_imap_fields(id, 'RFC822').first.gsub(/\r\n/, "\n")
87
- end
86
+ unsynchronized_scan_mailbox
87
+ get_imap_fields(id, 'RFC822').first.gsub(/\r\n/, "\n")
88
88
  end
89
+ synchronized :raw_full_message
89
90
 
90
91
  def connect
91
- return false if broken?
92
- return true if @imap
92
+ return if broken? || @imap
93
+ safely { } # do nothing!
94
+ end
95
+ synchronized :connect
93
96
 
94
- say "Connecting to IMAP server #{host}:#{port}..."
97
+ def scan_mailbox
98
+ return if @last_scan && (Time.now - @last_scan) < SCAN_INTERVAL
99
+ last_id = safely do
100
+ @imap.examine mailbox
101
+ @imap.responses["EXISTS"].last
102
+ end
103
+ @last_scan = Time.now
104
+
105
+ return if last_id == @ids.length
106
+
107
+ Redwood::log "fetching IMAP headers #{(@ids.length + 1) .. last_id}"
108
+ values = safely { @imap.fetch((@ids.length + 1) .. last_id, ['RFC822.SIZE', 'INTERNALDATE']) }
109
+ values.each do |v|
110
+ id = make_id v
111
+ @ids << id
112
+ @imap_ids[id] = v.seqno
113
+ end
114
+ end
115
+ synchronized :scan_mailbox
116
+
117
+ def each
118
+ ids =
119
+ @mutex.synchronize do
120
+ unsynchronized_scan_mailbox
121
+ @ids
122
+ end
95
123
 
96
- ## ok, this is FUCKING ANNOYING.
97
- ##
98
- ## what imap.rb likes to do is, if an exception occurs, catch it
99
- ## and re-raise it on the calling thread. seems reasonable. but
100
- ## what that REALLY means is that the only way to reasonably
101
- ## initialize imap is in its own thread, because otherwise, you
102
- ## will never be able to catch the exception it raises on the
103
- ## calling thread, and the backtrace will not make any sense at
104
- ## all, and you will waste HOURS of your life on this fucking
105
- ## problem.
106
- ##
107
- ## FUCK!!!!!!!!!
124
+ start = ids.index(cur_offset || start_offset) or die_from "Unknown message id #{cur_offset || start_offset}.", :suggest_rebuild => true # couldn't find the most recent email
108
125
 
126
+ start.upto(ids.length - 1) do |i|
127
+ id = ids[i]
128
+ self.cur_offset = id
129
+ yield id, labels
130
+ end
131
+ end
132
+
133
+ def start_offset
134
+ unsynchronized_scan_mailbox
135
+ @ids.first
136
+ end
137
+ synchronized :start_offset
138
+
139
+ def end_offset
140
+ unsynchronized_scan_mailbox
141
+ @ids.last
142
+ end
143
+ synchronized :end_offset
144
+
145
+ def pct_done; 100.0 * (@ids.index(cur_offset) || 0).to_f / (@ids.length - 1).to_f; end
146
+
147
+ private
148
+
149
+ def unsafe_connect
150
+ say "Connecting to IMAP server #{host}:#{port}..."
151
+
152
+ ## apparently imap.rb does a lot of threaded stuff internally and
153
+ ## if an exception occurs, it will catch it and re-raise it on the
154
+ ## calling thread. but i can't seem to catch that exception, so
155
+ ## i've resorted to initializing it in its own thread. surely
156
+ ## there's a better way.
109
157
  exception = nil
110
- Redwood::reporting_thread do
158
+ ::Thread.new do
111
159
  begin
112
160
  #raise Net::IMAP::ByeResponseError, "simulated imap failure"
113
161
  @imap = Net::IMAP.new host, port, ssl?
@@ -115,7 +163,7 @@ class IMAP < Source
115
163
 
116
164
  ## although RFC1730 claims that "If an AUTHENTICATE command
117
165
  ## fails with a NO response, the client may try another", in
118
- ## practice it seems like they will also send BAD responses.
166
+ ## practice it seems like they can also send a BAD response.
119
167
  begin
120
168
  @imap.authenticate 'CRAM-MD5', @username, @password
121
169
  rescue Net::IMAP::BadResponseError, Net::IMAP::NoResponseError => e
@@ -127,51 +175,17 @@ class IMAP < Source
127
175
  @imap.login @username, @password
128
176
  end
129
177
  end
130
- scan_mailbox
131
178
  say "Successfully connected to #{@parsed_uri}."
132
- rescue SocketError, Net::IMAP::Error, SourceError => e
179
+ rescue Exception => e
133
180
  exception = e
134
181
  ensure
135
182
  shutup
136
183
  end
137
184
  end.join
138
185
 
139
- die_from exception, :while => "connecting" if exception
140
- end
141
-
142
- def each
143
- @mutex.synchronize { connect or raise SourceError, broken_msg }
144
-
145
- start = @ids.index(cur_offset || start_offset) or die_from "Unknown message id #{cur_offset || start_offset}.", :suggest_rebuild => true # couldn't find the most recent email
146
-
147
- start.upto(@ids.length - 1) do |i|
148
- id = @ids[i]
149
- self.cur_offset = id
150
- yield id, labels
151
- end
152
- end
153
-
154
- def start_offset
155
- @mutex.synchronize { connect }
156
- @ids.first
186
+ raise exception if exception
157
187
  end
158
188
 
159
- def end_offset
160
- @mutex.synchronize do
161
- begin
162
- connect
163
- scan_mailbox
164
- rescue SocketError, Net::IMAP::Error => e
165
- die_from e, :while => "scanning mailbox"
166
- end
167
- end
168
- @ids.last
169
- end
170
-
171
- def pct_done; 100.0 * (@ids.index(cur_offset) || 0).to_f / (@ids.length - 1).to_f; end
172
-
173
- private
174
-
175
189
  def say s
176
190
  @say_id = BufferManager.say s, @say_id if BufferManager.instantiated?
177
191
  Redwood::log s
@@ -182,22 +196,6 @@ private
182
196
  @say_id = nil
183
197
  end
184
198
 
185
- def scan_mailbox
186
- return if @last_scan && (Time.now - @last_scan) < SCAN_INTERVAL
187
-
188
- @imap.examine mailbox
189
- last_id = @imap.responses["EXISTS"].last
190
- @last_scan = Time.now
191
- return if last_id == @ids.length
192
- Redwood::log "fetching IMAP headers #{(@ids.length + 1) .. last_id}"
193
- values = @imap.fetch((@ids.length + 1) .. last_id, ['RFC822.SIZE', 'INTERNALDATE'])
194
- values.each do |v|
195
- id = make_id v
196
- @ids << id
197
- @imap_ids[id] = v.seqno
198
- end
199
- end
200
-
201
199
  def die_from e, opts={}
202
200
  @imap = nil
203
201
 
@@ -225,26 +223,38 @@ private
225
223
  end
226
224
 
227
225
  def get_imap_fields id, *fields
228
- retries = 0
229
- f = nil
226
+ raise SourceError, broken_msg if broken?
230
227
  imap_id = @imap_ids[id] or die_from "Unknown message id #{id}.", :suggest_rebuild => true
228
+
229
+ retried = false
230
+ results = safely { @imap.fetch imap_id, (fields + ['RFC822.SIZE', 'INTERNALDATE']).uniq }.first
231
+ got_id = make_id results
232
+ die_from "IMAP message mismatch: requested #{id}, got #{got_id}.", :suggest_rebuild => true unless got_id == id
233
+
234
+ fields.map { |f| results.attr[f] }
235
+ end
236
+
237
+ ## execute a block, connected if unconnected, re-connected up to 3
238
+ ## times if a recoverable error occurs, and properly dying if an
239
+ ## unrecoverable error occurs.
240
+ def safely
241
+ retries = 0
231
242
  begin
232
- f = @imap.fetch imap_id, (fields + ['RFC822.SIZE', 'INTERNALDATE']).uniq
233
- got_id = make_id f[0]
234
- die_from "IMAP message mismatch: requested #{id}, got #{got_id}.", :suggest_rebuild => true unless id == got_id
235
- rescue SocketError, Net::IMAP::Error => e
236
- die_from e, :while => "communicating with IMAP server"
237
- rescue Errno::EPIPE
238
- if (retries += 1) <= 3
239
- @imap = nil
240
- connect
241
- retry
243
+ begin
244
+ unsafe_connect unless @imap
245
+ yield
246
+ rescue *RECOVERABLE_ERRORS
247
+ if (retries += 1) <= 3
248
+ @imap = nil
249
+ retry
250
+ end
251
+ raise
242
252
  end
253
+ rescue Net, SocketError, Net::IMAP::Error, SystemCallError => e
254
+ die_from e, :while => "communicating with IMAP server"
243
255
  end
244
- die_from "Null IMAP field '#{field}' for message with id #{id} imap id #{imap_id}." if f.nil?
245
-
246
- fields.map { |field| f[0].attr[field] }
247
256
  end
257
+
248
258
  end
249
259
 
250
260
  Redwood::register_yaml(IMAP, %w(uri username password cur_offset usual archived id))
data/lib/sup/index.rb CHANGED
@@ -3,7 +3,6 @@
3
3
  require 'thread'
4
4
  require 'fileutils'
5
5
  require 'ferret'
6
- #require_gem 'ferret', ">= 0.10.13"
7
6
 
8
7
  module Redwood
9
8
 
@@ -45,7 +44,7 @@ class Index
45
44
  @sources[source.id] = source
46
45
  end
47
46
 
48
- def source_for name; @sources.values.find { |s| s.is_source_for? name }; end
47
+ def source_for uri; @sources.values.find { |s| s.is_source_for? uri }; end
49
48
  def usual_sources; @sources.values.find_all { |s| s.usual? }; end
50
49
  def sources; @sources.values; end
51
50
 
@@ -73,24 +72,29 @@ class Index
73
72
  end
74
73
  end
75
74
 
76
- ## update the message by deleting and re-adding
77
- def update_message m, source=nil, source_info=nil
78
- docid, entry = load_entry_for_id m.id
79
- if entry
80
- source ||= entry[:source_id].to_i
81
- source_info ||= entry[:source_info].to_i
75
+ ## Update the message state on disk, by deleting and re-adding it.
76
+ ## The message must exist in the index. docid and entry are found
77
+ ## unless given.
78
+ ##
79
+ ## Overwrites the labels on disk with the new labels in 'm', so that
80
+ ## we can actually change message state.
81
+ def update_message m, docid=nil, entry=nil
82
+ unless docid && entry
83
+ docid, entry = load_entry_for_id m.id
84
+ raise ArgumentError, "cannot find #{m.id} in the index" unless entry
82
85
  end
83
86
 
84
- ## this happens sometimes. i'm not sure why. ferret bug?
85
- raise "no entry and no source info for message #{m.id}: source #{source.inspect}, info #{source_info.inspect}, entry #{entry.inspect}, query #{Ferret::Search::TermQuery.new(:message_id, m.id)}, results #{@index.search(Ferret::Search::TermQuery.new(:message_id, m.id)).inspect}" unless source && source_info
87
+ raise "no entry and no source info for message #{m.id}" unless m.source && m.source_info
86
88
 
87
89
  raise "deleting non-corresponding entry #{docid}" unless @index[docid][:message_id] == m.id
90
+
88
91
  @index.delete docid
89
92
  add_message m
93
+ docid, entry = load_entry_for_id m.id
90
94
  end
91
95
 
92
96
  def save_index fn=File.join(@dir, "ferret")
93
- # don't have to do anything apparently
97
+ # don't have to do anything, apparently
94
98
  end
95
99
 
96
100
  def contains_id? id
@@ -100,8 +104,7 @@ class Index
100
104
  def size; @index.size; end
101
105
 
102
106
  ## you should probably not call this on a block that doesn't break
103
- ## rather quickly because the results will probably be, as we say
104
- ## in scotland, frikkin' huuuge.
107
+ ## rather quickly because the results can be very large.
105
108
  EACH_BY_DATE_NUM = 100
106
109
  def each_id_by_date opts={}
107
110
  return if @index.size == 0 # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
@@ -123,20 +126,20 @@ class Index
123
126
  end
124
127
 
125
128
  ## yield all messages in the thread containing 'm' by repeatedly
126
- ## querying the index. yields pairs of message ids and
129
+ ## querying the index. uields pairs of message ids and
127
130
  ## message-building lambdas, so that building an unwanted message
128
131
  ## can be skipped in the block if desired.
132
+ ##
133
+ ## stops loading any thread if a message with a :killed flag is found.
134
+
129
135
  SAME_SUBJECT_DATE_LIMIT = 7
130
136
  def each_message_in_thread_for m, opts={}
131
137
  messages = {}
132
138
  searched = {}
133
139
  num_queries = 0
134
140
 
135
- ## temporarily disabling subject searching because it's a
136
- ## significant slowdown.
137
- ##
138
- ## TODO: make this configurable, i guess
139
- if true
141
+ ## todo: make subject querying configurable
142
+ if true # do subject queries
140
143
  date_min = m.date - (SAME_SUBJECT_DATE_LIMIT * 12 * 3600)
141
144
  date_max = m.date + (SAME_SUBJECT_DATE_LIMIT * 12 * 3600)
142
145
 
@@ -146,9 +149,10 @@ class Index
146
149
  sq.add_term t
147
150
  end
148
151
  q.add_query sq, :must
149
- q.add_query Ferret::Search::TermQuery.new(:label, "spam"), :must_not
150
152
  q.add_query Ferret::Search::RangeQuery.new(:date, :>= => date_min.to_indexable_s, :<= => date_max.to_indexable_s), :must
151
153
 
154
+ q = build_query :qobj => q
155
+
152
156
  pending = @index.search(q).hits.map { |hit| @index[hit.doc][:message_id] }
153
157
  Redwood::log "found #{pending.size} results for subject query #{q}"
154
158
  else
@@ -163,9 +167,12 @@ class Index
163
167
  q.add_query Ferret::Search::TermQuery.new(:message_id, id), :should
164
168
  q.add_query Ferret::Search::TermQuery.new(:refs, id), :should
165
169
 
170
+ q = build_query :qobj => q, :load_killed => true
171
+
166
172
  num_queries += 1
167
173
  @index.search_each(q, :limit => :all) do |docid, score|
168
174
  break if opts[:limit] && messages.size >= opts[:limit]
175
+ break if @index[docid][:label].split(/\s+/).include? "killed" unless opts[:load_killed]
169
176
  mid = @index[docid][:message_id]
170
177
  unless messages.member? mid
171
178
  messages[mid] ||= lambda { build_message docid }
@@ -174,7 +181,7 @@ class Index
174
181
  end
175
182
  end
176
183
  end
177
- Redwood::log "ran #{num_queries} queries to build thread of #{messages.size} messages for #{m.id}"
184
+ Redwood::log "ran #{num_queries} queries to build thread of #{messages.size} messages for #{m.id}" if num_queries > 0
178
185
  messages.each { |mid, builder| yield mid, builder }
179
186
  end
180
187
 
@@ -203,8 +210,10 @@ class Index
203
210
  def wrap_subj subj; "__START_SUBJECT__ #{subj} __END_SUBJECT__"; end
204
211
  def unwrap_subj subj; subj =~ /__START_SUBJECT__ (.*?) __END_SUBJECT__/ && $1; end
205
212
 
213
+ ## Adds a message to the index. The message cannot already exist in
214
+ ## the index.
206
215
  def add_message m
207
- return false if contains? m
216
+ raise ArgumentError, "index already contains #{m.id}" if contains? m
208
217
 
209
218
  source_id =
210
219
  if m.source.is_a? Integer
@@ -230,9 +239,9 @@ class Index
230
239
 
231
240
  @index.add_document d
232
241
 
233
- ## TODO: figure out why this is sometimes triggered
234
- #docid, entry = load_entry_for_id m.id
235
- #raise "just added message #{m.id} but couldn't find it in a search" unless docid
242
+ docid, entry = load_entry_for_id m.id
243
+ ## this hasn't been triggered in a long time. TODO: decide whether it's still a problem.
244
+ raise "just added message #{m.id} but couldn't find it in a search" unless docid
236
245
  true
237
246
  end
238
247
 
@@ -294,6 +303,7 @@ protected
294
303
  end
295
304
 
296
305
  query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam)
306
+ query.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless opts[:load_deleted] || labels.include?(:deleted)
297
307
  query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not unless opts[:load_killed] || labels.include?(:killed)
298
308
  query
299
309
  end
data/lib/sup/label.rb CHANGED
@@ -5,10 +5,10 @@ class LabelManager
5
5
 
6
6
  ## labels that have special semantics. user will be unable to
7
7
  ## add/remove these via normal label mechanisms.
8
- RESERVED_LABELS = [ :starred, :spam, :draft, :unread, :killed, :sent ]
8
+ RESERVED_LABELS = [ :starred, :spam, :draft, :unread, :killed, :sent, :deleted ]
9
9
 
10
10
  ## labels which it nonetheless makes sense to search for by
11
- LISTABLE_LABELS = [ :starred, :spam, :draft, :sent ]
11
+ LISTABLE_LABELS = [ :starred, :spam, :draft, :sent, :killed, :deleted ]
12
12
 
13
13
  ## labels that will never be displayed to the user
14
14
  HIDDEN_LABELS = [ :starred, :unread ]
data/lib/sup/mbox.rb CHANGED
@@ -16,24 +16,29 @@ module MBox
16
16
  ## when scanning over large mbox files.
17
17
  while(line = f.gets)
18
18
  case line
19
- when /^From:\s+(.*)$/i: header[last = "From"] = $1
20
- when /^To:\s+(.*)$/i: header[last = "To"] = $1
21
- when /^Cc:\s+(.*)$/i: header[last = "Cc"] = $1
22
- when /^Bcc:\s+(.*)$/i: header[last = "Bcc"] = $1
23
- when /^Subject:\s+(.*)$/i: header[last = "Subject"] = $1
24
- when /^Date:\s+(.*)$/i: header[last = "Date"] = $1
25
- when /^Message-Id:\s+<(.*)>$/i: header[last = "Message-Id"] = $1
26
- when /^References:\s+(.*)$/i: header[last = "References"] = $1
27
- when /^In-Reply-To:\s+(.*)$/i: header[last = "In-Reply-To"] = $1
28
- when /^List-Post:\s+(.*)$/i: header[last = "List-Post"] = $1
29
- when /^Reply-To:\s+(.*)$/i: header[last = "Reply-To"] = $1
30
- when /^Status:\s+(.*)$/i: header[last = "Status"] = $1
31
- when /^Delivered-To:\s+(.*)$/i
32
- header[last = "Delivered-To"] = $1 unless header["Delivered-To"]
19
+ when /^(From):\s+(.*)$/i,
20
+ /^(To):\s+(.*)$/i,
21
+ /^(Cc):\s+(.*)$/i,
22
+ /^(Bcc):\s+(.*)$/i,
23
+ /^(Subject):\s+(.*)$/i,
24
+ /^(Date):\s+(.*)$/i,
25
+ /^(Message-Id):\s+<(.*)>$/i,
26
+ /^(References):\s+(.*)$/i,
27
+ /^(In-Reply-To):\s+(.*)$/i,
28
+ /^(Reply-To):\s+(.*)$/i,
29
+ /^(List-Post):\s+(.*)$/i,
30
+ /^(Status):\s+(.*)$/i: header[last = $1] = $2
31
+
32
+ ## these next three can occur multiple times, and we want the
33
+ ## first one
34
+ when /^(Delivered-To):\s+(.*)$/i,
35
+ /^(X-Original-To):\s+(.*)$/i,
36
+ /^(Envelope-To):\s+(.*)$/i: header[last = $1.downcase] ||= $2
37
+
33
38
  when /^$/: break
34
39
  when /:/: last = nil
35
40
  else
36
- header[last] += line.gsub(/^\s+/, "") if last
41
+ header[last] += " " + line.chomp.gsub(/^\s+/, "") if last
37
42
  end
38
43
  end
39
44
  header