sup 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sup might be problematic. Click here for more details.

@@ -49,13 +49,8 @@ class DraftLoader < Source
49
49
  def id; DraftManager.source_id; end
50
50
  def to_s; DraftManager.source_name; end
51
51
 
52
- def next
53
- ret = nil
54
- begin
55
- ret = cur_offset
56
- self.cur_offset = cur_offset + 1
57
- end until File.exists? fn_for_offset(ret)
58
- [ret, [:draft]]
52
+ def each
53
+ Dir.entries(@dir).select { |x| x =~ /^\d+$/ }.sort_by { |x| x.to_i }.each { |id| yield [id, [:draft]] }
59
54
  end
60
55
 
61
56
  def gen_offset
@@ -101,7 +96,7 @@ class DraftLoader < Source
101
96
  end
102
97
 
103
98
  def start_offset; 0; end
104
- def end_offset; Dir.new(@dir).entries.sort.last.to_i + 1; end
99
+ def end_offset; Dir.new(@dir).entries.sort.last.to_i; end
105
100
  end
106
101
 
107
102
  Redwood::register_yaml(DraftLoader, %w(cur_offset))
@@ -1,32 +1,70 @@
1
1
  require 'uri'
2
2
  require 'net/imap'
3
3
  require 'stringio'
4
+ require 'time'
5
+
6
+ ## fucking imap fucking sucks. what the FUCK kind of committee of
7
+ ## dunces designed this shit.
8
+
9
+ ## imap talks about 'unique ids' for messages, to be used for
10
+ ## cross-session identification. great---just what sup needs! except
11
+ ## it turns out the uids can be invalidated every time the
12
+ ## 'uidvalidity' value changes on the server, and 'uidvalidity' can
13
+ ## change without restriction. it can change any time you log in. it
14
+ ## can change EVERY time you log in. of course the imap spec "strongly
15
+ ## recommends" that it never change, but there's nothing to stop
16
+ ## people from just setting it to the current timestamp, and in fact
17
+ ## that's exactly what the one imap server i have at my disposal
18
+ ## does. thus the so-called uids are absolutely useless and imap
19
+ ## provides no cross-session way of uniquely identifying a
20
+ ## message. but thanks for the "strong recommendation", guys!
21
+
22
+ ## so right now i'm using the 'internal date' and the size of each
23
+ ## message to uniquely identify it, and i scan over the entire mailbox
24
+ ## each time i open it to map those things to message ids. that can be
25
+ ## slow for large mailboxes, and we'll just have to hope that there
26
+ ## are no collisions. ho ho! a perfectly reasonable solution!
27
+
28
+ ## fuck you, imap committee. you managed to design something as shitty
29
+ ## as mbox but goddamn THIRTY YEARS LATER.
4
30
 
5
31
  module Redwood
6
32
 
7
33
  class IMAP < Source
8
- attr_reader :labels
9
-
10
- def initialize uri, username, password, last_uid=nil, usual=true, archived=false, id=nil
34
+ attr_reader_cloned :labels
35
+ attr_accessor :username, :password
36
+
37
+ def initialize uri, username, password, last_idate=nil, usual=true, archived=false, id=nil
11
38
  raise ArgumentError, "username and password must be specified" unless username && password
39
+ raise ArgumentError, "not an imap uri" unless uri =~ %r!imaps?://!
12
40
 
13
- super uri, last_uid, usual, archived, id
41
+ super uri, last_idate, usual, archived, id
14
42
 
15
43
  @parsed_uri = URI(uri)
16
44
  @username = username
17
45
  @password = password
18
46
  @imap = nil
47
+ @imap_ids = {}
48
+ @ids = []
19
49
  @labels = [:unread]
20
- @labels << mailbox.intern unless mailbox =~ /inbox/i || mailbox.empty?
21
50
  @labels << :inbox unless archived?
51
+ @labels << mailbox.intern unless mailbox =~ /inbox/i || mailbox.nil?
52
+ @mutex = Mutex.new
53
+ end
22
54
 
23
- connect
55
+ def say s
56
+ @say_id = BufferManager.say s, @say_id if BufferManager.instantiated?
57
+ Redwood::log s
24
58
  end
59
+ def shutup
60
+ BufferManager.clear @say_id if BufferManager.instantiated?
61
+ @say_id = nil
62
+ end
63
+ private :say, :shutup
25
64
 
26
65
  def connect
27
66
  return false if broken?
28
67
  return true if @imap
29
- Redwood::log "connecting to #{@parsed_uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?} ..."
30
68
 
31
69
  ## ok, this is FUCKING ANNOYING.
32
70
  ##
@@ -40,17 +78,35 @@ class IMAP < Source
40
78
  ## problem.
41
79
  ##
42
80
  ## FUCK!!!!!!!!!
43
- ::Thread.new do
81
+
82
+ say "Connecting to IMAP server #{host}:#{port}..."
83
+
84
+ Redwood::reporting_thread do
44
85
  begin
45
86
  #raise Net::IMAP::ByeResponseError, "simulated imap failure"
46
- @imap = Net::IMAP.new @parsed_uri.host, ssl? ? 993 : 143, ssl?
87
+ @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl?
88
+ say "Logging in..."
47
89
  @imap.authenticate 'LOGIN', @username, @password
90
+ say "Sizing mailbox..."
48
91
  @imap.examine mailbox
49
- Redwood::log "successfully connected to #{@parsed_uri}, mailbox #{mailbox}"
50
- rescue Exception => e
92
+ last_id = @imap.responses["EXISTS"][-1]
93
+
94
+ say "Reading headers (because IMAP sucks)..."
95
+ values = @imap.fetch(1 .. last_id, ['RFC822.SIZE', 'INTERNALDATE'])
96
+
97
+ say "Successfully connected to #{@parsed_uri}"
98
+
99
+ values.each do |v|
100
+ id = make_id v
101
+ @ids << id
102
+ @imap_ids[id] = v.seqno
103
+ end
104
+ rescue SocketError, Net::IMAP::Error, SourceError => e
51
105
  self.broken_msg = e.message.chomp # fucking chomp! fuck!!!
52
106
  @imap = nil
53
107
  Redwood::log "error connecting to IMAP server: #{self.broken_msg}"
108
+ ensure
109
+ shutup
54
110
  end
55
111
  end.join
56
112
 
@@ -58,50 +114,78 @@ class IMAP < Source
58
114
  end
59
115
  private :connect
60
116
 
61
- def mailbox; @parsed_uri.path[1..-1] end ##XXXX TODO handle nil
117
+ def make_id imap_stuff
118
+ msize, mdate = imap_stuff.attr['RFC822.SIZE'], Time.parse(imap_stuff.attr["INTERNALDATE"])
119
+ sprintf("%d%07d", mdate.to_i, msize).to_i
120
+ end
121
+ private :make_id
122
+
123
+ def host; @parsed_uri.host; end
124
+ def port; @parsed_uri.port || (ssl? ? 993 : 143); end
125
+ def mailbox; @parsed_uri.path[1..-1] || 'INBOX'; end
62
126
  def ssl?; @parsed_uri.scheme == 'imaps' end
63
127
 
64
- def load_header uid=nil
65
- MBox::read_header StringIO.new(raw_header(uid))
128
+ def load_header id
129
+ MBox::read_header StringIO.new(raw_header(id))
66
130
  end
67
131
 
68
- def load_message uid
69
- RMail::Parser.read raw_full_message(uid)
132
+ def load_message id
133
+ RMail::Parser.read raw_full_message(id)
70
134
  end
71
135
 
72
136
  ## load the full header text
73
- def raw_header uid
74
- connect or return broken_msg
75
- begin
76
- connect or return broken_msg
77
- rescue Exception => e
78
- raise "wtf: #{e.inspect}"
137
+ def raw_header id
138
+ @mutex.synchronize do
139
+ connect or raise SourceError, broken_msg
140
+ get_imap_field(id, 'RFC822.HEADER').gsub(/\r\n/, "\n")
79
141
  end
80
- @imap.uid_fetch(uid, 'RFC822.HEADER')[0].attr['RFC822.HEADER'].gsub(/\r\n/, "\n")
81
142
  end
82
143
 
83
- def raw_full_message uid
84
- connect or return broken_msg
85
- @imap.uid_fetch(uid, 'RFC822')[0].attr['RFC822'].gsub(/\r\n/, "\n")
144
+ def raw_full_message id
145
+ @mutex.synchronize do
146
+ connect or raise SourceError, broken_msg
147
+ get_imap_field(id, 'RFC822').gsub(/\r\n/, "\n")
148
+ end
86
149
  end
150
+
151
+ def get_imap_field id, field
152
+ f = nil
153
+ imap_id = @imap_ids[id] or raise SourceError, "Unknown message id #{id}. It is likely that messages have been deleted from this IMAP mailbox."
154
+ begin
155
+ f = @imap.fetch imap_id, [field, 'RFC822.SIZE', 'INTERNALDATE']
156
+ got_id = make_id f[0]
157
+ raise SourceError, "IMAP message mismatch: requested #{id}, got #{got_id}. It is likely the IMAP mailbox has been modified." unless got_id == id
158
+ rescue Net::IMAP::Error => e
159
+ raise SourceError, e.message
160
+ end
161
+ raise SourceError, "null IMAP field '#{field}' for message with id #{id} imap id #{imap_id}" if f.nil?
162
+
163
+ f[0].attr[field]
164
+ end
165
+ private :get_imap_field
87
166
 
88
167
  def each
89
- connect or return broken_msg
90
- uids = @imap.uid_search ['UID', "#{cur_offset}:#{end_offset}"]
91
- uids.each do |uid|
92
- @last_uid = uid
93
- @dirty = true
94
- yield uid, labels
168
+ @mutex.synchronize { connect or raise SourceError, broken_msg }
169
+
170
+ start = @ids.index(cur_offset || start_offset)
171
+ start.upto(@ids.length - 1) do |i|
172
+ id = @ids[i]
173
+ self.cur_offset = id
174
+ yield id, labels
95
175
  end
96
176
  end
97
177
 
98
- def start_offset; 1; end
178
+ def start_offset
179
+ @mutex.synchronize { connect or raise SourceError, broken_msg }
180
+ @ids.first
181
+ end
182
+
99
183
  def end_offset
100
- connect or return start_offset
101
- @imap.uid_search(['ALL']).last
184
+ @mutex.synchronize { connect or raise SourceError, broken_msg }
185
+ @ids.last
102
186
  end
103
187
  end
104
188
 
105
- Redwood::register_yaml(IMAP, %w(uri username password offset usual archived id))
189
+ Redwood::register_yaml(IMAP, %w(uri username password cur_offset usual archived id))
106
190
 
107
191
  end
@@ -2,30 +2,26 @@
2
2
 
3
3
  require 'thread'
4
4
  require 'fileutils'
5
- require_gem 'ferret', ">= 0.10.13"
5
+ require 'ferret'
6
+ #require_gem 'ferret', ">= 0.10.13"
6
7
 
7
8
  module Redwood
8
9
 
9
- class IndexError < StandardError
10
- attr_reader :source
11
-
12
- def initialize source, s
13
- super s
14
- @source = source
15
- end
16
- end
17
-
18
10
  class Index
19
11
  include Singleton
20
12
 
21
- attr_reader :index # debugging only
22
-
13
+ attr_reader :index
23
14
  def initialize dir=BASE_DIR
24
15
  @dir = dir
25
- @mutex = Mutex.new
26
16
  @sources = {}
27
17
  @sources_dirty = false
28
18
 
19
+ wsa = Ferret::Analysis::WhiteSpaceAnalyzer.new false
20
+ sa = Ferret::Analysis::StandardAnalyzer.new Ferret::Analysis::FULL_ENGLISH_STOP_WORDS, true
21
+ @analyzer = Ferret::Analysis::PerFieldAnalyzer.new wsa
22
+ @analyzer[:body] = sa
23
+ @qparser ||= Ferret::QueryParser.new :default_field => :body, :analyzer => @analyzer
24
+
29
25
  self.class.i_am_the_instance self
30
26
  end
31
27
 
@@ -51,18 +47,15 @@ class Index
51
47
 
52
48
  def source_for name; @sources.values.find { |s| s.is_source_for? name }; end
53
49
  def usual_sources; @sources.values.find_all { |s| s.usual? }; end
50
+ def sources; @sources.values; end
54
51
 
55
52
  def load_index dir=File.join(@dir, "ferret")
56
- wsa = Ferret::Analysis::WhiteSpaceAnalyzer.new false
57
- sa = Ferret::Analysis::StandardAnalyzer.new
58
- analyzer = Ferret::Analysis::PerFieldAnalyzer.new wsa
59
- analyzer[:body] = sa
60
-
61
53
  if File.exists? dir
62
- Redwood::log "loading index"
63
- @index = Ferret::Index::Index.new(:path => dir, :analyzer => analyzer)
54
+ Redwood::log "loading index..."
55
+ @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer)
56
+ Redwood::log "loaded index of #{@index.size} messages"
64
57
  else
65
- Redwood::log "creating index"
58
+ Redwood::log "creating index..."
66
59
  field_infos = Ferret::Index::FieldInfos.new :store => :yes
67
60
  field_infos.add_field :message_id
68
61
  field_infos.add_field :source_id
@@ -76,7 +69,7 @@ class Index
76
69
  field_infos.add_field :refs
77
70
  field_infos.add_field :snippet, :index => :no, :term_vector => :no
78
71
  field_infos.create_index dir
79
- @index = Ferret::Index::Index.new(:path => dir, :analyzer => analyzer)
72
+ @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer)
80
73
  end
81
74
  end
82
75
 
@@ -87,7 +80,9 @@ class Index
87
80
  source ||= entry[:source_id].to_i
88
81
  source_info ||= entry[:source_info].to_i
89
82
  end
90
- raise "no entry and no source info for message #{m.id}" unless source && source_info
83
+
84
+ ## this happens sometimes. i'm not sure why. ferret bug?
85
+ raise "no entry and no source info for message #{m.id}: source #{source.inspect}, info #{source_info.inspect}, entry #{entry.inspect}, query #{Ferret::Search::TermQuery.new(:message_id, m.id)}, results #{@index.search(Ferret::Search::TermQuery.new(:message_id, m.id)).inspect}" unless source && source_info
91
86
 
92
87
  raise "deleting non-corresponding entry #{docid}" unless @index[docid][:message_id] == m.id
93
88
  @index.delete docid
@@ -109,7 +104,7 @@ class Index
109
104
  ## in scotland, frikkin' huuuge.
110
105
  EACH_BY_DATE_NUM = 100
111
106
  def each_id_by_date opts={}
112
- return if @index.size == 0 # otherwise ferret barfs
107
+ return if @index.size == 0 # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
113
108
  query = build_query opts
114
109
  offset = 0
115
110
  while true
@@ -122,12 +117,15 @@ class Index
122
117
  end
123
118
 
124
119
  def num_results_for opts={}
125
- query = build_query opts
126
- x = @index.search(query).total_hits
127
- Redwood::log "num_results_for: have #{x} for query #{query}"
128
- x
120
+ return 0 if @index.size == 0 # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
121
+ q = build_query opts
122
+ index.search(q).total_hits
129
123
  end
130
124
 
125
+ ## yield all messages in the thread containing 'm' by repeatedly
126
+ ## querying the index. yields pairs of message ids and
127
+ ## message-building lambdas, so that building an unwanted message
128
+ ## can be skipped in the block if desired.
131
129
  SAME_SUBJECT_DATE_LIMIT = 7
132
130
  def each_message_in_thread_for m, opts={}
133
131
  messages = {}
@@ -186,44 +184,18 @@ class Index
186
184
  #puts "building message #{doc[:message_id]} (#{source}##{doc[:source_info]})"
187
185
  raise "invalid source #{doc[:source_id]}" unless source
188
186
 
189
- m =
190
- if source.broken?
191
- nil
192
- else
193
- begin
194
- Message.new :source => source, :source_info => doc[:source_info].to_i,
195
- :labels => doc[:label].split(" ").map { |s| s.intern },
196
- :snippet => doc[:snippet]
197
- rescue MessageFormatError => e
198
- raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}")
199
- rescue SourceError => e
200
- nil
201
- end
202
- end
187
+ fake_header = {
188
+ "date" => Time.at(doc[:date].to_i),
189
+ "subject" => unwrap_subj(doc[:subject]),
190
+ "from" => doc[:from],
191
+ "to" => doc[:to],
192
+ "message-id" => doc[:message_id],
193
+ "references" => doc[:refs],
194
+ }
203
195
 
204
- unless m
205
- fake_header = {
206
- "date" => Time.at(doc[:date].to_i),
207
- "subject" => unwrap_subj(doc[:subject]),
208
- "from" => doc[:from],
209
- "to" => doc[:to],
210
- "message-id" => doc[:message_id],
211
- "references" => doc[:refs],
212
- }
213
-
214
- m = Message.new :labels => doc[:label].split(" ").map { |s| s.intern },
215
- :snippet => doc[:snippet], :header => fake_header,
216
- :body => <<EOS
217
- #{doc[:snippet]}...
218
-
219
- An error occurred while loading this message. It is possible that the source
220
- has changed, or (in the case of remote sources) is down.
221
-
222
- The error message was:
223
- #{source.broken_msg}
224
- EOS
225
- end
226
- m
196
+ Message.new :source => source, :source_info => doc[:source_info].to_i,
197
+ :labels => doc[:label].split(" ").map { |s| s.intern },
198
+ :snippet => doc[:snippet], :header => fake_header
227
199
  end
228
200
 
229
201
  def fresh_thread_id; @next_thread_id += 1; end
@@ -237,7 +209,7 @@ EOS
237
209
  if m.source.is_a? Integer
238
210
  m.source
239
211
  else
240
- m.source.id or raise "unregistered source #{m.source}"
212
+ m.source.id or raise "unregistered source #{m.source} (id #{m.source.id.inspect})"
241
213
  end
242
214
 
243
215
  to = (m.to + m.cc + m.bcc).map { |x| x.email }.join(" ")
@@ -305,21 +277,23 @@ EOS
305
277
 
306
278
  protected
307
279
 
308
- ## TODO: convert this to query objects rather than strings
280
+ def parse_user_query_string str; @qparser.parse str; end
309
281
  def build_query opts
310
- query = ""
311
- query += opts[:labels].map { |t| "+label:#{t}" }.join(" ") if opts[:labels]
312
- query += " +label:#{opts[:label]}" if opts[:label]
313
- query += " #{opts[:content]}" if opts[:content]
282
+ query = Ferret::Search::BooleanQuery.new
283
+ query.add_query opts[:qobj], :must if opts[:qobj]
284
+ labels = ([opts[:label]] + (opts[:labels] || [])).compact
285
+ labels.each { |t| query.add_query Ferret::Search::TermQuery.new("label", t.to_s), :must }
314
286
  if opts[:participants]
315
- query += "+(" +
316
- opts[:participants].map { |p| "from:#{p.email} OR to:#{p.email}" }.join(" OR ") + ")"
287
+ q2 = Ferret::Search::BooleanQuery.new
288
+ opts[:participants].each do |p|
289
+ q2.add_query Ferret::Search::TermQuery.new("from", p.email), :should
290
+ q2.add_query Ferret::Search::TermQuery.new("to", p.email), :should
291
+ end
292
+ query.add_query q2, :must
317
293
  end
318
294
 
319
- query += " -label:spam" unless opts[:load_spam] || opts[:labels] == :spam ||
320
- (opts[:labels] && opts[:labels].include?(:spam))
321
- query += " -label:killed" unless opts[:load_killed] || opts[:labels] == :killed ||
322
- (opts[:labels] && opts[:labels].include?(:killed))
295
+ query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam)
296
+ query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not unless opts[:load_killed] || labels.include?(:killed)
323
297
  query
324
298
  end
325
299
 
@@ -335,7 +309,7 @@ protected
335
309
  File.chmod 0600, fn
336
310
  FileUtils.mv fn, bakfn, :force => true unless File.exists?(bakfn) && File.size(bakfn) > File.size(fn)
337
311
  end
338
- Redwood::save_yaml_obj @sources.values, fn
312
+ Redwood::save_yaml_obj @sources.values, fn
339
313
  File.chmod 0600, fn
340
314
  end
341
315
  @sources_dirty = false