sup 0.8.1 → 0.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sup might be problematic. Click here for more details.

Files changed (67) hide show
  1. data/CONTRIBUTORS +13 -6
  2. data/History.txt +19 -0
  3. data/ReleaseNotes +35 -0
  4. data/bin/sup +82 -77
  5. data/bin/sup-add +7 -7
  6. data/bin/sup-config +104 -85
  7. data/bin/sup-dump +4 -5
  8. data/bin/sup-recover-sources +9 -10
  9. data/bin/sup-sync +121 -100
  10. data/bin/sup-sync-back +18 -15
  11. data/bin/sup-tweak-labels +24 -21
  12. data/lib/sup.rb +53 -33
  13. data/lib/sup/account.rb +0 -2
  14. data/lib/sup/buffer.rb +47 -22
  15. data/lib/sup/colormap.rb +6 -6
  16. data/lib/sup/contact.rb +0 -2
  17. data/lib/sup/crypto.rb +34 -23
  18. data/lib/sup/draft.rb +6 -14
  19. data/lib/sup/ferret_index.rb +471 -0
  20. data/lib/sup/hook.rb +30 -43
  21. data/lib/sup/hook.rb.BACKUP.8625.rb +158 -0
  22. data/lib/sup/hook.rb.BACKUP.8681.rb +158 -0
  23. data/lib/sup/hook.rb.BASE.8625.rb +155 -0
  24. data/lib/sup/hook.rb.BASE.8681.rb +155 -0
  25. data/lib/sup/hook.rb.LOCAL.8625.rb +142 -0
  26. data/lib/sup/hook.rb.LOCAL.8681.rb +142 -0
  27. data/lib/sup/hook.rb.REMOTE.8625.rb +145 -0
  28. data/lib/sup/hook.rb.REMOTE.8681.rb +145 -0
  29. data/lib/sup/imap.rb +18 -8
  30. data/lib/sup/index.rb +70 -528
  31. data/lib/sup/interactive-lock.rb +74 -0
  32. data/lib/sup/keymap.rb +26 -26
  33. data/lib/sup/label.rb +2 -4
  34. data/lib/sup/logger.rb +54 -35
  35. data/lib/sup/maildir.rb +41 -6
  36. data/lib/sup/mbox.rb +1 -1
  37. data/lib/sup/mbox/loader.rb +18 -6
  38. data/lib/sup/mbox/ssh-file.rb +1 -7
  39. data/lib/sup/message-chunks.rb +36 -23
  40. data/lib/sup/message.rb +126 -46
  41. data/lib/sup/mode.rb +3 -2
  42. data/lib/sup/modes/console-mode.rb +108 -0
  43. data/lib/sup/modes/edit-message-mode.rb +15 -5
  44. data/lib/sup/modes/inbox-mode.rb +2 -4
  45. data/lib/sup/modes/label-list-mode.rb +1 -1
  46. data/lib/sup/modes/line-cursor-mode.rb +18 -18
  47. data/lib/sup/modes/log-mode.rb +29 -16
  48. data/lib/sup/modes/poll-mode.rb +7 -9
  49. data/lib/sup/modes/reply-mode.rb +5 -3
  50. data/lib/sup/modes/scroll-mode.rb +2 -2
  51. data/lib/sup/modes/search-results-mode.rb +9 -11
  52. data/lib/sup/modes/text-mode.rb +2 -2
  53. data/lib/sup/modes/thread-index-mode.rb +26 -16
  54. data/lib/sup/modes/thread-view-mode.rb +84 -39
  55. data/lib/sup/person.rb +6 -8
  56. data/lib/sup/poll.rb +46 -47
  57. data/lib/sup/rfc2047.rb +1 -5
  58. data/lib/sup/sent.rb +27 -20
  59. data/lib/sup/source.rb +90 -13
  60. data/lib/sup/textfield.rb +4 -4
  61. data/lib/sup/thread.rb +15 -13
  62. data/lib/sup/undo.rb +0 -1
  63. data/lib/sup/update.rb +0 -1
  64. data/lib/sup/util.rb +51 -43
  65. data/lib/sup/xapian_index.rb +566 -0
  66. metadata +57 -46
  67. data/lib/sup/suicide.rb +0 -36
@@ -0,0 +1,566 @@
1
+ require 'xapian'
2
+ require 'set'
3
+
4
+ module Redwood
5
+
6
+ # This index implementation uses Xapian for searching and GDBM for storage. It
7
+ # tends to be slightly faster than Ferret for indexing and significantly faster
8
+ # for searching due to precomputing thread membership.
9
+ class XapianIndex < BaseIndex
10
+ STEM_LANGUAGE = "english"
11
+ INDEX_VERSION = '1'
12
+
13
+ ## dates are converted to integers for xapian, and are used for document ids,
14
+ ## so we must ensure they're reasonably valid. this typically only affect
15
+ ## spam.
16
+ MIN_DATE = Time.at 0
17
+ MAX_DATE = Time.at(2**31-1)
18
+
19
+ HookManager.register "custom-search", <<EOS
20
+ Executes before a string search is applied to the index,
21
+ returning a new search string.
22
+ Variables:
23
+ subs: The string being searched.
24
+ EOS
25
+
26
+ def initialize dir=BASE_DIR
27
+ super
28
+
29
+ @index_mutex = Monitor.new
30
+ end
31
+
32
+ def load_index
33
+ path = File.join(@dir, 'xapian')
34
+ if File.exists? path
35
+ @xapian = Xapian::WritableDatabase.new(path, Xapian::DB_OPEN)
36
+ db_version = @xapian.get_metadata 'version'
37
+ db_version = '0' if db_version.empty?
38
+ if db_version != INDEX_VERSION
39
+ fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please downgrade to your previous version and dump your labels before upgrading to this version (then run sup-sync --restore)."
40
+ end
41
+ else
42
+ @xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
43
+ @xapian.set_metadata 'version', INDEX_VERSION
44
+ end
45
+ @term_generator = Xapian::TermGenerator.new()
46
+ @term_generator.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
47
+ @enquire = Xapian::Enquire.new @xapian
48
+ @enquire.weighting_scheme = Xapian::BoolWeight.new
49
+ @enquire.docid_order = Xapian::Enquire::ASCENDING
50
+ end
51
+
52
+ def save_index
53
+ end
54
+
55
+ def optimize
56
+ end
57
+
58
+ def size
59
+ synchronize { @xapian.doccount }
60
+ end
61
+
62
+ def contains_id? id
63
+ synchronize { find_docid(id) && true }
64
+ end
65
+
66
+ def source_for_id id
67
+ synchronize { get_entry(id)[:source_id] }
68
+ end
69
+
70
+ def delete id
71
+ synchronize { @xapian.delete_document mkterm(:msgid, id) }
72
+ end
73
+
74
+ def build_message id
75
+ entry = synchronize { get_entry id }
76
+ return unless entry
77
+
78
+ source = SourceManager[entry[:source_id]]
79
+ raise "invalid source #{entry[:source_id]}" unless source
80
+
81
+ m = Message.new :source => source, :source_info => entry[:source_info],
82
+ :labels => entry[:labels], :snippet => entry[:snippet]
83
+
84
+ mk_person = lambda { |x| Person.new(*x.reverse!) }
85
+ entry[:from] = mk_person[entry[:from]]
86
+ entry[:to].map!(&mk_person)
87
+ entry[:cc].map!(&mk_person)
88
+ entry[:bcc].map!(&mk_person)
89
+
90
+ m.load_from_index! entry
91
+ m
92
+ end
93
+
94
+ def add_message m; sync_message m end
95
+ def update_message m; sync_message m end
96
+ def update_message_state m; sync_message m end
97
+
98
+ def sync_message m, opts={}
99
+ entry = synchronize { get_entry m.id }
100
+ snippet = m.snippet
101
+ entry ||= {}
102
+ labels = m.labels
103
+ entry = {} if opts[:force_overwrite]
104
+
105
+ d = {
106
+ :message_id => m.id,
107
+ :source_id => m.source.id,
108
+ :source_info => m.source_info,
109
+ :date => (entry[:date] || m.date),
110
+ :snippet => snippet,
111
+ :labels => labels,
112
+ :from => (entry[:from] || [m.from.email, m.from.name]),
113
+ :to => (entry[:to] || m.to.map { |p| [p.email, p.name] }),
114
+ :cc => (entry[:cc] || m.cc.map { |p| [p.email, p.name] }),
115
+ :bcc => (entry[:bcc] || m.bcc.map { |p| [p.email, p.name] }),
116
+ :subject => m.subj,
117
+ :refs => (entry[:refs] || m.refs),
118
+ :replytos => (entry[:replytos] || m.replytos),
119
+ }
120
+
121
+ labels.each { |l| LabelManager << l }
122
+
123
+ synchronize do
124
+ index_message m, d, opts
125
+ end
126
+ true
127
+ end
128
+ private :sync_message
129
+
130
+ def num_results_for query={}
131
+ xapian_query = build_xapian_query query
132
+ matchset = run_query xapian_query, 0, 0, 100
133
+ matchset.matches_estimated
134
+ end
135
+
136
+ EACH_ID_PAGE = 100
137
+ def each_id query={}
138
+ offset = 0
139
+ page = EACH_ID_PAGE
140
+
141
+ xapian_query = build_xapian_query query
142
+ while true
143
+ ids = run_query_ids xapian_query, offset, (offset+page)
144
+ ids.each { |id| yield id }
145
+ break if ids.size < page
146
+ offset += page
147
+ end
148
+ end
149
+
150
+ def each_id_by_date query={}
151
+ each_id(query) { |id| yield id, lambda { build_message id } }
152
+ end
153
+
154
+ def each_message_in_thread_for m, opts={}
155
+ # TODO thread by subject
156
+ # TODO handle killed threads
157
+ return unless doc = find_doc(m.id)
158
+ queue = doc.value(THREAD_VALUENO).split(',')
159
+ msgids = [m.id]
160
+ seen_threads = Set.new
161
+ seen_messages = Set.new [m.id]
162
+ while not queue.empty?
163
+ thread_id = queue.pop
164
+ next if seen_threads.member? thread_id
165
+ return false if thread_killed? thread_id
166
+ seen_threads << thread_id
167
+ docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
168
+ docs.each do |doc|
169
+ msgid = doc.value MSGID_VALUENO
170
+ next if seen_messages.member? msgid
171
+ msgids << msgid
172
+ seen_messages << msgid
173
+ queue.concat doc.value(THREAD_VALUENO).split(',')
174
+ end
175
+ end
176
+ msgids.each { |id| yield id, lambda { build_message id } }
177
+ true
178
+ end
179
+
180
+ def load_contacts emails, opts={}
181
+ contacts = Set.new
182
+ num = opts[:num] || 20
183
+ each_id_by_date :participants => emails do |id,b|
184
+ break if contacts.size >= num
185
+ m = b.call
186
+ ([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] }
187
+ end
188
+ contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num]
189
+ end
190
+
191
+ # TODO share code with the Ferret index
192
+ def parse_query s
193
+ query = {}
194
+
195
+ subs = HookManager.run("custom-search", :subs => s) || s
196
+ subs = s.gsub(/\b(to|from):(\S+)\b/) do
197
+ field, name = $1, $2
198
+ if(p = ContactManager.contact_for(name))
199
+ [field, p.email]
200
+ elsif name == "me"
201
+ [field, "(" + AccountManager.user_emails.join("||") + ")"]
202
+ else
203
+ [field, name]
204
+ end.join(":")
205
+ end
206
+
207
+ ## if we see a label:deleted or a label:spam term anywhere in the query
208
+ ## string, we set the extra load_spam or load_deleted options to true.
209
+ ## bizarre? well, because the query allows arbitrary parenthesized boolean
210
+ ## expressions, without fully parsing the query, we can't tell whether
211
+ ## the user is explicitly directing us to search spam messages or not.
212
+ ## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
213
+ ## search spam messages or not?
214
+ ##
215
+ ## so, we rely on the fact that turning these extra options ON turns OFF
216
+ ## the adding of "-label:deleted" or "-label:spam" terms at the very
217
+ ## final stage of query processing. if the user wants to search spam
218
+ ## messages, not adding that is the right thing; if he doesn't want to
219
+ ## search spam messages, then not adding it won't have any effect.
220
+ query[:load_spam] = true if subs =~ /\blabel:spam\b/
221
+ query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
222
+
223
+ ## gmail style "is" operator
224
+ subs = subs.gsub(/\b(is|has):(\S+)\b/) do
225
+ field, label = $1, $2
226
+ case label
227
+ when "read"
228
+ "-label:unread"
229
+ when "spam"
230
+ query[:load_spam] = true
231
+ "label:spam"
232
+ when "deleted"
233
+ query[:load_deleted] = true
234
+ "label:deleted"
235
+ else
236
+ "label:#{$2}"
237
+ end
238
+ end
239
+
240
+ ## gmail style attachments "filename" and "filetype" searches
241
+ subs = subs.gsub(/\b(filename|filetype):(\((.+?)\)\B|(\S+)\b)/) do
242
+ field, name = $1, ($3 || $4)
243
+ case field
244
+ when "filename"
245
+ debug "filename: translated #{field}:#{name} to attachment:\"#{name.downcase}\""
246
+ "attachment:\"#{name.downcase}\""
247
+ when "filetype"
248
+ debug "filetype: translated #{field}:#{name} to attachment_extension:#{name.downcase}"
249
+ "attachment_extension:#{name.downcase}"
250
+ end
251
+ end
252
+
253
+ if $have_chronic
254
+ lastdate = 2<<32 - 1
255
+ firstdate = 0
256
+ subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do
257
+ field, datestr = $1, ($3 || $4)
258
+ realdate = Chronic.parse datestr, :guess => false, :context => :past
259
+ if realdate
260
+ case field
261
+ when "after"
262
+ debug "chronic: translated #{field}:#{datestr} to #{realdate.end}"
263
+ "date:#{realdate.end.to_i}..#{lastdate}"
264
+ when "before"
265
+ debug "chronic: translated #{field}:#{datestr} to #{realdate.begin}"
266
+ "date:#{firstdate}..#{realdate.end.to_i}"
267
+ else
268
+ debug "chronic: translated #{field}:#{datestr} to #{realdate}"
269
+ "date:#{realdate.begin.to_i}..#{realdate.end.to_i}"
270
+ end
271
+ else
272
+ raise ParseError, "can't understand date #{datestr.inspect}"
273
+ end
274
+ end
275
+ end
276
+
277
+ ## limit:42 restrict the search to 42 results
278
+ subs = subs.gsub(/\blimit:(\S+)\b/) do
279
+ lim = $1
280
+ if lim =~ /^\d+$/
281
+ query[:limit] = lim.to_i
282
+ ''
283
+ else
284
+ raise ParseError, "non-numeric limit #{lim.inspect}"
285
+ end
286
+ end
287
+
288
+ qp = Xapian::QueryParser.new
289
+ qp.database = @xapian
290
+ qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
291
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
292
+ qp.default_op = Xapian::Query::OP_AND
293
+ qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
294
+ NORMAL_PREFIX.each { |k,v| qp.add_prefix k, v }
295
+ BOOLEAN_PREFIX.each { |k,v| qp.add_boolean_prefix k, v }
296
+ xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD, PREFIX['body'])
297
+
298
+ raise ParseError if xapian_query.nil? or xapian_query.empty?
299
+ query[:qobj] = xapian_query
300
+ query[:text] = s
301
+ query
302
+ end
303
+
304
+ private
305
+
306
+ # Stemmed
307
+ NORMAL_PREFIX = {
308
+ 'subject' => 'S',
309
+ 'body' => 'B',
310
+ 'from_name' => 'FN',
311
+ 'to_name' => 'TN',
312
+ 'name' => 'N',
313
+ 'attachment' => 'A',
314
+ }
315
+
316
+ # Unstemmed
317
+ BOOLEAN_PREFIX = {
318
+ 'type' => 'K',
319
+ 'from_email' => 'FE',
320
+ 'to_email' => 'TE',
321
+ 'email' => 'E',
322
+ 'date' => 'D',
323
+ 'label' => 'L',
324
+ 'source_id' => 'I',
325
+ 'attachment_extension' => 'O',
326
+ 'msgid' => 'Q',
327
+ 'thread' => 'H',
328
+ 'ref' => 'R',
329
+ }
330
+
331
+ PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
332
+
333
+ MSGID_VALUENO = 0
334
+ THREAD_VALUENO = 1
335
+ DATE_VALUENO = 2
336
+
337
+ MAX_TERM_LENGTH = 245
338
+
339
+ # Xapian can very efficiently sort in ascending docid order. Sup always wants
340
+ # to sort by descending date, so this method maps between them. In order to
341
+ # handle multiple messages per second, we use a logistic curve centered
342
+ # around MIDDLE_DATE so that the slope (docid/s) is greatest in this time
343
+ # period. A docid collision is not an error - the code will pick the next
344
+ # smallest unused one.
345
+ DOCID_SCALE = 2.0**32
346
+ TIME_SCALE = 2.0**27
347
+ MIDDLE_DATE = Time.gm(2011)
348
+ def assign_docid m, truncated_date
349
+ t = (truncated_date.to_i - MIDDLE_DATE.to_i).to_f
350
+ docid = (DOCID_SCALE - DOCID_SCALE/(Math::E**(-(t/TIME_SCALE)) + 1)).to_i
351
+ while docid > 0 and docid_exists? docid
352
+ docid -= 1
353
+ end
354
+ docid > 0 ? docid : nil
355
+ end
356
+
357
+ # XXX is there a better way?
358
+ def docid_exists? docid
359
+ begin
360
+ @xapian.doclength docid
361
+ true
362
+ rescue RuntimeError #Xapian::DocNotFoundError
363
+ raise unless $!.message =~ /DocNotFoundError/
364
+ false
365
+ end
366
+ end
367
+
368
+ def term_docids term
369
+ @xapian.postlist(term).map { |x| x.docid }
370
+ end
371
+
372
+ def find_docid id
373
+ docids = term_docids(mkterm(:msgid,id))
374
+ fail unless docids.size <= 1
375
+ docids.first
376
+ end
377
+
378
+ def find_doc id
379
+ return unless docid = find_docid(id)
380
+ @xapian.document docid
381
+ end
382
+
383
+ def get_id docid
384
+ return unless doc = @xapian.document(docid)
385
+ doc.value MSGID_VALUENO
386
+ end
387
+
388
+ def get_entry id
389
+ return unless doc = find_doc(id)
390
+ Marshal.load doc.data
391
+ end
392
+
393
+ def thread_killed? thread_id
394
+ not run_query(Q.new(Q::OP_AND, mkterm(:thread, thread_id), mkterm(:label, :Killed)), 0, 1).empty?
395
+ end
396
+
397
+ def synchronize &b
398
+ @index_mutex.synchronize &b
399
+ end
400
+
401
+ def run_query xapian_query, offset, limit, checkatleast=0
402
+ synchronize do
403
+ @enquire.query = xapian_query
404
+ @enquire.mset(offset, limit-offset, checkatleast)
405
+ end
406
+ end
407
+
408
+ def run_query_ids xapian_query, offset, limit
409
+ matchset = run_query xapian_query, offset, limit
410
+ matchset.matches.map { |r| r.document.value MSGID_VALUENO }
411
+ end
412
+
413
+ Q = Xapian::Query
414
+ def build_xapian_query opts
415
+ labels = ([opts[:label]] + (opts[:labels] || [])).compact
416
+ neglabels = [:spam, :deleted, :killed].reject { |l| (labels.include? l) || opts.member?("load_#{l}".intern) }
417
+ pos_terms, neg_terms = [], []
418
+
419
+ pos_terms << mkterm(:type, 'mail')
420
+ pos_terms.concat(labels.map { |l| mkterm(:label,l) })
421
+ pos_terms << opts[:qobj] if opts[:qobj]
422
+ pos_terms << mkterm(:source_id, opts[:source_id]) if opts[:source_id]
423
+
424
+ if opts[:participants]
425
+ participant_terms = opts[:participants].map { |p| mkterm(:email,:any, (Redwood::Person === p) ? p.email : p) }
426
+ pos_terms << Q.new(Q::OP_OR, participant_terms)
427
+ end
428
+
429
+ neg_terms.concat(neglabels.map { |l| mkterm(:label,l) })
430
+
431
+ pos_query = Q.new(Q::OP_AND, pos_terms)
432
+ neg_query = Q.new(Q::OP_OR, neg_terms)
433
+
434
+ if neg_query.empty?
435
+ pos_query
436
+ else
437
+ Q.new(Q::OP_AND_NOT, [pos_query, neg_query])
438
+ end
439
+ end
440
+
441
+ def index_message m, entry, opts
442
+ terms = []
443
+ text = []
444
+
445
+ subject_text = m.indexable_subject
446
+ body_text = m.indexable_body
447
+
448
+ # Person names are indexed with several prefixes
449
+ person_termer = lambda do |d|
450
+ lambda do |p|
451
+ ["#{d}_name", "name", "body"].each do |x|
452
+ text << [p.name, PREFIX[x]]
453
+ end if p.name
454
+ [d, :any].each { |x| terms << mkterm(:email, x, p.email) }
455
+ end
456
+ end
457
+
458
+ person_termer[:from][m.from] if m.from
459
+ (m.to+m.cc+m.bcc).each(&(person_termer[:to]))
460
+
461
+ terms << mkterm(:date,m.date) if m.date
462
+ m.labels.each { |t| terms << mkterm(:label,t) }
463
+ terms << mkterm(:type, 'mail')
464
+ terms << mkterm(:msgid, m.id)
465
+ terms << mkterm(:source_id, m.source.id)
466
+ m.attachments.each do |a|
467
+ a =~ /\.(\w+)$/ or next
468
+ t = mkterm(:attachment_extension, $1)
469
+ terms << t
470
+ end
471
+
472
+ ## Thread membership
473
+ children = term_docids(mkterm(:ref, m.id)).map { |docid| @xapian.document docid }
474
+ parent_ids = m.refs + m.replytos
475
+ parents = parent_ids.map { |id| find_doc id }.compact
476
+ thread_members = SavingHash.new { [] }
477
+ (children + parents).each do |doc2|
478
+ thread_ids = doc2.value(THREAD_VALUENO).split ','
479
+ thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
480
+ end
481
+
482
+ thread_ids = thread_members.empty? ? [m.id] : thread_members.keys
483
+
484
+ thread_ids.each { |thread_id| terms << mkterm(:thread, thread_id) }
485
+ parent_ids.each do |ref|
486
+ terms << mkterm(:ref, ref)
487
+ end
488
+
489
+ # Full text search content
490
+ text << [subject_text, PREFIX['subject']]
491
+ text << [subject_text, PREFIX['body']]
492
+ text << [body_text, PREFIX['body']]
493
+ m.attachments.each { |a| text << [a, PREFIX['attachment']] }
494
+
495
+ truncated_date = if m.date < MIN_DATE
496
+ debug "warning: adjusting too-low date #{m.date} for indexing"
497
+ MIN_DATE
498
+ elsif m.date > MAX_DATE
499
+ debug "warning: adjusting too-high date #{m.date} for indexing"
500
+ MAX_DATE
501
+ else
502
+ m.date
503
+ end
504
+
505
+ # Date value for range queries
506
+ date_value = begin
507
+ Xapian.sortable_serialise truncated_date.to_i
508
+ rescue TypeError
509
+ Xapian.sortable_serialise 0
510
+ end
511
+
512
+ docid = nil
513
+ unless doc = find_doc(m.id)
514
+ doc = Xapian::Document.new
515
+ if not docid = assign_docid(m, truncated_date)
516
+ # Could be triggered by spam
517
+ Redwood::log "warning: docid underflow, dropping #{m.id.inspect}"
518
+ return
519
+ end
520
+ else
521
+ doc.clear_terms
522
+ doc.clear_values
523
+ docid = doc.docid
524
+ end
525
+
526
+ @term_generator.document = doc
527
+ text.each { |text,prefix| @term_generator.index_text text, 1, prefix }
528
+ terms.each { |term| doc.add_term term if term.length <= MAX_TERM_LENGTH }
529
+ doc.add_value MSGID_VALUENO, m.id
530
+ doc.add_value THREAD_VALUENO, (thread_ids * ',')
531
+ doc.add_value DATE_VALUENO, date_value
532
+ doc.data = Marshal.dump entry
533
+
534
+ @xapian.replace_document docid, doc
535
+ end
536
+
537
+ # Construct a Xapian term
538
+ def mkterm type, *args
539
+ case type
540
+ when :label
541
+ PREFIX['label'] + args[0].to_s.downcase
542
+ when :type
543
+ PREFIX['type'] + args[0].to_s.downcase
544
+ when :date
545
+ PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
546
+ when :email
547
+ case args[0]
548
+ when :from then PREFIX['from_email']
549
+ when :to then PREFIX['to_email']
550
+ when :any then PREFIX['email']
551
+ else raise "Invalid email term type #{args[0]}"
552
+ end + args[1].to_s.downcase
553
+ when :source_id
554
+ PREFIX['source_id'] + args[0].to_s.downcase
555
+ when :attachment_extension
556
+ PREFIX['attachment_extension'] + args[0].to_s.downcase
557
+ when :msgid, :ref, :thread
558
+ PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
559
+ else
560
+ raise "Invalid term type #{type}"
561
+ end
562
+ end
563
+
564
+ end
565
+
566
+ end