sup 0.10.2 → 0.11

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sup might be problematic. Click here for more details.

@@ -0,0 +1,42 @@
1
+ require 'thread'
2
+
3
+ module Redwood
4
+
5
+ class IdleManager
6
+ include Singleton
7
+
8
+ IDLE_THRESHOLD = 60
9
+
10
+ def initialize
11
+ @no_activity_since = Time.now
12
+ @idle = false
13
+ @thread = nil
14
+ end
15
+
16
+ def ping
17
+ if @idle
18
+ UpdateManager.relay self, :unidle, Time.at(@no_activity_since)
19
+ @idle = false
20
+ end
21
+ @no_activity_since = Time.now
22
+ end
23
+
24
+ def start
25
+ @thread = Redwood::reporting_thread("checking for idleness") do
26
+ while true
27
+ sleep 1
28
+ if !@idle and Time.now.to_i - @no_activity_since.to_i >= IDLE_THRESHOLD
29
+ UpdateManager.relay self, :idle, Time.at(@no_activity_since)
30
+ @idle = true
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ def stop
37
+ @thread.kill if @thread
38
+ @thread = nil
39
+ end
40
+ end
41
+
42
+ end
@@ -1,5 +1,7 @@
1
- ## Index interface, subclassed by Ferret indexer.
1
+ ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
2
2
 
3
+ require 'xapian'
4
+ require 'set'
3
5
  require 'fileutils'
4
6
 
5
7
  begin
@@ -12,9 +14,28 @@ end
12
14
 
13
15
  module Redwood
14
16
 
15
- class BaseIndex
17
+ # This index implementation uses Xapian for searching and storage. It
18
+ # tends to be slightly faster than Ferret for indexing and significantly faster
19
+ # for searching due to precomputing thread membership.
20
+ class Index
16
21
  include InteractiveLock
17
22
 
23
+ STEM_LANGUAGE = "english"
24
+ INDEX_VERSION = '2'
25
+
26
+ ## dates are converted to integers for xapian, and are used for document ids,
27
+ ## so we must ensure they're reasonably valid. this typically only affect
28
+ ## spam.
29
+ MIN_DATE = Time.at 0
30
+ MAX_DATE = Time.at(2**31-1)
31
+
32
+ HookManager.register "custom-search", <<EOS
33
+ Executes before a string search is applied to the index,
34
+ returning a new search string.
35
+ Variables:
36
+ subs: The string being searched.
37
+ EOS
38
+
18
39
  class LockError < StandardError
19
40
  def initialize h
20
41
  @h = h
@@ -23,8 +44,6 @@ class BaseIndex
23
44
  def method_missing m; @h[m.to_s] end
24
45
  end
25
46
 
26
- def is_a_deprecated_ferret_index?; false end
27
-
28
47
  include Singleton
29
48
 
30
49
  def initialize dir=BASE_DIR
@@ -32,6 +51,7 @@ class BaseIndex
32
51
  @lock = Lockfile.new lockfile, :retries => 0, :max_age => nil
33
52
  @sync_worker = nil
34
53
  @sync_queue = Queue.new
54
+ @index_mutex = Monitor.new
35
55
  end
36
56
 
37
57
  def lockfile; File.join @dir, "lock" end
@@ -79,25 +99,43 @@ class BaseIndex
79
99
  end
80
100
 
81
101
  def load_index
82
- unimplemented
102
+ path = File.join(@dir, 'xapian')
103
+ if File.exists? path
104
+ @xapian = Xapian::WritableDatabase.new(path, Xapian::DB_OPEN)
105
+ db_version = @xapian.get_metadata 'version'
106
+ db_version = '0' if db_version.empty?
107
+ if db_version == '1'
108
+ info "Upgrading index format 1 to 2"
109
+ @xapian.set_metadata 'version', INDEX_VERSION
110
+ elsif db_version != INDEX_VERSION
111
+ fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please downgrade to your previous version and dump your labels before upgrading to this version (then run sup-sync --restore)."
112
+ end
113
+ else
114
+ @xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
115
+ @xapian.set_metadata 'version', INDEX_VERSION
116
+ end
117
+ @enquire = Xapian::Enquire.new @xapian
118
+ @enquire.weighting_scheme = Xapian::BoolWeight.new
119
+ @enquire.docid_order = Xapian::Enquire::ASCENDING
83
120
  end
84
121
 
85
- def add_message m; unimplemented end
86
- def update_message m; unimplemented end
87
- def update_message_state m; unimplemented end
122
+ def add_message m; sync_message m, true end
123
+ def update_message m; sync_message m, true end
124
+ def update_message_state m; sync_message m, false end
88
125
 
89
- def save_index fn
90
- unimplemented
126
+ def save_index
127
+ info "Flushing Xapian updates to disk. This may take a while..."
128
+ @xapian.flush
91
129
  end
92
130
 
93
131
  def contains_id? id
94
- unimplemented
132
+ synchronize { find_docid(id) && true }
95
133
  end
96
134
 
97
135
  def contains? m; contains_id? m.id end
98
136
 
99
137
  def size
100
- unimplemented
138
+ synchronize { @xapian.doccount }
101
139
  end
102
140
 
103
141
  def empty?; size == 0 end
@@ -107,12 +145,14 @@ class BaseIndex
107
145
  ## You should probably not call this on a block that doesn't break
108
146
  ## rather quickly because the results can be very large.
109
147
  def each_id_by_date query={}
110
- unimplemented
148
+ each_id(query) { |id| yield id, lambda { build_message id } }
111
149
  end
112
150
 
113
151
  ## Return the number of matches for query in the index
114
152
  def num_results_for query={}
115
- unimplemented
153
+ xapian_query = build_xapian_query query
154
+ matchset = run_query xapian_query, 0, 0, 100
155
+ matchset.matches_estimated
116
156
  end
117
157
 
118
158
  ## yield all messages in the thread containing 'm' by repeatedly
@@ -124,28 +164,82 @@ class BaseIndex
124
164
  ## true, stops loading any thread if a message with a :killed flag
125
165
  ## is found.
126
166
  def each_message_in_thread_for m, opts={}
127
- unimplemented
167
+ # TODO thread by subject
168
+ return unless doc = find_doc(m.id)
169
+ queue = doc.value(THREAD_VALUENO).split(',')
170
+ msgids = [m.id]
171
+ seen_threads = Set.new
172
+ seen_messages = Set.new [m.id]
173
+ while not queue.empty?
174
+ thread_id = queue.pop
175
+ next if seen_threads.member? thread_id
176
+ return false if opts[:skip_killed] && thread_killed?(thread_id)
177
+ seen_threads << thread_id
178
+ docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
179
+ docs.each do |doc|
180
+ msgid = doc.value MSGID_VALUENO
181
+ next if seen_messages.member? msgid
182
+ msgids << msgid
183
+ seen_messages << msgid
184
+ queue.concat doc.value(THREAD_VALUENO).split(',')
185
+ end
186
+ end
187
+ msgids.each { |id| yield id, lambda { build_message id } }
188
+ true
128
189
  end
129
190
 
130
191
  ## Load message with the given message-id from the index
131
192
  def build_message id
132
- unimplemented
193
+ entry = synchronize { get_entry id }
194
+ return unless entry
195
+
196
+ source = SourceManager[entry[:source_id]]
197
+ raise "invalid source #{entry[:source_id]}" unless source
198
+
199
+ m = Message.new :source => source, :source_info => entry[:source_info],
200
+ :labels => entry[:labels], :snippet => entry[:snippet]
201
+
202
+ mk_person = lambda { |x| Person.new(*x.reverse!) }
203
+ entry[:from] = mk_person[entry[:from]]
204
+ entry[:to].map!(&mk_person)
205
+ entry[:cc].map!(&mk_person)
206
+ entry[:bcc].map!(&mk_person)
207
+
208
+ m.load_from_index! entry
209
+ m
133
210
  end
134
211
 
135
212
  ## Delete message with the given message-id from the index
136
213
  def delete id
137
- unimplemented
214
+ synchronize { @xapian.delete_document mkterm(:msgid, id) }
138
215
  end
139
216
 
140
217
  ## Given an array of email addresses, return an array of Person objects that
141
218
  ## have sent mail to or received mail from any of the given addresses.
142
- def load_contacts email_addresses, h={}
143
- unimplemented
219
+ def load_contacts email_addresses, opts={}
220
+ contacts = Set.new
221
+ num = opts[:num] || 20
222
+ each_id_by_date :participants => email_addresses do |id,b|
223
+ break if contacts.size >= num
224
+ m = b.call
225
+ ([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] }
226
+ end
227
+ contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num]
144
228
  end
145
229
 
146
230
  ## Yield each message-id matching query
231
+ EACH_ID_PAGE = 100
147
232
  def each_id query={}
148
- unimplemented
233
+ offset = 0
234
+ page = EACH_ID_PAGE
235
+
236
+ xapian_query = build_xapian_query query
237
+ while true
238
+ ids = run_query_ids xapian_query, offset, (offset+page)
239
+ ids.each { |id| yield id }
240
+ break if ids.size < page
241
+ offset += page
242
+ end
149
243
  end
150
244
 
151
245
  ## Yield each message matching query
@@ -155,15 +249,15 @@ class BaseIndex
155
249
  end
156
250
  end
157
251
 
158
- ## Implementation-specific optimization step
252
+ ## xapian-compact takes too long, so this is a no-op
253
+ ## until we think of something better
159
254
  def optimize
160
- unimplemented
161
255
  end
162
256
 
163
257
  ## Return the id source of the source the message with the given message-id
164
258
  ## was synced from
165
259
  def source_for_id id
166
- unimplemented
260
+ synchronize { get_entry(id)[:source_id] }
167
261
  end
168
262
 
169
263
  class ParseError < StandardError; end
@@ -174,7 +268,130 @@ class BaseIndex
174
268
  ##
175
269
  ## raises a ParseError if something went wrong.
176
270
  def parse_query s
177
- unimplemented
271
+ query = {}
272
+
273
+ subs = HookManager.run("custom-search", :subs => s) || s
274
+ begin
275
+ subs = SearchManager.expand subs
276
+ rescue SearchManager::ExpansionError => e
277
+ raise ParseError, e.message
278
+ end
279
+ subs = subs.gsub(/\b(to|from):(\S+)\b/) do
280
+ field, value = $1, $2
281
+ email_field, name_field = %w(email name).map { |x| "#{field}_#{x}" }
282
+ if(p = ContactManager.contact_for(value))
283
+ "#{email_field}:#{p.email}"
284
+ elsif value == "me"
285
+ '(' + AccountManager.user_emails.map { |e| "#{email_field}:#{e}" }.join(' OR ') + ')'
286
+ else
287
+ "(#{email_field}:#{value} OR #{name_field}:#{value})"
288
+ end
289
+ end
290
+
291
+ ## if we see a label:deleted or a label:spam term anywhere in the query
292
+ ## string, we set the extra load_spam or load_deleted options to true.
293
+ ## bizarre? well, because the query allows arbitrary parenthesized boolean
294
+ ## expressions, without fully parsing the query, we can't tell whether
295
+ ## the user is explicitly directing us to search spam messages or not.
296
+ ## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
297
+ ## search spam messages or not?
298
+ ##
299
+ ## so, we rely on the fact that turning these extra options ON turns OFF
300
+ ## the adding of "-label:deleted" or "-label:spam" terms at the very
301
+ ## final stage of query processing. if the user wants to search spam
302
+ ## messages, not adding that is the right thing; if he doesn't want to
303
+ ## search spam messages, then not adding it won't have any effect.
304
+ query[:load_spam] = true if subs =~ /\blabel:spam\b/
305
+ query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
306
+
307
+ ## gmail style "is" operator
308
+ subs = subs.gsub(/\b(is|has):(\S+)\b/) do
309
+ field, label = $1, $2
310
+ case label
311
+ when "read"
312
+ "-label:unread"
313
+ when "spam"
314
+ query[:load_spam] = true
315
+ "label:spam"
316
+ when "deleted"
317
+ query[:load_deleted] = true
318
+ "label:deleted"
319
+ else
320
+ "label:#{$2}"
321
+ end
322
+ end
323
+
324
+ ## gmail style attachments "filename" and "filetype" searches
325
+ subs = subs.gsub(/\b(filename|filetype):(\((.+?)\)\B|(\S+)\b)/) do
326
+ field, name = $1, ($3 || $4)
327
+ case field
328
+ when "filename"
329
+ debug "filename: translated #{field}:#{name} to attachment:\"#{name.downcase}\""
330
+ "attachment:\"#{name.downcase}\""
331
+ when "filetype"
332
+ debug "filetype: translated #{field}:#{name} to attachment_extension:#{name.downcase}"
333
+ "attachment_extension:#{name.downcase}"
334
+ end
335
+ end
336
+
337
+ if $have_chronic
338
+ lastdate = 2<<32 - 1
339
+ firstdate = 0
340
+ subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do
341
+ field, datestr = $1, ($3 || $4)
342
+ realdate = Chronic.parse datestr, :guess => false, :context => :past
343
+ if realdate
344
+ case field
345
+ when "after"
346
+ debug "chronic: translated #{field}:#{datestr} to #{realdate.end}"
347
+ "date:#{realdate.end.to_i}..#{lastdate}"
348
+ when "before"
349
+ debug "chronic: translated #{field}:#{datestr} to #{realdate.begin}"
350
+ "date:#{firstdate}..#{realdate.end.to_i}"
351
+ else
352
+ debug "chronic: translated #{field}:#{datestr} to #{realdate}"
353
+ "date:#{realdate.begin.to_i}..#{realdate.end.to_i}"
354
+ end
355
+ else
356
+ raise ParseError, "can't understand date #{datestr.inspect}"
357
+ end
358
+ end
359
+ end
360
+
361
+ ## limit:42 restrict the search to 42 results
362
+ subs = subs.gsub(/\blimit:(\S+)\b/) do
363
+ lim = $1
364
+ if lim =~ /^\d+$/
365
+ query[:limit] = lim.to_i
366
+ ''
367
+ else
368
+ raise ParseError, "non-numeric limit #{lim.inspect}"
369
+ end
370
+ end
371
+
372
+ debug "translated query: #{subs.inspect}"
373
+
374
+ qp = Xapian::QueryParser.new
375
+ qp.database = @xapian
376
+ qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
377
+ qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
378
+ qp.default_op = Xapian::Query::OP_AND
379
+ qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
380
+ NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } }
381
+ BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } }
382
+
383
+ begin
384
+ xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD)
385
+ rescue RuntimeError => e
386
+ raise ParseError, "xapian query parser error: #{e}"
387
+ end
388
+
389
+ debug "parsed xapian query: #{xapian_query.description}"
390
+
391
+ raise ParseError if xapian_query.nil? or xapian_query.empty?
392
+ query[:qobj] = xapian_query
393
+ query[:text] = s
394
+ query
178
395
  end
179
396
 
180
397
  def save_thread t
@@ -207,34 +424,332 @@ class BaseIndex
207
424
  sleep 0.03
208
425
  end
209
426
  end
210
- end
211
427
 
212
- ## just to make the backtraces even more insane, here we engage in yet more
213
- ## method_missing metaprogramming so that Index.init(index_type_name) will
214
- ## magically make Index act like the correct Index class.
215
- class Index
216
- def self.init type=nil
217
- ## determine the index type from the many possible ways of setting it
218
- type = (type == "auto" ? nil : type) ||
219
- ENV['SUP_INDEX'] ||
220
- $config[:index] ||
221
- (File.exist?(File.join(BASE_DIR, "xapian")) && "xapian") || ## PRIORITIZE THIS
222
- (File.exist?(File.join(BASE_DIR, "ferret")) && "ferret") || ## deprioritize this
223
- DEFAULT_NEW_INDEX_TYPE
428
+ private
429
+
430
+ # Stemmed
431
+ NORMAL_PREFIX = {
432
+ 'subject' => 'S',
433
+ 'body' => 'B',
434
+ 'from_name' => 'FN',
435
+ 'to_name' => 'TN',
436
+ 'name' => %w(FN TN),
437
+ 'attachment' => 'A',
438
+ 'email_text' => 'E',
439
+ '' => %w(S B FN TN A E),
440
+ }
441
+
442
+ # Unstemmed
443
+ BOOLEAN_PREFIX = {
444
+ 'type' => 'K',
445
+ 'from_email' => 'FE',
446
+ 'to_email' => 'TE',
447
+ 'email' => %w(FE TE),
448
+ 'date' => 'D',
449
+ 'label' => 'L',
450
+ 'source_id' => 'I',
451
+ 'attachment_extension' => 'O',
452
+ 'msgid' => 'Q',
453
+ 'id' => 'Q',
454
+ 'thread' => 'H',
455
+ 'ref' => 'R',
456
+ }
457
+
458
+ PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
459
+
460
+ MSGID_VALUENO = 0
461
+ THREAD_VALUENO = 1
462
+ DATE_VALUENO = 2
463
+
464
+ MAX_TERM_LENGTH = 245
465
+
466
+ # Xapian can very efficiently sort in ascending docid order. Sup always wants
467
+ # to sort by descending date, so this method maps between them. In order to
468
+ # handle multiple messages per second, we use a logistic curve centered
469
+ # around MIDDLE_DATE so that the slope (docid/s) is greatest in this time
470
+ # period. A docid collision is not an error - the code will pick the next
471
+ # smallest unused one.
472
+ DOCID_SCALE = 2.0**32
473
+ TIME_SCALE = 2.0**27
474
+ MIDDLE_DATE = Time.gm(2011)
475
+ def assign_docid m, truncated_date
476
+ t = (truncated_date.to_i - MIDDLE_DATE.to_i).to_f
477
+ docid = (DOCID_SCALE - DOCID_SCALE/(Math::E**(-(t/TIME_SCALE)) + 1)).to_i
478
+ while docid > 0 and docid_exists? docid
479
+ docid -= 1
480
+ end
481
+ docid > 0 ? docid : nil
482
+ end
483
+
484
+ # XXX is there a better way?
485
+ def docid_exists? docid
224
486
  begin
225
- require "sup/#{type}_index"
226
- @klass = Redwood.const_get "#{type.capitalize}Index"
227
- @obj = @klass.init
228
- rescue LoadError, NameError => e
229
- raise "unknown index type #{type.inspect}: #{e.message}"
487
+ @xapian.doclength docid
488
+ true
489
+ rescue RuntimeError #Xapian::DocNotFoundError
490
+ raise unless $!.message =~ /DocNotFoundError/
491
+ false
492
+ end
493
+ end
494
+
495
+ def term_docids term
496
+ @xapian.postlist(term).map { |x| x.docid }
497
+ end
498
+
499
+ def find_docid id
500
+ docids = term_docids(mkterm(:msgid,id))
501
+ fail unless docids.size <= 1
502
+ docids.first
503
+ end
504
+
505
+ def find_doc id
506
+ return unless docid = find_docid(id)
507
+ @xapian.document docid
508
+ end
509
+
510
+ def get_id docid
511
+ return unless doc = @xapian.document(docid)
512
+ doc.value MSGID_VALUENO
513
+ end
514
+
515
+ def get_entry id
516
+ return unless doc = find_doc(id)
517
+ Marshal.load doc.data
518
+ end
519
+
520
+ def thread_killed? thread_id
521
+ not run_query(Q.new(Q::OP_AND, mkterm(:thread, thread_id), mkterm(:label, :Killed)), 0, 1).empty?
522
+ end
523
+
524
+ def synchronize &b
525
+ @index_mutex.synchronize &b
526
+ end
527
+
528
+ def run_query xapian_query, offset, limit, checkatleast=0
529
+ synchronize do
530
+ @enquire.query = xapian_query
531
+ @enquire.mset(offset, limit-offset, checkatleast)
532
+ end
533
+ end
534
+
535
+ def run_query_ids xapian_query, offset, limit
536
+ matchset = run_query xapian_query, offset, limit
537
+ matchset.matches.map { |r| r.document.value MSGID_VALUENO }
538
+ end
539
+
540
+ Q = Xapian::Query
541
+ def build_xapian_query opts
542
+ labels = ([opts[:label]] + (opts[:labels] || [])).compact
543
+ neglabels = [:spam, :deleted, :killed].reject { |l| (labels.include? l) || opts.member?("load_#{l}".intern) }
544
+ pos_terms, neg_terms = [], []
545
+
546
+ pos_terms << mkterm(:type, 'mail')
547
+ pos_terms.concat(labels.map { |l| mkterm(:label,l) })
548
+ pos_terms << opts[:qobj] if opts[:qobj]
549
+ pos_terms << mkterm(:source_id, opts[:source_id]) if opts[:source_id]
550
+
551
+ if opts[:participants]
552
+ participant_terms = opts[:participants].map { |p| [:from,:to].map { |d| mkterm(:email, d, (Redwood::Person === p) ? p.email : p) } }.flatten
553
+ pos_terms << Q.new(Q::OP_OR, participant_terms)
554
+ end
555
+
556
+ neg_terms.concat(neglabels.map { |l| mkterm(:label,l) })
557
+
558
+ pos_query = Q.new(Q::OP_AND, pos_terms)
559
+ neg_query = Q.new(Q::OP_OR, neg_terms)
560
+
561
+ if neg_query.empty?
562
+ pos_query
563
+ else
564
+ Q.new(Q::OP_AND_NOT, [pos_query, neg_query])
565
+ end
566
+ end
567
+
568
+ def sync_message m, overwrite
569
+ doc = synchronize { find_doc(m.id) }
570
+ existed = doc != nil
571
+ doc ||= Xapian::Document.new
572
+ do_index_static = overwrite || !existed
573
+ old_entry = !do_index_static && doc.entry
574
+ snippet = do_index_static ? m.snippet : old_entry[:snippet]
575
+
576
+ entry = {
577
+ :message_id => m.id,
578
+ :source_id => m.source.id,
579
+ :source_info => m.source_info,
580
+ :date => truncate_date(m.date),
581
+ :snippet => snippet,
582
+ :labels => m.labels.to_a,
583
+ :from => [m.from.email, m.from.name],
584
+ :to => m.to.map { |p| [p.email, p.name] },
585
+ :cc => m.cc.map { |p| [p.email, p.name] },
586
+ :bcc => m.bcc.map { |p| [p.email, p.name] },
587
+ :subject => m.subj,
588
+ :refs => m.refs.to_a,
589
+ :replytos => m.replytos.to_a,
590
+ }
591
+
592
+ if do_index_static
593
+ doc.clear_terms
594
+ doc.clear_values
595
+ index_message_static m, doc, entry
596
+ end
597
+
598
+ index_message_threading doc, entry, old_entry
599
+ index_message_labels doc, entry[:labels], (do_index_static ? [] : old_entry[:labels])
600
+ doc.entry = entry
601
+
602
+ synchronize do
603
+ unless docid = existed ? doc.docid : assign_docid(m, truncate_date(m.date))
604
+ # Could be triggered by spam
605
+ warn "docid underflow, dropping #{m.id.inspect}"
606
+ return
607
+ end
608
+ @xapian.replace_document docid, doc
609
+ end
610
+
611
+ m.labels.each { |l| LabelManager << l }
612
+ true
613
+ end
614
+
615
+ ## Index content that can't be changed by the user
616
+ def index_message_static m, doc, entry
617
+ # Person names are indexed with several prefixes
618
+ person_termer = lambda do |d|
619
+ lambda do |p|
620
+ doc.index_text p.name, PREFIX["#{d}_name"] if p.name
621
+ doc.index_text p.email, PREFIX['email_text']
622
+ doc.add_term mkterm(:email, d, p.email)
623
+ end
624
+ end
625
+
626
+ person_termer[:from][m.from] if m.from
627
+ (m.to+m.cc+m.bcc).each(&(person_termer[:to]))
628
+
629
+ # Full text search content
630
+ subject_text = m.indexable_subject
631
+ body_text = m.indexable_body
632
+ doc.index_text subject_text, PREFIX['subject']
633
+ doc.index_text body_text, PREFIX['body']
634
+ m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
635
+
636
+ # Miscellaneous terms
637
+ doc.add_term mkterm(:date, m.date) if m.date
638
+ doc.add_term mkterm(:type, 'mail')
639
+ doc.add_term mkterm(:msgid, m.id)
640
+ doc.add_term mkterm(:source_id, m.source.id)
641
+ m.attachments.each do |a|
642
+ a =~ /\.(\w+)$/ or next
643
+ doc.add_term mkterm(:attachment_extension, $1)
644
+ end
645
+
646
+ # Date value for range queries
647
+ date_value = begin
648
+ Xapian.sortable_serialise m.date.to_i
649
+ rescue TypeError
650
+ Xapian.sortable_serialise 0
651
+ end
652
+
653
+ doc.add_value MSGID_VALUENO, m.id
654
+ doc.add_value DATE_VALUENO, date_value
655
+ end
656
+
657
+ def index_message_labels doc, new_labels, old_labels
658
+ return if new_labels == old_labels
659
+ added = new_labels.to_a - old_labels.to_a
660
+ removed = old_labels.to_a - new_labels.to_a
661
+ added.each { |t| doc.add_term mkterm(:label,t) }
662
+ removed.each { |t| doc.remove_term mkterm(:label,t) }
663
+ end
664
+
665
+ ## Assign a set of thread ids to the document. This is a hybrid of the runtime
666
+ ## search done by the Ferret index and the index-time union done by previous
667
+ ## versions of the Xapian index. We first find the thread ids of all messages
668
+ ## with a reference to or from us. If that set is empty, we use our own
669
+ ## message id. Otherwise, we use all the thread ids we previously found. In
670
+ ## the common case there's only one member in that set, but if we're the
671
+ ## missing link between multiple previously unrelated threads we can have
672
+ ## more. XapianIndex#each_message_in_thread_for follows the thread ids when
673
+ ## searching so the user sees a single unified thread.
674
+ def index_message_threading doc, entry, old_entry
675
+ return if old_entry && (entry[:refs] == old_entry[:refs]) && (entry[:replytos] == old_entry[:replytos])
676
+ children = term_docids(mkterm(:ref, entry[:message_id])).map { |docid| @xapian.document docid }
677
+ parent_ids = entry[:refs] + entry[:replytos]
678
+ parents = parent_ids.map { |id| find_doc id }.compact
679
+ thread_members = SavingHash.new { [] }
680
+ (children + parents).each do |doc2|
681
+ thread_ids = doc2.value(THREAD_VALUENO).split ','
682
+ thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
683
+ end
684
+ thread_ids = thread_members.empty? ? [entry[:message_id]] : thread_members.keys
685
+ thread_ids.each { |thread_id| doc.add_term mkterm(:thread, thread_id) }
686
+ parent_ids.each { |ref| doc.add_term mkterm(:ref, ref) }
687
+ doc.add_value THREAD_VALUENO, (thread_ids * ',')
688
+ end
689
+
690
+ def truncate_date date
691
+ if date < MIN_DATE
692
+ debug "warning: adjusting too-low date #{date} for indexing"
693
+ MIN_DATE
694
+ elsif date > MAX_DATE
695
+ debug "warning: adjusting too-high date #{date} for indexing"
696
+ MAX_DATE
697
+ else
698
+ date
230
699
  end
231
- debug "using #{type} index"
232
- @obj
233
700
  end
234
701
 
235
- def self.instance; @obj end
236
- def self.method_missing m, *a, &b; @obj.send(m, *a, &b) end
237
- def self.const_missing x; @obj.class.const_get(x) end
702
+ # Construct a Xapian term
703
+ def mkterm type, *args
704
+ case type
705
+ when :label
706
+ PREFIX['label'] + args[0].to_s.downcase
707
+ when :type
708
+ PREFIX['type'] + args[0].to_s.downcase
709
+ when :date
710
+ PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
711
+ when :email
712
+ case args[0]
713
+ when :from then PREFIX['from_email']
714
+ when :to then PREFIX['to_email']
715
+ else raise "Invalid email term type #{args[0]}"
716
+ end + args[1].to_s.downcase
717
+ when :source_id
718
+ PREFIX['source_id'] + args[0].to_s.downcase
719
+ when :attachment_extension
720
+ PREFIX['attachment_extension'] + args[0].to_s.downcase
721
+ when :msgid, :ref, :thread
722
+ PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
723
+ else
724
+ raise "Invalid term type #{type}"
725
+ end
726
+ end
238
727
  end
239
728
 
240
729
  end
730
+
731
+ class Xapian::Document
732
+ def entry
733
+ Marshal.load data
734
+ end
735
+
736
+ def entry=(x)
737
+ self.data = Marshal.dump x
738
+ end
739
+
740
+ def index_text text, prefix, weight=1
741
+ term_generator = Xapian::TermGenerator.new
742
+ term_generator.stemmer = Xapian::Stem.new(Redwood::Index::STEM_LANGUAGE)
743
+ term_generator.document = self
744
+ term_generator.index_text text, weight, prefix
745
+ end
746
+
747
+ alias old_add_term add_term
748
+ def add_term term
749
+ if term.length <= Redwood::Index::MAX_TERM_LENGTH
750
+ old_add_term term, 0
751
+ else
752
+ warn "dropping excessively long term #{term}"
753
+ end
754
+ end
755
+ end