sup 0.9.1 → 0.10

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sup might be problematic. Click here for more details.

Files changed (46) hide show
  1. data/CONTRIBUTORS +10 -6
  2. data/History.txt +11 -0
  3. data/ReleaseNotes +10 -0
  4. data/bin/sup +55 -19
  5. data/bin/sup-add +18 -8
  6. data/bin/sup-config +2 -2
  7. data/bin/sup-convert-ferret-index +84 -0
  8. data/bin/sup-dump +4 -3
  9. data/bin/sup-sync +4 -3
  10. data/bin/sup-sync-back +3 -2
  11. data/bin/sup-tweak-labels +3 -3
  12. data/lib/sup.rb +35 -4
  13. data/lib/sup/buffer.rb +12 -6
  14. data/lib/sup/colormap.rb +1 -0
  15. data/lib/sup/crypto.rb +76 -55
  16. data/lib/sup/ferret_index.rb +6 -1
  17. data/lib/sup/index.rb +62 -8
  18. data/lib/sup/logger.rb +2 -1
  19. data/lib/sup/maildir.rb +4 -2
  20. data/lib/sup/mbox/loader.rb +4 -3
  21. data/lib/sup/message-chunks.rb +9 -7
  22. data/lib/sup/message.rb +29 -27
  23. data/lib/sup/mode.rb +11 -4
  24. data/lib/sup/modes/buffer-list-mode.rb +5 -0
  25. data/lib/sup/modes/console-mode.rb +4 -0
  26. data/lib/sup/modes/edit-message-mode.rb +4 -2
  27. data/lib/sup/modes/file-browser-mode.rb +1 -1
  28. data/lib/sup/modes/inbox-mode.rb +18 -1
  29. data/lib/sup/modes/label-list-mode.rb +44 -3
  30. data/lib/sup/modes/text-mode.rb +1 -1
  31. data/lib/sup/modes/thread-index-mode.rb +63 -52
  32. data/lib/sup/modes/thread-view-mode.rb +68 -7
  33. data/lib/sup/poll.rb +20 -5
  34. data/lib/sup/source.rb +1 -0
  35. data/lib/sup/thread.rb +1 -1
  36. data/lib/sup/util.rb +49 -11
  37. data/lib/sup/xapian_index.rb +151 -112
  38. metadata +4 -10
  39. data/lib/sup/hook.rb.BACKUP.8625.rb +0 -158
  40. data/lib/sup/hook.rb.BACKUP.8681.rb +0 -158
  41. data/lib/sup/hook.rb.BASE.8625.rb +0 -155
  42. data/lib/sup/hook.rb.BASE.8681.rb +0 -155
  43. data/lib/sup/hook.rb.LOCAL.8625.rb +0 -142
  44. data/lib/sup/hook.rb.LOCAL.8681.rb +0 -142
  45. data/lib/sup/hook.rb.REMOTE.8625.rb +0 -145
  46. data/lib/sup/hook.rb.REMOTE.8681.rb +0 -145
@@ -28,19 +28,18 @@ num_inbox_total_unread: the total number of unread messages in the inbox
28
28
  only those messages appearing in the inbox
29
29
  EOS
30
30
 
31
- DELAY = 300
31
+ DELAY = $config[:poll_interval] || 300
32
32
 
33
33
  def initialize
34
34
  @mutex = Mutex.new
35
35
  @thread = nil
36
36
  @last_poll = nil
37
37
  @polling = false
38
+ @poll_sources = nil
38
39
  @mode = nil
39
40
  end
40
41
 
41
- def poll
42
- return if @polling
43
- @polling = true
42
+ def poll_with_sources
44
43
  @mode ||= PollMode.new
45
44
  HookManager.run "before-poll"
46
45
 
@@ -54,6 +53,22 @@ EOS
54
53
 
55
54
  HookManager.run "after-poll", :num => num, :num_inbox => numi, :from_and_subj => from_and_subj, :from_and_subj_inbox => from_and_subj_inbox, :num_inbox_total_unread => lambda { Index.num_results_for :labels => [:inbox, :unread] }
56
55
 
56
+ end
57
+
58
+ def poll
59
+ return if @polling
60
+ @polling = true
61
+ @poll_sources = SourceManager.usual_sources
62
+ num, numi = poll_with_sources
63
+ @polling = false
64
+ [num, numi]
65
+ end
66
+
67
+ def poll_unusual
68
+ return if @polling
69
+ @polling = true
70
+ @poll_sources = SourceManager.unusual_sources
71
+ num, numi = poll_with_sources
57
72
  @polling = false
58
73
  [num, numi]
59
74
  end
@@ -79,7 +94,7 @@ EOS
79
94
  loaded_labels = Set.new
80
95
 
81
96
  @mutex.synchronize do
82
- SourceManager.usual_sources.each do |source|
97
+ @poll_sources.each do |source|
83
98
  # yield "source #{source} is done? #{source.done?} (cur_offset #{source.cur_offset} >= #{source.end_offset})"
84
99
  begin
85
100
  yield "Loading from #{source}... " unless source.done? || (source.respond_to?(:has_errors?) && source.has_errors?)
@@ -207,6 +207,7 @@ class SourceManager
207
207
 
208
208
  def source_for uri; sources.find { |s| s.is_source_for? uri }; end
209
209
  def usual_sources; sources.find_all { |s| s.usual? }; end
210
+ def unusual_sources; sources.find_all { |s| !s.usual? }; end
210
211
 
211
212
  def load_sources fn=Redwood::SOURCE_FN
212
213
  source_array = (Redwood::load_yaml_obj(fn) || []).map { |o| Recoverable.new o }
@@ -112,7 +112,7 @@ class Thread
112
112
 
113
113
  def set_labels l; each { |m, *o| m && m.labels = l }; end
114
114
  def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end
115
- def save_state index; each { |m, *o| m && m.save_state(index) }; end
115
+ def each_dirty_message; each { |m, *o| m && m.dirty? && yield(m) }; end
116
116
 
117
117
  def direct_participants
118
118
  map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq
@@ -177,7 +177,7 @@ class String
177
177
  ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
178
178
  ## the utf8 regex and count those. otherwise, use the byte length.
179
179
  def display_length
180
- if $encoding == "UTF-8" || $encoding == "utf8"
180
+ if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
181
181
  scan(/./u).size
182
182
  else
183
183
  size
@@ -290,12 +290,45 @@ class String
290
290
  end
291
291
  end
292
292
 
293
+ unless method_defined? :each
294
+ def each &b
295
+ each_line &b
296
+ end
297
+ end
298
+
293
299
  ## takes a list of words, and returns an array of symbols. typically used in
294
300
  ## Sup for translating Ferret's representation of a list of labels (a string)
295
301
  ## to an array of label symbols.
296
302
  ##
297
303
  ## split_on will be passed to String#split, so you can leave this nil for space.
298
304
  def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
305
+
306
+ class CheckError < ArgumentError; end
307
+ def check
308
+ begin
309
+ fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
310
+ fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
311
+ rescue
312
+ raise CheckError.new($!.message)
313
+ end
314
+ end
315
+
316
+ def ascii
317
+ out = ""
318
+ each_byte do |b|
319
+ if (b & 128) != 0
320
+ out << "\\x#{b.to_s 16}"
321
+ else
322
+ out << b.chr
323
+ end
324
+ end
325
+ out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
326
+ out
327
+ end
328
+
329
+ def transcode src_encoding=$encoding
330
+ Iconv.easy_decode $encoding, src_encoding, self
331
+ end
299
332
  end
300
333
 
301
334
  class Numeric
@@ -486,9 +519,9 @@ class Time
486
519
  strftime "%b %e"
487
520
  else
488
521
  if is_the_same_day? from
489
- strftime("%l:%M%P")
522
+ strftime("%l:%M%p").downcase # emulate %P (missing on ruby 1.8 darwin)
490
523
  elsif is_the_day_before? from
491
- "Yest." + nearest_hour.strftime("%l%P")
524
+ "Yest." + nearest_hour.strftime("%l%p").downcase # emulate %P
492
525
  else
493
526
  strftime "%b %e"
494
527
  end
@@ -641,21 +674,26 @@ class FinishLine
641
674
  end
642
675
 
643
676
  class Iconv
644
- def self.easy_decode target, charset, text
645
- return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
646
- charset = case charset
677
+ def self.easy_decode target, orig_charset, text
678
+ if text.respond_to? :force_encoding
679
+ text = text.dup
680
+ text.force_encoding Encoding::BINARY
681
+ end
682
+ charset = case orig_charset
647
683
  when /UTF[-_ ]?8/i then "utf-8"
648
684
  when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
649
685
  when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
650
686
  when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
651
- else charset
687
+ when /^euc$/i then 'EUC-JP' # XXX try them all?
688
+ when /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i then 'ASCII'
689
+ else orig_charset
652
690
  end
653
691
 
654
692
  begin
655
- Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
656
- rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence => e
657
- warn "couldn't transcode text from #{charset} to #{target} (\"#{text[0 ... 20]}\"...) (got #{e.message}); using original as is"
658
- text
693
+ returning(Iconv.iconv(target, charset, text + " ").join[0 .. -2]) { |str| str.check }
694
+ rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence, String::CheckError
695
+ debug "couldn't transcode text from #{orig_charset} (#{charset}) to #{target}) (#{text[0 ... 20].inspect}...) (got #{$!.message} (#{$!.class}))"
696
+ text.ascii
659
697
  end
660
698
  end
661
699
  end
@@ -1,3 +1,5 @@
1
+ ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
2
+
1
3
  require 'xapian'
2
4
  require 'set'
3
5
 
@@ -42,14 +44,14 @@ EOS
42
44
  @xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
43
45
  @xapian.set_metadata 'version', INDEX_VERSION
44
46
  end
45
- @term_generator = Xapian::TermGenerator.new()
46
- @term_generator.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
47
47
  @enquire = Xapian::Enquire.new @xapian
48
48
  @enquire.weighting_scheme = Xapian::BoolWeight.new
49
49
  @enquire.docid_order = Xapian::Enquire::ASCENDING
50
50
  end
51
51
 
52
52
  def save_index
53
+ info "Flushing Xapian updates to disk. This may take a while..."
54
+ @xapian.flush
53
55
  end
54
56
 
55
57
  def optimize
@@ -91,41 +93,9 @@ EOS
91
93
  m
92
94
  end
93
95
 
94
- def add_message m; sync_message m end
95
- def update_message m; sync_message m end
96
- def update_message_state m; sync_message m end
97
-
98
- def sync_message m, opts={}
99
- entry = synchronize { get_entry m.id }
100
- snippet = m.snippet
101
- entry ||= {}
102
- labels = m.labels
103
- entry = {} if opts[:force_overwrite]
104
-
105
- d = {
106
- :message_id => m.id,
107
- :source_id => m.source.id,
108
- :source_info => m.source_info,
109
- :date => (entry[:date] || m.date),
110
- :snippet => snippet,
111
- :labels => labels,
112
- :from => (entry[:from] || [m.from.email, m.from.name]),
113
- :to => (entry[:to] || m.to.map { |p| [p.email, p.name] }),
114
- :cc => (entry[:cc] || m.cc.map { |p| [p.email, p.name] }),
115
- :bcc => (entry[:bcc] || m.bcc.map { |p| [p.email, p.name] }),
116
- :subject => m.subj,
117
- :refs => (entry[:refs] || m.refs),
118
- :replytos => (entry[:replytos] || m.replytos),
119
- }
120
-
121
- labels.each { |l| LabelManager << l }
122
-
123
- synchronize do
124
- index_message m, d, opts
125
- end
126
- true
127
- end
128
- private :sync_message
96
+ def add_message m; sync_message m, true end
97
+ def update_message m; sync_message m, true end
98
+ def update_message_state m; sync_message m, false end
129
99
 
130
100
  def num_results_for query={}
131
101
  xapian_query = build_xapian_query query
@@ -153,7 +123,6 @@ EOS
153
123
 
154
124
  def each_message_in_thread_for m, opts={}
155
125
  # TODO thread by subject
156
- # TODO handle killed threads
157
126
  return unless doc = find_doc(m.id)
158
127
  queue = doc.value(THREAD_VALUENO).split(',')
159
128
  msgids = [m.id]
@@ -162,7 +131,7 @@ EOS
162
131
  while not queue.empty?
163
132
  thread_id = queue.pop
164
133
  next if seen_threads.member? thread_id
165
- return false if thread_killed? thread_id
134
+ return false if opts[:skip_killed] && thread_killed?(thread_id)
166
135
  seen_threads << thread_id
167
136
  docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
168
137
  docs.each do |doc|
@@ -194,14 +163,15 @@ EOS
194
163
 
195
164
  subs = HookManager.run("custom-search", :subs => s) || s
196
165
  subs = subs.gsub(/\b(to|from):(\S+)\b/) do
197
- field, name = $1, $2
198
- if(p = ContactManager.contact_for(name))
199
- [field, p.email]
200
- elsif name == "me"
201
- [field, "(" + AccountManager.user_emails.join("||") + ")"]
166
+ field, value = $1, $2
167
+ email_field, name_field = %w(email name).map { |x| "#{field}_#{x}" }
168
+ if(p = ContactManager.contact_for(value))
169
+ "#{email_field}:#{p.email}"
170
+ elsif value == "me"
171
+ '(' + AccountManager.user_emails.map { |e| "#{email_field}:#{e}" }.join(' OR ') + ')'
202
172
  else
203
- [field, name]
204
- end.join(":")
173
+ "(#{email_field}:#{value} OR #{name_field}:#{value})"
174
+ end
205
175
  end
206
176
 
207
177
  ## if we see a label:deleted or a label:spam term anywhere in the query
@@ -285,6 +255,8 @@ EOS
285
255
  end
286
256
  end
287
257
 
258
+ debug "translated query: #{subs.inspect}"
259
+
288
260
  qp = Xapian::QueryParser.new
289
261
  qp.database = @xapian
290
262
  qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
@@ -295,6 +267,8 @@ EOS
295
267
  BOOLEAN_PREFIX.each { |k,v| qp.add_boolean_prefix k, v }
296
268
  xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD, PREFIX['body'])
297
269
 
270
+ debug "parsed xapian query: #{xapian_query.description}"
271
+
298
272
  raise ParseError if xapian_query.nil? or xapian_query.empty?
299
273
  query[:qobj] = xapian_query
300
274
  query[:text] = s
@@ -438,100 +412,140 @@ EOS
438
412
  end
439
413
  end
440
414
 
441
- def index_message m, entry, opts
442
- terms = []
443
- text = []
415
+ def sync_message m, overwrite
416
+ doc = synchronize { find_doc(m.id) }
417
+ existed = doc != nil
418
+ doc ||= Xapian::Document.new
419
+ do_index_static = overwrite || !existed
420
+ old_entry = !do_index_static && doc.entry
421
+ snippet = do_index_static ? m.snippet : old_entry[:snippet]
444
422
 
445
- subject_text = m.indexable_subject
446
- body_text = m.indexable_body
423
+ entry = {
424
+ :message_id => m.id,
425
+ :source_id => m.source.id,
426
+ :source_info => m.source_info,
427
+ :date => m.date,
428
+ :snippet => snippet,
429
+ :labels => m.labels.to_a,
430
+ :from => [m.from.email, m.from.name],
431
+ :to => m.to.map { |p| [p.email, p.name] },
432
+ :cc => m.cc.map { |p| [p.email, p.name] },
433
+ :bcc => m.bcc.map { |p| [p.email, p.name] },
434
+ :subject => m.subj,
435
+ :refs => m.refs.to_a,
436
+ :replytos => m.replytos.to_a,
437
+ }
438
+
439
+ if do_index_static
440
+ doc.clear_terms
441
+ doc.clear_values
442
+ index_message_static m, doc, entry
443
+ end
444
+
445
+ index_message_threading doc, entry, old_entry
446
+ index_message_labels doc, entry[:labels], (do_index_static ? [] : old_entry[:labels])
447
+ doc.entry = entry
447
448
 
449
+ synchronize do
450
+ unless docid = existed ? doc.docid : assign_docid(m, truncate_date(m.date))
451
+ # Could be triggered by spam
452
+ warn "docid underflow, dropping #{m.id.inspect}"
453
+ return
454
+ end
455
+ @xapian.replace_document docid, doc
456
+ end
457
+
458
+ m.labels.each { |l| LabelManager << l }
459
+ true
460
+ end
461
+
462
+ ## Index content that can't be changed by the user
463
+ def index_message_static m, doc, entry
448
464
  # Person names are indexed with several prefixes
449
465
  person_termer = lambda do |d|
450
466
  lambda do |p|
451
467
  ["#{d}_name", "name", "body"].each do |x|
452
- text << [p.name, PREFIX[x]]
468
+ doc.index_text p.name, PREFIX[x]
453
469
  end if p.name
454
- [d, :any].each { |x| terms << mkterm(:email, x, p.email) }
470
+ [d, :any].each { |x| doc.add_term mkterm(:email, x, p.email) }
455
471
  end
456
472
  end
457
473
 
458
474
  person_termer[:from][m.from] if m.from
459
475
  (m.to+m.cc+m.bcc).each(&(person_termer[:to]))
460
476
 
461
- terms << mkterm(:date,m.date) if m.date
462
- m.labels.each { |t| terms << mkterm(:label,t) }
463
- terms << mkterm(:type, 'mail')
464
- terms << mkterm(:msgid, m.id)
465
- terms << mkterm(:source_id, m.source.id)
477
+ # Full text search content
478
+ subject_text = m.indexable_subject
479
+ body_text = m.indexable_body
480
+ doc.index_text subject_text, PREFIX['subject']
481
+ doc.index_text subject_text, PREFIX['body']
482
+ doc.index_text body_text, PREFIX['body']
483
+ m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
484
+
485
+ # Miscellaneous terms
486
+ doc.add_term mkterm(:date, m.date) if m.date
487
+ doc.add_term mkterm(:type, 'mail')
488
+ doc.add_term mkterm(:msgid, m.id)
489
+ doc.add_term mkterm(:source_id, m.source.id)
466
490
  m.attachments.each do |a|
467
491
  a =~ /\.(\w+)$/ or next
468
- t = mkterm(:attachment_extension, $1)
469
- terms << t
492
+ doc.add_term mkterm(:attachment_extension, $1)
470
493
  end
471
494
 
472
- ## Thread membership
473
- children = term_docids(mkterm(:ref, m.id)).map { |docid| @xapian.document docid }
474
- parent_ids = m.refs + m.replytos
495
+ # Date value for range queries
496
+ date_value = begin
497
+ Xapian.sortable_serialise m.date.to_i
498
+ rescue TypeError
499
+ Xapian.sortable_serialise 0
500
+ end
501
+
502
+ doc.add_value MSGID_VALUENO, m.id
503
+ doc.add_value DATE_VALUENO, date_value
504
+ end
505
+
506
+ def index_message_labels doc, new_labels, old_labels
507
+ return if new_labels == old_labels
508
+ added = new_labels.to_a - old_labels.to_a
509
+ removed = old_labels.to_a - new_labels.to_a
510
+ added.each { |t| doc.add_term mkterm(:label,t) }
511
+ removed.each { |t| doc.remove_term mkterm(:label,t) }
512
+ end
513
+
514
+ ## Assign a set of thread ids to the document. This is a hybrid of the runtime
515
+ ## search done by the Ferret index and the index-time union done by previous
516
+ ## versions of the Xapian index. We first find the thread ids of all messages
517
+ ## with a reference to or from us. If that set is empty, we use our own
518
+ ## message id. Otherwise, we use all the thread ids we previously found. In
519
+ ## the common case there's only one member in that set, but if we're the
520
+ ## missing link between multiple previously unrelated threads we can have
521
+ ## more. XapianIndex#each_message_in_thread_for follows the thread ids when
522
+ ## searching so the user sees a single unified thread.
523
+ def index_message_threading doc, entry, old_entry
524
+ return if old_entry && (entry[:refs] == old_entry[:refs]) && (entry[:replytos] == old_entry[:replytos])
525
+ children = term_docids(mkterm(:ref, entry[:message_id])).map { |docid| @xapian.document docid }
526
+ parent_ids = entry[:refs] + entry[:replytos]
475
527
  parents = parent_ids.map { |id| find_doc id }.compact
476
528
  thread_members = SavingHash.new { [] }
477
529
  (children + parents).each do |doc2|
478
530
  thread_ids = doc2.value(THREAD_VALUENO).split ','
479
531
  thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
480
532
  end
533
+ thread_ids = thread_members.empty? ? [entry[:message_id]] : thread_members.keys
534
+ thread_ids.each { |thread_id| doc.add_term mkterm(:thread, thread_id) }
535
+ parent_ids.each { |ref| doc.add_term mkterm(:ref, ref) }
536
+ doc.add_value THREAD_VALUENO, (thread_ids * ',')
537
+ end
481
538
 
482
- thread_ids = thread_members.empty? ? [m.id] : thread_members.keys
483
-
484
- thread_ids.each { |thread_id| terms << mkterm(:thread, thread_id) }
485
- parent_ids.each do |ref|
486
- terms << mkterm(:ref, ref)
487
- end
488
-
489
- # Full text search content
490
- text << [subject_text, PREFIX['subject']]
491
- text << [subject_text, PREFIX['body']]
492
- text << [body_text, PREFIX['body']]
493
- m.attachments.each { |a| text << [a, PREFIX['attachment']] }
494
-
495
- truncated_date = if m.date < MIN_DATE
496
- debug "warning: adjusting too-low date #{m.date} for indexing"
539
+ def truncate_date date
540
+ if date < MIN_DATE
541
+ debug "warning: adjusting too-low date #{date} for indexing"
497
542
  MIN_DATE
498
- elsif m.date > MAX_DATE
499
- debug "warning: adjusting too-high date #{m.date} for indexing"
543
+ elsif date > MAX_DATE
544
+ debug "warning: adjusting too-high date #{date} for indexing"
500
545
  MAX_DATE
501
546
  else
502
- m.date
547
+ date
503
548
  end
504
-
505
- # Date value for range queries
506
- date_value = begin
507
- Xapian.sortable_serialise truncated_date.to_i
508
- rescue TypeError
509
- Xapian.sortable_serialise 0
510
- end
511
-
512
- docid = nil
513
- unless doc = find_doc(m.id)
514
- doc = Xapian::Document.new
515
- if not docid = assign_docid(m, truncated_date)
516
- # Could be triggered by spam
517
- Redwood::log "warning: docid underflow, dropping #{m.id.inspect}"
518
- return
519
- end
520
- else
521
- doc.clear_terms
522
- doc.clear_values
523
- docid = doc.docid
524
- end
525
-
526
- @term_generator.document = doc
527
- text.each { |text,prefix| @term_generator.index_text text, 1, prefix }
528
- terms.each { |term| doc.add_term term if term.length <= MAX_TERM_LENGTH }
529
- doc.add_value MSGID_VALUENO, m.id
530
- doc.add_value THREAD_VALUENO, (thread_ids * ',')
531
- doc.add_value DATE_VALUENO, date_value
532
- doc.data = Marshal.dump entry
533
-
534
- @xapian.replace_document docid, doc
535
549
  end
536
550
 
537
551
  # Construct a Xapian term
@@ -560,7 +574,32 @@ EOS
560
574
  raise "Invalid term type #{type}"
561
575
  end
562
576
  end
577
+ end
563
578
 
564
579
  end
565
580
 
581
+ class Xapian::Document
582
+ def entry
583
+ Marshal.load data
584
+ end
585
+
586
+ def entry=(x)
587
+ self.data = Marshal.dump x
588
+ end
589
+
590
+ def index_text text, prefix, weight=1
591
+ term_generator = Xapian::TermGenerator.new
592
+ term_generator.stemmer = Xapian::Stem.new(Redwood::XapianIndex::STEM_LANGUAGE)
593
+ term_generator.document = self
594
+ term_generator.index_text text, weight, prefix
595
+ end
596
+
597
+ alias old_add_term add_term
598
+ def add_term term
599
+ if term.length <= Redwood::XapianIndex::MAX_TERM_LENGTH
600
+ old_add_term term, 0
601
+ else
602
+ warn "dropping excessively long term #{term}"
603
+ end
604
+ end
566
605
  end