sup 0.9.1 → 0.10
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sup might be problematic. Click here for more details.
- data/CONTRIBUTORS +10 -6
- data/History.txt +11 -0
- data/ReleaseNotes +10 -0
- data/bin/sup +55 -19
- data/bin/sup-add +18 -8
- data/bin/sup-config +2 -2
- data/bin/sup-convert-ferret-index +84 -0
- data/bin/sup-dump +4 -3
- data/bin/sup-sync +4 -3
- data/bin/sup-sync-back +3 -2
- data/bin/sup-tweak-labels +3 -3
- data/lib/sup.rb +35 -4
- data/lib/sup/buffer.rb +12 -6
- data/lib/sup/colormap.rb +1 -0
- data/lib/sup/crypto.rb +76 -55
- data/lib/sup/ferret_index.rb +6 -1
- data/lib/sup/index.rb +62 -8
- data/lib/sup/logger.rb +2 -1
- data/lib/sup/maildir.rb +4 -2
- data/lib/sup/mbox/loader.rb +4 -3
- data/lib/sup/message-chunks.rb +9 -7
- data/lib/sup/message.rb +29 -27
- data/lib/sup/mode.rb +11 -4
- data/lib/sup/modes/buffer-list-mode.rb +5 -0
- data/lib/sup/modes/console-mode.rb +4 -0
- data/lib/sup/modes/edit-message-mode.rb +4 -2
- data/lib/sup/modes/file-browser-mode.rb +1 -1
- data/lib/sup/modes/inbox-mode.rb +18 -1
- data/lib/sup/modes/label-list-mode.rb +44 -3
- data/lib/sup/modes/text-mode.rb +1 -1
- data/lib/sup/modes/thread-index-mode.rb +63 -52
- data/lib/sup/modes/thread-view-mode.rb +68 -7
- data/lib/sup/poll.rb +20 -5
- data/lib/sup/source.rb +1 -0
- data/lib/sup/thread.rb +1 -1
- data/lib/sup/util.rb +49 -11
- data/lib/sup/xapian_index.rb +151 -112
- metadata +4 -10
- data/lib/sup/hook.rb.BACKUP.8625.rb +0 -158
- data/lib/sup/hook.rb.BACKUP.8681.rb +0 -158
- data/lib/sup/hook.rb.BASE.8625.rb +0 -155
- data/lib/sup/hook.rb.BASE.8681.rb +0 -155
- data/lib/sup/hook.rb.LOCAL.8625.rb +0 -142
- data/lib/sup/hook.rb.LOCAL.8681.rb +0 -142
- data/lib/sup/hook.rb.REMOTE.8625.rb +0 -145
- data/lib/sup/hook.rb.REMOTE.8681.rb +0 -145
data/lib/sup/poll.rb
CHANGED
@@ -28,19 +28,18 @@ num_inbox_total_unread: the total number of unread messages in the inbox
|
|
28
28
|
only those messages appearing in the inbox
|
29
29
|
EOS
|
30
30
|
|
31
|
-
DELAY = 300
|
31
|
+
DELAY = $config[:poll_interval] || 300
|
32
32
|
|
33
33
|
def initialize
|
34
34
|
@mutex = Mutex.new
|
35
35
|
@thread = nil
|
36
36
|
@last_poll = nil
|
37
37
|
@polling = false
|
38
|
+
@poll_sources = nil
|
38
39
|
@mode = nil
|
39
40
|
end
|
40
41
|
|
41
|
-
def
|
42
|
-
return if @polling
|
43
|
-
@polling = true
|
42
|
+
def poll_with_sources
|
44
43
|
@mode ||= PollMode.new
|
45
44
|
HookManager.run "before-poll"
|
46
45
|
|
@@ -54,6 +53,22 @@ EOS
|
|
54
53
|
|
55
54
|
HookManager.run "after-poll", :num => num, :num_inbox => numi, :from_and_subj => from_and_subj, :from_and_subj_inbox => from_and_subj_inbox, :num_inbox_total_unread => lambda { Index.num_results_for :labels => [:inbox, :unread] }
|
56
55
|
|
56
|
+
end
|
57
|
+
|
58
|
+
def poll
|
59
|
+
return if @polling
|
60
|
+
@polling = true
|
61
|
+
@poll_sources = SourceManager.usual_sources
|
62
|
+
num, numi = poll_with_sources
|
63
|
+
@polling = false
|
64
|
+
[num, numi]
|
65
|
+
end
|
66
|
+
|
67
|
+
def poll_unusual
|
68
|
+
return if @polling
|
69
|
+
@polling = true
|
70
|
+
@poll_sources = SourceManager.unusual_sources
|
71
|
+
num, numi = poll_with_sources
|
57
72
|
@polling = false
|
58
73
|
[num, numi]
|
59
74
|
end
|
@@ -79,7 +94,7 @@ EOS
|
|
79
94
|
loaded_labels = Set.new
|
80
95
|
|
81
96
|
@mutex.synchronize do
|
82
|
-
|
97
|
+
@poll_sources.each do |source|
|
83
98
|
# yield "source #{source} is done? #{source.done?} (cur_offset #{source.cur_offset} >= #{source.end_offset})"
|
84
99
|
begin
|
85
100
|
yield "Loading from #{source}... " unless source.done? || (source.respond_to?(:has_errors?) && source.has_errors?)
|
data/lib/sup/source.rb
CHANGED
@@ -207,6 +207,7 @@ class SourceManager
|
|
207
207
|
|
208
208
|
def source_for uri; sources.find { |s| s.is_source_for? uri }; end
|
209
209
|
def usual_sources; sources.find_all { |s| s.usual? }; end
|
210
|
+
def unusual_sources; sources.find_all { |s| !s.usual? }; end
|
210
211
|
|
211
212
|
def load_sources fn=Redwood::SOURCE_FN
|
212
213
|
source_array = (Redwood::load_yaml_obj(fn) || []).map { |o| Recoverable.new o }
|
data/lib/sup/thread.rb
CHANGED
@@ -112,7 +112,7 @@ class Thread
|
|
112
112
|
|
113
113
|
def set_labels l; each { |m, *o| m && m.labels = l }; end
|
114
114
|
def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end
|
115
|
-
def
|
115
|
+
def each_dirty_message; each { |m, *o| m && m.dirty? && yield(m) }; end
|
116
116
|
|
117
117
|
def direct_participants
|
118
118
|
map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq
|
data/lib/sup/util.rb
CHANGED
@@ -177,7 +177,7 @@ class String
|
|
177
177
|
## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
|
178
178
|
## the utf8 regex and count those. otherwise, use the byte length.
|
179
179
|
def display_length
|
180
|
-
if $encoding == "UTF-8" || $encoding == "utf8"
|
180
|
+
if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
|
181
181
|
scan(/./u).size
|
182
182
|
else
|
183
183
|
size
|
@@ -290,12 +290,45 @@ class String
|
|
290
290
|
end
|
291
291
|
end
|
292
292
|
|
293
|
+
unless method_defined? :each
|
294
|
+
def each &b
|
295
|
+
each_line &b
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
293
299
|
## takes a list of words, and returns an array of symbols. typically used in
|
294
300
|
## Sup for translating Ferret's representation of a list of labels (a string)
|
295
301
|
## to an array of label symbols.
|
296
302
|
##
|
297
303
|
## split_on will be passed to String#split, so you can leave this nil for space.
|
298
304
|
def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
|
305
|
+
|
306
|
+
class CheckError < ArgumentError; end
|
307
|
+
def check
|
308
|
+
begin
|
309
|
+
fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
|
310
|
+
fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
|
311
|
+
rescue
|
312
|
+
raise CheckError.new($!.message)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def ascii
|
317
|
+
out = ""
|
318
|
+
each_byte do |b|
|
319
|
+
if (b & 128) != 0
|
320
|
+
out << "\\x#{b.to_s 16}"
|
321
|
+
else
|
322
|
+
out << b.chr
|
323
|
+
end
|
324
|
+
end
|
325
|
+
out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
|
326
|
+
out
|
327
|
+
end
|
328
|
+
|
329
|
+
def transcode src_encoding=$encoding
|
330
|
+
Iconv.easy_decode $encoding, src_encoding, self
|
331
|
+
end
|
299
332
|
end
|
300
333
|
|
301
334
|
class Numeric
|
@@ -486,9 +519,9 @@ class Time
|
|
486
519
|
strftime "%b %e"
|
487
520
|
else
|
488
521
|
if is_the_same_day? from
|
489
|
-
strftime("%l:%M%
|
522
|
+
strftime("%l:%M%p").downcase # emulate %P (missing on ruby 1.8 darwin)
|
490
523
|
elsif is_the_day_before? from
|
491
|
-
"Yest." + nearest_hour.strftime("%l%
|
524
|
+
"Yest." + nearest_hour.strftime("%l%p").downcase # emulate %P
|
492
525
|
else
|
493
526
|
strftime "%b %e"
|
494
527
|
end
|
@@ -641,21 +674,26 @@ class FinishLine
|
|
641
674
|
end
|
642
675
|
|
643
676
|
class Iconv
|
644
|
-
def self.easy_decode target,
|
645
|
-
|
646
|
-
|
677
|
+
def self.easy_decode target, orig_charset, text
|
678
|
+
if text.respond_to? :force_encoding
|
679
|
+
text = text.dup
|
680
|
+
text.force_encoding Encoding::BINARY
|
681
|
+
end
|
682
|
+
charset = case orig_charset
|
647
683
|
when /UTF[-_ ]?8/i then "utf-8"
|
648
684
|
when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
|
649
685
|
when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
|
650
686
|
when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
|
651
|
-
|
687
|
+
when /^euc$/i then 'EUC-JP' # XXX try them all?
|
688
|
+
when /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i then 'ASCII'
|
689
|
+
else orig_charset
|
652
690
|
end
|
653
691
|
|
654
692
|
begin
|
655
|
-
Iconv.iconv(target
|
656
|
-
rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence
|
657
|
-
|
658
|
-
text
|
693
|
+
returning(Iconv.iconv(target, charset, text + " ").join[0 .. -2]) { |str| str.check }
|
694
|
+
rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence, String::CheckError
|
695
|
+
debug "couldn't transcode text from #{orig_charset} (#{charset}) to #{target}) (#{text[0 ... 20].inspect}...) (got #{$!.message} (#{$!.class}))"
|
696
|
+
text.ascii
|
659
697
|
end
|
660
698
|
end
|
661
699
|
end
|
data/lib/sup/xapian_index.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
|
2
|
+
|
1
3
|
require 'xapian'
|
2
4
|
require 'set'
|
3
5
|
|
@@ -42,14 +44,14 @@ EOS
|
|
42
44
|
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
|
43
45
|
@xapian.set_metadata 'version', INDEX_VERSION
|
44
46
|
end
|
45
|
-
@term_generator = Xapian::TermGenerator.new()
|
46
|
-
@term_generator.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
|
47
47
|
@enquire = Xapian::Enquire.new @xapian
|
48
48
|
@enquire.weighting_scheme = Xapian::BoolWeight.new
|
49
49
|
@enquire.docid_order = Xapian::Enquire::ASCENDING
|
50
50
|
end
|
51
51
|
|
52
52
|
def save_index
|
53
|
+
info "Flushing Xapian updates to disk. This may take a while..."
|
54
|
+
@xapian.flush
|
53
55
|
end
|
54
56
|
|
55
57
|
def optimize
|
@@ -91,41 +93,9 @@ EOS
|
|
91
93
|
m
|
92
94
|
end
|
93
95
|
|
94
|
-
def add_message m; sync_message m end
|
95
|
-
def update_message m; sync_message m end
|
96
|
-
def update_message_state m; sync_message m end
|
97
|
-
|
98
|
-
def sync_message m, opts={}
|
99
|
-
entry = synchronize { get_entry m.id }
|
100
|
-
snippet = m.snippet
|
101
|
-
entry ||= {}
|
102
|
-
labels = m.labels
|
103
|
-
entry = {} if opts[:force_overwrite]
|
104
|
-
|
105
|
-
d = {
|
106
|
-
:message_id => m.id,
|
107
|
-
:source_id => m.source.id,
|
108
|
-
:source_info => m.source_info,
|
109
|
-
:date => (entry[:date] || m.date),
|
110
|
-
:snippet => snippet,
|
111
|
-
:labels => labels,
|
112
|
-
:from => (entry[:from] || [m.from.email, m.from.name]),
|
113
|
-
:to => (entry[:to] || m.to.map { |p| [p.email, p.name] }),
|
114
|
-
:cc => (entry[:cc] || m.cc.map { |p| [p.email, p.name] }),
|
115
|
-
:bcc => (entry[:bcc] || m.bcc.map { |p| [p.email, p.name] }),
|
116
|
-
:subject => m.subj,
|
117
|
-
:refs => (entry[:refs] || m.refs),
|
118
|
-
:replytos => (entry[:replytos] || m.replytos),
|
119
|
-
}
|
120
|
-
|
121
|
-
labels.each { |l| LabelManager << l }
|
122
|
-
|
123
|
-
synchronize do
|
124
|
-
index_message m, d, opts
|
125
|
-
end
|
126
|
-
true
|
127
|
-
end
|
128
|
-
private :sync_message
|
96
|
+
def add_message m; sync_message m, true end
|
97
|
+
def update_message m; sync_message m, true end
|
98
|
+
def update_message_state m; sync_message m, false end
|
129
99
|
|
130
100
|
def num_results_for query={}
|
131
101
|
xapian_query = build_xapian_query query
|
@@ -153,7 +123,6 @@ EOS
|
|
153
123
|
|
154
124
|
def each_message_in_thread_for m, opts={}
|
155
125
|
# TODO thread by subject
|
156
|
-
# TODO handle killed threads
|
157
126
|
return unless doc = find_doc(m.id)
|
158
127
|
queue = doc.value(THREAD_VALUENO).split(',')
|
159
128
|
msgids = [m.id]
|
@@ -162,7 +131,7 @@ EOS
|
|
162
131
|
while not queue.empty?
|
163
132
|
thread_id = queue.pop
|
164
133
|
next if seen_threads.member? thread_id
|
165
|
-
return false if thread_killed?
|
134
|
+
return false if opts[:skip_killed] && thread_killed?(thread_id)
|
166
135
|
seen_threads << thread_id
|
167
136
|
docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
|
168
137
|
docs.each do |doc|
|
@@ -194,14 +163,15 @@ EOS
|
|
194
163
|
|
195
164
|
subs = HookManager.run("custom-search", :subs => s) || s
|
196
165
|
subs = subs.gsub(/\b(to|from):(\S+)\b/) do
|
197
|
-
field,
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
166
|
+
field, value = $1, $2
|
167
|
+
email_field, name_field = %w(email name).map { |x| "#{field}_#{x}" }
|
168
|
+
if(p = ContactManager.contact_for(value))
|
169
|
+
"#{email_field}:#{p.email}"
|
170
|
+
elsif value == "me"
|
171
|
+
'(' + AccountManager.user_emails.map { |e| "#{email_field}:#{e}" }.join(' OR ') + ')'
|
202
172
|
else
|
203
|
-
|
204
|
-
end
|
173
|
+
"(#{email_field}:#{value} OR #{name_field}:#{value})"
|
174
|
+
end
|
205
175
|
end
|
206
176
|
|
207
177
|
## if we see a label:deleted or a label:spam term anywhere in the query
|
@@ -285,6 +255,8 @@ EOS
|
|
285
255
|
end
|
286
256
|
end
|
287
257
|
|
258
|
+
debug "translated query: #{subs.inspect}"
|
259
|
+
|
288
260
|
qp = Xapian::QueryParser.new
|
289
261
|
qp.database = @xapian
|
290
262
|
qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
|
@@ -295,6 +267,8 @@ EOS
|
|
295
267
|
BOOLEAN_PREFIX.each { |k,v| qp.add_boolean_prefix k, v }
|
296
268
|
xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD, PREFIX['body'])
|
297
269
|
|
270
|
+
debug "parsed xapian query: #{xapian_query.description}"
|
271
|
+
|
298
272
|
raise ParseError if xapian_query.nil? or xapian_query.empty?
|
299
273
|
query[:qobj] = xapian_query
|
300
274
|
query[:text] = s
|
@@ -438,100 +412,140 @@ EOS
|
|
438
412
|
end
|
439
413
|
end
|
440
414
|
|
441
|
-
def
|
442
|
-
|
443
|
-
|
415
|
+
def sync_message m, overwrite
|
416
|
+
doc = synchronize { find_doc(m.id) }
|
417
|
+
existed = doc != nil
|
418
|
+
doc ||= Xapian::Document.new
|
419
|
+
do_index_static = overwrite || !existed
|
420
|
+
old_entry = !do_index_static && doc.entry
|
421
|
+
snippet = do_index_static ? m.snippet : old_entry[:snippet]
|
444
422
|
|
445
|
-
|
446
|
-
|
423
|
+
entry = {
|
424
|
+
:message_id => m.id,
|
425
|
+
:source_id => m.source.id,
|
426
|
+
:source_info => m.source_info,
|
427
|
+
:date => m.date,
|
428
|
+
:snippet => snippet,
|
429
|
+
:labels => m.labels.to_a,
|
430
|
+
:from => [m.from.email, m.from.name],
|
431
|
+
:to => m.to.map { |p| [p.email, p.name] },
|
432
|
+
:cc => m.cc.map { |p| [p.email, p.name] },
|
433
|
+
:bcc => m.bcc.map { |p| [p.email, p.name] },
|
434
|
+
:subject => m.subj,
|
435
|
+
:refs => m.refs.to_a,
|
436
|
+
:replytos => m.replytos.to_a,
|
437
|
+
}
|
438
|
+
|
439
|
+
if do_index_static
|
440
|
+
doc.clear_terms
|
441
|
+
doc.clear_values
|
442
|
+
index_message_static m, doc, entry
|
443
|
+
end
|
444
|
+
|
445
|
+
index_message_threading doc, entry, old_entry
|
446
|
+
index_message_labels doc, entry[:labels], (do_index_static ? [] : old_entry[:labels])
|
447
|
+
doc.entry = entry
|
447
448
|
|
449
|
+
synchronize do
|
450
|
+
unless docid = existed ? doc.docid : assign_docid(m, truncate_date(m.date))
|
451
|
+
# Could be triggered by spam
|
452
|
+
warn "docid underflow, dropping #{m.id.inspect}"
|
453
|
+
return
|
454
|
+
end
|
455
|
+
@xapian.replace_document docid, doc
|
456
|
+
end
|
457
|
+
|
458
|
+
m.labels.each { |l| LabelManager << l }
|
459
|
+
true
|
460
|
+
end
|
461
|
+
|
462
|
+
## Index content that can't be changed by the user
|
463
|
+
def index_message_static m, doc, entry
|
448
464
|
# Person names are indexed with several prefixes
|
449
465
|
person_termer = lambda do |d|
|
450
466
|
lambda do |p|
|
451
467
|
["#{d}_name", "name", "body"].each do |x|
|
452
|
-
|
468
|
+
doc.index_text p.name, PREFIX[x]
|
453
469
|
end if p.name
|
454
|
-
[d, :any].each { |x|
|
470
|
+
[d, :any].each { |x| doc.add_term mkterm(:email, x, p.email) }
|
455
471
|
end
|
456
472
|
end
|
457
473
|
|
458
474
|
person_termer[:from][m.from] if m.from
|
459
475
|
(m.to+m.cc+m.bcc).each(&(person_termer[:to]))
|
460
476
|
|
461
|
-
|
462
|
-
m.
|
463
|
-
|
464
|
-
|
465
|
-
|
477
|
+
# Full text search content
|
478
|
+
subject_text = m.indexable_subject
|
479
|
+
body_text = m.indexable_body
|
480
|
+
doc.index_text subject_text, PREFIX['subject']
|
481
|
+
doc.index_text subject_text, PREFIX['body']
|
482
|
+
doc.index_text body_text, PREFIX['body']
|
483
|
+
m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
|
484
|
+
|
485
|
+
# Miscellaneous terms
|
486
|
+
doc.add_term mkterm(:date, m.date) if m.date
|
487
|
+
doc.add_term mkterm(:type, 'mail')
|
488
|
+
doc.add_term mkterm(:msgid, m.id)
|
489
|
+
doc.add_term mkterm(:source_id, m.source.id)
|
466
490
|
m.attachments.each do |a|
|
467
491
|
a =~ /\.(\w+)$/ or next
|
468
|
-
|
469
|
-
terms << t
|
492
|
+
doc.add_term mkterm(:attachment_extension, $1)
|
470
493
|
end
|
471
494
|
|
472
|
-
|
473
|
-
|
474
|
-
|
495
|
+
# Date value for range queries
|
496
|
+
date_value = begin
|
497
|
+
Xapian.sortable_serialise m.date.to_i
|
498
|
+
rescue TypeError
|
499
|
+
Xapian.sortable_serialise 0
|
500
|
+
end
|
501
|
+
|
502
|
+
doc.add_value MSGID_VALUENO, m.id
|
503
|
+
doc.add_value DATE_VALUENO, date_value
|
504
|
+
end
|
505
|
+
|
506
|
+
def index_message_labels doc, new_labels, old_labels
|
507
|
+
return if new_labels == old_labels
|
508
|
+
added = new_labels.to_a - old_labels.to_a
|
509
|
+
removed = old_labels.to_a - new_labels.to_a
|
510
|
+
added.each { |t| doc.add_term mkterm(:label,t) }
|
511
|
+
removed.each { |t| doc.remove_term mkterm(:label,t) }
|
512
|
+
end
|
513
|
+
|
514
|
+
## Assign a set of thread ids to the document. This is a hybrid of the runtime
|
515
|
+
## search done by the Ferret index and the index-time union done by previous
|
516
|
+
## versions of the Xapian index. We first find the thread ids of all messages
|
517
|
+
## with a reference to or from us. If that set is empty, we use our own
|
518
|
+
## message id. Otherwise, we use all the thread ids we previously found. In
|
519
|
+
## the common case there's only one member in that set, but if we're the
|
520
|
+
## missing link between multiple previously unrelated threads we can have
|
521
|
+
## more. XapianIndex#each_message_in_thread_for follows the thread ids when
|
522
|
+
## searching so the user sees a single unified thread.
|
523
|
+
def index_message_threading doc, entry, old_entry
|
524
|
+
return if old_entry && (entry[:refs] == old_entry[:refs]) && (entry[:replytos] == old_entry[:replytos])
|
525
|
+
children = term_docids(mkterm(:ref, entry[:message_id])).map { |docid| @xapian.document docid }
|
526
|
+
parent_ids = entry[:refs] + entry[:replytos]
|
475
527
|
parents = parent_ids.map { |id| find_doc id }.compact
|
476
528
|
thread_members = SavingHash.new { [] }
|
477
529
|
(children + parents).each do |doc2|
|
478
530
|
thread_ids = doc2.value(THREAD_VALUENO).split ','
|
479
531
|
thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
|
480
532
|
end
|
533
|
+
thread_ids = thread_members.empty? ? [entry[:message_id]] : thread_members.keys
|
534
|
+
thread_ids.each { |thread_id| doc.add_term mkterm(:thread, thread_id) }
|
535
|
+
parent_ids.each { |ref| doc.add_term mkterm(:ref, ref) }
|
536
|
+
doc.add_value THREAD_VALUENO, (thread_ids * ',')
|
537
|
+
end
|
481
538
|
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
parent_ids.each do |ref|
|
486
|
-
terms << mkterm(:ref, ref)
|
487
|
-
end
|
488
|
-
|
489
|
-
# Full text search content
|
490
|
-
text << [subject_text, PREFIX['subject']]
|
491
|
-
text << [subject_text, PREFIX['body']]
|
492
|
-
text << [body_text, PREFIX['body']]
|
493
|
-
m.attachments.each { |a| text << [a, PREFIX['attachment']] }
|
494
|
-
|
495
|
-
truncated_date = if m.date < MIN_DATE
|
496
|
-
debug "warning: adjusting too-low date #{m.date} for indexing"
|
539
|
+
def truncate_date date
|
540
|
+
if date < MIN_DATE
|
541
|
+
debug "warning: adjusting too-low date #{date} for indexing"
|
497
542
|
MIN_DATE
|
498
|
-
elsif
|
499
|
-
debug "warning: adjusting too-high date #{
|
543
|
+
elsif date > MAX_DATE
|
544
|
+
debug "warning: adjusting too-high date #{date} for indexing"
|
500
545
|
MAX_DATE
|
501
546
|
else
|
502
|
-
|
547
|
+
date
|
503
548
|
end
|
504
|
-
|
505
|
-
# Date value for range queries
|
506
|
-
date_value = begin
|
507
|
-
Xapian.sortable_serialise truncated_date.to_i
|
508
|
-
rescue TypeError
|
509
|
-
Xapian.sortable_serialise 0
|
510
|
-
end
|
511
|
-
|
512
|
-
docid = nil
|
513
|
-
unless doc = find_doc(m.id)
|
514
|
-
doc = Xapian::Document.new
|
515
|
-
if not docid = assign_docid(m, truncated_date)
|
516
|
-
# Could be triggered by spam
|
517
|
-
Redwood::log "warning: docid underflow, dropping #{m.id.inspect}"
|
518
|
-
return
|
519
|
-
end
|
520
|
-
else
|
521
|
-
doc.clear_terms
|
522
|
-
doc.clear_values
|
523
|
-
docid = doc.docid
|
524
|
-
end
|
525
|
-
|
526
|
-
@term_generator.document = doc
|
527
|
-
text.each { |text,prefix| @term_generator.index_text text, 1, prefix }
|
528
|
-
terms.each { |term| doc.add_term term if term.length <= MAX_TERM_LENGTH }
|
529
|
-
doc.add_value MSGID_VALUENO, m.id
|
530
|
-
doc.add_value THREAD_VALUENO, (thread_ids * ',')
|
531
|
-
doc.add_value DATE_VALUENO, date_value
|
532
|
-
doc.data = Marshal.dump entry
|
533
|
-
|
534
|
-
@xapian.replace_document docid, doc
|
535
549
|
end
|
536
550
|
|
537
551
|
# Construct a Xapian term
|
@@ -560,7 +574,32 @@ EOS
|
|
560
574
|
raise "Invalid term type #{type}"
|
561
575
|
end
|
562
576
|
end
|
577
|
+
end
|
563
578
|
|
564
579
|
end
|
565
580
|
|
581
|
+
class Xapian::Document
|
582
|
+
def entry
|
583
|
+
Marshal.load data
|
584
|
+
end
|
585
|
+
|
586
|
+
def entry=(x)
|
587
|
+
self.data = Marshal.dump x
|
588
|
+
end
|
589
|
+
|
590
|
+
def index_text text, prefix, weight=1
|
591
|
+
term_generator = Xapian::TermGenerator.new
|
592
|
+
term_generator.stemmer = Xapian::Stem.new(Redwood::XapianIndex::STEM_LANGUAGE)
|
593
|
+
term_generator.document = self
|
594
|
+
term_generator.index_text text, weight, prefix
|
595
|
+
end
|
596
|
+
|
597
|
+
alias old_add_term add_term
|
598
|
+
def add_term term
|
599
|
+
if term.length <= Redwood::XapianIndex::MAX_TERM_LENGTH
|
600
|
+
old_add_term term, 0
|
601
|
+
else
|
602
|
+
warn "dropping excessively long term #{term}"
|
603
|
+
end
|
604
|
+
end
|
566
605
|
end
|