sup 0.10.2 → 0.11
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sup might be problematic. Click here for more details.
- data/CONTRIBUTORS +11 -9
- data/History.txt +14 -0
- data/README.txt +3 -11
- data/ReleaseNotes +16 -0
- data/bin/sup +67 -42
- data/bin/sup-add +2 -20
- data/bin/sup-config +0 -34
- data/bin/sup-dump +2 -5
- data/bin/sup-sync +2 -3
- data/bin/sup-sync-back +2 -3
- data/bin/sup-tweak-labels +2 -3
- data/lib/sup.rb +12 -4
- data/lib/sup/account.rb +2 -0
- data/lib/sup/buffer.rb +11 -2
- data/lib/sup/colormap.rb +59 -49
- data/lib/sup/connection.rb +63 -0
- data/lib/sup/crypto.rb +12 -0
- data/lib/sup/hook.rb +1 -0
- data/lib/sup/idle.rb +42 -0
- data/lib/sup/index.rb +562 -47
- data/lib/sup/keymap.rb +41 -3
- data/lib/sup/message.rb +1 -1
- data/lib/sup/mode.rb +8 -0
- data/lib/sup/modes/console-mode.rb +2 -3
- data/lib/sup/modes/edit-message-mode.rb +32 -7
- data/lib/sup/modes/inbox-mode.rb +4 -0
- data/lib/sup/modes/search-list-mode.rb +188 -0
- data/lib/sup/modes/search-results-mode.rb +17 -1
- data/lib/sup/modes/thread-index-mode.rb +43 -10
- data/lib/sup/modes/thread-view-mode.rb +29 -4
- data/lib/sup/poll.rb +13 -2
- data/lib/sup/search.rb +73 -0
- data/lib/sup/textfield.rb +17 -12
- data/lib/sup/util.rb +11 -0
- metadata +45 -46
- data/bin/sup-convert-ferret-index +0 -84
- data/lib/ncurses.rb +0 -289
- data/lib/sup/ferret_index.rb +0 -476
- data/lib/sup/xapian_index.rb +0 -605
data/lib/sup/xapian_index.rb
DELETED
@@ -1,605 +0,0 @@
|
|
1
|
-
ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
|
2
|
-
|
3
|
-
require 'xapian'
|
4
|
-
require 'set'
|
5
|
-
|
6
|
-
module Redwood
|
7
|
-
|
8
|
-
# This index implementation uses Xapian for searching and storage. It
|
9
|
-
# tends to be slightly faster than Ferret for indexing and significantly faster
|
10
|
-
# for searching due to precomputing thread membership.
|
11
|
-
class XapianIndex < BaseIndex
|
12
|
-
STEM_LANGUAGE = "english"
|
13
|
-
INDEX_VERSION = '1'
|
14
|
-
|
15
|
-
## dates are converted to integers for xapian, and are used for document ids,
|
16
|
-
## so we must ensure they're reasonably valid. this typically only affect
|
17
|
-
## spam.
|
18
|
-
MIN_DATE = Time.at 0
|
19
|
-
MAX_DATE = Time.at(2**31-1)
|
20
|
-
|
21
|
-
HookManager.register "custom-search", <<EOS
|
22
|
-
Executes before a string search is applied to the index,
|
23
|
-
returning a new search string.
|
24
|
-
Variables:
|
25
|
-
subs: The string being searched.
|
26
|
-
EOS
|
27
|
-
|
28
|
-
def initialize dir=BASE_DIR
|
29
|
-
super
|
30
|
-
|
31
|
-
@index_mutex = Monitor.new
|
32
|
-
end
|
33
|
-
|
34
|
-
def load_index
|
35
|
-
path = File.join(@dir, 'xapian')
|
36
|
-
if File.exists? path
|
37
|
-
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_OPEN)
|
38
|
-
db_version = @xapian.get_metadata 'version'
|
39
|
-
db_version = '0' if db_version.empty?
|
40
|
-
if db_version != INDEX_VERSION
|
41
|
-
fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please downgrade to your previous version and dump your labels before upgrading to this version (then run sup-sync --restore)."
|
42
|
-
end
|
43
|
-
else
|
44
|
-
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
|
45
|
-
@xapian.set_metadata 'version', INDEX_VERSION
|
46
|
-
end
|
47
|
-
@enquire = Xapian::Enquire.new @xapian
|
48
|
-
@enquire.weighting_scheme = Xapian::BoolWeight.new
|
49
|
-
@enquire.docid_order = Xapian::Enquire::ASCENDING
|
50
|
-
end
|
51
|
-
|
52
|
-
def save_index
|
53
|
-
info "Flushing Xapian updates to disk. This may take a while..."
|
54
|
-
@xapian.flush
|
55
|
-
end
|
56
|
-
|
57
|
-
def optimize
|
58
|
-
end
|
59
|
-
|
60
|
-
def size
|
61
|
-
synchronize { @xapian.doccount }
|
62
|
-
end
|
63
|
-
|
64
|
-
def contains_id? id
|
65
|
-
synchronize { find_docid(id) && true }
|
66
|
-
end
|
67
|
-
|
68
|
-
def source_for_id id
|
69
|
-
synchronize { get_entry(id)[:source_id] }
|
70
|
-
end
|
71
|
-
|
72
|
-
def delete id
|
73
|
-
synchronize { @xapian.delete_document mkterm(:msgid, id) }
|
74
|
-
end
|
75
|
-
|
76
|
-
def build_message id
|
77
|
-
entry = synchronize { get_entry id }
|
78
|
-
return unless entry
|
79
|
-
|
80
|
-
source = SourceManager[entry[:source_id]]
|
81
|
-
raise "invalid source #{entry[:source_id]}" unless source
|
82
|
-
|
83
|
-
m = Message.new :source => source, :source_info => entry[:source_info],
|
84
|
-
:labels => entry[:labels], :snippet => entry[:snippet]
|
85
|
-
|
86
|
-
mk_person = lambda { |x| Person.new(*x.reverse!) }
|
87
|
-
entry[:from] = mk_person[entry[:from]]
|
88
|
-
entry[:to].map!(&mk_person)
|
89
|
-
entry[:cc].map!(&mk_person)
|
90
|
-
entry[:bcc].map!(&mk_person)
|
91
|
-
|
92
|
-
m.load_from_index! entry
|
93
|
-
m
|
94
|
-
end
|
95
|
-
|
96
|
-
def add_message m; sync_message m, true end
|
97
|
-
def update_message m; sync_message m, true end
|
98
|
-
def update_message_state m; sync_message m, false end
|
99
|
-
|
100
|
-
def num_results_for query={}
|
101
|
-
xapian_query = build_xapian_query query
|
102
|
-
matchset = run_query xapian_query, 0, 0, 100
|
103
|
-
matchset.matches_estimated
|
104
|
-
end
|
105
|
-
|
106
|
-
EACH_ID_PAGE = 100
|
107
|
-
def each_id query={}
|
108
|
-
offset = 0
|
109
|
-
page = EACH_ID_PAGE
|
110
|
-
|
111
|
-
xapian_query = build_xapian_query query
|
112
|
-
while true
|
113
|
-
ids = run_query_ids xapian_query, offset, (offset+page)
|
114
|
-
ids.each { |id| yield id }
|
115
|
-
break if ids.size < page
|
116
|
-
offset += page
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
def each_id_by_date query={}
|
121
|
-
each_id(query) { |id| yield id, lambda { build_message id } }
|
122
|
-
end
|
123
|
-
|
124
|
-
def each_message_in_thread_for m, opts={}
|
125
|
-
# TODO thread by subject
|
126
|
-
return unless doc = find_doc(m.id)
|
127
|
-
queue = doc.value(THREAD_VALUENO).split(',')
|
128
|
-
msgids = [m.id]
|
129
|
-
seen_threads = Set.new
|
130
|
-
seen_messages = Set.new [m.id]
|
131
|
-
while not queue.empty?
|
132
|
-
thread_id = queue.pop
|
133
|
-
next if seen_threads.member? thread_id
|
134
|
-
return false if opts[:skip_killed] && thread_killed?(thread_id)
|
135
|
-
seen_threads << thread_id
|
136
|
-
docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
|
137
|
-
docs.each do |doc|
|
138
|
-
msgid = doc.value MSGID_VALUENO
|
139
|
-
next if seen_messages.member? msgid
|
140
|
-
msgids << msgid
|
141
|
-
seen_messages << msgid
|
142
|
-
queue.concat doc.value(THREAD_VALUENO).split(',')
|
143
|
-
end
|
144
|
-
end
|
145
|
-
msgids.each { |id| yield id, lambda { build_message id } }
|
146
|
-
true
|
147
|
-
end
|
148
|
-
|
149
|
-
def load_contacts emails, opts={}
|
150
|
-
contacts = Set.new
|
151
|
-
num = opts[:num] || 20
|
152
|
-
each_id_by_date :participants => emails do |id,b|
|
153
|
-
break if contacts.size >= num
|
154
|
-
m = b.call
|
155
|
-
([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] }
|
156
|
-
end
|
157
|
-
contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num]
|
158
|
-
end
|
159
|
-
|
160
|
-
# TODO share code with the Ferret index
|
161
|
-
def parse_query s
|
162
|
-
query = {}
|
163
|
-
|
164
|
-
subs = HookManager.run("custom-search", :subs => s) || s
|
165
|
-
subs = subs.gsub(/\b(to|from):(\S+)\b/) do
|
166
|
-
field, value = $1, $2
|
167
|
-
email_field, name_field = %w(email name).map { |x| "#{field}_#{x}" }
|
168
|
-
if(p = ContactManager.contact_for(value))
|
169
|
-
"#{email_field}:#{p.email}"
|
170
|
-
elsif value == "me"
|
171
|
-
'(' + AccountManager.user_emails.map { |e| "#{email_field}:#{e}" }.join(' OR ') + ')'
|
172
|
-
else
|
173
|
-
"(#{email_field}:#{value} OR #{name_field}:#{value})"
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
## if we see a label:deleted or a label:spam term anywhere in the query
|
178
|
-
## string, we set the extra load_spam or load_deleted options to true.
|
179
|
-
## bizarre? well, because the query allows arbitrary parenthesized boolean
|
180
|
-
## expressions, without fully parsing the query, we can't tell whether
|
181
|
-
## the user is explicitly directing us to search spam messages or not.
|
182
|
-
## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
|
183
|
-
## search spam messages or not?
|
184
|
-
##
|
185
|
-
## so, we rely on the fact that turning these extra options ON turns OFF
|
186
|
-
## the adding of "-label:deleted" or "-label:spam" terms at the very
|
187
|
-
## final stage of query processing. if the user wants to search spam
|
188
|
-
## messages, not adding that is the right thing; if he doesn't want to
|
189
|
-
## search spam messages, then not adding it won't have any effect.
|
190
|
-
query[:load_spam] = true if subs =~ /\blabel:spam\b/
|
191
|
-
query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
|
192
|
-
|
193
|
-
## gmail style "is" operator
|
194
|
-
subs = subs.gsub(/\b(is|has):(\S+)\b/) do
|
195
|
-
field, label = $1, $2
|
196
|
-
case label
|
197
|
-
when "read"
|
198
|
-
"-label:unread"
|
199
|
-
when "spam"
|
200
|
-
query[:load_spam] = true
|
201
|
-
"label:spam"
|
202
|
-
when "deleted"
|
203
|
-
query[:load_deleted] = true
|
204
|
-
"label:deleted"
|
205
|
-
else
|
206
|
-
"label:#{$2}"
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
## gmail style attachments "filename" and "filetype" searches
|
211
|
-
subs = subs.gsub(/\b(filename|filetype):(\((.+?)\)\B|(\S+)\b)/) do
|
212
|
-
field, name = $1, ($3 || $4)
|
213
|
-
case field
|
214
|
-
when "filename"
|
215
|
-
debug "filename: translated #{field}:#{name} to attachment:\"#{name.downcase}\""
|
216
|
-
"attachment:\"#{name.downcase}\""
|
217
|
-
when "filetype"
|
218
|
-
debug "filetype: translated #{field}:#{name} to attachment_extension:#{name.downcase}"
|
219
|
-
"attachment_extension:#{name.downcase}"
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
if $have_chronic
|
224
|
-
lastdate = 2<<32 - 1
|
225
|
-
firstdate = 0
|
226
|
-
subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do
|
227
|
-
field, datestr = $1, ($3 || $4)
|
228
|
-
realdate = Chronic.parse datestr, :guess => false, :context => :past
|
229
|
-
if realdate
|
230
|
-
case field
|
231
|
-
when "after"
|
232
|
-
debug "chronic: translated #{field}:#{datestr} to #{realdate.end}"
|
233
|
-
"date:#{realdate.end.to_i}..#{lastdate}"
|
234
|
-
when "before"
|
235
|
-
debug "chronic: translated #{field}:#{datestr} to #{realdate.begin}"
|
236
|
-
"date:#{firstdate}..#{realdate.end.to_i}"
|
237
|
-
else
|
238
|
-
debug "chronic: translated #{field}:#{datestr} to #{realdate}"
|
239
|
-
"date:#{realdate.begin.to_i}..#{realdate.end.to_i}"
|
240
|
-
end
|
241
|
-
else
|
242
|
-
raise ParseError, "can't understand date #{datestr.inspect}"
|
243
|
-
end
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
## limit:42 restrict the search to 42 results
|
248
|
-
subs = subs.gsub(/\blimit:(\S+)\b/) do
|
249
|
-
lim = $1
|
250
|
-
if lim =~ /^\d+$/
|
251
|
-
query[:limit] = lim.to_i
|
252
|
-
''
|
253
|
-
else
|
254
|
-
raise ParseError, "non-numeric limit #{lim.inspect}"
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
debug "translated query: #{subs.inspect}"
|
259
|
-
|
260
|
-
qp = Xapian::QueryParser.new
|
261
|
-
qp.database = @xapian
|
262
|
-
qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
|
263
|
-
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
264
|
-
qp.default_op = Xapian::Query::OP_AND
|
265
|
-
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
|
266
|
-
NORMAL_PREFIX.each { |k,v| qp.add_prefix k, v }
|
267
|
-
BOOLEAN_PREFIX.each { |k,v| qp.add_boolean_prefix k, v }
|
268
|
-
xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD, PREFIX['body'])
|
269
|
-
|
270
|
-
debug "parsed xapian query: #{xapian_query.description}"
|
271
|
-
|
272
|
-
raise ParseError if xapian_query.nil? or xapian_query.empty?
|
273
|
-
query[:qobj] = xapian_query
|
274
|
-
query[:text] = s
|
275
|
-
query
|
276
|
-
end
|
277
|
-
|
278
|
-
private
|
279
|
-
|
280
|
-
# Stemmed
|
281
|
-
NORMAL_PREFIX = {
|
282
|
-
'subject' => 'S',
|
283
|
-
'body' => 'B',
|
284
|
-
'from_name' => 'FN',
|
285
|
-
'to_name' => 'TN',
|
286
|
-
'name' => 'N',
|
287
|
-
'attachment' => 'A',
|
288
|
-
}
|
289
|
-
|
290
|
-
# Unstemmed
|
291
|
-
BOOLEAN_PREFIX = {
|
292
|
-
'type' => 'K',
|
293
|
-
'from_email' => 'FE',
|
294
|
-
'to_email' => 'TE',
|
295
|
-
'email' => 'E',
|
296
|
-
'date' => 'D',
|
297
|
-
'label' => 'L',
|
298
|
-
'source_id' => 'I',
|
299
|
-
'attachment_extension' => 'O',
|
300
|
-
'msgid' => 'Q',
|
301
|
-
'thread' => 'H',
|
302
|
-
'ref' => 'R',
|
303
|
-
}
|
304
|
-
|
305
|
-
PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
|
306
|
-
|
307
|
-
MSGID_VALUENO = 0
|
308
|
-
THREAD_VALUENO = 1
|
309
|
-
DATE_VALUENO = 2
|
310
|
-
|
311
|
-
MAX_TERM_LENGTH = 245
|
312
|
-
|
313
|
-
# Xapian can very efficiently sort in ascending docid order. Sup always wants
|
314
|
-
# to sort by descending date, so this method maps between them. In order to
|
315
|
-
# handle multiple messages per second, we use a logistic curve centered
|
316
|
-
# around MIDDLE_DATE so that the slope (docid/s) is greatest in this time
|
317
|
-
# period. A docid collision is not an error - the code will pick the next
|
318
|
-
# smallest unused one.
|
319
|
-
DOCID_SCALE = 2.0**32
|
320
|
-
TIME_SCALE = 2.0**27
|
321
|
-
MIDDLE_DATE = Time.gm(2011)
|
322
|
-
def assign_docid m, truncated_date
|
323
|
-
t = (truncated_date.to_i - MIDDLE_DATE.to_i).to_f
|
324
|
-
docid = (DOCID_SCALE - DOCID_SCALE/(Math::E**(-(t/TIME_SCALE)) + 1)).to_i
|
325
|
-
while docid > 0 and docid_exists? docid
|
326
|
-
docid -= 1
|
327
|
-
end
|
328
|
-
docid > 0 ? docid : nil
|
329
|
-
end
|
330
|
-
|
331
|
-
# XXX is there a better way?
|
332
|
-
def docid_exists? docid
|
333
|
-
begin
|
334
|
-
@xapian.doclength docid
|
335
|
-
true
|
336
|
-
rescue RuntimeError #Xapian::DocNotFoundError
|
337
|
-
raise unless $!.message =~ /DocNotFoundError/
|
338
|
-
false
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
def term_docids term
|
343
|
-
@xapian.postlist(term).map { |x| x.docid }
|
344
|
-
end
|
345
|
-
|
346
|
-
def find_docid id
|
347
|
-
docids = term_docids(mkterm(:msgid,id))
|
348
|
-
fail unless docids.size <= 1
|
349
|
-
docids.first
|
350
|
-
end
|
351
|
-
|
352
|
-
def find_doc id
|
353
|
-
return unless docid = find_docid(id)
|
354
|
-
@xapian.document docid
|
355
|
-
end
|
356
|
-
|
357
|
-
def get_id docid
|
358
|
-
return unless doc = @xapian.document(docid)
|
359
|
-
doc.value MSGID_VALUENO
|
360
|
-
end
|
361
|
-
|
362
|
-
def get_entry id
|
363
|
-
return unless doc = find_doc(id)
|
364
|
-
Marshal.load doc.data
|
365
|
-
end
|
366
|
-
|
367
|
-
def thread_killed? thread_id
|
368
|
-
not run_query(Q.new(Q::OP_AND, mkterm(:thread, thread_id), mkterm(:label, :Killed)), 0, 1).empty?
|
369
|
-
end
|
370
|
-
|
371
|
-
def synchronize &b
|
372
|
-
@index_mutex.synchronize &b
|
373
|
-
end
|
374
|
-
|
375
|
-
def run_query xapian_query, offset, limit, checkatleast=0
|
376
|
-
synchronize do
|
377
|
-
@enquire.query = xapian_query
|
378
|
-
@enquire.mset(offset, limit-offset, checkatleast)
|
379
|
-
end
|
380
|
-
end
|
381
|
-
|
382
|
-
def run_query_ids xapian_query, offset, limit
|
383
|
-
matchset = run_query xapian_query, offset, limit
|
384
|
-
matchset.matches.map { |r| r.document.value MSGID_VALUENO }
|
385
|
-
end
|
386
|
-
|
387
|
-
Q = Xapian::Query
|
388
|
-
def build_xapian_query opts
|
389
|
-
labels = ([opts[:label]] + (opts[:labels] || [])).compact
|
390
|
-
neglabels = [:spam, :deleted, :killed].reject { |l| (labels.include? l) || opts.member?("load_#{l}".intern) }
|
391
|
-
pos_terms, neg_terms = [], []
|
392
|
-
|
393
|
-
pos_terms << mkterm(:type, 'mail')
|
394
|
-
pos_terms.concat(labels.map { |l| mkterm(:label,l) })
|
395
|
-
pos_terms << opts[:qobj] if opts[:qobj]
|
396
|
-
pos_terms << mkterm(:source_id, opts[:source_id]) if opts[:source_id]
|
397
|
-
|
398
|
-
if opts[:participants]
|
399
|
-
participant_terms = opts[:participants].map { |p| mkterm(:email,:any, (Redwood::Person === p) ? p.email : p) }
|
400
|
-
pos_terms << Q.new(Q::OP_OR, participant_terms)
|
401
|
-
end
|
402
|
-
|
403
|
-
neg_terms.concat(neglabels.map { |l| mkterm(:label,l) })
|
404
|
-
|
405
|
-
pos_query = Q.new(Q::OP_AND, pos_terms)
|
406
|
-
neg_query = Q.new(Q::OP_OR, neg_terms)
|
407
|
-
|
408
|
-
if neg_query.empty?
|
409
|
-
pos_query
|
410
|
-
else
|
411
|
-
Q.new(Q::OP_AND_NOT, [pos_query, neg_query])
|
412
|
-
end
|
413
|
-
end
|
414
|
-
|
415
|
-
def sync_message m, overwrite
|
416
|
-
doc = synchronize { find_doc(m.id) }
|
417
|
-
existed = doc != nil
|
418
|
-
doc ||= Xapian::Document.new
|
419
|
-
do_index_static = overwrite || !existed
|
420
|
-
old_entry = !do_index_static && doc.entry
|
421
|
-
snippet = do_index_static ? m.snippet : old_entry[:snippet]
|
422
|
-
|
423
|
-
entry = {
|
424
|
-
:message_id => m.id,
|
425
|
-
:source_id => m.source.id,
|
426
|
-
:source_info => m.source_info,
|
427
|
-
:date => m.date,
|
428
|
-
:snippet => snippet,
|
429
|
-
:labels => m.labels.to_a,
|
430
|
-
:from => [m.from.email, m.from.name],
|
431
|
-
:to => m.to.map { |p| [p.email, p.name] },
|
432
|
-
:cc => m.cc.map { |p| [p.email, p.name] },
|
433
|
-
:bcc => m.bcc.map { |p| [p.email, p.name] },
|
434
|
-
:subject => m.subj,
|
435
|
-
:refs => m.refs.to_a,
|
436
|
-
:replytos => m.replytos.to_a,
|
437
|
-
}
|
438
|
-
|
439
|
-
if do_index_static
|
440
|
-
doc.clear_terms
|
441
|
-
doc.clear_values
|
442
|
-
index_message_static m, doc, entry
|
443
|
-
end
|
444
|
-
|
445
|
-
index_message_threading doc, entry, old_entry
|
446
|
-
index_message_labels doc, entry[:labels], (do_index_static ? [] : old_entry[:labels])
|
447
|
-
doc.entry = entry
|
448
|
-
|
449
|
-
synchronize do
|
450
|
-
unless docid = existed ? doc.docid : assign_docid(m, truncate_date(m.date))
|
451
|
-
# Could be triggered by spam
|
452
|
-
warn "docid underflow, dropping #{m.id.inspect}"
|
453
|
-
return
|
454
|
-
end
|
455
|
-
@xapian.replace_document docid, doc
|
456
|
-
end
|
457
|
-
|
458
|
-
m.labels.each { |l| LabelManager << l }
|
459
|
-
true
|
460
|
-
end
|
461
|
-
|
462
|
-
## Index content that can't be changed by the user
|
463
|
-
def index_message_static m, doc, entry
|
464
|
-
# Person names are indexed with several prefixes
|
465
|
-
person_termer = lambda do |d|
|
466
|
-
lambda do |p|
|
467
|
-
["#{d}_name", "name", "body"].each do |x|
|
468
|
-
doc.index_text p.name, PREFIX[x]
|
469
|
-
end if p.name
|
470
|
-
[d, :any].each { |x| doc.add_term mkterm(:email, x, p.email) }
|
471
|
-
end
|
472
|
-
end
|
473
|
-
|
474
|
-
person_termer[:from][m.from] if m.from
|
475
|
-
(m.to+m.cc+m.bcc).each(&(person_termer[:to]))
|
476
|
-
|
477
|
-
# Full text search content
|
478
|
-
subject_text = m.indexable_subject
|
479
|
-
body_text = m.indexable_body
|
480
|
-
doc.index_text subject_text, PREFIX['subject']
|
481
|
-
doc.index_text subject_text, PREFIX['body']
|
482
|
-
doc.index_text body_text, PREFIX['body']
|
483
|
-
m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
|
484
|
-
|
485
|
-
# Miscellaneous terms
|
486
|
-
doc.add_term mkterm(:date, m.date) if m.date
|
487
|
-
doc.add_term mkterm(:type, 'mail')
|
488
|
-
doc.add_term mkterm(:msgid, m.id)
|
489
|
-
doc.add_term mkterm(:source_id, m.source.id)
|
490
|
-
m.attachments.each do |a|
|
491
|
-
a =~ /\.(\w+)$/ or next
|
492
|
-
doc.add_term mkterm(:attachment_extension, $1)
|
493
|
-
end
|
494
|
-
|
495
|
-
# Date value for range queries
|
496
|
-
date_value = begin
|
497
|
-
Xapian.sortable_serialise m.date.to_i
|
498
|
-
rescue TypeError
|
499
|
-
Xapian.sortable_serialise 0
|
500
|
-
end
|
501
|
-
|
502
|
-
doc.add_value MSGID_VALUENO, m.id
|
503
|
-
doc.add_value DATE_VALUENO, date_value
|
504
|
-
end
|
505
|
-
|
506
|
-
def index_message_labels doc, new_labels, old_labels
|
507
|
-
return if new_labels == old_labels
|
508
|
-
added = new_labels.to_a - old_labels.to_a
|
509
|
-
removed = old_labels.to_a - new_labels.to_a
|
510
|
-
added.each { |t| doc.add_term mkterm(:label,t) }
|
511
|
-
removed.each { |t| doc.remove_term mkterm(:label,t) }
|
512
|
-
end
|
513
|
-
|
514
|
-
## Assign a set of thread ids to the document. This is a hybrid of the runtime
|
515
|
-
## search done by the Ferret index and the index-time union done by previous
|
516
|
-
## versions of the Xapian index. We first find the thread ids of all messages
|
517
|
-
## with a reference to or from us. If that set is empty, we use our own
|
518
|
-
## message id. Otherwise, we use all the thread ids we previously found. In
|
519
|
-
## the common case there's only one member in that set, but if we're the
|
520
|
-
## missing link between multiple previously unrelated threads we can have
|
521
|
-
## more. XapianIndex#each_message_in_thread_for follows the thread ids when
|
522
|
-
## searching so the user sees a single unified thread.
|
523
|
-
def index_message_threading doc, entry, old_entry
|
524
|
-
return if old_entry && (entry[:refs] == old_entry[:refs]) && (entry[:replytos] == old_entry[:replytos])
|
525
|
-
children = term_docids(mkterm(:ref, entry[:message_id])).map { |docid| @xapian.document docid }
|
526
|
-
parent_ids = entry[:refs] + entry[:replytos]
|
527
|
-
parents = parent_ids.map { |id| find_doc id }.compact
|
528
|
-
thread_members = SavingHash.new { [] }
|
529
|
-
(children + parents).each do |doc2|
|
530
|
-
thread_ids = doc2.value(THREAD_VALUENO).split ','
|
531
|
-
thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
|
532
|
-
end
|
533
|
-
thread_ids = thread_members.empty? ? [entry[:message_id]] : thread_members.keys
|
534
|
-
thread_ids.each { |thread_id| doc.add_term mkterm(:thread, thread_id) }
|
535
|
-
parent_ids.each { |ref| doc.add_term mkterm(:ref, ref) }
|
536
|
-
doc.add_value THREAD_VALUENO, (thread_ids * ',')
|
537
|
-
end
|
538
|
-
|
539
|
-
def truncate_date date
|
540
|
-
if date < MIN_DATE
|
541
|
-
debug "warning: adjusting too-low date #{date} for indexing"
|
542
|
-
MIN_DATE
|
543
|
-
elsif date > MAX_DATE
|
544
|
-
debug "warning: adjusting too-high date #{date} for indexing"
|
545
|
-
MAX_DATE
|
546
|
-
else
|
547
|
-
date
|
548
|
-
end
|
549
|
-
end
|
550
|
-
|
551
|
-
# Construct a Xapian term
|
552
|
-
def mkterm type, *args
|
553
|
-
case type
|
554
|
-
when :label
|
555
|
-
PREFIX['label'] + args[0].to_s.downcase
|
556
|
-
when :type
|
557
|
-
PREFIX['type'] + args[0].to_s.downcase
|
558
|
-
when :date
|
559
|
-
PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
|
560
|
-
when :email
|
561
|
-
case args[0]
|
562
|
-
when :from then PREFIX['from_email']
|
563
|
-
when :to then PREFIX['to_email']
|
564
|
-
when :any then PREFIX['email']
|
565
|
-
else raise "Invalid email term type #{args[0]}"
|
566
|
-
end + args[1].to_s.downcase
|
567
|
-
when :source_id
|
568
|
-
PREFIX['source_id'] + args[0].to_s.downcase
|
569
|
-
when :attachment_extension
|
570
|
-
PREFIX['attachment_extension'] + args[0].to_s.downcase
|
571
|
-
when :msgid, :ref, :thread
|
572
|
-
PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
|
573
|
-
else
|
574
|
-
raise "Invalid term type #{type}"
|
575
|
-
end
|
576
|
-
end
|
577
|
-
end
|
578
|
-
|
579
|
-
end
|
580
|
-
|
581
|
-
class Xapian::Document
|
582
|
-
def entry
|
583
|
-
Marshal.load data
|
584
|
-
end
|
585
|
-
|
586
|
-
def entry=(x)
|
587
|
-
self.data = Marshal.dump x
|
588
|
-
end
|
589
|
-
|
590
|
-
def index_text text, prefix, weight=1
|
591
|
-
term_generator = Xapian::TermGenerator.new
|
592
|
-
term_generator.stemmer = Xapian::Stem.new(Redwood::XapianIndex::STEM_LANGUAGE)
|
593
|
-
term_generator.document = self
|
594
|
-
term_generator.index_text text, weight, prefix
|
595
|
-
end
|
596
|
-
|
597
|
-
alias old_add_term add_term
|
598
|
-
def add_term term
|
599
|
-
if term.length <= Redwood::XapianIndex::MAX_TERM_LENGTH
|
600
|
-
old_add_term term, 0
|
601
|
-
else
|
602
|
-
warn "dropping excessively long term #{term}"
|
603
|
-
end
|
604
|
-
end
|
605
|
-
end
|