sup 0.8.1 → 0.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sup might be problematic. Click here for more details.
- data/CONTRIBUTORS +13 -6
- data/History.txt +19 -0
- data/ReleaseNotes +35 -0
- data/bin/sup +82 -77
- data/bin/sup-add +7 -7
- data/bin/sup-config +104 -85
- data/bin/sup-dump +4 -5
- data/bin/sup-recover-sources +9 -10
- data/bin/sup-sync +121 -100
- data/bin/sup-sync-back +18 -15
- data/bin/sup-tweak-labels +24 -21
- data/lib/sup.rb +53 -33
- data/lib/sup/account.rb +0 -2
- data/lib/sup/buffer.rb +47 -22
- data/lib/sup/colormap.rb +6 -6
- data/lib/sup/contact.rb +0 -2
- data/lib/sup/crypto.rb +34 -23
- data/lib/sup/draft.rb +6 -14
- data/lib/sup/ferret_index.rb +471 -0
- data/lib/sup/hook.rb +30 -43
- data/lib/sup/hook.rb.BACKUP.8625.rb +158 -0
- data/lib/sup/hook.rb.BACKUP.8681.rb +158 -0
- data/lib/sup/hook.rb.BASE.8625.rb +155 -0
- data/lib/sup/hook.rb.BASE.8681.rb +155 -0
- data/lib/sup/hook.rb.LOCAL.8625.rb +142 -0
- data/lib/sup/hook.rb.LOCAL.8681.rb +142 -0
- data/lib/sup/hook.rb.REMOTE.8625.rb +145 -0
- data/lib/sup/hook.rb.REMOTE.8681.rb +145 -0
- data/lib/sup/imap.rb +18 -8
- data/lib/sup/index.rb +70 -528
- data/lib/sup/interactive-lock.rb +74 -0
- data/lib/sup/keymap.rb +26 -26
- data/lib/sup/label.rb +2 -4
- data/lib/sup/logger.rb +54 -35
- data/lib/sup/maildir.rb +41 -6
- data/lib/sup/mbox.rb +1 -1
- data/lib/sup/mbox/loader.rb +18 -6
- data/lib/sup/mbox/ssh-file.rb +1 -7
- data/lib/sup/message-chunks.rb +36 -23
- data/lib/sup/message.rb +126 -46
- data/lib/sup/mode.rb +3 -2
- data/lib/sup/modes/console-mode.rb +108 -0
- data/lib/sup/modes/edit-message-mode.rb +15 -5
- data/lib/sup/modes/inbox-mode.rb +2 -4
- data/lib/sup/modes/label-list-mode.rb +1 -1
- data/lib/sup/modes/line-cursor-mode.rb +18 -18
- data/lib/sup/modes/log-mode.rb +29 -16
- data/lib/sup/modes/poll-mode.rb +7 -9
- data/lib/sup/modes/reply-mode.rb +5 -3
- data/lib/sup/modes/scroll-mode.rb +2 -2
- data/lib/sup/modes/search-results-mode.rb +9 -11
- data/lib/sup/modes/text-mode.rb +2 -2
- data/lib/sup/modes/thread-index-mode.rb +26 -16
- data/lib/sup/modes/thread-view-mode.rb +84 -39
- data/lib/sup/person.rb +6 -8
- data/lib/sup/poll.rb +46 -47
- data/lib/sup/rfc2047.rb +1 -5
- data/lib/sup/sent.rb +27 -20
- data/lib/sup/source.rb +90 -13
- data/lib/sup/textfield.rb +4 -4
- data/lib/sup/thread.rb +15 -13
- data/lib/sup/undo.rb +0 -1
- data/lib/sup/update.rb +0 -1
- data/lib/sup/util.rb +51 -43
- data/lib/sup/xapian_index.rb +566 -0
- metadata +57 -46
- data/lib/sup/suicide.rb +0 -36
@@ -0,0 +1,566 @@
|
|
1
|
+
require 'xapian'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
module Redwood
|
5
|
+
|
6
|
+
# This index implementation uses Xapian for searching and GDBM for storage. It
|
7
|
+
# tends to be slightly faster than Ferret for indexing and significantly faster
|
8
|
+
# for searching due to precomputing thread membership.
|
9
|
+
class XapianIndex < BaseIndex
|
10
|
+
STEM_LANGUAGE = "english"
|
11
|
+
INDEX_VERSION = '1'
|
12
|
+
|
13
|
+
## dates are converted to integers for xapian, and are used for document ids,
|
14
|
+
## so we must ensure they're reasonably valid. this typically only affect
|
15
|
+
## spam.
|
16
|
+
MIN_DATE = Time.at 0
|
17
|
+
MAX_DATE = Time.at(2**31-1)
|
18
|
+
|
19
|
+
HookManager.register "custom-search", <<EOS
|
20
|
+
Executes before a string search is applied to the index,
|
21
|
+
returning a new search string.
|
22
|
+
Variables:
|
23
|
+
subs: The string being searched.
|
24
|
+
EOS
|
25
|
+
|
26
|
+
def initialize dir=BASE_DIR
|
27
|
+
super
|
28
|
+
|
29
|
+
@index_mutex = Monitor.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def load_index
|
33
|
+
path = File.join(@dir, 'xapian')
|
34
|
+
if File.exists? path
|
35
|
+
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_OPEN)
|
36
|
+
db_version = @xapian.get_metadata 'version'
|
37
|
+
db_version = '0' if db_version.empty?
|
38
|
+
if db_version != INDEX_VERSION
|
39
|
+
fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please downgrade to your previous version and dump your labels before upgrading to this version (then run sup-sync --restore)."
|
40
|
+
end
|
41
|
+
else
|
42
|
+
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
|
43
|
+
@xapian.set_metadata 'version', INDEX_VERSION
|
44
|
+
end
|
45
|
+
@term_generator = Xapian::TermGenerator.new()
|
46
|
+
@term_generator.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
|
47
|
+
@enquire = Xapian::Enquire.new @xapian
|
48
|
+
@enquire.weighting_scheme = Xapian::BoolWeight.new
|
49
|
+
@enquire.docid_order = Xapian::Enquire::ASCENDING
|
50
|
+
end
|
51
|
+
|
52
|
+
def save_index
|
53
|
+
end
|
54
|
+
|
55
|
+
def optimize
|
56
|
+
end
|
57
|
+
|
58
|
+
def size
|
59
|
+
synchronize { @xapian.doccount }
|
60
|
+
end
|
61
|
+
|
62
|
+
def contains_id? id
|
63
|
+
synchronize { find_docid(id) && true }
|
64
|
+
end
|
65
|
+
|
66
|
+
def source_for_id id
|
67
|
+
synchronize { get_entry(id)[:source_id] }
|
68
|
+
end
|
69
|
+
|
70
|
+
def delete id
|
71
|
+
synchronize { @xapian.delete_document mkterm(:msgid, id) }
|
72
|
+
end
|
73
|
+
|
74
|
+
def build_message id
|
75
|
+
entry = synchronize { get_entry id }
|
76
|
+
return unless entry
|
77
|
+
|
78
|
+
source = SourceManager[entry[:source_id]]
|
79
|
+
raise "invalid source #{entry[:source_id]}" unless source
|
80
|
+
|
81
|
+
m = Message.new :source => source, :source_info => entry[:source_info],
|
82
|
+
:labels => entry[:labels], :snippet => entry[:snippet]
|
83
|
+
|
84
|
+
mk_person = lambda { |x| Person.new(*x.reverse!) }
|
85
|
+
entry[:from] = mk_person[entry[:from]]
|
86
|
+
entry[:to].map!(&mk_person)
|
87
|
+
entry[:cc].map!(&mk_person)
|
88
|
+
entry[:bcc].map!(&mk_person)
|
89
|
+
|
90
|
+
m.load_from_index! entry
|
91
|
+
m
|
92
|
+
end
|
93
|
+
|
94
|
+
def add_message m; sync_message m end
|
95
|
+
def update_message m; sync_message m end
|
96
|
+
def update_message_state m; sync_message m end
|
97
|
+
|
98
|
+
def sync_message m, opts={}
|
99
|
+
entry = synchronize { get_entry m.id }
|
100
|
+
snippet = m.snippet
|
101
|
+
entry ||= {}
|
102
|
+
labels = m.labels
|
103
|
+
entry = {} if opts[:force_overwrite]
|
104
|
+
|
105
|
+
d = {
|
106
|
+
:message_id => m.id,
|
107
|
+
:source_id => m.source.id,
|
108
|
+
:source_info => m.source_info,
|
109
|
+
:date => (entry[:date] || m.date),
|
110
|
+
:snippet => snippet,
|
111
|
+
:labels => labels,
|
112
|
+
:from => (entry[:from] || [m.from.email, m.from.name]),
|
113
|
+
:to => (entry[:to] || m.to.map { |p| [p.email, p.name] }),
|
114
|
+
:cc => (entry[:cc] || m.cc.map { |p| [p.email, p.name] }),
|
115
|
+
:bcc => (entry[:bcc] || m.bcc.map { |p| [p.email, p.name] }),
|
116
|
+
:subject => m.subj,
|
117
|
+
:refs => (entry[:refs] || m.refs),
|
118
|
+
:replytos => (entry[:replytos] || m.replytos),
|
119
|
+
}
|
120
|
+
|
121
|
+
labels.each { |l| LabelManager << l }
|
122
|
+
|
123
|
+
synchronize do
|
124
|
+
index_message m, d, opts
|
125
|
+
end
|
126
|
+
true
|
127
|
+
end
|
128
|
+
private :sync_message
|
129
|
+
|
130
|
+
def num_results_for query={}
|
131
|
+
xapian_query = build_xapian_query query
|
132
|
+
matchset = run_query xapian_query, 0, 0, 100
|
133
|
+
matchset.matches_estimated
|
134
|
+
end
|
135
|
+
|
136
|
+
EACH_ID_PAGE = 100
|
137
|
+
def each_id query={}
|
138
|
+
offset = 0
|
139
|
+
page = EACH_ID_PAGE
|
140
|
+
|
141
|
+
xapian_query = build_xapian_query query
|
142
|
+
while true
|
143
|
+
ids = run_query_ids xapian_query, offset, (offset+page)
|
144
|
+
ids.each { |id| yield id }
|
145
|
+
break if ids.size < page
|
146
|
+
offset += page
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def each_id_by_date query={}
|
151
|
+
each_id(query) { |id| yield id, lambda { build_message id } }
|
152
|
+
end
|
153
|
+
|
154
|
+
def each_message_in_thread_for m, opts={}
|
155
|
+
# TODO thread by subject
|
156
|
+
# TODO handle killed threads
|
157
|
+
return unless doc = find_doc(m.id)
|
158
|
+
queue = doc.value(THREAD_VALUENO).split(',')
|
159
|
+
msgids = [m.id]
|
160
|
+
seen_threads = Set.new
|
161
|
+
seen_messages = Set.new [m.id]
|
162
|
+
while not queue.empty?
|
163
|
+
thread_id = queue.pop
|
164
|
+
next if seen_threads.member? thread_id
|
165
|
+
return false if thread_killed? thread_id
|
166
|
+
seen_threads << thread_id
|
167
|
+
docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
|
168
|
+
docs.each do |doc|
|
169
|
+
msgid = doc.value MSGID_VALUENO
|
170
|
+
next if seen_messages.member? msgid
|
171
|
+
msgids << msgid
|
172
|
+
seen_messages << msgid
|
173
|
+
queue.concat doc.value(THREAD_VALUENO).split(',')
|
174
|
+
end
|
175
|
+
end
|
176
|
+
msgids.each { |id| yield id, lambda { build_message id } }
|
177
|
+
true
|
178
|
+
end
|
179
|
+
|
180
|
+
def load_contacts emails, opts={}
|
181
|
+
contacts = Set.new
|
182
|
+
num = opts[:num] || 20
|
183
|
+
each_id_by_date :participants => emails do |id,b|
|
184
|
+
break if contacts.size >= num
|
185
|
+
m = b.call
|
186
|
+
([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] }
|
187
|
+
end
|
188
|
+
contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num]
|
189
|
+
end
|
190
|
+
|
191
|
+
# TODO share code with the Ferret index
|
192
|
+
def parse_query s
|
193
|
+
query = {}
|
194
|
+
|
195
|
+
subs = HookManager.run("custom-search", :subs => s) || s
|
196
|
+
subs = s.gsub(/\b(to|from):(\S+)\b/) do
|
197
|
+
field, name = $1, $2
|
198
|
+
if(p = ContactManager.contact_for(name))
|
199
|
+
[field, p.email]
|
200
|
+
elsif name == "me"
|
201
|
+
[field, "(" + AccountManager.user_emails.join("||") + ")"]
|
202
|
+
else
|
203
|
+
[field, name]
|
204
|
+
end.join(":")
|
205
|
+
end
|
206
|
+
|
207
|
+
## if we see a label:deleted or a label:spam term anywhere in the query
|
208
|
+
## string, we set the extra load_spam or load_deleted options to true.
|
209
|
+
## bizarre? well, because the query allows arbitrary parenthesized boolean
|
210
|
+
## expressions, without fully parsing the query, we can't tell whether
|
211
|
+
## the user is explicitly directing us to search spam messages or not.
|
212
|
+
## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
|
213
|
+
## search spam messages or not?
|
214
|
+
##
|
215
|
+
## so, we rely on the fact that turning these extra options ON turns OFF
|
216
|
+
## the adding of "-label:deleted" or "-label:spam" terms at the very
|
217
|
+
## final stage of query processing. if the user wants to search spam
|
218
|
+
## messages, not adding that is the right thing; if he doesn't want to
|
219
|
+
## search spam messages, then not adding it won't have any effect.
|
220
|
+
query[:load_spam] = true if subs =~ /\blabel:spam\b/
|
221
|
+
query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
|
222
|
+
|
223
|
+
## gmail style "is" operator
|
224
|
+
subs = subs.gsub(/\b(is|has):(\S+)\b/) do
|
225
|
+
field, label = $1, $2
|
226
|
+
case label
|
227
|
+
when "read"
|
228
|
+
"-label:unread"
|
229
|
+
when "spam"
|
230
|
+
query[:load_spam] = true
|
231
|
+
"label:spam"
|
232
|
+
when "deleted"
|
233
|
+
query[:load_deleted] = true
|
234
|
+
"label:deleted"
|
235
|
+
else
|
236
|
+
"label:#{$2}"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
## gmail style attachments "filename" and "filetype" searches
|
241
|
+
subs = subs.gsub(/\b(filename|filetype):(\((.+?)\)\B|(\S+)\b)/) do
|
242
|
+
field, name = $1, ($3 || $4)
|
243
|
+
case field
|
244
|
+
when "filename"
|
245
|
+
debug "filename: translated #{field}:#{name} to attachment:\"#{name.downcase}\""
|
246
|
+
"attachment:\"#{name.downcase}\""
|
247
|
+
when "filetype"
|
248
|
+
debug "filetype: translated #{field}:#{name} to attachment_extension:#{name.downcase}"
|
249
|
+
"attachment_extension:#{name.downcase}"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
if $have_chronic
|
254
|
+
lastdate = 2<<32 - 1
|
255
|
+
firstdate = 0
|
256
|
+
subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do
|
257
|
+
field, datestr = $1, ($3 || $4)
|
258
|
+
realdate = Chronic.parse datestr, :guess => false, :context => :past
|
259
|
+
if realdate
|
260
|
+
case field
|
261
|
+
when "after"
|
262
|
+
debug "chronic: translated #{field}:#{datestr} to #{realdate.end}"
|
263
|
+
"date:#{realdate.end.to_i}..#{lastdate}"
|
264
|
+
when "before"
|
265
|
+
debug "chronic: translated #{field}:#{datestr} to #{realdate.begin}"
|
266
|
+
"date:#{firstdate}..#{realdate.end.to_i}"
|
267
|
+
else
|
268
|
+
debug "chronic: translated #{field}:#{datestr} to #{realdate}"
|
269
|
+
"date:#{realdate.begin.to_i}..#{realdate.end.to_i}"
|
270
|
+
end
|
271
|
+
else
|
272
|
+
raise ParseError, "can't understand date #{datestr.inspect}"
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
## limit:42 restrict the search to 42 results
|
278
|
+
subs = subs.gsub(/\blimit:(\S+)\b/) do
|
279
|
+
lim = $1
|
280
|
+
if lim =~ /^\d+$/
|
281
|
+
query[:limit] = lim.to_i
|
282
|
+
''
|
283
|
+
else
|
284
|
+
raise ParseError, "non-numeric limit #{lim.inspect}"
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
qp = Xapian::QueryParser.new
|
289
|
+
qp.database = @xapian
|
290
|
+
qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
|
291
|
+
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
292
|
+
qp.default_op = Xapian::Query::OP_AND
|
293
|
+
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
|
294
|
+
NORMAL_PREFIX.each { |k,v| qp.add_prefix k, v }
|
295
|
+
BOOLEAN_PREFIX.each { |k,v| qp.add_boolean_prefix k, v }
|
296
|
+
xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD, PREFIX['body'])
|
297
|
+
|
298
|
+
raise ParseError if xapian_query.nil? or xapian_query.empty?
|
299
|
+
query[:qobj] = xapian_query
|
300
|
+
query[:text] = s
|
301
|
+
query
|
302
|
+
end
|
303
|
+
|
304
|
+
private
|
305
|
+
|
306
|
+
# Stemmed
|
307
|
+
NORMAL_PREFIX = {
|
308
|
+
'subject' => 'S',
|
309
|
+
'body' => 'B',
|
310
|
+
'from_name' => 'FN',
|
311
|
+
'to_name' => 'TN',
|
312
|
+
'name' => 'N',
|
313
|
+
'attachment' => 'A',
|
314
|
+
}
|
315
|
+
|
316
|
+
# Unstemmed
|
317
|
+
BOOLEAN_PREFIX = {
|
318
|
+
'type' => 'K',
|
319
|
+
'from_email' => 'FE',
|
320
|
+
'to_email' => 'TE',
|
321
|
+
'email' => 'E',
|
322
|
+
'date' => 'D',
|
323
|
+
'label' => 'L',
|
324
|
+
'source_id' => 'I',
|
325
|
+
'attachment_extension' => 'O',
|
326
|
+
'msgid' => 'Q',
|
327
|
+
'thread' => 'H',
|
328
|
+
'ref' => 'R',
|
329
|
+
}
|
330
|
+
|
331
|
+
PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
|
332
|
+
|
333
|
+
MSGID_VALUENO = 0
|
334
|
+
THREAD_VALUENO = 1
|
335
|
+
DATE_VALUENO = 2
|
336
|
+
|
337
|
+
MAX_TERM_LENGTH = 245
|
338
|
+
|
339
|
+
# Xapian can very efficiently sort in ascending docid order. Sup always wants
|
340
|
+
# to sort by descending date, so this method maps between them. In order to
|
341
|
+
# handle multiple messages per second, we use a logistic curve centered
|
342
|
+
# around MIDDLE_DATE so that the slope (docid/s) is greatest in this time
|
343
|
+
# period. A docid collision is not an error - the code will pick the next
|
344
|
+
# smallest unused one.
|
345
|
+
DOCID_SCALE = 2.0**32
|
346
|
+
TIME_SCALE = 2.0**27
|
347
|
+
MIDDLE_DATE = Time.gm(2011)
|
348
|
+
def assign_docid m, truncated_date
|
349
|
+
t = (truncated_date.to_i - MIDDLE_DATE.to_i).to_f
|
350
|
+
docid = (DOCID_SCALE - DOCID_SCALE/(Math::E**(-(t/TIME_SCALE)) + 1)).to_i
|
351
|
+
while docid > 0 and docid_exists? docid
|
352
|
+
docid -= 1
|
353
|
+
end
|
354
|
+
docid > 0 ? docid : nil
|
355
|
+
end
|
356
|
+
|
357
|
+
# XXX is there a better way?
|
358
|
+
def docid_exists? docid
|
359
|
+
begin
|
360
|
+
@xapian.doclength docid
|
361
|
+
true
|
362
|
+
rescue RuntimeError #Xapian::DocNotFoundError
|
363
|
+
raise unless $!.message =~ /DocNotFoundError/
|
364
|
+
false
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
def term_docids term
|
369
|
+
@xapian.postlist(term).map { |x| x.docid }
|
370
|
+
end
|
371
|
+
|
372
|
+
def find_docid id
|
373
|
+
docids = term_docids(mkterm(:msgid,id))
|
374
|
+
fail unless docids.size <= 1
|
375
|
+
docids.first
|
376
|
+
end
|
377
|
+
|
378
|
+
def find_doc id
|
379
|
+
return unless docid = find_docid(id)
|
380
|
+
@xapian.document docid
|
381
|
+
end
|
382
|
+
|
383
|
+
def get_id docid
|
384
|
+
return unless doc = @xapian.document(docid)
|
385
|
+
doc.value MSGID_VALUENO
|
386
|
+
end
|
387
|
+
|
388
|
+
def get_entry id
|
389
|
+
return unless doc = find_doc(id)
|
390
|
+
Marshal.load doc.data
|
391
|
+
end
|
392
|
+
|
393
|
+
def thread_killed? thread_id
|
394
|
+
not run_query(Q.new(Q::OP_AND, mkterm(:thread, thread_id), mkterm(:label, :Killed)), 0, 1).empty?
|
395
|
+
end
|
396
|
+
|
397
|
+
def synchronize &b
|
398
|
+
@index_mutex.synchronize &b
|
399
|
+
end
|
400
|
+
|
401
|
+
def run_query xapian_query, offset, limit, checkatleast=0
|
402
|
+
synchronize do
|
403
|
+
@enquire.query = xapian_query
|
404
|
+
@enquire.mset(offset, limit-offset, checkatleast)
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
def run_query_ids xapian_query, offset, limit
|
409
|
+
matchset = run_query xapian_query, offset, limit
|
410
|
+
matchset.matches.map { |r| r.document.value MSGID_VALUENO }
|
411
|
+
end
|
412
|
+
|
413
|
+
Q = Xapian::Query
|
414
|
+
def build_xapian_query opts
|
415
|
+
labels = ([opts[:label]] + (opts[:labels] || [])).compact
|
416
|
+
neglabels = [:spam, :deleted, :killed].reject { |l| (labels.include? l) || opts.member?("load_#{l}".intern) }
|
417
|
+
pos_terms, neg_terms = [], []
|
418
|
+
|
419
|
+
pos_terms << mkterm(:type, 'mail')
|
420
|
+
pos_terms.concat(labels.map { |l| mkterm(:label,l) })
|
421
|
+
pos_terms << opts[:qobj] if opts[:qobj]
|
422
|
+
pos_terms << mkterm(:source_id, opts[:source_id]) if opts[:source_id]
|
423
|
+
|
424
|
+
if opts[:participants]
|
425
|
+
participant_terms = opts[:participants].map { |p| mkterm(:email,:any, (Redwood::Person === p) ? p.email : p) }
|
426
|
+
pos_terms << Q.new(Q::OP_OR, participant_terms)
|
427
|
+
end
|
428
|
+
|
429
|
+
neg_terms.concat(neglabels.map { |l| mkterm(:label,l) })
|
430
|
+
|
431
|
+
pos_query = Q.new(Q::OP_AND, pos_terms)
|
432
|
+
neg_query = Q.new(Q::OP_OR, neg_terms)
|
433
|
+
|
434
|
+
if neg_query.empty?
|
435
|
+
pos_query
|
436
|
+
else
|
437
|
+
Q.new(Q::OP_AND_NOT, [pos_query, neg_query])
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
def index_message m, entry, opts
|
442
|
+
terms = []
|
443
|
+
text = []
|
444
|
+
|
445
|
+
subject_text = m.indexable_subject
|
446
|
+
body_text = m.indexable_body
|
447
|
+
|
448
|
+
# Person names are indexed with several prefixes
|
449
|
+
person_termer = lambda do |d|
|
450
|
+
lambda do |p|
|
451
|
+
["#{d}_name", "name", "body"].each do |x|
|
452
|
+
text << [p.name, PREFIX[x]]
|
453
|
+
end if p.name
|
454
|
+
[d, :any].each { |x| terms << mkterm(:email, x, p.email) }
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
person_termer[:from][m.from] if m.from
|
459
|
+
(m.to+m.cc+m.bcc).each(&(person_termer[:to]))
|
460
|
+
|
461
|
+
terms << mkterm(:date,m.date) if m.date
|
462
|
+
m.labels.each { |t| terms << mkterm(:label,t) }
|
463
|
+
terms << mkterm(:type, 'mail')
|
464
|
+
terms << mkterm(:msgid, m.id)
|
465
|
+
terms << mkterm(:source_id, m.source.id)
|
466
|
+
m.attachments.each do |a|
|
467
|
+
a =~ /\.(\w+)$/ or next
|
468
|
+
t = mkterm(:attachment_extension, $1)
|
469
|
+
terms << t
|
470
|
+
end
|
471
|
+
|
472
|
+
## Thread membership
|
473
|
+
children = term_docids(mkterm(:ref, m.id)).map { |docid| @xapian.document docid }
|
474
|
+
parent_ids = m.refs + m.replytos
|
475
|
+
parents = parent_ids.map { |id| find_doc id }.compact
|
476
|
+
thread_members = SavingHash.new { [] }
|
477
|
+
(children + parents).each do |doc2|
|
478
|
+
thread_ids = doc2.value(THREAD_VALUENO).split ','
|
479
|
+
thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
|
480
|
+
end
|
481
|
+
|
482
|
+
thread_ids = thread_members.empty? ? [m.id] : thread_members.keys
|
483
|
+
|
484
|
+
thread_ids.each { |thread_id| terms << mkterm(:thread, thread_id) }
|
485
|
+
parent_ids.each do |ref|
|
486
|
+
terms << mkterm(:ref, ref)
|
487
|
+
end
|
488
|
+
|
489
|
+
# Full text search content
|
490
|
+
text << [subject_text, PREFIX['subject']]
|
491
|
+
text << [subject_text, PREFIX['body']]
|
492
|
+
text << [body_text, PREFIX['body']]
|
493
|
+
m.attachments.each { |a| text << [a, PREFIX['attachment']] }
|
494
|
+
|
495
|
+
truncated_date = if m.date < MIN_DATE
|
496
|
+
debug "warning: adjusting too-low date #{m.date} for indexing"
|
497
|
+
MIN_DATE
|
498
|
+
elsif m.date > MAX_DATE
|
499
|
+
debug "warning: adjusting too-high date #{m.date} for indexing"
|
500
|
+
MAX_DATE
|
501
|
+
else
|
502
|
+
m.date
|
503
|
+
end
|
504
|
+
|
505
|
+
# Date value for range queries
|
506
|
+
date_value = begin
|
507
|
+
Xapian.sortable_serialise truncated_date.to_i
|
508
|
+
rescue TypeError
|
509
|
+
Xapian.sortable_serialise 0
|
510
|
+
end
|
511
|
+
|
512
|
+
docid = nil
|
513
|
+
unless doc = find_doc(m.id)
|
514
|
+
doc = Xapian::Document.new
|
515
|
+
if not docid = assign_docid(m, truncated_date)
|
516
|
+
# Could be triggered by spam
|
517
|
+
Redwood::log "warning: docid underflow, dropping #{m.id.inspect}"
|
518
|
+
return
|
519
|
+
end
|
520
|
+
else
|
521
|
+
doc.clear_terms
|
522
|
+
doc.clear_values
|
523
|
+
docid = doc.docid
|
524
|
+
end
|
525
|
+
|
526
|
+
@term_generator.document = doc
|
527
|
+
text.each { |text,prefix| @term_generator.index_text text, 1, prefix }
|
528
|
+
terms.each { |term| doc.add_term term if term.length <= MAX_TERM_LENGTH }
|
529
|
+
doc.add_value MSGID_VALUENO, m.id
|
530
|
+
doc.add_value THREAD_VALUENO, (thread_ids * ',')
|
531
|
+
doc.add_value DATE_VALUENO, date_value
|
532
|
+
doc.data = Marshal.dump entry
|
533
|
+
|
534
|
+
@xapian.replace_document docid, doc
|
535
|
+
end
|
536
|
+
|
537
|
+
# Construct a Xapian term
|
538
|
+
def mkterm type, *args
|
539
|
+
case type
|
540
|
+
when :label
|
541
|
+
PREFIX['label'] + args[0].to_s.downcase
|
542
|
+
when :type
|
543
|
+
PREFIX['type'] + args[0].to_s.downcase
|
544
|
+
when :date
|
545
|
+
PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
|
546
|
+
when :email
|
547
|
+
case args[0]
|
548
|
+
when :from then PREFIX['from_email']
|
549
|
+
when :to then PREFIX['to_email']
|
550
|
+
when :any then PREFIX['email']
|
551
|
+
else raise "Invalid email term type #{args[0]}"
|
552
|
+
end + args[1].to_s.downcase
|
553
|
+
when :source_id
|
554
|
+
PREFIX['source_id'] + args[0].to_s.downcase
|
555
|
+
when :attachment_extension
|
556
|
+
PREFIX['attachment_extension'] + args[0].to_s.downcase
|
557
|
+
when :msgid, :ref, :thread
|
558
|
+
PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
|
559
|
+
else
|
560
|
+
raise "Invalid term type #{type}"
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
end
|
565
|
+
|
566
|
+
end
|