sup 0.10.2 → 0.11
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sup might be problematic. Click here for more details.
- data/CONTRIBUTORS +11 -9
- data/History.txt +14 -0
- data/README.txt +3 -11
- data/ReleaseNotes +16 -0
- data/bin/sup +67 -42
- data/bin/sup-add +2 -20
- data/bin/sup-config +0 -34
- data/bin/sup-dump +2 -5
- data/bin/sup-sync +2 -3
- data/bin/sup-sync-back +2 -3
- data/bin/sup-tweak-labels +2 -3
- data/lib/sup.rb +12 -4
- data/lib/sup/account.rb +2 -0
- data/lib/sup/buffer.rb +11 -2
- data/lib/sup/colormap.rb +59 -49
- data/lib/sup/connection.rb +63 -0
- data/lib/sup/crypto.rb +12 -0
- data/lib/sup/hook.rb +1 -0
- data/lib/sup/idle.rb +42 -0
- data/lib/sup/index.rb +562 -47
- data/lib/sup/keymap.rb +41 -3
- data/lib/sup/message.rb +1 -1
- data/lib/sup/mode.rb +8 -0
- data/lib/sup/modes/console-mode.rb +2 -3
- data/lib/sup/modes/edit-message-mode.rb +32 -7
- data/lib/sup/modes/inbox-mode.rb +4 -0
- data/lib/sup/modes/search-list-mode.rb +188 -0
- data/lib/sup/modes/search-results-mode.rb +17 -1
- data/lib/sup/modes/thread-index-mode.rb +43 -10
- data/lib/sup/modes/thread-view-mode.rb +29 -4
- data/lib/sup/poll.rb +13 -2
- data/lib/sup/search.rb +73 -0
- data/lib/sup/textfield.rb +17 -12
- data/lib/sup/util.rb +11 -0
- metadata +45 -46
- data/bin/sup-convert-ferret-index +0 -84
- data/lib/ncurses.rb +0 -289
- data/lib/sup/ferret_index.rb +0 -476
- data/lib/sup/xapian_index.rb +0 -605
data/lib/sup/idle.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Redwood
|
4
|
+
|
5
|
+
class IdleManager
|
6
|
+
include Singleton
|
7
|
+
|
8
|
+
IDLE_THRESHOLD = 60
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@no_activity_since = Time.now
|
12
|
+
@idle = false
|
13
|
+
@thread = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def ping
|
17
|
+
if @idle
|
18
|
+
UpdateManager.relay self, :unidle, Time.at(@no_activity_since)
|
19
|
+
@idle = false
|
20
|
+
end
|
21
|
+
@no_activity_since = Time.now
|
22
|
+
end
|
23
|
+
|
24
|
+
def start
|
25
|
+
@thread = Redwood::reporting_thread("checking for idleness") do
|
26
|
+
while true
|
27
|
+
sleep 1
|
28
|
+
if !@idle and Time.now.to_i - @no_activity_since.to_i >= IDLE_THRESHOLD
|
29
|
+
UpdateManager.relay self, :idle, Time.at(@no_activity_since)
|
30
|
+
@idle = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def stop
|
37
|
+
@thread.kill if @thread
|
38
|
+
@thread = nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/lib/sup/index.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
|
1
|
+
ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
|
2
2
|
|
3
|
+
require 'xapian'
|
4
|
+
require 'set'
|
3
5
|
require 'fileutils'
|
4
6
|
|
5
7
|
begin
|
@@ -12,9 +14,28 @@ end
|
|
12
14
|
|
13
15
|
module Redwood
|
14
16
|
|
15
|
-
|
17
|
+
# This index implementation uses Xapian for searching and storage. It
|
18
|
+
# tends to be slightly faster than Ferret for indexing and significantly faster
|
19
|
+
# for searching due to precomputing thread membership.
|
20
|
+
class Index
|
16
21
|
include InteractiveLock
|
17
22
|
|
23
|
+
STEM_LANGUAGE = "english"
|
24
|
+
INDEX_VERSION = '2'
|
25
|
+
|
26
|
+
## dates are converted to integers for xapian, and are used for document ids,
|
27
|
+
## so we must ensure they're reasonably valid. this typically only affect
|
28
|
+
## spam.
|
29
|
+
MIN_DATE = Time.at 0
|
30
|
+
MAX_DATE = Time.at(2**31-1)
|
31
|
+
|
32
|
+
HookManager.register "custom-search", <<EOS
|
33
|
+
Executes before a string search is applied to the index,
|
34
|
+
returning a new search string.
|
35
|
+
Variables:
|
36
|
+
subs: The string being searched.
|
37
|
+
EOS
|
38
|
+
|
18
39
|
class LockError < StandardError
|
19
40
|
def initialize h
|
20
41
|
@h = h
|
@@ -23,8 +44,6 @@ class BaseIndex
|
|
23
44
|
def method_missing m; @h[m.to_s] end
|
24
45
|
end
|
25
46
|
|
26
|
-
def is_a_deprecated_ferret_index?; false end
|
27
|
-
|
28
47
|
include Singleton
|
29
48
|
|
30
49
|
def initialize dir=BASE_DIR
|
@@ -32,6 +51,7 @@ class BaseIndex
|
|
32
51
|
@lock = Lockfile.new lockfile, :retries => 0, :max_age => nil
|
33
52
|
@sync_worker = nil
|
34
53
|
@sync_queue = Queue.new
|
54
|
+
@index_mutex = Monitor.new
|
35
55
|
end
|
36
56
|
|
37
57
|
def lockfile; File.join @dir, "lock" end
|
@@ -79,25 +99,43 @@ class BaseIndex
|
|
79
99
|
end
|
80
100
|
|
81
101
|
def load_index
|
82
|
-
|
102
|
+
path = File.join(@dir, 'xapian')
|
103
|
+
if File.exists? path
|
104
|
+
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_OPEN)
|
105
|
+
db_version = @xapian.get_metadata 'version'
|
106
|
+
db_version = '0' if db_version.empty?
|
107
|
+
if db_version == '1'
|
108
|
+
info "Upgrading index format 1 to 2"
|
109
|
+
@xapian.set_metadata 'version', INDEX_VERSION
|
110
|
+
elsif db_version != INDEX_VERSION
|
111
|
+
fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please downgrade to your previous version and dump your labels before upgrading to this version (then run sup-sync --restore)."
|
112
|
+
end
|
113
|
+
else
|
114
|
+
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
|
115
|
+
@xapian.set_metadata 'version', INDEX_VERSION
|
116
|
+
end
|
117
|
+
@enquire = Xapian::Enquire.new @xapian
|
118
|
+
@enquire.weighting_scheme = Xapian::BoolWeight.new
|
119
|
+
@enquire.docid_order = Xapian::Enquire::ASCENDING
|
83
120
|
end
|
84
121
|
|
85
|
-
def add_message m;
|
86
|
-
def update_message m;
|
87
|
-
def update_message_state m;
|
122
|
+
def add_message m; sync_message m, true end
|
123
|
+
def update_message m; sync_message m, true end
|
124
|
+
def update_message_state m; sync_message m, false end
|
88
125
|
|
89
|
-
def save_index
|
90
|
-
|
126
|
+
def save_index
|
127
|
+
info "Flushing Xapian updates to disk. This may take a while..."
|
128
|
+
@xapian.flush
|
91
129
|
end
|
92
130
|
|
93
131
|
def contains_id? id
|
94
|
-
|
132
|
+
synchronize { find_docid(id) && true }
|
95
133
|
end
|
96
134
|
|
97
135
|
def contains? m; contains_id? m.id end
|
98
136
|
|
99
137
|
def size
|
100
|
-
|
138
|
+
synchronize { @xapian.doccount }
|
101
139
|
end
|
102
140
|
|
103
141
|
def empty?; size == 0 end
|
@@ -107,12 +145,14 @@ class BaseIndex
|
|
107
145
|
## You should probably not call this on a block that doesn't break
|
108
146
|
## rather quickly because the results can be very large.
|
109
147
|
def each_id_by_date query={}
|
110
|
-
|
148
|
+
each_id(query) { |id| yield id, lambda { build_message id } }
|
111
149
|
end
|
112
150
|
|
113
151
|
## Return the number of matches for query in the index
|
114
152
|
def num_results_for query={}
|
115
|
-
|
153
|
+
xapian_query = build_xapian_query query
|
154
|
+
matchset = run_query xapian_query, 0, 0, 100
|
155
|
+
matchset.matches_estimated
|
116
156
|
end
|
117
157
|
|
118
158
|
## yield all messages in the thread containing 'm' by repeatedly
|
@@ -124,28 +164,82 @@ class BaseIndex
|
|
124
164
|
## true, stops loading any thread if a message with a :killed flag
|
125
165
|
## is found.
|
126
166
|
def each_message_in_thread_for m, opts={}
|
127
|
-
|
167
|
+
# TODO thread by subject
|
168
|
+
return unless doc = find_doc(m.id)
|
169
|
+
queue = doc.value(THREAD_VALUENO).split(',')
|
170
|
+
msgids = [m.id]
|
171
|
+
seen_threads = Set.new
|
172
|
+
seen_messages = Set.new [m.id]
|
173
|
+
while not queue.empty?
|
174
|
+
thread_id = queue.pop
|
175
|
+
next if seen_threads.member? thread_id
|
176
|
+
return false if opts[:skip_killed] && thread_killed?(thread_id)
|
177
|
+
seen_threads << thread_id
|
178
|
+
docs = term_docids(mkterm(:thread, thread_id)).map { |x| @xapian.document x }
|
179
|
+
docs.each do |doc|
|
180
|
+
msgid = doc.value MSGID_VALUENO
|
181
|
+
next if seen_messages.member? msgid
|
182
|
+
msgids << msgid
|
183
|
+
seen_messages << msgid
|
184
|
+
queue.concat doc.value(THREAD_VALUENO).split(',')
|
185
|
+
end
|
186
|
+
end
|
187
|
+
msgids.each { |id| yield id, lambda { build_message id } }
|
188
|
+
true
|
128
189
|
end
|
129
190
|
|
130
191
|
## Load message with the given message-id from the index
|
131
192
|
def build_message id
|
132
|
-
|
193
|
+
entry = synchronize { get_entry id }
|
194
|
+
return unless entry
|
195
|
+
|
196
|
+
source = SourceManager[entry[:source_id]]
|
197
|
+
raise "invalid source #{entry[:source_id]}" unless source
|
198
|
+
|
199
|
+
m = Message.new :source => source, :source_info => entry[:source_info],
|
200
|
+
:labels => entry[:labels], :snippet => entry[:snippet]
|
201
|
+
|
202
|
+
mk_person = lambda { |x| Person.new(*x.reverse!) }
|
203
|
+
entry[:from] = mk_person[entry[:from]]
|
204
|
+
entry[:to].map!(&mk_person)
|
205
|
+
entry[:cc].map!(&mk_person)
|
206
|
+
entry[:bcc].map!(&mk_person)
|
207
|
+
|
208
|
+
m.load_from_index! entry
|
209
|
+
m
|
133
210
|
end
|
134
211
|
|
135
212
|
## Delete message with the given message-id from the index
|
136
213
|
def delete id
|
137
|
-
|
214
|
+
synchronize { @xapian.delete_document mkterm(:msgid, id) }
|
138
215
|
end
|
139
216
|
|
140
217
|
## Given an array of email addresses, return an array of Person objects that
|
141
218
|
## have sent mail to or received mail from any of the given addresses.
|
142
|
-
def load_contacts email_addresses,
|
143
|
-
|
219
|
+
def load_contacts email_addresses, opts={}
|
220
|
+
contacts = Set.new
|
221
|
+
num = opts[:num] || 20
|
222
|
+
each_id_by_date :participants => email_addresses do |id,b|
|
223
|
+
break if contacts.size >= num
|
224
|
+
m = b.call
|
225
|
+
([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] }
|
226
|
+
end
|
227
|
+
contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num]
|
144
228
|
end
|
145
229
|
|
146
230
|
## Yield each message-id matching query
|
231
|
+
EACH_ID_PAGE = 100
|
147
232
|
def each_id query={}
|
148
|
-
|
233
|
+
offset = 0
|
234
|
+
page = EACH_ID_PAGE
|
235
|
+
|
236
|
+
xapian_query = build_xapian_query query
|
237
|
+
while true
|
238
|
+
ids = run_query_ids xapian_query, offset, (offset+page)
|
239
|
+
ids.each { |id| yield id }
|
240
|
+
break if ids.size < page
|
241
|
+
offset += page
|
242
|
+
end
|
149
243
|
end
|
150
244
|
|
151
245
|
## Yield each message matching query
|
@@ -155,15 +249,15 @@ class BaseIndex
|
|
155
249
|
end
|
156
250
|
end
|
157
251
|
|
158
|
-
##
|
252
|
+
## xapian-compact takes too long, so this is a no-op
|
253
|
+
## until we think of something better
|
159
254
|
def optimize
|
160
|
-
unimplemented
|
161
255
|
end
|
162
256
|
|
163
257
|
## Return the id source of the source the message with the given message-id
|
164
258
|
## was synced from
|
165
259
|
def source_for_id id
|
166
|
-
|
260
|
+
synchronize { get_entry(id)[:source_id] }
|
167
261
|
end
|
168
262
|
|
169
263
|
class ParseError < StandardError; end
|
@@ -174,7 +268,130 @@ class BaseIndex
|
|
174
268
|
##
|
175
269
|
## raises a ParseError if something went wrong.
|
176
270
|
def parse_query s
|
177
|
-
|
271
|
+
query = {}
|
272
|
+
|
273
|
+
subs = HookManager.run("custom-search", :subs => s) || s
|
274
|
+
begin
|
275
|
+
subs = SearchManager.expand subs
|
276
|
+
rescue SearchManager::ExpansionError => e
|
277
|
+
raise ParseError, e.message
|
278
|
+
end
|
279
|
+
subs = subs.gsub(/\b(to|from):(\S+)\b/) do
|
280
|
+
field, value = $1, $2
|
281
|
+
email_field, name_field = %w(email name).map { |x| "#{field}_#{x}" }
|
282
|
+
if(p = ContactManager.contact_for(value))
|
283
|
+
"#{email_field}:#{p.email}"
|
284
|
+
elsif value == "me"
|
285
|
+
'(' + AccountManager.user_emails.map { |e| "#{email_field}:#{e}" }.join(' OR ') + ')'
|
286
|
+
else
|
287
|
+
"(#{email_field}:#{value} OR #{name_field}:#{value})"
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
## if we see a label:deleted or a label:spam term anywhere in the query
|
292
|
+
## string, we set the extra load_spam or load_deleted options to true.
|
293
|
+
## bizarre? well, because the query allows arbitrary parenthesized boolean
|
294
|
+
## expressions, without fully parsing the query, we can't tell whether
|
295
|
+
## the user is explicitly directing us to search spam messages or not.
|
296
|
+
## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
|
297
|
+
## search spam messages or not?
|
298
|
+
##
|
299
|
+
## so, we rely on the fact that turning these extra options ON turns OFF
|
300
|
+
## the adding of "-label:deleted" or "-label:spam" terms at the very
|
301
|
+
## final stage of query processing. if the user wants to search spam
|
302
|
+
## messages, not adding that is the right thing; if he doesn't want to
|
303
|
+
## search spam messages, then not adding it won't have any effect.
|
304
|
+
query[:load_spam] = true if subs =~ /\blabel:spam\b/
|
305
|
+
query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
|
306
|
+
|
307
|
+
## gmail style "is" operator
|
308
|
+
subs = subs.gsub(/\b(is|has):(\S+)\b/) do
|
309
|
+
field, label = $1, $2
|
310
|
+
case label
|
311
|
+
when "read"
|
312
|
+
"-label:unread"
|
313
|
+
when "spam"
|
314
|
+
query[:load_spam] = true
|
315
|
+
"label:spam"
|
316
|
+
when "deleted"
|
317
|
+
query[:load_deleted] = true
|
318
|
+
"label:deleted"
|
319
|
+
else
|
320
|
+
"label:#{$2}"
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
## gmail style attachments "filename" and "filetype" searches
|
325
|
+
subs = subs.gsub(/\b(filename|filetype):(\((.+?)\)\B|(\S+)\b)/) do
|
326
|
+
field, name = $1, ($3 || $4)
|
327
|
+
case field
|
328
|
+
when "filename"
|
329
|
+
debug "filename: translated #{field}:#{name} to attachment:\"#{name.downcase}\""
|
330
|
+
"attachment:\"#{name.downcase}\""
|
331
|
+
when "filetype"
|
332
|
+
debug "filetype: translated #{field}:#{name} to attachment_extension:#{name.downcase}"
|
333
|
+
"attachment_extension:#{name.downcase}"
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
if $have_chronic
|
338
|
+
lastdate = 2<<32 - 1
|
339
|
+
firstdate = 0
|
340
|
+
subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do
|
341
|
+
field, datestr = $1, ($3 || $4)
|
342
|
+
realdate = Chronic.parse datestr, :guess => false, :context => :past
|
343
|
+
if realdate
|
344
|
+
case field
|
345
|
+
when "after"
|
346
|
+
debug "chronic: translated #{field}:#{datestr} to #{realdate.end}"
|
347
|
+
"date:#{realdate.end.to_i}..#{lastdate}"
|
348
|
+
when "before"
|
349
|
+
debug "chronic: translated #{field}:#{datestr} to #{realdate.begin}"
|
350
|
+
"date:#{firstdate}..#{realdate.end.to_i}"
|
351
|
+
else
|
352
|
+
debug "chronic: translated #{field}:#{datestr} to #{realdate}"
|
353
|
+
"date:#{realdate.begin.to_i}..#{realdate.end.to_i}"
|
354
|
+
end
|
355
|
+
else
|
356
|
+
raise ParseError, "can't understand date #{datestr.inspect}"
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
## limit:42 restrict the search to 42 results
|
362
|
+
subs = subs.gsub(/\blimit:(\S+)\b/) do
|
363
|
+
lim = $1
|
364
|
+
if lim =~ /^\d+$/
|
365
|
+
query[:limit] = lim.to_i
|
366
|
+
''
|
367
|
+
else
|
368
|
+
raise ParseError, "non-numeric limit #{lim.inspect}"
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
debug "translated query: #{subs.inspect}"
|
373
|
+
|
374
|
+
qp = Xapian::QueryParser.new
|
375
|
+
qp.database = @xapian
|
376
|
+
qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
|
377
|
+
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
|
378
|
+
qp.default_op = Xapian::Query::OP_AND
|
379
|
+
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
|
380
|
+
NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } }
|
381
|
+
BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } }
|
382
|
+
|
383
|
+
begin
|
384
|
+
xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD)
|
385
|
+
rescue RuntimeError => e
|
386
|
+
raise ParseError, "xapian query parser error: #{e}"
|
387
|
+
end
|
388
|
+
|
389
|
+
debug "parsed xapian query: #{xapian_query.description}"
|
390
|
+
|
391
|
+
raise ParseError if xapian_query.nil? or xapian_query.empty?
|
392
|
+
query[:qobj] = xapian_query
|
393
|
+
query[:text] = s
|
394
|
+
query
|
178
395
|
end
|
179
396
|
|
180
397
|
def save_thread t
|
@@ -207,34 +424,332 @@ class BaseIndex
|
|
207
424
|
sleep 0.03
|
208
425
|
end
|
209
426
|
end
|
210
|
-
end
|
211
427
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
428
|
+
private
|
429
|
+
|
430
|
+
# Stemmed
|
431
|
+
NORMAL_PREFIX = {
|
432
|
+
'subject' => 'S',
|
433
|
+
'body' => 'B',
|
434
|
+
'from_name' => 'FN',
|
435
|
+
'to_name' => 'TN',
|
436
|
+
'name' => %w(FN TN),
|
437
|
+
'attachment' => 'A',
|
438
|
+
'email_text' => 'E',
|
439
|
+
'' => %w(S B FN TN A E),
|
440
|
+
}
|
441
|
+
|
442
|
+
# Unstemmed
|
443
|
+
BOOLEAN_PREFIX = {
|
444
|
+
'type' => 'K',
|
445
|
+
'from_email' => 'FE',
|
446
|
+
'to_email' => 'TE',
|
447
|
+
'email' => %w(FE TE),
|
448
|
+
'date' => 'D',
|
449
|
+
'label' => 'L',
|
450
|
+
'source_id' => 'I',
|
451
|
+
'attachment_extension' => 'O',
|
452
|
+
'msgid' => 'Q',
|
453
|
+
'id' => 'Q',
|
454
|
+
'thread' => 'H',
|
455
|
+
'ref' => 'R',
|
456
|
+
}
|
457
|
+
|
458
|
+
PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
|
459
|
+
|
460
|
+
MSGID_VALUENO = 0
|
461
|
+
THREAD_VALUENO = 1
|
462
|
+
DATE_VALUENO = 2
|
463
|
+
|
464
|
+
MAX_TERM_LENGTH = 245
|
465
|
+
|
466
|
+
# Xapian can very efficiently sort in ascending docid order. Sup always wants
|
467
|
+
# to sort by descending date, so this method maps between them. In order to
|
468
|
+
# handle multiple messages per second, we use a logistic curve centered
|
469
|
+
# around MIDDLE_DATE so that the slope (docid/s) is greatest in this time
|
470
|
+
# period. A docid collision is not an error - the code will pick the next
|
471
|
+
# smallest unused one.
|
472
|
+
DOCID_SCALE = 2.0**32
|
473
|
+
TIME_SCALE = 2.0**27
|
474
|
+
MIDDLE_DATE = Time.gm(2011)
|
475
|
+
def assign_docid m, truncated_date
|
476
|
+
t = (truncated_date.to_i - MIDDLE_DATE.to_i).to_f
|
477
|
+
docid = (DOCID_SCALE - DOCID_SCALE/(Math::E**(-(t/TIME_SCALE)) + 1)).to_i
|
478
|
+
while docid > 0 and docid_exists? docid
|
479
|
+
docid -= 1
|
480
|
+
end
|
481
|
+
docid > 0 ? docid : nil
|
482
|
+
end
|
483
|
+
|
484
|
+
# XXX is there a better way?
|
485
|
+
def docid_exists? docid
|
224
486
|
begin
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
487
|
+
@xapian.doclength docid
|
488
|
+
true
|
489
|
+
rescue RuntimeError #Xapian::DocNotFoundError
|
490
|
+
raise unless $!.message =~ /DocNotFoundError/
|
491
|
+
false
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
def term_docids term
|
496
|
+
@xapian.postlist(term).map { |x| x.docid }
|
497
|
+
end
|
498
|
+
|
499
|
+
def find_docid id
|
500
|
+
docids = term_docids(mkterm(:msgid,id))
|
501
|
+
fail unless docids.size <= 1
|
502
|
+
docids.first
|
503
|
+
end
|
504
|
+
|
505
|
+
def find_doc id
|
506
|
+
return unless docid = find_docid(id)
|
507
|
+
@xapian.document docid
|
508
|
+
end
|
509
|
+
|
510
|
+
def get_id docid
|
511
|
+
return unless doc = @xapian.document(docid)
|
512
|
+
doc.value MSGID_VALUENO
|
513
|
+
end
|
514
|
+
|
515
|
+
def get_entry id
|
516
|
+
return unless doc = find_doc(id)
|
517
|
+
Marshal.load doc.data
|
518
|
+
end
|
519
|
+
|
520
|
+
def thread_killed? thread_id
|
521
|
+
not run_query(Q.new(Q::OP_AND, mkterm(:thread, thread_id), mkterm(:label, :Killed)), 0, 1).empty?
|
522
|
+
end
|
523
|
+
|
524
|
+
def synchronize &b
|
525
|
+
@index_mutex.synchronize &b
|
526
|
+
end
|
527
|
+
|
528
|
+
def run_query xapian_query, offset, limit, checkatleast=0
|
529
|
+
synchronize do
|
530
|
+
@enquire.query = xapian_query
|
531
|
+
@enquire.mset(offset, limit-offset, checkatleast)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
def run_query_ids xapian_query, offset, limit
|
536
|
+
matchset = run_query xapian_query, offset, limit
|
537
|
+
matchset.matches.map { |r| r.document.value MSGID_VALUENO }
|
538
|
+
end
|
539
|
+
|
540
|
+
Q = Xapian::Query
|
541
|
+
def build_xapian_query opts
|
542
|
+
labels = ([opts[:label]] + (opts[:labels] || [])).compact
|
543
|
+
neglabels = [:spam, :deleted, :killed].reject { |l| (labels.include? l) || opts.member?("load_#{l}".intern) }
|
544
|
+
pos_terms, neg_terms = [], []
|
545
|
+
|
546
|
+
pos_terms << mkterm(:type, 'mail')
|
547
|
+
pos_terms.concat(labels.map { |l| mkterm(:label,l) })
|
548
|
+
pos_terms << opts[:qobj] if opts[:qobj]
|
549
|
+
pos_terms << mkterm(:source_id, opts[:source_id]) if opts[:source_id]
|
550
|
+
|
551
|
+
if opts[:participants]
|
552
|
+
participant_terms = opts[:participants].map { |p| [:from,:to].map { |d| mkterm(:email, d, (Redwood::Person === p) ? p.email : p) } }.flatten
|
553
|
+
pos_terms << Q.new(Q::OP_OR, participant_terms)
|
554
|
+
end
|
555
|
+
|
556
|
+
neg_terms.concat(neglabels.map { |l| mkterm(:label,l) })
|
557
|
+
|
558
|
+
pos_query = Q.new(Q::OP_AND, pos_terms)
|
559
|
+
neg_query = Q.new(Q::OP_OR, neg_terms)
|
560
|
+
|
561
|
+
if neg_query.empty?
|
562
|
+
pos_query
|
563
|
+
else
|
564
|
+
Q.new(Q::OP_AND_NOT, [pos_query, neg_query])
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
def sync_message m, overwrite
|
569
|
+
doc = synchronize { find_doc(m.id) }
|
570
|
+
existed = doc != nil
|
571
|
+
doc ||= Xapian::Document.new
|
572
|
+
do_index_static = overwrite || !existed
|
573
|
+
old_entry = !do_index_static && doc.entry
|
574
|
+
snippet = do_index_static ? m.snippet : old_entry[:snippet]
|
575
|
+
|
576
|
+
entry = {
|
577
|
+
:message_id => m.id,
|
578
|
+
:source_id => m.source.id,
|
579
|
+
:source_info => m.source_info,
|
580
|
+
:date => truncate_date(m.date),
|
581
|
+
:snippet => snippet,
|
582
|
+
:labels => m.labels.to_a,
|
583
|
+
:from => [m.from.email, m.from.name],
|
584
|
+
:to => m.to.map { |p| [p.email, p.name] },
|
585
|
+
:cc => m.cc.map { |p| [p.email, p.name] },
|
586
|
+
:bcc => m.bcc.map { |p| [p.email, p.name] },
|
587
|
+
:subject => m.subj,
|
588
|
+
:refs => m.refs.to_a,
|
589
|
+
:replytos => m.replytos.to_a,
|
590
|
+
}
|
591
|
+
|
592
|
+
if do_index_static
|
593
|
+
doc.clear_terms
|
594
|
+
doc.clear_values
|
595
|
+
index_message_static m, doc, entry
|
596
|
+
end
|
597
|
+
|
598
|
+
index_message_threading doc, entry, old_entry
|
599
|
+
index_message_labels doc, entry[:labels], (do_index_static ? [] : old_entry[:labels])
|
600
|
+
doc.entry = entry
|
601
|
+
|
602
|
+
synchronize do
|
603
|
+
unless docid = existed ? doc.docid : assign_docid(m, truncate_date(m.date))
|
604
|
+
# Could be triggered by spam
|
605
|
+
warn "docid underflow, dropping #{m.id.inspect}"
|
606
|
+
return
|
607
|
+
end
|
608
|
+
@xapian.replace_document docid, doc
|
609
|
+
end
|
610
|
+
|
611
|
+
m.labels.each { |l| LabelManager << l }
|
612
|
+
true
|
613
|
+
end
|
614
|
+
|
615
|
+
## Index content that can't be changed by the user
|
616
|
+
def index_message_static m, doc, entry
|
617
|
+
# Person names are indexed with several prefixes
|
618
|
+
person_termer = lambda do |d|
|
619
|
+
lambda do |p|
|
620
|
+
doc.index_text p.name, PREFIX["#{d}_name"] if p.name
|
621
|
+
doc.index_text p.email, PREFIX['email_text']
|
622
|
+
doc.add_term mkterm(:email, d, p.email)
|
623
|
+
end
|
624
|
+
end
|
625
|
+
|
626
|
+
person_termer[:from][m.from] if m.from
|
627
|
+
(m.to+m.cc+m.bcc).each(&(person_termer[:to]))
|
628
|
+
|
629
|
+
# Full text search content
|
630
|
+
subject_text = m.indexable_subject
|
631
|
+
body_text = m.indexable_body
|
632
|
+
doc.index_text subject_text, PREFIX['subject']
|
633
|
+
doc.index_text body_text, PREFIX['body']
|
634
|
+
m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
|
635
|
+
|
636
|
+
# Miscellaneous terms
|
637
|
+
doc.add_term mkterm(:date, m.date) if m.date
|
638
|
+
doc.add_term mkterm(:type, 'mail')
|
639
|
+
doc.add_term mkterm(:msgid, m.id)
|
640
|
+
doc.add_term mkterm(:source_id, m.source.id)
|
641
|
+
m.attachments.each do |a|
|
642
|
+
a =~ /\.(\w+)$/ or next
|
643
|
+
doc.add_term mkterm(:attachment_extension, $1)
|
644
|
+
end
|
645
|
+
|
646
|
+
# Date value for range queries
|
647
|
+
date_value = begin
|
648
|
+
Xapian.sortable_serialise m.date.to_i
|
649
|
+
rescue TypeError
|
650
|
+
Xapian.sortable_serialise 0
|
651
|
+
end
|
652
|
+
|
653
|
+
doc.add_value MSGID_VALUENO, m.id
|
654
|
+
doc.add_value DATE_VALUENO, date_value
|
655
|
+
end
|
656
|
+
|
657
|
+
def index_message_labels doc, new_labels, old_labels
|
658
|
+
return if new_labels == old_labels
|
659
|
+
added = new_labels.to_a - old_labels.to_a
|
660
|
+
removed = old_labels.to_a - new_labels.to_a
|
661
|
+
added.each { |t| doc.add_term mkterm(:label,t) }
|
662
|
+
removed.each { |t| doc.remove_term mkterm(:label,t) }
|
663
|
+
end
|
664
|
+
|
665
|
+
## Assign a set of thread ids to the document. This is a hybrid of the runtime
|
666
|
+
## search done by the Ferret index and the index-time union done by previous
|
667
|
+
## versions of the Xapian index. We first find the thread ids of all messages
|
668
|
+
## with a reference to or from us. If that set is empty, we use our own
|
669
|
+
## message id. Otherwise, we use all the thread ids we previously found. In
|
670
|
+
## the common case there's only one member in that set, but if we're the
|
671
|
+
## missing link between multiple previously unrelated threads we can have
|
672
|
+
## more. XapianIndex#each_message_in_thread_for follows the thread ids when
|
673
|
+
## searching so the user sees a single unified thread.
|
674
|
+
def index_message_threading doc, entry, old_entry
|
675
|
+
return if old_entry && (entry[:refs] == old_entry[:refs]) && (entry[:replytos] == old_entry[:replytos])
|
676
|
+
children = term_docids(mkterm(:ref, entry[:message_id])).map { |docid| @xapian.document docid }
|
677
|
+
parent_ids = entry[:refs] + entry[:replytos]
|
678
|
+
parents = parent_ids.map { |id| find_doc id }.compact
|
679
|
+
thread_members = SavingHash.new { [] }
|
680
|
+
(children + parents).each do |doc2|
|
681
|
+
thread_ids = doc2.value(THREAD_VALUENO).split ','
|
682
|
+
thread_ids.each { |thread_id| thread_members[thread_id] << doc2 }
|
683
|
+
end
|
684
|
+
thread_ids = thread_members.empty? ? [entry[:message_id]] : thread_members.keys
|
685
|
+
thread_ids.each { |thread_id| doc.add_term mkterm(:thread, thread_id) }
|
686
|
+
parent_ids.each { |ref| doc.add_term mkterm(:ref, ref) }
|
687
|
+
doc.add_value THREAD_VALUENO, (thread_ids * ',')
|
688
|
+
end
|
689
|
+
|
690
|
+
def truncate_date date
|
691
|
+
if date < MIN_DATE
|
692
|
+
debug "warning: adjusting too-low date #{date} for indexing"
|
693
|
+
MIN_DATE
|
694
|
+
elsif date > MAX_DATE
|
695
|
+
debug "warning: adjusting too-high date #{date} for indexing"
|
696
|
+
MAX_DATE
|
697
|
+
else
|
698
|
+
date
|
230
699
|
end
|
231
|
-
debug "using #{type} index"
|
232
|
-
@obj
|
233
700
|
end
|
234
701
|
|
235
|
-
|
236
|
-
def
|
237
|
-
|
702
|
+
# Construct a Xapian term
|
703
|
+
def mkterm type, *args
|
704
|
+
case type
|
705
|
+
when :label
|
706
|
+
PREFIX['label'] + args[0].to_s.downcase
|
707
|
+
when :type
|
708
|
+
PREFIX['type'] + args[0].to_s.downcase
|
709
|
+
when :date
|
710
|
+
PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
|
711
|
+
when :email
|
712
|
+
case args[0]
|
713
|
+
when :from then PREFIX['from_email']
|
714
|
+
when :to then PREFIX['to_email']
|
715
|
+
else raise "Invalid email term type #{args[0]}"
|
716
|
+
end + args[1].to_s.downcase
|
717
|
+
when :source_id
|
718
|
+
PREFIX['source_id'] + args[0].to_s.downcase
|
719
|
+
when :attachment_extension
|
720
|
+
PREFIX['attachment_extension'] + args[0].to_s.downcase
|
721
|
+
when :msgid, :ref, :thread
|
722
|
+
PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
|
723
|
+
else
|
724
|
+
raise "Invalid term type #{type}"
|
725
|
+
end
|
726
|
+
end
|
238
727
|
end
|
239
728
|
|
240
729
|
end
|
730
|
+
|
731
|
+
class Xapian::Document
|
732
|
+
def entry
|
733
|
+
Marshal.load data
|
734
|
+
end
|
735
|
+
|
736
|
+
def entry=(x)
|
737
|
+
self.data = Marshal.dump x
|
738
|
+
end
|
739
|
+
|
740
|
+
def index_text text, prefix, weight=1
|
741
|
+
term_generator = Xapian::TermGenerator.new
|
742
|
+
term_generator.stemmer = Xapian::Stem.new(Redwood::Index::STEM_LANGUAGE)
|
743
|
+
term_generator.document = self
|
744
|
+
term_generator.index_text text, weight, prefix
|
745
|
+
end
|
746
|
+
|
747
|
+
alias old_add_term add_term
|
748
|
+
def add_term term
|
749
|
+
if term.length <= Redwood::Index::MAX_TERM_LENGTH
|
750
|
+
old_add_term term, 0
|
751
|
+
else
|
752
|
+
warn "dropping excessively long term #{term}"
|
753
|
+
end
|
754
|
+
end
|
755
|
+
end
|