mailmate 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,23 +35,34 @@
35
35
  # or [] if the message has no flags / isn't indexed.
36
36
  #
37
37
  # IndexReader instances cache both files in memory and build a hash from
38
- # id → [[start,end], …] for O(1) lookup. Construction cost 5–20 ms for
39
- # 50–200k records; memorya few MB. For a CLI invocation that's fine; the
40
- # evaluator instantiates one lazily when first needed.
38
+ # id → [packed_range, …] (start << 32 | end, one Integer per record) for
39
+ # O(1) lookup. Construction cost 5–20 ms for 50–200k records (one bulk
40
+ # unpack("V*") pass); memory a few MB. For a CLI invocation that's fine;
41
+ # the evaluator instantiates one lazily when first needed.
41
42
 
42
43
  module Mailmate
43
44
  # @api public
44
45
  class IndexReader
45
46
  RECORD_SIZE = 12
46
47
 
48
+ # Re-stat the underlying files at most this often per reader (seconds).
49
+ # Short-lived CLI processes never hit the recheck; the persistent MCP
50
+ # server picks up MailMate's continuous index rewrites within this window
51
+ # instead of serving a snapshot from its first request forever.
52
+ FRESHNESS_INTERVAL = 1.0
53
+
47
54
  class << self
48
55
  # Per-process cache of readers keyed by [name, db_headers]. Including
49
56
  # db_headers means a Mailmate.config swap (e.g. a test pointing at a
50
57
  # different tmpdir) doesn't return stale readers built from the old
51
- # path.
58
+ # path. Cached readers are re-validated against the on-disk files'
59
+ # mtime+size (throttled; see FRESHNESS_INTERVAL) so long-lived
60
+ # processes don't serve stale data after MailMate rewrites an index.
52
61
  def for(name)
53
62
  @cache ||= {}
54
- @cache[cache_key(name)] ||= new(name)
63
+ key = cache_key(name)
64
+ @cache.delete(key) if @cache[key]&.stale?
65
+ @cache[key] ||= new(name)
55
66
  end
56
67
 
57
68
  # Invalidate cached readers. With no argument, drops the entire cache
@@ -78,13 +89,33 @@ module Mailmate
78
89
 
79
90
  def initialize(name)
80
91
  @name = name
81
- base = "#{Mailmate.config.db_headers}/#{name}"
82
- raise ArgumentError, "Index not found: #{name} (looked at #{base}.{cache,offsets})" \
83
- unless File.exist?("#{base}.cache") && File.exist?("#{base}.offsets")
92
+ @base = "#{Mailmate.config.db_headers}/#{name}"
93
+ raise ArgumentError, "Index not found: #{name} (looked at #{@base}.{cache,offsets})" \
94
+ unless File.exist?("#{@base}.cache") && File.exist?("#{@base}.offsets")
95
+
96
+ @cache_bytes = File.binread("#{@base}.cache")
97
+ @offsets_bytes = File.binread("#{@base}.offsets")
98
+ @cache_sig = file_sig("#{@base}.cache")
99
+ @offsets_sig = file_sig("#{@base}.offsets")
100
+ @checked_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
101
+ # The id→ranges hash builds lazily (see index): ids_matching-only
102
+ # consumers (the inverted body search) never need it, and skipping it
103
+ # saves ~250 ms of construction on the big body indexes.
104
+ @index = nil
105
+ end
84
106
 
85
- @cache_bytes = File.binread("#{base}.cache")
86
- @offsets_bytes = File.binread("#{base}.offsets")
87
- build_index!
107
+ # True when the on-disk files no longer match what this reader was built
108
+ # from. Throttled to one stat-pair per FRESHNESS_INTERVAL; a vanished
109
+ # file (mid-swap while MailMate rewrites) counts as not-stale so we keep
110
+ # serving the last good snapshot rather than racing the writer.
111
+ def stale?
112
+ now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
113
+ return false if now - @checked_at < FRESHNESS_INTERVAL
114
+ @checked_at = now
115
+ cache_sig = file_sig("#{@base}.cache")
116
+ offsets_sig = file_sig("#{@base}.offsets")
117
+ return false if cache_sig.nil? || offsets_sig.nil?
118
+ cache_sig != @cache_sig || offsets_sig != @offsets_sig
88
119
  end
89
120
 
90
121
  # Returns the raw cached value for a given .eml body-part ID, or nil if
@@ -94,19 +125,19 @@ module Mailmate
94
125
  # body indexes (`#unquoted#lc`, `#quoted#lc`) last-alone is meaningless
95
126
  # — use values_for to read every segment.
96
127
  def value_for(eml_id)
97
- pairs = @index[eml_id.to_i]
98
- return nil if pairs.nil? || pairs.empty?
99
- s, e = pairs[-1]
100
- @cache_bytes[s...e]
128
+ packs = index[eml_id.to_i]
129
+ return nil if packs.nil? || packs.empty?
130
+ v = packs[-1]
131
+ @cache_bytes[(v >> 32)...(v & 0xFFFFFFFF)]
101
132
  end
102
133
 
103
134
  # Returns every recorded value for an id, in offsets-file order. Returns
104
135
  # [] if the id isn't in the index. Use this for body indexes
105
136
  # (#unquoted#lc, #quoted#lc), which store one record per text segment.
106
137
  def values_for(eml_id)
107
- pairs = @index[eml_id.to_i]
108
- return [] if pairs.nil?
109
- pairs.map { |(s, e)| @cache_bytes[s...e] }
138
+ packs = index[eml_id.to_i]
139
+ return [] if packs.nil?
140
+ packs.map { |v| @cache_bytes[(v >> 32)...(v & 0xFFFFFFFF)] }
110
141
  end
111
142
 
112
143
  # `#flags.flag` semantics: the cache stores a space-separated list of IMAP
@@ -117,15 +148,45 @@ module Mailmate
117
148
  v.split(/\s+/).reject(&:empty?)
118
149
  end
119
150
 
151
+ # True when the index has at least one record for this id. Cheaper than
152
+ # values_for(id).empty? — no substring slicing.
153
+ def key?(eml_id)
154
+ index.key?(eml_id.to_i)
155
+ end
156
+
157
+ # Inverted substring search: returns a Hash whose keys are every id with
158
+ # at least one record containing `needle` (byte-wise; pass pre-downcased
159
+ # bytes when querying an #lc index). One memchr-fast String#index scan
160
+ # of the whole cache instead of one substring test per record — for a
161
+ # 77 MB body cache that's ~75 ms versus seconds of per-message lookups.
162
+ #
163
+ # A raw cache hit can span two adjacent records' ranges; interval
164
+ # stabbing keeps only hits that fall entirely inside a single record
165
+ # (per-segment semantics, matching MailMate's own body search). Records
166
+ # sharing a byte range (deduped values) all report their ids.
167
+ def ids_matching(needle)
168
+ needle = needle.b
169
+ found = {}
170
+ return found if needle.empty? || @cache_bytes.empty?
171
+ ensure_stab_table!
172
+ nlen = needle.bytesize
173
+ pos = 0
174
+ while (pos = @cache_bytes.index(needle, pos))
175
+ stab(pos, pos + nlen) { |id| found[id] = true }
176
+ pos += 1
177
+ end
178
+ found
179
+ end
180
+
120
181
  # Number of distinct ids in the index. For multi-record indexes this is
121
182
  # smaller than the on-disk record count (use record_count for that).
122
183
  def size
123
- @index.size
184
+ index.size
124
185
  end
125
186
 
126
187
  # Total number of on-disk records (sum across all ids). Diagnostics.
127
188
  def record_count
128
- @index.values.sum(&:size)
189
+ index.values.sum(&:size)
129
190
  end
130
191
 
131
192
  # Iterate every recorded eml-id. Yields just the id; callers that also
@@ -133,7 +194,7 @@ module Mailmate
133
194
  # modules don't have to reach into `@index` directly.
134
195
  def each_eml_id(&block)
135
196
  return enum_for(:each_eml_id) unless block
136
- @index.each_key(&block)
197
+ index.each_key(&block)
137
198
  end
138
199
 
139
200
  # Iterate every (eml_id, raw_value) pair, once per on-disk record.
@@ -142,22 +203,92 @@ module Mailmate
142
203
  # should massage it themselves.
143
204
  def each_record
144
205
  return enum_for(:each_record) unless block_given?
145
- @index.each do |eml_id, pairs|
146
- pairs.each { |(s, e)| yield eml_id, @cache_bytes[s...e] }
206
+ index.each do |eml_id, packs|
207
+ packs.each { |v| yield eml_id, @cache_bytes[(v >> 32)...(v & 0xFFFFFFFF)] }
147
208
  end
148
209
  end
149
210
 
150
211
  private
151
212
 
152
- def build_index!
153
- @index = Hash.new { |h, k| h[k] = [] }
154
- n = @offsets_bytes.bytesize / RECORD_SIZE
213
+ # Lazy tables for ids_matching's interval stabbing, built once per
214
+ # reader snapshot (a rebuilt reader starts fresh, so staleness handling
215
+ # comes for free). @stab_flat is the raw [id, start, end, …] triple
216
+ # stream; @stab_order holds record numbers sorted by start;
217
+ # @stab_prefix_max_end[i] is the max end among @stab_order[0..i], which
218
+ # lets stab() stop walking left as soon as no earlier-starting record
219
+ # could still reach the queried range. Integers only — no per-record
220
+ # object allocations.
221
+ def ensure_stab_table!
222
+ return if @stab_order
223
+ flat = @offsets_bytes.unpack("V*")
224
+ recs = (flat.size - (flat.size % 3)) / 3
225
+ # Pack (start, recnum) into one Integer and sort! with native compare —
226
+ # a sort_by block is ~3× slower at this record count. start sorts as
227
+ # the high bits; recnum keeps the low bits unique.
228
+ packed = Array.new(recs) { |k| (flat[k * 3 + 1] << 32) | k }
229
+ packed.sort!
230
+ order = packed
231
+ order.map! { |p| p & 0xFFFFFFFF }
232
+ prefix = Array.new(recs)
233
+ max_end = -1
234
+ order.each_with_index do |k, i|
235
+ e = flat[k * 3 + 2]
236
+ max_end = e if e > max_end
237
+ prefix[i] = max_end
238
+ end
239
+ @stab_flat = flat
240
+ @stab_order = order
241
+ @stab_prefix_max_end = prefix
242
+ end
243
+
244
+ # Yields the id of every record whose [start, end) range fully contains
245
+ # [lo, hi). Classic stabbing query over ranges sorted by start: binary
246
+ # search to the last range starting at or before lo, then walk left
247
+ # while the prefix-max end says a covering range is still possible —
248
+ # O(log n + overlap depth), and body-index ranges rarely overlap.
249
+ def stab(lo, hi)
250
+ order = @stab_order
251
+ flat = @stab_flat
252
+ i = order.bsearch_index { |k| flat[k * 3 + 1] > lo }
253
+ i = i.nil? ? order.size - 1 : i - 1
254
+ while i >= 0 && @stab_prefix_max_end[i] >= hi
255
+ k = order[i]
256
+ yield flat[k * 3] if flat[k * 3 + 1] <= lo && flat[k * 3 + 2] >= hi
257
+ i -= 1
258
+ end
259
+ end
260
+
261
+ def file_sig(path)
262
+ st = File.stat(path)
263
+ [st.mtime, st.size]
264
+ rescue SystemCallError
265
+ nil
266
+ end
267
+
268
+ # Decode the offsets file in one bulk unpack — one C call instead of one
269
+ # String#[] + unpack per record (2× faster on the 730k-record body
270
+ # indexes). Each (start, end) range is packed into a single Integer
271
+ # (start << 32 | end) so a record costs an immediate value, not a
272
+ # two-element Array; accessors decode with shift/mask. Caches are tens
273
+ # of MB, so both halves fit 32 bits with room to spare. unpack("V*")
274
+ # silently drops trailing bytes that don't fill a uint32; the % 3 guard
275
+ # drops a trailing partial record.
276
+ def build_index
277
+ h = {}
278
+ flat = @offsets_bytes.unpack("V*")
279
+ n = flat.size - (flat.size % 3)
155
280
  i = 0
156
281
  while i < n
157
- rec = @offsets_bytes[i * RECORD_SIZE, RECORD_SIZE].unpack("V3")
158
- @index[rec[0]] << [rec[1], rec[2]]
159
- i += 1
282
+ (h[flat[i]] ||= []) << ((flat[i + 1] << 32) | flat[i + 2])
283
+ i += 3
160
284
  end
285
+ h
286
+ end
287
+
288
+ # Lazy id→ranges hash: built on first keyed access, skipped entirely by
289
+ # ids_matching-only consumers (the inverted body search).
290
+ def index
291
+ @index ||= build_index
161
292
  end
162
293
  end
163
294
  end
data/lib/mailmate/mcp.rb CHANGED
@@ -7,6 +7,7 @@ require "mailmate"
7
7
  require "mailmate/cli/search"
8
8
  require "mailmate/cli/message"
9
9
  require "mailmate/cli/modify"
10
+ require "mailmate/cli/verify"
10
11
  require "mailmate/cli/send"
11
12
  require "mailmate/cli/draft"
12
13
  require "mailmate/cli/open"
@@ -127,13 +128,14 @@ module Mailmate
127
128
  },
128
129
  {
129
130
  name: "message",
130
- description: "Read one MailMate message. Accepts either local eml-id (digits) or RFC Message-ID (with or without angle brackets). Default output: headers block + plain-text body.",
131
+ description: "Read one MailMate message. Accepts either local eml-id (digits) or RFC Message-ID (with or without angle brackets). Default output: headers block (incl. any user tags) + plain-text body. For HTML-only mail (most newsletters), pass markdown:true to get clean readable markdown instead of raw HTML — strongly preferred for reading and far more token-efficient; it's a no-op on plain-text messages.",
131
132
  inputSchema: {
132
133
  type: "object",
133
134
  properties: {
134
135
  id: { type: "string", description: "eml-id (e.g. '183715') or RFC Message-ID (e.g. '<abc@example.com>')." },
135
136
  raw: { type: "boolean", description: "Return raw .eml bytes." },
136
137
  text_only: { type: "boolean", description: "Body only, no headers block." },
138
+ markdown: { type: "boolean", description: "Render an HTML-only body as clean markdown (drops <style>/<script>, strips newsletter spacer chars). No-op for plain-text messages." },
137
139
  },
138
140
  required: ["id"],
139
141
  additionalProperties: false,
@@ -155,6 +157,20 @@ module Mailmate
155
157
 
156
158
  Valid actions: read unread flag unflag tag untag clear-tags archive
157
159
  junk not-junk mute delete move
160
+
161
+ Verifying the action landed (it can't be read back from MailMate, so
162
+ this re-reads the target eml-id's #flags index — the only way to catch
163
+ a Message-ID that resolved to a different duplicate copy):
164
+ check:"inline" confirm now, before returning (failed → isError).
165
+ Costs a few seconds — MailMate flushes #flags ~5s
166
+ after acting. Use for a high-stakes single mutation.
167
+ check:"defer" don't wait; return a JSON check-ticket instead.
168
+ Collect tickets across a batch, then pass them to the
169
+ `verify` tool to confirm them ALL with one flush-wait
170
+ (the efficient choice for bulk work — 50 modifies pay
171
+ the ~5s latency once, not 50 times).
172
+ check:"none" (default) fire-and-forget.
173
+ Location-changing chains (move/archive/delete) aren't flag-verifiable.
158
174
  DESC
159
175
  inputSchema: {
160
176
  type: "object",
@@ -166,13 +182,40 @@ module Mailmate
166
182
  description: "Flat list of action tokens; arg-taking actions consume the following item.",
167
183
  },
168
184
  dry_run: { type: "boolean", description: "Print plan, don't execute." },
169
- verify: { type: "boolean", description: "Re-read flags after acting to confirm." },
185
+ verify: { type: "boolean", description: "Print the message's current flags after acting (raw probe)." },
186
+ check: { type: "string", enum: %w[none inline defer], description: "Effect-verification mode (default none). 'inline' confirms now (slow, returns isError on failure); 'defer' returns a JSON ticket for batched verification via the `verify` tool." },
170
187
  keep_window: { type: "boolean", description: "Skip the close-window keystroke at the end." },
171
188
  },
172
189
  required: %w[id actions],
173
190
  additionalProperties: false,
174
191
  },
175
192
  },
193
+ {
194
+ name: "verify",
195
+ description: <<~DESC.strip,
196
+ Batch-confirm `modify` check-tickets (from check:"defer") against the
197
+ #flags index in ONE flush-wait. Pass the tickets you collected from a
198
+ run of deferred modifies; this polls the index until every ticket's
199
+ expected flag/tag/read state holds (or check_timeout elapses) and
200
+ returns a JSON summary {checked, passed, failed, waited_seconds,
201
+ results:[{eml_id, ok, flags, unmet}]}. isError if any ticket failed —
202
+ a failure means that action didn't land on that eml-id (wrong
203
+ duplicate copy, or it never registered).
204
+ DESC
205
+ inputSchema: {
206
+ type: "object",
207
+ properties: {
208
+ tickets: {
209
+ type: "array",
210
+ items: { type: "object" },
211
+ description: "The check-ticket objects returned by modify calls made with check:\"defer\".",
212
+ },
213
+ check_timeout: { type: "number", description: "Max seconds to wait for #flags to reflect the batch (default 8)." },
214
+ },
215
+ required: ["tickets"],
216
+ additionalProperties: false,
217
+ },
218
+ },
176
219
  {
177
220
  name: "send",
178
221
  description: "Send mail via MailMate's `emate` (markdown body). Recipients and subject via fields; body is the markdown source. For replies, set `in_reply_to` and `references` so recipients' clients thread the message — without them a `Re:` subject alone is not enough. MailMate generates the outgoing Message-ID automatically.",
@@ -326,6 +369,7 @@ module Mailmate
326
369
  when "search" then call_search(args)
327
370
  when "message" then call_message(args)
328
371
  when "modify" then call_modify(args)
372
+ when "verify" then call_verify(args)
329
373
  when "send" then call_send(args)
330
374
  when "draft" then call_draft(args)
331
375
  when "open" then call_open(args)
@@ -336,6 +380,12 @@ module Mailmate
336
380
  end
337
381
  rescue StandardError => e
338
382
  text_error("#{e.class}: #{e.message}\n#{e.backtrace.first(8).join("\n")}")
383
+ rescue SystemExit => e
384
+ # A CLI path that calls exit/abort (e.g. a missing optional gem) must
385
+ # not take down the persistent server — surface it as a tool error and
386
+ # keep the loop alive. SystemExit isn't a StandardError, so it needs
387
+ # its own clause.
388
+ text_error("Tool '#{name}' called exit(#{e.status}) — treated as failure, server still running.")
339
389
  end
340
390
 
341
391
  # ---- tool handlers ----------------------------------------------------
@@ -359,6 +409,7 @@ module Mailmate
359
409
  argv = [args["id"].to_s]
360
410
  argv << "--raw" if args["raw"]
361
411
  argv << "--text-only" if args["text_only"]
412
+ argv << "--markdown" if args["markdown"]
362
413
  run_cli(Mailmate::CLI::Message, argv)
363
414
  end
364
415
 
@@ -366,10 +417,24 @@ module Mailmate
366
417
  argv = [args["id"].to_s] + Array(args["actions"]).map(&:to_s)
367
418
  argv << "--dry-run" if args["dry_run"]
368
419
  argv << "--verify" if args["verify"]
420
+ case args["check"]
421
+ when "inline" then argv << "--check"
422
+ when "defer" then argv << "--emit-check"
423
+ end
369
424
  argv << "--keep-window" if args["keep_window"]
370
425
  run_cli(Mailmate::CLI::Modify, argv)
371
426
  end
372
427
 
428
+ # Batch-verify deferred check-tickets. Tickets arrive as JSON objects;
429
+ # pipe them to mm-verify on stdin (the same array-or-NDJSON it reads
430
+ # from the CLI).
431
+ def call_verify(args)
432
+ argv = []
433
+ argv.push("--check-timeout", args["check_timeout"].to_s) if args["check_timeout"]
434
+ payload = JSON.generate(Array(args["tickets"]))
435
+ with_stdin(payload) { run_cli(Mailmate::CLI::Verify, argv) }
436
+ end
437
+
373
438
  def call_send(args)
374
439
  argv = compose_argv(args)
375
440
  argv << "--send-now" if args["send_now"]
@@ -39,14 +39,23 @@ module Mailmate
39
39
 
40
40
  private
41
41
 
42
+ # The cached inversion is keyed by db_headers AND pinned to the exact
43
+ # IndexReader object it was built from — when IndexReader.for returns a
44
+ # rebuilt reader (file changed on disk; see IndexReader#stale?), the
45
+ # identity check fails and the inversion rebuilds with it. Keeps the
46
+ # persistent MCP server's part map in sync without explicit resets.
42
47
  def inversion
48
+ reader = Mailmate::IndexReader.for("#root-body-part")
43
49
  @inversions ||= {}
44
- @inversions[Mailmate.config.db_headers] ||= build_inversion
50
+ entry = @inversions[Mailmate.config.db_headers]
51
+ return entry[:inv] if entry && entry[:reader].equal?(reader)
52
+ inv = build_inversion(reader)
53
+ @inversions[Mailmate.config.db_headers] = { reader: reader, inv: inv }
54
+ inv
45
55
  end
46
56
 
47
- def build_inversion
57
+ def build_inversion(reader)
48
58
  inv = Hash.new { |h, k| h[k] = [] }
49
- reader = Mailmate::IndexReader.for("#root-body-part")
50
59
  reader.each_eml_id do |part_id|
51
60
  root_str = reader.value_for(part_id)
52
61
  # Skip deleted parts: MailMate appends an empty trailing record to
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Mailmate
4
- VERSION = "1.4.0"
4
+ VERSION = "1.5.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mailmate
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Murphy-Dye
@@ -98,6 +98,7 @@ executables:
98
98
  - mm-mailboxes
99
99
  - mm-modify
100
100
  - mm-send
101
+ - mm-verify
101
102
  - mmdiscover
102
103
  - mmmessage
103
104
  - mmopen
@@ -114,6 +115,7 @@ files:
114
115
  - exe/mm-mailboxes
115
116
  - exe/mm-modify
116
117
  - exe/mm-send
118
+ - exe/mm-verify
117
119
  - exe/mmdiscover
118
120
  - exe/mmmessage
119
121
  - exe/mmopen
@@ -132,11 +134,13 @@ files:
132
134
  - lib/mailmate/cli/search.rb
133
135
  - lib/mailmate/cli/send.rb
134
136
  - lib/mailmate/cli/tags.rb
137
+ - lib/mailmate/cli/verify.rb
135
138
  - lib/mailmate/config.rb
136
139
  - lib/mailmate/duplicate_scanner.rb
137
140
  - lib/mailmate/eml_lookup.rb
138
141
  - lib/mailmate/evaluator.rb
139
142
  - lib/mailmate/filter_classifier.rb
143
+ - lib/mailmate/flag_check.rb
140
144
  - lib/mailmate/header_reader.rb
141
145
  - lib/mailmate/identity.rb
142
146
  - lib/mailmate/index_reader.rb