mailmate 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -102,7 +102,7 @@ module Mailmate
102
102
  return 1
103
103
  end
104
104
 
105
- specs = parse_search(search_string)
105
+ specs = order_specs(parse_search(search_string))
106
106
 
107
107
  # Compose + parse the smart-mailbox filter exactly once. The same AST
108
108
  # feeds the evaluator, the tier classifier, and the literals extractor.
@@ -157,7 +157,7 @@ module Mailmate
157
157
  epoch = Time.at(0)
158
158
  rows.sort_by! do |r|
159
159
  s = reader && (reader.value_for(r[0].to_i) rescue nil)
160
- (s && !s.empty? && (Time.parse(s) rescue nil)) || epoch
160
+ (s && !s.empty? && (fast_time(s) || (Time.parse(s) rescue nil))) || epoch
161
161
  end
162
162
  rows.reverse! if mode == :desc
163
163
  rows
@@ -349,82 +349,196 @@ module Mailmate
349
349
  specs
350
350
  end
351
351
 
352
- # ---- date matching ------------------------------------------------------
352
+ # Static cost rank per spec field for AND evaluation order: compiled
353
+ # date compare < header/tag index lookup < body matching (resolves
354
+ # part-ids and walks every body segment). Used by order_specs.
355
+ SPEC_COST = {
356
+ date: 0,
357
+ from: 1, recipients: 1, cc: 1, subject: 1, address_any: 1, any: 1,
358
+ tag: 1, keyword: 1,
359
+ body: 2, message_or_body: 2,
360
+ }.freeze
353
361
 
354
- def date_matches?(mail, eml_id, term)
355
- d = nil
356
- if eml_id
357
- s = (Mailmate::IndexReader.for("#date").value_for(eml_id.to_i) rescue nil)
358
- if s && !s.empty?
359
- d = (Time.parse(s) rescue nil)
360
- end
361
- end
362
- if d.nil? && mail
363
- raw = mail.date
364
- d = raw.respond_to?(:to_time) ? raw.to_time : raw
362
+ # Evaluate cheap, selective specs before expensive ones. specs combine
363
+ # with AND (order-independent), and matches? short-circuits on the
364
+ # first miss — so `b invoice d 7d` should date-reject 47k messages
365
+ # before body matching ever runs, not after. Stable within a cost rank
366
+ # to keep the user's order deterministic.
367
+ def order_specs(specs)
368
+ specs.sort_by.with_index { |(field, _term, _negate), i| [SPEC_COST.fetch(field, 1), i] }
369
+ end
370
+
371
+ # ---- date matching ------------------------------------------------------
372
+ #
373
+ # The `#date` index stores fixed-format strings ("2026-03-19 18:55:19
374
+ # -0600", sender-local time with varying UTC offsets — NOT lexically
375
+ # comparable). date_matches? runs once per candidate message, so the
376
+ # hot path avoids Time.parse (~10× slower than slicing) and per-call
377
+ # cutoff arithmetic: terms compile once to an inclusive [lo, hi] range
378
+ # of YYYYMMDD integers, and the indexed value slices straight to the
379
+ # same integer form. Calendar-date comparison semantics are unchanged.
380
+
381
+ # Compiled day-range for a date term, memoized per term. nil = term
382
+ # can't match anything. The memo resets when the calendar day rolls
383
+ # over so relative terms ("1d") stay correct in long-lived processes
384
+ # (the MCP server).
385
+ def date_range_for(term)
386
+ today = Date.today
387
+ if @date_ranges_day != today
388
+ @date_ranges_day = today
389
+ @date_ranges = {}
365
390
  end
366
- return false unless d
391
+ return @date_ranges[term] if @date_ranges.key?(term)
392
+ @date_ranges[term] = compile_date_range(term, today)
393
+ end
367
394
 
395
+ def compile_date_range(term, today)
368
396
  if term =~ /\A(\d+)([dwmy])\z/
369
397
  n, u = Regexp.last_match(1).to_i, Regexp.last_match(2)
370
398
  cutoff = case u
371
- when "d" then Date.today - n
372
- when "w" then Date.today - (n * 7)
373
- when "m" then Date.today << n
374
- when "y" then Date.today << (n * 12)
399
+ when "d" then today - n
400
+ when "w" then today - (n * 7)
401
+ when "m" then today << n
402
+ when "y" then today << (n * 12)
375
403
  end
376
- return d.to_date >= cutoff
404
+ return [ymd_int(cutoff), 9999_12_31]
377
405
  end
378
406
 
379
- norm = term.tr("/.", "-")
380
- parts = norm.split("-")
407
+ parts = term.tr("/.", "-").split("-")
408
+ y = parts[0].to_i
409
+ return nil if y.zero?
381
410
  case parts.size
382
- when 1 then d.year.to_s == parts[0]
383
- when 2 then d.year.to_s == parts[0] && d.month == parts[1].to_i
384
- when 3 then d.to_date == Date.new(parts[0].to_i, parts[1].to_i, parts[2].to_i)
385
- else false
411
+ when 1 then [y * 10_000 + 101, y * 10_000 + 1231]
412
+ when 2 then [y * 10_000 + parts[1].to_i * 100 + 1, y * 10_000 + parts[1].to_i * 100 + 31]
413
+ when 3 then [ymd = y * 10_000 + parts[1].to_i * 100 + parts[2].to_i, ymd]
414
+ end
415
+ end
416
+
417
+ def ymd_int(d)
418
+ d.year * 10_000 + d.month * 100 + d.day
419
+ end
420
+
421
+ # "2026-03-19 …" → 20260319 without Time.parse. nil when the value
422
+ # isn't in the indexed shape (caller falls back to the slow path).
423
+ def fast_ymd(s)
424
+ return nil unless s && s.length >= 10 && s.getbyte(4) == 0x2D && s.getbyte(7) == 0x2D
425
+ y = s[0, 4].to_i
426
+ m = s[5, 2].to_i
427
+ d = s[8, 2].to_i
428
+ return nil if y.zero? || m.zero? || d.zero?
429
+ y * 10_000 + m * 100 + d
430
+ end
431
+
432
+ def date_matches?(mail, eml_id, term)
433
+ range = date_range_for(term)
434
+ return false unless range
435
+
436
+ ymd = nil
437
+ if eml_id
438
+ s = (reader_for("#date")&.value_for(eml_id.to_i) rescue nil)
439
+ if s && !s.empty?
440
+ ymd = fast_ymd(s)
441
+ if ymd.nil?
442
+ t = (Time.parse(s) rescue nil)
443
+ ymd = t && ymd_int(t.to_date)
444
+ end
445
+ end
446
+ end
447
+ if ymd.nil? && mail
448
+ raw = mail.date
449
+ d = raw.respond_to?(:to_time) ? raw.to_time : raw
450
+ ymd = d && ymd_int(d.to_date)
386
451
  end
452
+ return false unless ymd
453
+
454
+ ymd >= range[0] && ymd <= range[1]
387
455
  rescue StandardError
388
456
  false
389
457
  end
390
458
 
391
459
  # ---- field-value matching -----------------------------------------------
460
+ #
461
+ # Match haystacks are RAW BYTES (ASCII-8BIT), not scrubbed UTF-8: the
462
+ # index values come straight out of the cache slice and the needle is
463
+ # `term.b`, so substring matching is byte-wise. That's exact for valid
464
+ # UTF-8 (lead bytes can't alias continuation bytes) and saves the
465
+ # dup + force_encoding + scrub allocations per header per message —
466
+ # scrubbing only matters when a value is *emitted*, which extract()
467
+ # still does via header_index_value.
468
+
469
+ # Memoized "<name>#lc" strings — interpolating per lookup costs an
470
+ # allocation per header per message.
471
+ LC_NAMES = Hash.new { |h, n| h[n] = "#{n}#lc" }
472
+
473
+ # Per-name reader memo for the match loop. IndexReader.for is cached
474
+ # but not free (cache-key allocation + staleness throttle check per
475
+ # call), and the loop calls it several times per message. The memo is
476
+ # keyed to the active db_headers (config swaps in tests) and reset at
477
+ # the top of collect_rows, so one search run sees one consistent index
478
+ # snapshot; staleness is re-checked between runs, which is the same
479
+ # granularity the MCP server needs.
480
+ def reader_for(name)
481
+ dbh = Mailmate.config.db_headers
482
+ if !defined?(@hdr_readers) || @hdr_readers.nil? || @hdr_readers_dbh != dbh
483
+ @hdr_readers = {}
484
+ @hdr_readers_dbh = dbh
485
+ end
486
+ return @hdr_readers[name] if @hdr_readers.key?(name)
487
+ @hdr_readers[name] =
488
+ begin
489
+ Mailmate::IndexReader.for(name)
490
+ rescue ArgumentError
491
+ nil
492
+ end
493
+ end
494
+
495
+ def reset_run_caches!
496
+ @hdr_readers = nil
497
+ end
392
498
 
393
- # Lowercased index value for a header — tries `<name>#lc` (MailMate's
394
- # pre-downcased index) first, falls back to `<name>` + downcase. Returns
395
- # nil if neither index has a record for this eml-id.
499
+ # Lowercased raw index value for a header — tries `<name>#lc`
500
+ # (MailMate's pre-downcased index) first, falls back to `<name>` +
501
+ # downcase (byte-wise, i.e. ASCII-only fine: the #lc index exists
502
+ # for every header MailMate matches on, so the fallback is for tests
503
+ # and fresh installs). Returns nil if neither index has a record.
396
504
  def header_index_value_lc(eml_id, name)
397
- v = header_index_value(eml_id, "#{name}#lc")
505
+ v = header_index_value_raw(eml_id, LC_NAMES[name])
398
506
  return v unless v.nil?
399
- raw = header_index_value(eml_id, name)
400
- raw&.downcase
507
+ header_index_value_raw(eml_id, name)&.downcase
401
508
  end
402
509
 
403
- # Substring-match haystack for a filter modifier. Index-first; mail
404
- # fallback only kicks in for the no-index case (tests, fresh installs,
405
- # messages MailMate hasn't indexed yet).
510
+ # Unscrubbed twin of header_index_value, for match paths only.
511
+ def header_index_value_raw(eml_id, name)
512
+ return nil if eml_id.nil?
513
+ reader_for(name)&.value_for(eml_id.to_i)
514
+ end
515
+
516
+ # Substring-match haystack for a filter modifier, as raw bytes (mail
517
+ # fallbacks are downcased then `.b`'d so every return path has the
518
+ # same encoding). Index-first; mail fallback only kicks in for the
519
+ # no-index case (tests, fresh installs, unindexed messages).
406
520
  def field_value(eml_id, mail, field)
407
521
  case field
408
522
  when :from
409
523
  idx = header_index_value_lc(eml_id, "from")
410
524
  return idx if idx && !idx.empty?
411
- mail ? [Array(mail.from), mail[:from]&.value.to_s].flatten.join(" ").downcase : ""
525
+ mail ? [Array(mail.from), mail[:from]&.value.to_s].flatten.join(" ").downcase.b : "".b
412
526
  when :recipients
413
527
  parts = %w[to cc].map { |n| header_index_value_lc(eml_id, n) }.compact.reject(&:empty?)
414
528
  return parts.join(" ") unless parts.empty?
415
- mail ? [Array(mail.to), Array(mail.cc), mail[:to]&.value.to_s, mail[:cc]&.value.to_s].flatten.join(" ").downcase : ""
529
+ mail ? [Array(mail.to), Array(mail.cc), mail[:to]&.value.to_s, mail[:cc]&.value.to_s].flatten.join(" ").downcase.b : "".b
416
530
  when :cc
417
531
  idx = header_index_value_lc(eml_id, "cc")
418
532
  return idx if idx && !idx.empty?
419
- mail ? [Array(mail.cc), mail[:cc]&.value.to_s].flatten.join(" ").downcase : ""
533
+ mail ? [Array(mail.cc), mail[:cc]&.value.to_s].flatten.join(" ").downcase.b : "".b
420
534
  when :subject
421
535
  idx = header_index_value_lc(eml_id, "subject")
422
536
  return idx if idx && !idx.empty?
423
- mail ? mail.subject.to_s.downcase : ""
537
+ mail ? mail.subject.to_s.downcase.b : "".b
424
538
  when :address_any
425
539
  parts = %w[from to cc reply-to sender].map { |n| header_index_value_lc(eml_id, n) }.compact.reject(&:empty?)
426
540
  return parts.join(" ") unless parts.empty?
427
- mail ? [mail[:from], mail[:to], mail[:cc], mail[:reply_to], mail[:sender]].compact.map { |h| h.value.to_s }.join(" ").downcase : ""
541
+ mail ? [mail[:from], mail[:to], mail[:cc], mail[:reply_to], mail[:sender]].compact.map { |h| h.value.to_s }.join(" ").downcase.b : "".b
428
542
  end
429
543
  end
430
544
 
@@ -434,7 +548,7 @@ module Mailmate
434
548
  # (Thunderbird/Apple) system flags so substring matches only hit user tags.
435
549
  def tag_value(eml_id)
436
550
  return "" unless eml_id
437
- flags = (Mailmate::IndexReader.for("#flags").flags_for(eml_id.to_i) rescue [])
551
+ flags = (reader_for("#flags")&.flags_for(eml_id.to_i) || [])
438
552
  flags.reject { |f| f.start_with?("\\", "$") }.join(" ").downcase
439
553
  end
440
554
 
@@ -515,26 +629,112 @@ module Mailmate
515
629
 
516
630
  def matches?(mail, eml_id, specs, headers_only, path = nil, index_only: false, exclude_quoted: false)
517
631
  specs.all? do |field, term, negate|
632
+ term_b = term.b
518
633
  hit =
519
634
  case field
520
635
  when :from, :recipients, :cc, :subject, :address_any
521
- field_value(eml_id, mail, field).include?(term)
636
+ field_value(eml_id, mail, field).include?(term_b)
522
637
  when :tag, :keyword
523
- tag_value(eml_id).include?(term)
638
+ tag_value(eml_id).include?(term_b)
524
639
  when :body
525
- headers_only ? false : body_value(eml_id, mail, path, index_only: index_only, exclude_quoted: exclude_quoted).include?(term)
640
+ headers_only ? false : body_matches?(eml_id, mail, path, term, term_b, index_only: index_only, exclude_quoted: exclude_quoted)
526
641
  when :message_or_body
527
- common = %i[from recipients subject].any? { |f| field_value(eml_id, mail, f).include?(term) }
528
- common || (!headers_only && body_value(eml_id, mail, path, index_only: index_only, exclude_quoted: exclude_quoted).include?(term))
642
+ common = %i[from recipients subject].any? { |f| field_value(eml_id, mail, f).include?(term_b) }
643
+ common || (!headers_only && body_matches?(eml_id, mail, path, term, term_b, index_only: index_only, exclude_quoted: exclude_quoted))
529
644
  when :date
530
645
  date_matches?(mail, eml_id, term)
531
646
  when :any
532
- %i[from recipients subject].any? { |f| field_value(eml_id, mail, f).include?(term) }
647
+ %i[from recipients subject].any? { |f| field_value(eml_id, mail, f).include?(term_b) }
533
648
  end
534
649
  negate ? !hit : hit
535
650
  end
536
651
  end
537
652
 
653
+ # ---- body matching --------------------------------------------------
654
+ #
655
+ # Body matching is inverted: instead of fetching and testing every
656
+ # body segment of every candidate message (which reallocates most of
657
+ # the body cache per search), one ids_matching scan per body index
658
+ # finds every part-id containing the term, mapped once to a set of
659
+ # envelope ids. Per message the test is then a hash lookup. The
660
+ # per-message segment walk (body_index_records / body_value) survives
661
+ # as the fallback when the body indexes aren't on disk at all (tests,
662
+ # fresh installs), and the Mail.read fallback for unindexed messages
663
+ # under --all is unchanged.
664
+
665
+ def body_matches?(eml_id, mail, path, term, term_b, index_only: false, exclude_quoted: false)
666
+ env = eml_id&.to_i
667
+ cands = env && body_candidates(term_b, exclude_quoted: exclude_quoted)
668
+ if cands
669
+ return true if cands.key?(env)
670
+ return false if index_only
671
+ # Indexed but not a candidate = a real non-match; only unindexed
672
+ # messages get the --all read-the-eml fallback below.
673
+ return false if body_indexed?(env, exclude_quoted: exclude_quoted)
674
+ else
675
+ segs = body_index_records(eml_id, exclude_quoted: exclude_quoted)
676
+ return segs.any? { |s| s.b.include?(term_b) } unless segs.empty?
677
+ return false if index_only
678
+ end
679
+ return text_body(mail).include?(term) if mail
680
+ return false if path.nil?
681
+ begin
682
+ text_body(Mail.read(path)).include?(term)
683
+ rescue StandardError
684
+ false
685
+ end
686
+ end
687
+
688
+ # Envelope-id candidate set for a body term: every message with at
689
+ # least one body segment containing the bytes. Returns nil when the
690
+ # body indexes are unavailable (callers fall back to the per-message
691
+ # walk). Memoized per (term, exclude_quoted) and pinned to the reader
692
+ # objects it was built from, so an index rebuild (staleness, reset!)
693
+ # invalidates naturally; the size cap stops distinct-term buildup in
694
+ # the long-lived MCP server.
695
+ def body_candidates(term_b, exclude_quoted: false)
696
+ names = exclude_quoted ? ["#unquoted#lc"] : ["#unquoted#lc", "#quoted#lc"]
697
+ readers = names.map { |n| (Mailmate::IndexReader.for(n) rescue nil) }.compact
698
+ return nil if readers.empty?
699
+
700
+ @body_cands ||= {}
701
+ key = [term_b, exclude_quoted]
702
+ entry = @body_cands[key]
703
+ if entry && entry[:readers].size == readers.size &&
704
+ entry[:readers].zip(readers).all? { |a, b| a.equal?(b) }
705
+ return entry[:set]
706
+ end
707
+
708
+ @body_cands.clear if @body_cands.size > 32
709
+ set = {}
710
+ readers.each do |r|
711
+ r.ids_matching(term_b).each_key { |pid| set[envelope_of(pid)] = true }
712
+ end
713
+ @body_cands[key] = { readers: readers, set: set }
714
+ set
715
+ end
716
+
717
+ # Map a body-part-id back to its envelope (.eml) id via
718
+ # #root-body-part; single-part messages have no entry there (the
719
+ # envelope IS the body part), so fall through to the part-id itself.
720
+ def envelope_of(part_id)
721
+ root = (Mailmate::IndexReader.for("#root-body-part").value_for(part_id) rescue nil)
722
+ root && !root.empty? ? root.to_i : part_id
723
+ end
724
+
725
+ # Does this envelope have any body-index records at all? Distinguishes
726
+ # "indexed, doesn't contain the term" (no match) from "MailMate hasn't
727
+ # body-indexed it" (eligible for the --all Mail.read fallback).
728
+ def body_indexed?(env, exclude_quoted: false)
729
+ part_ids = Mailmate::PartLookup.body_parts_of(env)
730
+ part_ids = [env] if part_ids.empty?
731
+ names = exclude_quoted ? ["#unquoted#lc"] : ["#unquoted#lc", "#quoted#lc"]
732
+ names.any? do |n|
733
+ r = (Mailmate::IndexReader.for(n) rescue nil)
734
+ r && part_ids.any? { |pid| r.key?(pid) }
735
+ end
736
+ end
737
+
538
738
  # ---- pre-filter ---------------------------------------------------------
539
739
  #
540
740
  # Filter modifiers (f/t/s/c/a) now match through MailMate's per-header
@@ -567,12 +767,29 @@ module Mailmate
567
767
 
568
768
  # ---- timestamp ----------------------------------------------------------
569
769
 
770
+ # Slice-parse a `#date` index value ("2026-03-19 18:55:19 -0600") into a
771
+ # Time, preserving the embedded UTC offset. ~10× faster than Time.parse.
772
+ # Returns nil when the value isn't exactly that shape (caller falls back
773
+ # to Time.parse).
774
+ def fast_time(s)
775
+ return nil unless s && s.length >= 25 &&
776
+ s.getbyte(4) == 0x2D && s.getbyte(7) == 0x2D &&
777
+ s.getbyte(13) == 0x3A && s.getbyte(16) == 0x3A
778
+ off = s[20, 5]
779
+ return nil unless off.match?(/\A[+-]\d{4}\z/)
780
+ Time.new(s[0, 4].to_i, s[5, 2].to_i, s[8, 2].to_i,
781
+ s[11, 2].to_i, s[14, 2].to_i, s[17, 2].to_i,
782
+ "#{off[0, 3]}:#{off[3, 2]}")
783
+ rescue ArgumentError
784
+ nil
785
+ end
786
+
570
787
  # Absolute send time for an eml_id, preferring the MailMate `#date` index
571
788
  # (cheap, no .eml read). Falls back to the parsed mail's Date header.
572
789
  def message_time(eml_id, mail)
573
790
  s = (Mailmate::IndexReader.for("#date").value_for(eml_id.to_i) rescue nil)
574
791
  if s && !s.empty?
575
- t = (Time.parse(s) rescue nil)
792
+ t = fast_time(s) || (Time.parse(s) rescue nil)
576
793
  return t if t
577
794
  end
578
795
  raw = mail&.date
@@ -717,6 +934,7 @@ module Mailmate
717
934
  end
718
935
 
719
936
  def collect_rows(dirs:, specs:, fields:, smart_evaluator:, smart_literals:, filter_only_tier:, load_tier:, opts:)
937
+ reset_run_caches!
720
938
  rows = []
721
939
  catch(:done) do
722
940
  dirs.each do |dir|
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "optparse"
4
+ require "json"
5
+ require_relative "../flag_check"
6
+
7
+ module Mailmate
8
+ module CLI
9
+ # `mm-verify` — batch-confirm `mm-modify --emit-check` tickets against the
10
+ # `#flags` index in ONE index-flush wait.
11
+ #
12
+ # MailMate flushes `#flags` to disk a few seconds after an AppleScript
13
+ # action, and it's a single global file. So a batch of N modifies can be
14
+ # confirmed by waiting once for that flush and reading the index once —
15
+ # not by polling per message (which would pay the latency N times). Feed
16
+ # this the tickets `mm-modify --emit-check` printed (a JSON array, or
17
+ # newline-delimited JSON objects); it polls the index until every ticket's
18
+ # expectations hold or --check-timeout elapses, then prints a JSON summary.
19
+ #
20
+ # Exit: 0 all confirmed, 3 one or more failed, 2 bad input.
21
+ # @api private
22
+ module Verify
23
+ extend self
24
+
25
+ def run(argv)
26
+ opts = { check_timeout: 8.0, poll: 0.25, pretty: true, file: nil }
27
+ parser = build_parser(opts)
28
+ parser.parse!(argv)
29
+
30
+ raw = read_input(opts, argv)
31
+ return usage_error(parser, "no ticket input (pass a file, JSON arg, or pipe on stdin)") if raw.nil? || raw.strip.empty?
32
+
33
+ tickets =
34
+ begin
35
+ parse_tickets(raw)
36
+ rescue JSON::ParserError => e
37
+ warn "mm-verify: could not parse tickets as JSON array or NDJSON: #{e.message}"
38
+ return 2
39
+ end
40
+ return usage_error(parser, "no tickets found in input") if tickets.empty?
41
+
42
+ summary = verify(tickets, timeout: opts[:check_timeout], poll: opts[:poll])
43
+ $stdout.puts(opts[:pretty] ? JSON.pretty_generate(summary) : JSON.generate(summary))
44
+ summary["failed"].zero? ? 0 : 3
45
+ end
46
+
47
+ # Poll the #flags index until every ticket's expectations hold or the
48
+ # timeout elapses; one index read per poll iteration covers the whole
49
+ # batch. Returns the summary Hash.
50
+ def verify(tickets, timeout:, poll:)
51
+ deadline = Time.now + timeout
52
+ started = Time.now
53
+ results = nil
54
+ loop do
55
+ results = check_all(tickets)
56
+ break if results.all? { |r| r["ok"] }
57
+ break if Time.now >= deadline
58
+ sleep(poll)
59
+ end
60
+ passed = results.count { |r| r["ok"] }
61
+ {
62
+ "checked" => results.size,
63
+ "passed" => passed,
64
+ "failed" => results.size - passed,
65
+ "waited_seconds" => (Time.now - started).round(2),
66
+ "results" => results,
67
+ }
68
+ end
69
+
70
+ # One pass: read #flags fresh, then evaluate every ticket against it.
71
+ def check_all(tickets)
72
+ reader = fresh_flags_reader
73
+ tickets.map do |t|
74
+ eml_id = t["eml_id"].to_i
75
+ exps = Array(t["expectations"])
76
+ flags = reader ? reader.flags_for(eml_id) : []
77
+ unmet = exps.reject { |kind, arg| Mailmate::FlagCheck.met?(flags, kind, arg) }
78
+ {
79
+ "eml_id" => eml_id,
80
+ "message_id" => t["message_id"],
81
+ "ok" => unmet.empty?,
82
+ "flags" => flags,
83
+ "unmet" => unmet.map { |kind, arg| Mailmate::FlagCheck.label(kind, arg) },
84
+ }
85
+ end
86
+ end
87
+
88
+ # Force a fresh read of #flags (bypass the staleness throttle so each
89
+ # poll sees the latest on-disk state). nil if the index is absent.
90
+ def fresh_flags_reader
91
+ Mailmate::IndexReader.reset!("#flags")
92
+ Mailmate::IndexReader.for("#flags")
93
+ rescue ArgumentError
94
+ nil
95
+ end
96
+
97
+ # Accepts a JSON array of ticket objects, or newline-delimited JSON
98
+ # objects (what `mm-modify --emit-check` prints, one per line). A bare
99
+ # single object is wrapped.
100
+ def parse_tickets(raw)
101
+ s = raw.strip
102
+ if s.start_with?("[")
103
+ Array(JSON.parse(s))
104
+ elsif s.start_with?("{") && !s.include?("\n")
105
+ [JSON.parse(s)]
106
+ else
107
+ s.each_line.map(&:strip).reject(&:empty?).map { |line| JSON.parse(line) }
108
+ end
109
+ end
110
+
111
+ def read_input(opts, argv)
112
+ return File.read(opts[:file]) if opts[:file]
113
+ return argv.join("\n") unless argv.empty?
114
+ return nil if $stdin.tty?
115
+ $stdin.read
116
+ end
117
+
118
+ def build_parser(opts)
119
+ OptionParser.new do |o|
120
+ o.banner = <<~BANNER
121
+ Usage: mm-verify [tickets.json] [options]
122
+ mm-modify <id> <action> --emit-check | ... | mm-verify
123
+
124
+ Confirm a batch of `mm-modify --emit-check` tickets against the #flags
125
+ index, paying the index-flush wait ONCE for the whole batch. Input is a
126
+ JSON array of tickets, or newline-delimited JSON objects, read from a
127
+ file (positional or --file), a JSON argument, or stdin.
128
+
129
+ Output: a JSON summary {checked, passed, failed, waited_seconds, results}.
130
+ Exit 0 if all confirmed, 3 if any failed, 2 on bad input.
131
+ BANNER
132
+ o.on("--file PATH", "Read tickets from PATH instead of stdin") { |p| opts[:file] = p }
133
+ o.on("--check-timeout SECONDS", Float, "Max seconds to wait for #flags to reflect the batch (default 8.0)") { |s| opts[:check_timeout] = s }
134
+ o.on("--poll SECONDS", Float, "Index re-read interval while waiting (default 0.25)") { |s| opts[:poll] = s }
135
+ o.on("--compact", "Compact JSON output (default pretty)") { opts[:pretty] = false }
136
+ end
137
+ end
138
+
139
+ def usage_error(parser, msg)
140
+ warn "mm-verify: #{msg}"
141
+ warn parser.help
142
+ 2
143
+ end
144
+ end
145
+ end
146
+ end
@@ -25,9 +25,11 @@ module Mailmate
25
25
  end
26
26
 
27
27
  # Reverse-lookup: given an RFC Message-ID (with or without angle brackets),
28
- # return the local eml-id (integer) or nil. O(n) scan of the message-id
29
- # index fine for one-shot CLI lookups; cache the result if you need it
30
- # repeatedly.
28
+ # return the local eml-id (integer) or nil. Backed by a value→id map built
29
+ # once per index snapshot (one O(n) pass, then O(1) lookups) the map is
30
+ # pinned to the IndexReader object it was built from, so when the index
31
+ # rebuilds (file changed on disk) the map rebuilds with it. Matters for
32
+ # the persistent MCP server, where resolve_id is called repeatedly.
31
33
  def self.eml_id_for_message_id(message_id)
32
34
  needle = message_id.to_s.strip
33
35
  return nil if needle.empty?
@@ -36,14 +38,29 @@ module Mailmate
36
38
  [needle, needle[1..-2]] :
37
39
  [needle, "<#{needle}>"]
38
40
 
39
- Mailmate::IndexReader.for("message-id").each_record do |eml_id, value|
40
- return eml_id if candidates.include?(value)
41
+ map = mid_map
42
+ candidates.each do |c|
43
+ id = map[c]
44
+ return id if id
41
45
  end
42
46
  nil
43
47
  rescue ArgumentError
44
48
  nil
45
49
  end
46
50
 
51
+ # value→eml-id map over the message-id index. `||=` keeps the FIRST id
52
+ # recorded for a duplicated Message-ID, matching the old scan's
53
+ # first-match-wins semantics (duplicates are real: Sent + Received
54
+ # copies, Gmail label copies).
55
+ def self.mid_map
56
+ reader = Mailmate::IndexReader.for("message-id")
57
+ return @mid_map[:map] if @mid_map && @mid_map[:reader].equal?(reader)
58
+ map = {}
59
+ reader.each_record { |eml_id, value| map[value] ||= eml_id }
60
+ @mid_map = { reader: reader, map: map }
61
+ map
62
+ end
63
+
47
64
  # Resolve an identifier to a local eml-id. Accepts:
48
65
  # - eml-id (all digits) e.g. "183715"
49
66
  # - RFC Message-ID, brackets optional e.g. "<abc@example.com>"
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mailmate
4
+ # Shared #flags expectation predicate, used by both the inline check in
5
+ # `mm-modify` and the batch `mm-verify`. An "expectation" is a [kind, arg]
6
+ # pair describing the post-action state a single eml-id's flag list should
7
+ # satisfy; `kind` may be a Symbol (internal) or String (round-tripped
8
+ # through a JSON check-ticket) — both resolve the same.
9
+ #
10
+ # Kinds:
11
+ # [:seen, true|false] \Seen present / absent
12
+ # [:flagged, true|false] \Flagged present / absent
13
+ # [:tag_present, "name"] keyword present
14
+ # [:tag_absent, "name"] keyword absent
15
+ # [:no_user_tags, nil] no non-system keywords (only \… / $… remain)
16
+ module FlagCheck
17
+ module_function
18
+
19
+ def met?(flags, kind, arg)
20
+ case kind.to_sym
21
+ when :seen then flags.include?("\\Seen") == arg
22
+ when :flagged then flags.include?("\\Flagged") == arg
23
+ when :tag_present then flags.include?(arg)
24
+ when :tag_absent then !flags.include?(arg)
25
+ when :no_user_tags then flags.none? { |f| !system_flag?(f) }
26
+ else raise ArgumentError, "unknown flag-check kind: #{kind.inspect}"
27
+ end
28
+ end
29
+
30
+ # All expectations satisfied by `flags`? `expectations` is an array of
31
+ # [kind, arg] pairs.
32
+ def all_met?(flags, expectations)
33
+ expectations.all? { |kind, arg| met?(flags, kind, arg) }
34
+ end
35
+
36
+ def system_flag?(flag)
37
+ flag.start_with?("\\", "$")
38
+ end
39
+
40
+ # Human label for an expectation, for verification messages.
41
+ def label(kind, arg)
42
+ case kind.to_sym
43
+ when :seen then arg ? "read" : "unread"
44
+ when :flagged then arg ? "flagged" : "not flagged"
45
+ when :tag_present then "tag #{arg.inspect}"
46
+ when :tag_absent then "no tag #{arg.inspect}"
47
+ when :no_user_tags then "no user tags"
48
+ end
49
+ end
50
+ end
51
+ end