mailwise 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ """Email Issue Indexer - Parse, index, and search email issue threads."""
2
+ __version__ = "0.1.0"
@@ -0,0 +1,544 @@
1
+ """CLI interface for MailWise."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ import click
7
+ import yaml
8
+
9
+ from .embeddings import EmbeddingEngine
10
+ from .indexer import index_directory
11
+ from .search import find_similar, format_results
12
+ from .store import Store
13
+
14
+ DEFAULT_CONFIG = Path(__file__).parent.parent.parent / "config.yaml"
15
+
16
+ BANNER = """
17
+ __ __ _ ___ ___
18
+ | \\/ | __ _(_) \\ \\ / (_)___ ___
19
+ | |\\/| |/ _` | | |\\ \\ /\\ / /| / __|/ _ \\
20
+ | | | | (_| | | | \\ V V / | \\__ \\ __/
21
+ |_| |_|\\__,_|_|_| \\_/\\_/ |_|___/\\___|
22
+ """
23
+
24
+
25
+ def load_config(config_path: Path) -> dict:
26
+ if config_path.exists():
27
+ with open(config_path) as f:
28
+ return yaml.safe_load(f) or {}
29
+ local_path = config_path.parent / "config.local.yaml"
30
+ if local_path.exists():
31
+ with open(local_path) as f:
32
+ return yaml.safe_load(f) or {}
33
+ return {}
34
+
35
+
36
+ def get_store(config: dict) -> Store:
37
+ db_path = Path(config.get("database", "data/index.db"))
38
+ return Store(db_path)
39
+
40
+
41
+ def get_engine(config: dict) -> EmbeddingEngine:
42
+ model = config.get("embedding_model", "all-MiniLM-L6-v2")
43
+ return EmbeddingEngine(model)
44
+
45
+
46
+ @click.group()
47
+ @click.option("--config", "config_path", type=click.Path(exists=False),
48
+ default=str(DEFAULT_CONFIG), help="Path to config.yaml")
49
+ @click.pass_context
50
+ def cli(ctx, config_path):
51
+ """MailWise - Turn email threads into a searchable knowledge base.
52
+
53
+ Parse EML files, index with embeddings, and use RAG to learn how
54
+ your best engineers analyze issues.
55
+
56
+ \b
57
+ Quick start:
58
+ 1. Put .eml files in the emails/ directory
59
+ 2. Run: mailwise index
60
+ 3. Run: mailwise search "your issue description"
61
+ 4. Run: mailwise analyze "your issue description" (deep RAG analysis)
62
+
63
+ \b
64
+ Tip: For best results with 'analyze', paste the full bug report
65
+ content rather than just a short title. More context = better matches.
66
+ """
67
+ ctx.ensure_object(dict)
68
+ config = load_config(Path(config_path))
69
+ ctx.obj["config"] = config
70
+ ctx.obj["config_path"] = config_path
71
+
72
+ if not config and not Path(config_path).exists():
73
+ click.echo("Warning: No config.yaml found. Copy config.example.yaml "
74
+ "to config.yaml and edit it.", err=True)
75
+ click.echo(" cp config.example.yaml config.yaml\n", err=True)
76
+
77
+
78
+ @cli.command()
79
+ @click.pass_context
80
+ def init(ctx):
81
+ """Set up MailWise interactively.
82
+
83
+ \b
84
+ Creates a config.yaml file, sets up directories, and optionally
85
+ runs a test index to verify everything works.
86
+
87
+ \b
88
+ Example:
89
+ mailwise init
90
+ """
91
+ config_path = Path(ctx.obj["config_path"]).resolve()
92
+ base_dir = config_path.parent
93
+
94
+ # Step 1: Check existing config
95
+ if config_path.exists():
96
+ if not click.confirm("config.yaml already exists. Overwrite?", default=False):
97
+ click.echo("Aborted.")
98
+ return
99
+
100
+ click.echo(BANNER.strip())
101
+ click.echo("\nLet's set up MailWise!\n")
102
+
103
+ # Step 2: EML directory
104
+ eml_count = 0
105
+ while True:
106
+ eml_dir = click.prompt("Path to your EML files directory",
107
+ type=str, default="emails")
108
+ eml_path = Path(eml_dir).expanduser().resolve()
109
+ if not eml_path.exists():
110
+ click.echo(f" Directory '{eml_path}' does not exist.")
111
+ if click.confirm(" Create it?", default=True):
112
+ eml_path.mkdir(parents=True, exist_ok=True)
113
+ click.echo(f" Created {eml_path}")
114
+ click.echo(" Add your .eml files there and run 'mailwise index' later.\n")
115
+ break
116
+ continue
117
+ eml_count = len(list(eml_path.rglob("*.eml")))
118
+ if eml_count == 0:
119
+ click.echo(f" No .eml files found in '{eml_path}'.")
120
+ if click.confirm(" Use this directory anyway?", default=True):
121
+ break
122
+ continue
123
+ click.echo(f" Found {eml_count} .eml files.\n")
124
+ break
125
+
126
+ # Step 3: Experts (optional)
127
+ experts = []
128
+ click.echo("Expert engineers get boosted search rankings and [Expert] tags.")
129
+ if click.confirm("Add expert engineers now?", default=False):
130
+ while True:
131
+ email_addr = click.prompt(" Expert email", type=str)
132
+ if "@" not in email_addr:
133
+ click.echo(" That doesn't look like an email address. Try again.")
134
+ continue
135
+ name = click.prompt(" Display name", type=str, default="")
136
+ experts.append({"email": email_addr, "name": name})
137
+ click.echo(f" Added: {name or email_addr} <{email_addr}>")
138
+ if not click.confirm(" Add another expert?", default=False):
139
+ break
140
+ click.echo("")
141
+
142
+ # Step 4: Create directories relative to config location
143
+ (base_dir / "data").mkdir(exist_ok=True)
144
+ (base_dir / "markdown").mkdir(exist_ok=True)
145
+
146
+ # Step 5: Write config
147
+ config_data = {
148
+ "eml_directory": str(eml_path),
149
+ "database": "data/index.db",
150
+ "markdown_directory": "markdown",
151
+ "embedding_model": "all-MiniLM-L6-v2",
152
+ "expert_boost": 1.5,
153
+ "experts": experts if experts else [],
154
+ }
155
+ with open(config_path, "w") as f:
156
+ yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
157
+ click.echo(f"Config written to {config_path}")
158
+
159
+ # Step 6: Offer test index
160
+ if eml_count > 0 and click.confirm(
161
+ f"\nRun a quick test index ({min(5, eml_count)} files) to verify setup?",
162
+ default=True,
163
+ ):
164
+ store = get_store(config_data)
165
+ engine = get_engine(config_data)
166
+ try:
167
+ # Sync experts
168
+ for expert in experts:
169
+ store.add_expert(expert["email"], expert.get("name", ""))
170
+
171
+ # Index with small batch, limited files via symlinks in temp dir
172
+ import tempfile
173
+ test_files = sorted(eml_path.rglob("*.eml"))[:5]
174
+ md_dir = base_dir / "markdown"
175
+ with tempfile.TemporaryDirectory() as tmpdir:
176
+ tmp = Path(tmpdir)
177
+ for i, f in enumerate(test_files):
178
+ # Use index prefix to avoid name collisions from subdirs
179
+ (tmp / f"{i}_{f.name}").symlink_to(f)
180
+ stats = index_directory(
181
+ tmp, store, engine,
182
+ md_dir=md_dir, batch_size=5, max_workers=1,
183
+ )
184
+ click.echo(f"\nTest index complete: {stats['processed']} emails indexed, "
185
+ f"{stats['errors']} errors.")
186
+ if stats['errors'] == 0:
187
+ click.echo("Everything looks good!")
188
+ except Exception as e:
189
+ click.echo(f"\nTest index failed: {e}", err=True)
190
+ click.echo("You can debug and run 'mailwise index' manually.", err=True)
191
+ finally:
192
+ store.close()
193
+
194
+ # Step 7: Summary
195
+ click.echo("\nYou're all set! Next steps:")
196
+ if eml_count == 0:
197
+ click.echo(f" 1. Add .eml files to {eml_path}")
198
+ click.echo(" 2. Run: mailwise index")
199
+ click.echo(" 3. Run: mailwise search \"your issue\"")
200
+ click.echo(" 4. Run: mailwise analyze \"full bug report\"")
201
+ else:
202
+ click.echo(f" 1. Run: mailwise index (index all {eml_count} emails)")
203
+ click.echo(" 2. Run: mailwise search \"your issue\"")
204
+ click.echo(" 3. Run: mailwise analyze \"full bug report\"")
205
+
206
+
207
+ @cli.command()
208
+ @click.option("--dir", "eml_dir", type=click.Path(exists=True),
209
+ help="Directory containing EML files (overrides config)")
210
+ @click.option("--batch-size", default=200, help="Emails per batch")
211
+ @click.pass_context
212
+ def index(ctx, eml_dir, batch_size):
213
+ """Index EML files into the searchable database.
214
+
215
+ \b
216
+ Scans the configured eml_directory for .eml files, parses email
217
+ threads, generates embeddings, and stores everything in SQLite.
218
+ Also writes structured markdown files to the markdown/ directory.
219
+
220
+ \b
221
+ Features:
222
+ - Incremental: only processes new or changed files
223
+ - Parallel: parses EML files using multiple CPU cores
224
+ - Resumable: safe to interrupt and re-run
225
+
226
+ \b
227
+ Examples:
228
+ mailwise index # Index from configured directory
229
+ mailwise index --dir ~/emails # Index from a specific directory
230
+ """
231
+ config = ctx.obj["config"]
232
+ eml_path = Path(eml_dir) if eml_dir else Path(config.get("eml_directory", "."))
233
+
234
+ if not eml_path.exists():
235
+ click.echo(f"Error: directory '{eml_path}' does not exist.", err=True)
236
+ click.echo("Set 'eml_directory' in config.yaml or use --dir.", err=True)
237
+ raise SystemExit(1)
238
+
239
+ md_dir = Path(config.get("markdown_directory",
240
+ str(Path(__file__).parent.parent.parent / "markdown")))
241
+
242
+ store = get_store(config)
243
+ engine = get_engine(config)
244
+
245
+ # Sync experts from config
246
+ experts_synced = 0
247
+ for expert in config.get("experts", []):
248
+ if isinstance(expert, dict) and "email" in expert:
249
+ store.add_expert(expert["email"], expert.get("name", ""))
250
+ experts_synced += 1
251
+
252
+ try:
253
+ stats = index_directory(eml_path, store, engine, md_dir=md_dir,
254
+ batch_size=batch_size)
255
+
256
+ click.echo(f"\nIndex complete: {stats['processed']} new, "
257
+ f"{stats['skipped']} unchanged, {stats['errors']} errors")
258
+
259
+ if stats['processed'] > 0:
260
+ s = store.get_stats()
261
+ click.echo(f"\nDatabase summary:")
262
+ click.echo(f" Total emails: {s['emails']}")
263
+ click.echo(f" Thread messages: {s['thread_messages']}")
264
+ click.echo(f" Expert messages: {s['expert_messages']}")
265
+ click.echo(f" Experts tracked: {s['experts']}")
266
+ click.echo(f"\nNext steps:")
267
+ click.echo(f" mailwise search \"describe your issue here\"")
268
+ click.echo(f" mailwise analyze \"paste full bug report here\"")
269
+
270
+ if stats['processed'] > 0 and store.get_stats()['experts'] == 0:
271
+ click.echo(f"\nTip: No expert engineers configured yet. "
272
+ f"Add your best engineers to boost their replies:")
273
+ click.echo(f" mailwise experts add engineer@company.com --name \"Jane Doe\"")
274
+ finally:
275
+ store.close()
276
+
277
+
278
+ @cli.command()
279
+ @click.argument("query")
280
+ @click.option("-k", "--top-k", default=10, help="Number of results to show")
281
+ @click.option("--expert-only", is_flag=True,
282
+ help="Only show replies from expert engineers")
283
+ @click.option("--show-body", is_flag=True,
284
+ help="Show a preview of each matching message")
285
+ @click.pass_context
286
+ def search(ctx, query, top_k, expert_only, show_body):
287
+ """Find similar past issues using semantic search.
288
+
289
+ \b
290
+ Uses embeddings to find issues with similar meaning, not just
291
+ keyword matching. Expert engineers' replies are boosted in results.
292
+
293
+ \b
294
+ Tips:
295
+ - Use natural language: "email disappears after sync"
296
+ - Be specific for better results: include error codes, API names,
297
+ platform details (Mac/Windows/iOS)
298
+ - Use --show-body to preview matching messages
299
+ - Use --expert-only to see only what your best engineers said
300
+ - Use 'mailwise show <ID>' to read the full thread
301
+
302
+ \b
303
+ Examples:
304
+ mailwise search "calendar sync failure"
305
+ mailwise search "attachment crashes on iOS" --show-body
306
+ mailwise search "deleted items reappear" --expert-only -k 5
307
+ """
308
+ config = ctx.obj["config"]
309
+ store = get_store(config)
310
+ engine = get_engine(config)
311
+ boost = config.get("expert_boost", 1.5)
312
+
313
+ try:
314
+ s = store.get_stats()
315
+ if s['emails'] == 0:
316
+ click.echo("No emails indexed yet. Run 'mailwise index' first.")
317
+ return
318
+
319
+ results = find_similar(query, store, engine, top_k=top_k,
320
+ expert_boost=boost, expert_only=expert_only)
321
+ output = format_results(results, show_body=show_body)
322
+ click.echo(output)
323
+
324
+ if results and not show_body:
325
+ click.echo("Tip: Add --show-body to preview matching messages, "
326
+ "or run 'mailwise show <Email ID>' for the full thread.")
327
+
328
+ if not results and expert_only:
329
+ click.echo("Tip: No expert matches found. Try without --expert-only "
330
+ "to search all messages.")
331
+ finally:
332
+ store.close()
333
+
334
+
335
+ @cli.command()
336
+ @click.argument("query")
337
+ @click.option("-k", "--top-k", default=5,
338
+ help="Number of similar issues to feed to Claude")
339
+ @click.pass_context
340
+ def analyze(ctx, query, top_k):
341
+ """Deep analysis of an issue using RAG with expert knowledge.
342
+
343
+ \b
344
+ Finds similar past issues, then asks Claude to analyze patterns
345
+ in how your expert engineers investigated and resolved them.
346
+ Claude will suggest root causes, debugging approaches, and next steps.
347
+
348
+ \b
349
+ This command requires Claude Code to be installed and authenticated.
350
+ It uses your existing Claude Code auth — no separate API key needed.
351
+
352
+ \b
353
+ Tips:
354
+ - Paste the FULL bug report for best results, not just a title.
355
+ More context (error codes, logs, environment) = better matches.
356
+ - Increase -k for broader analysis across more past issues.
357
+ - Expert engineers' replies are highlighted and weighted heavily.
358
+
359
+ \b
360
+ Examples:
361
+ mailwise analyze "user reports calendar not syncing on Mac"
362
+ mailwise analyze "$(cat bug_report.txt)"
363
+ mailwise analyze "emails moved to local folder reappear after 30 min" -k 10
364
+ """
365
+ config = ctx.obj["config"]
366
+ store = get_store(config)
367
+ engine = get_engine(config)
368
+ boost = config.get("expert_boost", 2.0)
369
+
370
+ from .rag import analyze as rag_analyze
371
+
372
+ try:
373
+ s = store.get_stats()
374
+ if s['emails'] == 0:
375
+ click.echo("No emails indexed yet. Run 'mailwise index' first.")
376
+ return
377
+
378
+ click.echo(f"Searching {s['emails']} indexed emails for similar issues...",
379
+ err=True)
380
+ click.echo(f"Feeding top {top_k} matches to Claude for analysis...\n",
381
+ err=True)
382
+
383
+ result = rag_analyze(query, store, engine, top_k=top_k,
384
+ expert_boost=boost,
385
+ system_prompt=config.get("system_prompt"))
386
+
387
+ if result and result != "No similar issues found in the index. Try indexing more emails first.":
388
+ click.echo("\n---")
389
+ click.echo("Tip: Run 'mailwise search \"same query\" --show-body' "
390
+ "to see the raw source threads.", err=True)
391
+ finally:
392
+ store.close()
393
+
394
+
395
+ @cli.command()
396
+ @click.argument("email_id", type=int)
397
+ @click.pass_context
398
+ def show(ctx, email_id):
399
+ """Display the full markdown for an indexed email thread.
400
+
401
+ \b
402
+ Shows the complete parsed thread with all replies, timestamps,
403
+ and [Expert] tags. Use this to read the full context after finding
404
+ an issue via 'mailwise search'.
405
+
406
+ \b
407
+ Example:
408
+ mailwise show 42
409
+ """
410
+ config = ctx.obj["config"]
411
+ store = get_store(config)
412
+ try:
413
+ email_record = store.get_email(email_id)
414
+ if email_record:
415
+ click.echo(email_record.markdown)
416
+ else:
417
+ click.echo(f"Email ID {email_id} not found.", err=True)
418
+ click.echo("Run 'mailwise search' to find valid email IDs.", err=True)
419
+ finally:
420
+ store.close()
421
+
422
+
423
+ @cli.group()
424
+ def experts():
425
+ """Manage the expert engineers list.
426
+
427
+ \b
428
+ Expert engineers get special treatment:
429
+ - Their replies are tagged with [Expert] in markdown output
430
+ - Their messages get a score boost in search results
431
+ - Claude pays extra attention to their analysis in 'analyze' mode
432
+
433
+ \b
434
+ You can also configure experts in config.yaml under the 'experts' key.
435
+ """
436
+ pass
437
+
438
+
439
+ @experts.command("list")
440
+ @click.pass_context
441
+ def experts_list(ctx):
442
+ """List all configured expert engineers."""
443
+ config = ctx.obj["config"]
444
+ store = get_store(config)
445
+ try:
446
+ expert_list = store.get_experts()
447
+ if not expert_list:
448
+ click.echo("No experts configured yet.\n")
449
+ click.echo("Add your team's best engineers so their replies get "
450
+ "boosted in search and highlighted in output:")
451
+ click.echo(" mailwise experts add engineer@company.com --name \"Jane Doe\"\n")
452
+ click.echo("Or add them in config.yaml under the 'experts' key.")
453
+ return
454
+ click.echo(f"Expert engineers ({len(expert_list)}):\n")
455
+ for email_addr, name in expert_list:
456
+ click.echo(f" {name or '(no name)'} <{email_addr}>")
457
+ click.echo(f"\nTheir replies get a score boost in search "
458
+ f"and [Expert] tags in markdown output.")
459
+ finally:
460
+ store.close()
461
+
462
+
463
+ @experts.command("add")
464
+ @click.argument("email_addr")
465
+ @click.option("--name", default="", help="Engineer's display name")
466
+ @click.pass_context
467
+ def experts_add(ctx, email_addr, name):
468
+ """Add an expert engineer by email address.
469
+
470
+ \b
471
+ Examples:
472
+ mailwise experts add senior.dev@company.com --name "Jane Doe"
473
+ mailwise experts add tech.lead@company.com
474
+ """
475
+ config = ctx.obj["config"]
476
+ store = get_store(config)
477
+ try:
478
+ store.add_expert(email_addr, name)
479
+ click.echo(f"Added expert: {name or email_addr} <{email_addr}>")
480
+ click.echo("\nTip: Re-run 'mailwise index' to re-tag existing "
481
+ "messages from this expert.")
482
+ finally:
483
+ store.close()
484
+
485
+
486
+ @experts.command("remove")
487
+ @click.argument("email_addr")
488
+ @click.pass_context
489
+ def experts_remove(ctx, email_addr):
490
+ """Remove an expert engineer by email address."""
491
+ config = ctx.obj["config"]
492
+ store = get_store(config)
493
+ try:
494
+ store.remove_expert(email_addr)
495
+ click.echo(f"Removed expert: {email_addr}")
496
+ finally:
497
+ store.close()
498
+
499
+
500
+ @cli.command()
501
+ @click.pass_context
502
+ def stats(ctx):
503
+ """Show index statistics and health summary.
504
+
505
+ \b
506
+ Displays the current state of your MailWise index including
507
+ email count, message count, and expert coverage.
508
+ """
509
+ config = ctx.obj["config"]
510
+ store = get_store(config)
511
+ try:
512
+ s = store.get_stats()
513
+
514
+ click.echo(BANNER.strip())
515
+ click.echo("")
516
+ click.echo(f" Indexed emails: {s['emails']:,}")
517
+ click.echo(f" Thread messages: {s['thread_messages']:,}")
518
+ click.echo(f" Expert messages: {s['expert_messages']:,}")
519
+ click.echo(f" Configured experts: {s['experts']}")
520
+
521
+ if s['emails'] > 0 and s['thread_messages'] > 0:
522
+ avg = s['thread_messages'] / s['emails']
523
+ coverage = (s['expert_messages'] / s['thread_messages'] * 100
524
+ if s['thread_messages'] > 0 else 0)
525
+ click.echo(f"\n Avg replies/thread: {avg:.1f}")
526
+ click.echo(f" Expert coverage: {coverage:.1f}% of messages")
527
+
528
+ if s['emails'] == 0:
529
+ click.echo(f"\n No emails indexed yet. Get started:")
530
+ click.echo(f" 1. Put .eml files in the emails/ directory")
531
+ click.echo(f" 2. Run: mailwise index")
532
+ elif s['experts'] == 0:
533
+ click.echo(f"\n Tip: Add expert engineers to boost their replies:")
534
+ click.echo(f" mailwise experts add engineer@company.com --name \"Name\"")
535
+ else:
536
+ click.echo(f"\n Ready to use:")
537
+ click.echo(f" mailwise search \"describe your issue\"")
538
+ click.echo(f" mailwise analyze \"paste full bug report\"")
539
+ finally:
540
+ store.close()
541
+
542
+
543
+ if __name__ == "__main__":
544
+ cli()
@@ -0,0 +1,53 @@
1
+ """Embedding generation and vector similarity search."""
2
+ from __future__ import annotations
3
+
4
+ import numpy as np
5
+
6
+
7
+ class EmbeddingEngine:
8
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
9
+ self._model_name = model_name
10
+ self._model = None
11
+
12
+ @property
13
+ def model(self):
14
+ """Lazy-load the model to avoid import overhead when not needed."""
15
+ if self._model is None:
16
+ from sentence_transformers import SentenceTransformer
17
+ self._model = SentenceTransformer(self._model_name)
18
+ return self._model
19
+
20
+ def embed(self, text: str) -> np.ndarray:
21
+ """Embed a single text string."""
22
+ return self.model.encode(text, normalize_embeddings=True)
23
+
24
+ def embed_batch(self, texts: list[str], show_progress: bool = True) -> np.ndarray:
25
+ """Embed a batch of texts."""
26
+ if not texts:
27
+ return np.array([])
28
+ return self.model.encode(
29
+ texts, normalize_embeddings=True,
30
+ batch_size=64, show_progress_bar=show_progress,
31
+ )
32
+
33
+ @staticmethod
34
+ def search(
35
+ query_vec: np.ndarray,
36
+ corpus_vecs: np.ndarray,
37
+ expert_mask: np.ndarray | None = None,
38
+ expert_boost: float = 1.5,
39
+ top_k: int = 10,
40
+ ) -> list[tuple[int, float]]:
41
+ """Find top-k most similar vectors using cosine similarity.
42
+
43
+ Returns list of (index, score) tuples, highest first.
44
+ """
45
+ scores = corpus_vecs @ query_vec
46
+ if expert_mask is not None and expert_boost > 1.0:
47
+ scores = scores.copy()
48
+ scores[expert_mask] *= expert_boost
49
+
50
+ k = min(top_k, len(scores))
51
+ top_indices = np.argpartition(scores, -k)[-k:]
52
+ top_indices = top_indices[np.argsort(scores[top_indices])[::-1]]
53
+ return [(int(i), float(scores[i])) for i in top_indices]