PyPI - mailsense - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mailsense 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

mailsense/__init__.py +9 -0
mailsense/cli.py +86 -0
mailsense/commands/__init__.py +1 -0
mailsense/commands/analyze.py +395 -0
mailsense/commands/config_cmd.py +92 -0
mailsense/commands/download.py +360 -0
mailsense/commands/extract.py +365 -0
mailsense/commands/pipeline.py +186 -0
mailsense/config.py +266 -0
mailsense-0.1.0.dist-info/METADATA +459 -0
mailsense-0.1.0.dist-info/RECORD +14 -0
mailsense-0.1.0.dist-info/WHEEL +5 -0
mailsense-0.1.0.dist-info/entry_points.txt +2 -0
mailsense-0.1.0.dist-info/top_level.txt +1 -0

mailsense/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# Copyright 2026 Samapriya Roy
+# Apache 2.0 License
+"""
+mailsense — Automated mail intelligence pipeline.
+Gmail → mbox → image extraction → Gemini AI analysis.
+"""
+__version__ = "0.1.0"
+__author__  = "Samapriya Roy"

mailsense/cli.py ADDED Viewed

@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Copyright 2026 Samapriya Roy
+# Apache 2.0 License
+"""
+mailsense — Automated mail intelligence pipeline.
+Turns Gmail USPS Informed Delivery emails into structured JSON
+using Gemini AI across three stages:
+  config    — Store credentials and defaults in ~/.mailsense
+  download  — Gmail label → .mbox files
+  extract   — .mbox files → images + metadata
+  analyze   — images → structured JSON via Gemini AI
+  pipeline  — Run all three stages end-to-end
+"""
+from __future__ import annotations
+import argparse
+import sys
+from mailsense import __version__
+from mailsense import config as _config
+from mailsense.commands import (
+    config_cmd,
+    download,
+    extract,
+    analyze,
+    pipeline,
+)
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="mailsense",
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--version", "-V",
+        action="version",
+        version=f"mailsense {__version__}",
+    )
+    subparsers = parser.add_subparsers(dest="command", metavar="COMMAND")
+    subparsers.required = True
+    config_cmd.add_subparser(subparsers)
+    download.add_subparser(subparsers)
+    extract.add_subparser(subparsers)
+    analyze.add_subparser(subparsers)
+    pipeline.add_subparser(subparsers)
+    return parser
+def main() -> None:
+    parser = build_parser()
+    args   = parser.parse_args()
+    cfg    = _config.load()
+    dispatch = {
+        "config":   config_cmd.run,
+        "download": download.run,
+        "extract":  extract.run,
+        "analyze":  analyze.run,
+        "pipeline": pipeline.run,
+    }
+    handler = dispatch.get(args.command)
+    if handler is None:
+        parser.print_help()
+        sys.exit(1)
+    try:
+        handler(args, cfg)
+    except KeyboardInterrupt:
+        print("\nInterrupted.", file=sys.stderr)
+        sys.exit(130)
+    except Exception as exc:
+        print(f"\nError: {exc}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

mailsense/commands/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Copyright 2026 Samapriya Roy

mailsense/commands/analyze.py ADDED Viewed

@@ -0,0 +1,395 @@
+#!/usr/bin/env python3
+# Copyright 2026 Samapriya Roy
+# Apache 2.0 License
+"""
+Analyze mail images with Gemini AI.
+Reads images + metadata.json produced by the extract step and uses
+the Gemini API to extract structured information from each mail image.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+try:
+    import PIL.Image
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+try:
+    from google import genai
+    from google.genai import types as genai_types
+    HAS_GENAI = True
+except ImportError:
+    HAS_GENAI = False
+try:
+    from rich.console import Console
+    from rich.panel import Panel
+    from rich.progress import (BarColumn, Progress, SpinnerColumn,
+                               TaskProgressColumn, TextColumn,
+                               TimeRemainingColumn)
+    from rich.table import Table
+    HAS_RICH = True
+except ImportError:
+    HAS_RICH = False
+# ── Constants ──────────────────────────────────────────────────────────────────
+VALID_EXTENSIONS = (".jpg", ".jpeg", ".png", ".webp")
+IGNORE_PREFIXES  = ("content", "mailer", "ra")
+DEFAULT_MODEL    = "gemini-2.0-flash"
+EXTRACTION_PROMPT = """
+Analyze this image of mail and extract all relevant information.
+1. Identify if it is a marketing flyer (look for 'PRSRT STD', headshots,
+   promotional language, or 'Current Resident').
+2. If it IS a flyer: Set 'status' to 'Ignored' and 'is_marketing' to true.
+3. If it is OFFICIAL mail (bills, tax forms, First Class, invoices, statements):
+   Set 'status' to 'Processed' and 'is_marketing' to false.
+Extract:
+- sender: name, organization, address (street, city, state, zip_code)
+- recipient: name/names, address (street, city, state, zip_code)
+- postage_details: type/service_class, status, amount, date, permit info
+- document_info: document type, visible form fields, reference numbers, tracking codes
+- content_summary: 1-2 sentence summary of what this mail is about and its purpose
+  - If marketing: what is being promoted and the call to action
+  - If official: type of document and purpose (e.g. "Tax form 1099-MISC for payment reporting")
+Return ONLY a valid JSON object. No markdown, no code blocks, no explanatory text.
+"""
+# ── Helpers ────────────────────────────────────────────────────────────────────
+def _is_wanted(filename: str) -> bool:
+    lower = filename.lower()
+    return (
+        Path(lower).suffix in VALID_EXTENSIONS
+        and not lower.startswith(IGNORE_PREFIXES)
+    )
+def _load_metadata(path: Path) -> dict:
+    with open(path, encoding="utf-8") as f:
+        return json.load(f)
+def _find_email_meta(filename: str, metadata: dict) -> Optional[dict]:
+    for entry in metadata.get("emails", []):
+        for image in entry.get("images", []):
+            if image["filename"] == filename:
+                return {
+                    "date":       entry.get("date"),
+                    "date_iso":   entry.get("date_iso"),
+                    "subject":    entry.get("subject"),
+                    "from":       entry.get("from"),
+                    "message_id": entry.get("message_id"),
+                    "index":      entry.get("index"),
+                }
+    return None
+def _clean_json(text: str) -> str:
+    text = text.strip()
+    for fence in ("```json", "```"):
+        if text.startswith(fence):
+            text = text[len(fence):]
+    if text.endswith("```"):
+        text = text[:-3]
+    return text.strip()
+def _collect_work_units(input_dir: Path) -> list[tuple[Path, Path]]:
+    flat = input_dir / "metadata.json"
+    if flat.exists():
+        return [(input_dir, flat)]
+    units = []
+    for sub in sorted(input_dir.iterdir()):
+        if sub.is_dir():
+            meta = sub / "metadata.json"
+            if meta.exists():
+                units.append((sub, meta))
+    if not units:
+        print(f"Error: no metadata.json found in '{input_dir}' or its subdirectories.",
+              file=sys.stderr)
+        sys.exit(1)
+    return units
+def _call_gemini(client, model_name: str, image_path: Path) -> str:
+    """
+    Call the Gemini API using the current google-genai SDK.
+    PIL images are passed directly — the SDK handles serialization.
+    """
+    img = PIL.Image.open(image_path)
+    response = client.models.generate_content(
+        model=model_name,
+        contents=[EXTRACTION_PROMPT, img],
+        config=genai_types.GenerateContentConfig(
+            response_mime_type="application/json",
+            temperature=0.1,
+        ),
+    )
+    return response.text
+# ── Core processor ─────────────────────────────────────────────────────────────
+def _process_folder(
+    image_dir: Path,
+    metadata_path: Path,
+    output_dir: Path,
+    client,
+    model_name: str,
+    delay: float,
+    dry_run: bool,
+    progress=None,
+    outer_task=None,
+) -> dict:
+    metadata    = _load_metadata(metadata_path)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    image_files = sorted(f for f in os.listdir(image_dir) if _is_wanted(f))
+    total = len(image_files)
+    processed = skipped = errors = 0
+    error_list: list[dict] = []
+    inner_task = None
+    if progress:
+        inner_task = progress.add_task(f"[cyan]{image_dir.name}", total=total)
+    for idx, filename in enumerate(image_files, 1):
+        if progress and inner_task is not None:
+            progress.update(inner_task,
+                            description=f"[cyan]{image_dir.name} — {filename[:35]}")
+        out_file = output_dir / f"{Path(filename).stem}.json"
+        if out_file.exists():
+            print(f"  ⏭  Skipping {filename} (already processed)")
+            skipped += 1
+            if progress and inner_task is not None:
+                progress.update(inner_task, advance=1)
+            continue
+        if dry_run:
+            print(f"  [dry-run] Would process: {filename}")
+            processed += 1
+            if progress and inner_task is not None:
+                progress.update(inner_task, advance=1)
+            continue
+        cleaned = ""
+        try:
+            email_meta = _find_email_meta(filename, metadata)
+            raw_text   = _call_gemini(client, model_name, image_dir / filename)
+            cleaned    = _clean_json(raw_text)
+            data       = json.loads(cleaned)
+            data["filename"] = filename
+            if email_meta:
+                data["mail_metadata"] = email_meta
+            out_file.write_text(json.dumps(data, indent=2), encoding="utf-8")
+            processed += 1
+            print(f"  ✓ [{idx}/{total}] {filename}")
+        except json.JSONDecodeError as e:
+            errors += 1
+            error_list.append({"filename": filename,
+                               "error": f"JSON parse: {e}",
+                               "raw": cleaned[:400]})
+            print(f"  ✗ JSON error — {filename}: {e}", file=sys.stderr)
+        except Exception as e:
+            errors += 1
+            error_list.append({"filename": filename, "error": str(e)})
+            print(f"  ✗ Error — {filename}: {e}", file=sys.stderr)
+        finally:
+            if progress and inner_task is not None:
+                progress.update(inner_task, advance=1)
+            time.sleep(delay)
+    summary = {
+        "source_dir":      str(image_dir),
+        "output_dir":      str(output_dir),
+        "processing_date": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "total_files":     total,
+        "processed":       processed,
+        "skipped":         skipped,
+        "errors":          errors,
+        "error_details":   error_list,
+    }
+    # (output_dir / "_summary.json").write_text(
+    #     json.dumps(summary, indent=2), encoding="utf-8"
+    # )
+    if progress and outer_task is not None:
+        progress.update(outer_task, advance=1)
+    return summary
+def _display_summary_plain(summaries: list[dict]) -> None:
+    print("\n── Analysis Summary ──────────────────────────────────")
+    print(f"  {'Folder':<30}  {'Total':>6}  {'Done':>6}  {'Skip':>6}  {'Err':>5}")
+    print("  " + "─" * 60)
+    for s in summaries:
+        name = Path(s["source_dir"]).name
+        print(f"  {name:<30}  {s['total_files']:>6}  {s['processed']:>6}  "
+              f"{s['skipped']:>6}  {s['errors']:>5}")
+    if len(summaries) > 1:
+        totals = {k: sum(x[k] for x in summaries)
+                  for k in ("total_files", "processed", "skipped", "errors")}
+        print("  " + "─" * 60)
+        print(f"  {'TOTAL':<30}  {totals['total_files']:>6}  {totals['processed']:>6}  "
+              f"{totals['skipped']:>6}  {totals['errors']:>5}")
+def analyze(
+    input_dir: Path,
+    output_dir: Path,
+    api_key: str,
+    model_name: str = DEFAULT_MODEL,
+    delay: float = 4.0,
+    dry_run: bool = False,
+) -> list[dict]:
+    """
+    Public API: analyze all images in input_dir using Gemini.
+    Returns list of per-folder summary dicts.
+    """
+    if not dry_run:
+        if not HAS_GENAI:
+            raise ImportError(
+                "google-genai not installed. pip install google-genai"
+            )
+        if not HAS_PIL:
+            raise ImportError("Pillow not installed. pip install Pillow")
+        client = genai.Client(api_key=api_key)
+    else:
+        client = None
+    work_units = _collect_work_units(input_dir)
+    batch      = len(work_units) > 1
+    summaries  = []
+    if HAS_RICH and not dry_run:
+        console = Console()
+        console.print(Panel.fit(
+            f"[bold cyan]Mail Analysis[/bold cyan]\n"
+            f"Input:   {input_dir}\n"
+            f"Output:  {output_dir}\n"
+            f"Model:   {model_name}\n"
+            f"Delay:   {delay}s  |  Folders: {len(work_units)}",
+            border_style="cyan",
+        ))
+        with Progress(
+            SpinnerColumn(), TextColumn("[progress.description]{task.description}"),
+            BarColumn(), TextColumn("{task.completed}/{task.total}"),
+            TaskProgressColumn(), TimeRemainingColumn(),
+            console=console, transient=False,
+        ) as progress:
+            outer_task = (
+                progress.add_task("[bold]Overall", total=len(work_units))
+                if batch else None
+            )
+            for image_dir, meta_path in work_units:
+                rel        = image_dir.relative_to(input_dir) if batch else Path(".")
+                out_subdir = output_dir / rel
+                summary = _process_folder(
+                    image_dir, meta_path, out_subdir,
+                    client, model_name, delay, dry_run,
+                    progress=progress, outer_task=outer_task,
+                )
+                summaries.append(summary)
+        table = Table(title="Analysis Summary", header_style="bold cyan", show_lines=True)
+        table.add_column("Folder",    style="cyan",   no_wrap=True)
+        table.add_column("Total",     style="white",  justify="right")
+        table.add_column("Processed", style="green",  justify="right")
+        table.add_column("Skipped",   style="yellow", justify="right")
+        table.add_column("Errors",    style="red",    justify="right")
+        for s in summaries:
+            table.add_row(
+                Path(s["source_dir"]).name,
+                str(s["total_files"]),
+                str(s["processed"]),
+                str(s["skipped"]),
+                str(s["errors"]),
+            )
+        console.print(table)
+    else:
+        for image_dir, meta_path in work_units:
+            rel        = image_dir.relative_to(input_dir) if batch else Path(".")
+            out_subdir = output_dir / rel
+            summary = _process_folder(
+                image_dir, meta_path, out_subdir,
+                client, model_name, delay, dry_run,
+            )
+            summaries.append(summary)
+        _display_summary_plain(summaries)
+    return summaries
+# ── CLI entry ──────────────────────────────────────────────────────────────────
+def add_subparser(subparsers) -> None:
+    p = subparsers.add_parser(
+        "analyze",
+        help="Analyze mail images with Gemini AI",
+        description=__doc__,
+    )
+    p.add_argument("--input-dir",  "-i", required=True, metavar="DIR",
+                   help="Output directory from the extract step")
+    p.add_argument("--output-dir", "-o", required=True, metavar="DIR",
+                   help="Directory to save extracted JSON files")
+    p.add_argument("--api-key",    "-k", default=None, metavar="KEY",
+                   help="Gemini API key (overrides config / GEMINI_API_KEY env var)")
+    p.add_argument("--model",      "-m", default=None, metavar="MODEL",
+                   help=f"Gemini model name (default: {DEFAULT_MODEL})")
+    p.add_argument("--delay",      "-d", type=float, default=None, metavar="SEC",
+                   help="Seconds between API requests (default: 4 for free tier)")
+    p.add_argument("--dry-run",    "-n", action="store_true",
+                   help="Show what would be processed without calling the API")
+def run(args, cfg: dict) -> None:
+    api_key = (
+        args.api_key
+        or cfg.get("gemini_api_key")
+        or os.environ.get("GEMINI_API_KEY")
+    )
+    if not api_key and not args.dry_run:
+        print(
+            "Error: no Gemini API key. Use --api-key, set GEMINI_API_KEY, "
+            "or run: mailsense config set gemini_api_key YOUR_KEY",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    input_dir  = Path(args.input_dir)
+    output_dir = Path(args.output_dir)
+    if not input_dir.exists():
+        print(f"Error: input directory not found: {input_dir}", file=sys.stderr)
+        sys.exit(1)
+    model_name = args.model or cfg.get("gemini_model", DEFAULT_MODEL)
+    delay      = (
+        args.delay if args.delay is not None
+        else float(cfg.get("api_delay", 4.0))
+    )
+    analyze(
+        input_dir=input_dir,
+        output_dir=output_dir,
+        api_key=api_key or "",
+        model_name=model_name,
+        delay=delay,
+        dry_run=args.dry_run,
+    )

mailsense/commands/config_cmd.py ADDED Viewed

@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# Copyright 2026 Samapriya Roy
+# Apache 2.0 License
+"""
+Config — Manage mailsense credentials and defaults.
+Settings are stored in ~/.mailsense (JSON, mode 0600).
+"""
+from __future__ import annotations
+import sys
+def add_subparser(subparsers) -> None:
+    p = subparsers.add_parser(
+        "config",
+        help="Manage credentials and defaults stored in ~/.mailsense",
+        description=__doc__,
+    )
+    sub = p.add_subparsers(dest="config_action", metavar="ACTION")
+    # config show
+    sub.add_parser("show", help="Print current configuration (passwords masked)")
+    # config set KEY VALUE
+    s = sub.add_parser("set", help="Set a configuration key")
+    s.add_argument("key",   help="Configuration key (see 'show' for available keys)")
+    s.add_argument("value", help="Value to store")
+    # config unset KEY
+    u = sub.add_parser("unset", help="Remove a configuration key")
+    u.add_argument("key", help="Key to remove")
+    # config keys
+    sub.add_parser("keys", help="List all recognised configuration keys and their descriptions")
+    # config configure
+    c = sub.add_parser("configure", help="Interactive wizard to set all (or specific) config values")
+    c.add_argument(
+        "keys",
+        nargs="*",
+        metavar="KEY",
+        help="Optional: specific keys to configure (default: all)",
+    )
+def run(args, cfg: dict) -> None:
+    from mailsense import config
+    action = getattr(args, "config_action", None)
+    if action == "show" or action is None:
+        data = config.load()
+        if not data:
+            print("No configuration stored yet. Run:  mailsense config set <key> <value>")
+            return
+        descriptions = config.describe_keys()
+        print("Current configuration  (~/.mailsense)")
+        print("─" * 48)
+        for key, val in data.items():
+            masked = "•" * len(str(val)) if "password" in key or "key" in key else str(val)
+            desc   = descriptions.get(key, "")
+            print(f"  {key:<22}  {masked}")
+            if desc:
+                print(f"  {'':22}  ({desc})")
+    elif action == "set":
+        config.set_value(args.key, args.value)
+        masked = "•" * len(args.value) if "password" in args.key or "key" in args.key else args.value
+        print(f"  Set {args.key!r} = {masked}")
+    elif action == "unset":
+        if config.unset(args.key):
+            print(f"  Removed {args.key!r}")
+        else:
+            print(f"  Key {args.key!r} was not set.")
+    elif action == "keys":
+        descriptions = config.describe_keys()
+        print("Available configuration keys:")
+        print("─" * 56)
+        for key, desc in descriptions.items():
+            print(f"  {key:<22}  {desc}")
+    elif action == "configure":
+        keys = args.keys if args.keys else None
+        config.configure(keys)
+    else:
+        print("Unknown action. Use: show | set | unset | keys", file=sys.stderr)
+        sys.exit(1)