PyPI - applypilot - Versions diffs - 0.2.0__py3-none-any.whl - Mend

applypilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

applypilot/__init__.py +3 -0
applypilot/__main__.py +5 -0
applypilot/apply/__init__.py +1 -0
applypilot/apply/chrome.py +321 -0
applypilot/apply/dashboard.py +203 -0
applypilot/apply/launcher.py +785 -0
applypilot/apply/prompt.py +624 -0
applypilot/cli.py +316 -0
applypilot/config/employers.yaml +305 -0
applypilot/config/searches.example.yaml +112 -0
applypilot/config/sites.yaml +181 -0
applypilot/config.py +260 -0
applypilot/database.py +424 -0
applypilot/discovery/__init__.py +0 -0
applypilot/discovery/jobspy.py +478 -0
applypilot/discovery/smartextract.py +1118 -0
applypilot/discovery/workday.py +543 -0
applypilot/enrichment/__init__.py +0 -0
applypilot/enrichment/detail.py +894 -0
applypilot/llm.py +158 -0
applypilot/pipeline.py +531 -0
applypilot/scoring/__init__.py +1 -0
applypilot/scoring/cover_letter.py +286 -0
applypilot/scoring/pdf.py +440 -0
applypilot/scoring/scorer.py +180 -0
applypilot/scoring/tailor.py +562 -0
applypilot/scoring/validator.py +315 -0
applypilot/view.py +406 -0
applypilot/wizard/__init__.py +0 -0
applypilot/wizard/init.py +381 -0
applypilot-0.2.0.dist-info/METADATA +219 -0
applypilot-0.2.0.dist-info/RECORD +35 -0
applypilot-0.2.0.dist-info/WHEEL +4 -0
applypilot-0.2.0.dist-info/entry_points.txt +2 -0
applypilot-0.2.0.dist-info/licenses/LICENSE +661 -0

applypilot/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""ApplyPilot — AI-powered end-to-end job application pipeline."""
+__version__ = "0.2.0"

applypilot/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Enable `python -m applypilot`."""
+from applypilot.cli import app
+app()

applypilot/apply/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Apply pipeline: Chrome management, prompt building, orchestration, and dashboard."""

applypilot/apply/chrome.py ADDED Viewed

@@ -0,0 +1,321 @@
+"""Chrome lifecycle management for apply workers.
+Handles launching an isolated Chrome instance with remote debugging,
+worker profile setup/cloning, and cross-platform process cleanup.
+"""
+import json
+import logging
+import platform
+import shutil
+import subprocess
+import threading
+import time
+from pathlib import Path
+from applypilot import config
+logger = logging.getLogger(__name__)
+# CDP port base — each worker uses BASE_CDP_PORT + worker_id
+BASE_CDP_PORT = 9222
+# Track Chrome processes per worker for cleanup
+_chrome_procs: dict[int, subprocess.Popen] = {}
+_chrome_lock = threading.Lock()
+# ---------------------------------------------------------------------------
+# Cross-platform process helpers
+# ---------------------------------------------------------------------------
+def _kill_process_tree(pid: int) -> None:
+    """Kill a process and all its children.
+    On Windows, Chrome spawns 10+ child processes (GPU, renderer, etc.),
+    so taskkill /T is needed to kill the entire tree. On Unix, os.killpg
+    handles the process group.
+    """
+    import signal as _signal
+    try:
+        if platform.system() == "Windows":
+            subprocess.run(
+                ["taskkill", "/F", "/T", "/PID", str(pid)],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                timeout=10,
+            )
+        else:
+            # Unix: kill entire process group
+            import os
+            try:
+                os.killpg(os.getpgid(pid), _signal.SIGKILL)
+            except (ProcessLookupError, PermissionError):
+                # Process already gone or owned by another user
+                try:
+                    os.kill(pid, _signal.SIGKILL)
+                except (ProcessLookupError, PermissionError):
+                    pass
+    except Exception:
+        logger.debug("Failed to kill process tree for PID %d", pid, exc_info=True)
+def _kill_on_port(port: int) -> None:
+    """Kill any process listening on a specific port (zombie cleanup).
+    Uses netstat on Windows, lsof on macOS/Linux.
+    """
+    try:
+        if platform.system() == "Windows":
+            result = subprocess.run(
+                ["netstat", "-ano", "-p", "TCP"],
+                capture_output=True, text=True, timeout=10,
+            )
+            for line in result.stdout.splitlines():
+                if f":{port}" in line and "LISTENING" in line:
+                    pid = line.strip().split()[-1]
+                    if pid.isdigit():
+                        _kill_process_tree(int(pid))
+        else:
+            # macOS / Linux
+            result = subprocess.run(
+                ["lsof", "-ti", f":{port}"],
+                capture_output=True, text=True, timeout=10,
+            )
+            for pid_str in result.stdout.strip().splitlines():
+                pid_str = pid_str.strip()
+                if pid_str.isdigit():
+                    _kill_process_tree(int(pid_str))
+    except FileNotFoundError:
+        logger.debug("Port-kill tool not found (netstat/lsof) for port %d", port)
+    except Exception:
+        logger.debug("Failed to kill process on port %d", port, exc_info=True)
+# ---------------------------------------------------------------------------
+# Worker profile management
+# ---------------------------------------------------------------------------
+def setup_worker_profile(worker_id: int) -> Path:
+    """Create an isolated Chrome profile for a worker.
+    On first run, clones from an existing worker profile (preferred, since
+    it already has session cookies) or from the user's real Chrome profile.
+    Subsequent runs reuse the existing worker profile.
+    Args:
+        worker_id: Numeric worker identifier.
+    Returns:
+        Path to the worker's Chrome user-data directory.
+    """
+    profile_dir = config.CHROME_WORKER_DIR / f"worker-{worker_id}"
+    if (profile_dir / "Default").exists():
+        return profile_dir  # Already initialized
+    # Find a source: prefer existing worker (has session cookies), else user profile
+    source: Path | None = None
+    for wid in range(10):
+        if wid == worker_id:
+            continue
+        candidate = config.CHROME_WORKER_DIR / f"worker-{wid}"
+        if (candidate / "Default").exists():
+            source = candidate
+            break
+    if source is None:
+        source = config.get_chrome_user_data()
+    logger.info("[worker-%d] Copying Chrome profile from %s (first time setup)...",
+                worker_id, source.name)
+    profile_dir.mkdir(parents=True, exist_ok=True)
+    # Copy essential profile dirs -- skip caches and heavy transient data
+    skip = {
+        "ShaderCache", "GrShaderCache", "Service Worker", "Cache",
+        "Code Cache", "GPUCache", "CacheStorage", "Crashpad",
+        "BrowserMetrics", "SafeBrowsing", "Crowd Deny",
+        "MEIPreload", "SSLErrorAssistant", "recovery", "Temp",
+        "SingletonLock", "SingletonSocket", "SingletonCookie",
+    }
+    for item in source.iterdir():
+        if item.name in skip:
+            continue
+        dst = profile_dir / item.name
+        try:
+            if item.is_dir():
+                shutil.copytree(
+                    str(item), str(dst), dirs_exist_ok=True,
+                    ignore=shutil.ignore_patterns(
+                        "Cache", "Code Cache", "GPUCache", "Service Worker",
+                    ),
+                )
+            else:
+                shutil.copy2(str(item), str(dst))
+        except (PermissionError, OSError):
+            pass  # skip locked files
+    return profile_dir
+def _suppress_restore_nag(profile_dir: Path) -> None:
+    """Clear Chrome's 'restore pages' nag by fixing Preferences.
+    Chrome writes exit_type=Crashed when killed, which triggers a
+    'Restore pages?' prompt on next launch. This patches it out.
+    """
+    prefs_file = profile_dir / "Default" / "Preferences"
+    if not prefs_file.exists():
+        return
+    try:
+        prefs = json.loads(prefs_file.read_text(encoding="utf-8"))
+        prefs.setdefault("profile", {})["exit_type"] = "Normal"
+        prefs.setdefault("session", {})["restore_on_startup"] = 4  # 4 = open blank
+        prefs.setdefault("session", {}).pop("startup_urls", None)
+        prefs["credentials_enable_service"] = False
+        prefs.setdefault("password_manager", {})["saving_enabled"] = False
+        prefs.setdefault("autofill", {})["profile_enabled"] = False
+        prefs_file.write_text(json.dumps(prefs), encoding="utf-8")
+    except Exception:
+        logger.debug("Could not patch Chrome preferences", exc_info=True)
+# ---------------------------------------------------------------------------
+# Chrome launch / kill
+# ---------------------------------------------------------------------------
+def launch_chrome(worker_id: int, port: int | None = None,
+                  headless: bool = False) -> subprocess.Popen:
+    """Launch a Chrome instance with remote debugging for a worker.
+    Args:
+        worker_id: Numeric worker identifier.
+        port: CDP port. Defaults to BASE_CDP_PORT + worker_id.
+        headless: Run Chrome in headless mode (no visible window).
+    Returns:
+        subprocess.Popen handle for the Chrome process.
+    """
+    if port is None:
+        port = BASE_CDP_PORT + worker_id
+    profile_dir = setup_worker_profile(worker_id)
+    # Kill any zombie Chrome from a previous run on this port
+    _kill_on_port(port)
+    # Patch preferences to suppress restore nag
+    _suppress_restore_nag(profile_dir)
+    chrome_exe = config.get_chrome_path()
+    cmd = [
+        chrome_exe,
+        f"--remote-debugging-port={port}",
+        f"--user-data-dir={profile_dir}",
+        "--profile-directory=Default",
+        "--no-first-run",
+        "--no-default-browser-check",
+        "--window-size=1024,768",
+        "--disable-session-crashed-bubble",
+        "--disable-features=InfiniteSessionRestore,PasswordManagerOnboarding",
+        "--hide-crash-restore-bubble",
+        "--noerrdialogs",
+        "--password-store=basic",
+        "--disable-save-password-bubble",
+        "--disable-popup-blocking",
+        # Block dangerous permissions at browser level
+        "--use-fake-device-for-media-stream",
+        "--use-fake-ui-for-media-stream",
+        "--deny-permission-prompts",
+        "--disable-notifications",
+    ]
+    if headless:
+        cmd.append("--headless=new")
+    # On Unix, start in a new process group so we can kill the whole tree
+    kwargs: dict = dict(stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    if platform.system() != "Windows":
+        import os
+        kwargs["preexec_fn"] = os.setsid
+    proc = subprocess.Popen(cmd, **kwargs)
+    with _chrome_lock:
+        _chrome_procs[worker_id] = proc
+    # Give Chrome time to start and open the debug port
+    time.sleep(3)
+    logger.info("[worker-%d] Chrome started on port %d (pid %d)",
+                worker_id, port, proc.pid)
+    return proc
+def cleanup_worker(worker_id: int, process: subprocess.Popen | None) -> None:
+    """Kill a worker's Chrome instance and remove it from tracking.
+    Args:
+        worker_id: Numeric worker identifier.
+        process: The Popen handle returned by launch_chrome.
+    """
+    if process and process.poll() is None:
+        _kill_process_tree(process.pid)
+    with _chrome_lock:
+        _chrome_procs.pop(worker_id, None)
+    logger.info("[worker-%d] Chrome cleaned up", worker_id)
+def kill_all_chrome() -> None:
+    """Kill all Chrome instances and any port zombies.
+    Called during graceful shutdown to ensure no orphan Chrome processes.
+    """
+    with _chrome_lock:
+        procs = dict(_chrome_procs)
+        _chrome_procs.clear()
+    for wid, proc in procs.items():
+        if proc.poll() is None:
+            _kill_process_tree(proc.pid)
+        _kill_on_port(BASE_CDP_PORT + wid)
+    # Sweep base port in case of zombies
+    _kill_on_port(BASE_CDP_PORT)
+def reset_worker_dir(worker_id: int) -> Path:
+    """Wipe and recreate a worker's isolated working directory.
+    Each job gets a fresh working directory so that file conflicts
+    (resume PDFs, MCP configs) don't bleed between jobs.
+    Args:
+        worker_id: Numeric worker identifier.
+    Returns:
+        Path to the clean worker directory.
+    """
+    worker_dir = config.APPLY_WORKER_DIR / f"worker-{worker_id}"
+    if worker_dir.exists():
+        shutil.rmtree(str(worker_dir), ignore_errors=True)
+    worker_dir.mkdir(parents=True, exist_ok=True)
+    return worker_dir
+def cleanup_on_exit() -> None:
+    """Atexit handler: kill all Chrome processes and sweep CDP ports.
+    Register this with atexit.register() at application startup.
+    """
+    with _chrome_lock:
+        procs = dict(_chrome_procs)
+        _chrome_procs.clear()
+    for wid, proc in procs.items():
+        if proc.poll() is None:
+            _kill_process_tree(proc.pid)
+        _kill_on_port(BASE_CDP_PORT + wid)
+    # Sweep base port for any orphan
+    _kill_on_port(BASE_CDP_PORT)

applypilot/apply/dashboard.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""Rich live dashboard for the apply pipeline.
+Displays real-time worker status, job progress, and recent events
+in a terminal dashboard using the Rich library.
+"""
+import logging
+import threading
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from rich.console import Group
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+logger = logging.getLogger(__name__)
+@dataclass
+class WorkerState:
+    """Tracks the current state of the apply worker."""
+    worker_id: int = 0
+    status: str = "starting"  # starting, applying, applied, failed, expired, captcha, idle, done
+    job_title: str = ""
+    company: str = ""
+    score: int = 0
+    start_time: float = 0.0
+    actions: int = 0
+    last_action: str = ""
+    jobs_applied: int = 0
+    jobs_failed: int = 0
+    jobs_done: int = 0
+    total_cost: float = 0.0
+    log_file: Path | None = None
+# Module-level state (thread-safe via _lock)
+_worker_states: dict[int, WorkerState] = {}
+_events: list[str] = []
+_lock = threading.Lock()
+MAX_EVENTS = 8
+# ---------------------------------------------------------------------------
+# State mutation helpers
+# ---------------------------------------------------------------------------
+def init_worker(worker_id: int = 0) -> None:
+    """Register the worker in the dashboard state."""
+    with _lock:
+        _worker_states[worker_id] = WorkerState(worker_id=worker_id)
+def update_state(worker_id: int = 0, **kwargs) -> None:
+    """Update the worker's state fields.
+    Args:
+        worker_id: Which worker to update.
+        **kwargs: Field names and values to set on WorkerState.
+    """
+    with _lock:
+        state = _worker_states.get(worker_id)
+        if state is not None:
+            for key, value in kwargs.items():
+                setattr(state, key, value)
+def get_state(worker_id: int = 0) -> WorkerState | None:
+    """Read the worker's current state."""
+    with _lock:
+        return _worker_states.get(worker_id)
+def add_event(msg: str) -> None:
+    """Add a timestamped event to the scrolling event log.
+    Args:
+        msg: Rich markup string describing the event.
+    """
+    ts = datetime.now().strftime("%H:%M:%S")
+    with _lock:
+        _events.append(f"[dim]{ts}[/dim] {msg}")
+        if len(_events) > MAX_EVENTS:
+            _events.pop(0)
+# ---------------------------------------------------------------------------
+# Rendering
+# ---------------------------------------------------------------------------
+# Status -> Rich style mapping
+_STATUS_STYLES: dict[str, str] = {
+    "starting": "dim",
+    "idle": "dim",
+    "applying": "yellow",
+    "applied": "bold green",
+    "failed": "red",
+    "expired": "dim red",
+    "captcha": "magenta",
+    "login_issue": "red",
+    "done": "bold",
+}
+def render_dashboard() -> Table:
+    """Build the Rich table showing all worker statuses.
+    Returns:
+        A Rich Table object ready for display.
+    """
+    table = Table(title="ApplyPilot Dashboard", expand=True, show_lines=False)
+    table.add_column("W", style="bold", width=3, justify="center")
+    table.add_column("Job", min_width=30, max_width=50, no_wrap=True)
+    table.add_column("Status", width=12, justify="center")
+    table.add_column("Time", width=6, justify="right")
+    table.add_column("Acts", width=5, justify="right")
+    table.add_column("Last Action", min_width=20, max_width=35, no_wrap=True)
+    table.add_column("OK", width=4, justify="right", style="green")
+    table.add_column("Fail", width=4, justify="right", style="red")
+    table.add_column("Cost", width=8, justify="right")
+    with _lock:
+        states = sorted(_worker_states.values(), key=lambda s: s.worker_id)
+    total_applied = 0
+    total_failed = 0
+    total_cost = 0.0
+    for s in states:
+        elapsed = ""
+        if s.start_time and s.status == "applying":
+            elapsed = f"{int(time.time() - s.start_time)}s"
+        style = _STATUS_STYLES.get(s.status, "")
+        status_text = Text(s.status.upper(), style=style)
+        job_text = f"{s.job_title[:28]} @ {s.company[:16]}" if s.job_title else ""
+        table.add_row(
+            str(s.worker_id),
+            job_text,
+            status_text,
+            elapsed,
+            str(s.actions) if s.actions else "",
+            s.last_action[:35] if s.last_action else "",
+            str(s.jobs_applied),
+            str(s.jobs_failed),
+            f"${s.total_cost:.3f}" if s.total_cost else "",
+        )
+        total_applied += s.jobs_applied
+        total_failed += s.jobs_failed
+        total_cost += s.total_cost
+    # Totals row
+    table.add_section()
+    table.add_row(
+        "", "", "", "", "", "TOTAL",
+        str(total_applied), str(total_failed), f"${total_cost:.3f}",
+        style="bold",
+    )
+    return table
+def render_full() -> Table | Group:
+    """Render the dashboard table plus the recent events panel.
+    Returns:
+        A Rich Group (table + events panel) or just the table if no events.
+    """
+    table = render_dashboard()
+    with _lock:
+        event_lines = list(_events)
+    if event_lines:
+        event_text = Text.from_markup("\n".join(event_lines))
+        events_panel = Panel(
+            event_text,
+            title="Recent Events",
+            border_style="dim",
+            height=min(MAX_EVENTS + 2, len(event_lines) + 2),
+        )
+        return Group(table, events_panel)
+    return table
+def get_totals() -> dict[str, int | float]:
+    """Compute aggregate totals across all workers.
+    Returns:
+        Dict with keys: applied, failed, cost.
+    """
+    with _lock:
+        applied = sum(s.jobs_applied for s in _worker_states.values())
+        failed = sum(s.jobs_failed for s in _worker_states.values())
+        cost = sum(s.total_cost for s in _worker_states.values())
+    return {"applied": applied, "failed": failed, "cost": cost}