applypilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
applypilot/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """ApplyPilot — AI-powered end-to-end job application pipeline."""
2
+
3
+ __version__ = "0.2.0"
applypilot/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Enable `python -m applypilot`."""
2
+
3
+ from applypilot.cli import app
4
+
5
+ app()
@@ -0,0 +1 @@
1
+ """Apply pipeline: Chrome management, prompt building, orchestration, and dashboard."""
@@ -0,0 +1,321 @@
1
+ """Chrome lifecycle management for apply workers.
2
+
3
+ Handles launching an isolated Chrome instance with remote debugging,
4
+ worker profile setup/cloning, and cross-platform process cleanup.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import platform
10
+ import shutil
11
+ import subprocess
12
+ import threading
13
+ import time
14
+ from pathlib import Path
15
+
16
+ from applypilot import config
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # CDP port base — each worker uses BASE_CDP_PORT + worker_id
21
+ BASE_CDP_PORT = 9222
22
+
23
+ # Track Chrome processes per worker for cleanup
24
+ _chrome_procs: dict[int, subprocess.Popen] = {}
25
+ _chrome_lock = threading.Lock()
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Cross-platform process helpers
30
+ # ---------------------------------------------------------------------------
31
+
32
+ def _kill_process_tree(pid: int) -> None:
33
+ """Kill a process and all its children.
34
+
35
+ On Windows, Chrome spawns 10+ child processes (GPU, renderer, etc.),
36
+ so taskkill /T is needed to kill the entire tree. On Unix, os.killpg
37
+ handles the process group.
38
+ """
39
+ import signal as _signal
40
+
41
+ try:
42
+ if platform.system() == "Windows":
43
+ subprocess.run(
44
+ ["taskkill", "/F", "/T", "/PID", str(pid)],
45
+ stdout=subprocess.DEVNULL,
46
+ stderr=subprocess.DEVNULL,
47
+ timeout=10,
48
+ )
49
+ else:
50
+ # Unix: kill entire process group
51
+ import os
52
+ try:
53
+ os.killpg(os.getpgid(pid), _signal.SIGKILL)
54
+ except (ProcessLookupError, PermissionError):
55
+ # Process already gone or owned by another user
56
+ try:
57
+ os.kill(pid, _signal.SIGKILL)
58
+ except (ProcessLookupError, PermissionError):
59
+ pass
60
+ except Exception:
61
+ logger.debug("Failed to kill process tree for PID %d", pid, exc_info=True)
62
+
63
+
64
+ def _kill_on_port(port: int) -> None:
65
+ """Kill any process listening on a specific port (zombie cleanup).
66
+
67
+ Uses netstat on Windows, lsof on macOS/Linux.
68
+ """
69
+ try:
70
+ if platform.system() == "Windows":
71
+ result = subprocess.run(
72
+ ["netstat", "-ano", "-p", "TCP"],
73
+ capture_output=True, text=True, timeout=10,
74
+ )
75
+ for line in result.stdout.splitlines():
76
+ if f":{port}" in line and "LISTENING" in line:
77
+ pid = line.strip().split()[-1]
78
+ if pid.isdigit():
79
+ _kill_process_tree(int(pid))
80
+ else:
81
+ # macOS / Linux
82
+ result = subprocess.run(
83
+ ["lsof", "-ti", f":{port}"],
84
+ capture_output=True, text=True, timeout=10,
85
+ )
86
+ for pid_str in result.stdout.strip().splitlines():
87
+ pid_str = pid_str.strip()
88
+ if pid_str.isdigit():
89
+ _kill_process_tree(int(pid_str))
90
+ except FileNotFoundError:
91
+ logger.debug("Port-kill tool not found (netstat/lsof) for port %d", port)
92
+ except Exception:
93
+ logger.debug("Failed to kill process on port %d", port, exc_info=True)
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Worker profile management
98
+ # ---------------------------------------------------------------------------
99
+
100
+ def setup_worker_profile(worker_id: int) -> Path:
101
+ """Create an isolated Chrome profile for a worker.
102
+
103
+ On first run, clones from an existing worker profile (preferred, since
104
+ it already has session cookies) or from the user's real Chrome profile.
105
+ Subsequent runs reuse the existing worker profile.
106
+
107
+ Args:
108
+ worker_id: Numeric worker identifier.
109
+
110
+ Returns:
111
+ Path to the worker's Chrome user-data directory.
112
+ """
113
+ profile_dir = config.CHROME_WORKER_DIR / f"worker-{worker_id}"
114
+ if (profile_dir / "Default").exists():
115
+ return profile_dir # Already initialized
116
+
117
+ # Find a source: prefer existing worker (has session cookies), else user profile
118
+ source: Path | None = None
119
+ for wid in range(10):
120
+ if wid == worker_id:
121
+ continue
122
+ candidate = config.CHROME_WORKER_DIR / f"worker-{wid}"
123
+ if (candidate / "Default").exists():
124
+ source = candidate
125
+ break
126
+ if source is None:
127
+ source = config.get_chrome_user_data()
128
+
129
+ logger.info("[worker-%d] Copying Chrome profile from %s (first time setup)...",
130
+ worker_id, source.name)
131
+ profile_dir.mkdir(parents=True, exist_ok=True)
132
+
133
+ # Copy essential profile dirs -- skip caches and heavy transient data
134
+ skip = {
135
+ "ShaderCache", "GrShaderCache", "Service Worker", "Cache",
136
+ "Code Cache", "GPUCache", "CacheStorage", "Crashpad",
137
+ "BrowserMetrics", "SafeBrowsing", "Crowd Deny",
138
+ "MEIPreload", "SSLErrorAssistant", "recovery", "Temp",
139
+ "SingletonLock", "SingletonSocket", "SingletonCookie",
140
+ }
141
+
142
+ for item in source.iterdir():
143
+ if item.name in skip:
144
+ continue
145
+ dst = profile_dir / item.name
146
+ try:
147
+ if item.is_dir():
148
+ shutil.copytree(
149
+ str(item), str(dst), dirs_exist_ok=True,
150
+ ignore=shutil.ignore_patterns(
151
+ "Cache", "Code Cache", "GPUCache", "Service Worker",
152
+ ),
153
+ )
154
+ else:
155
+ shutil.copy2(str(item), str(dst))
156
+ except (PermissionError, OSError):
157
+ pass # skip locked files
158
+
159
+ return profile_dir
160
+
161
+
162
+ def _suppress_restore_nag(profile_dir: Path) -> None:
163
+ """Clear Chrome's 'restore pages' nag by fixing Preferences.
164
+
165
+ Chrome writes exit_type=Crashed when killed, which triggers a
166
+ 'Restore pages?' prompt on next launch. This patches it out.
167
+ """
168
+ prefs_file = profile_dir / "Default" / "Preferences"
169
+ if not prefs_file.exists():
170
+ return
171
+
172
+ try:
173
+ prefs = json.loads(prefs_file.read_text(encoding="utf-8"))
174
+ prefs.setdefault("profile", {})["exit_type"] = "Normal"
175
+ prefs.setdefault("session", {})["restore_on_startup"] = 4 # 4 = open blank
176
+ prefs.setdefault("session", {}).pop("startup_urls", None)
177
+ prefs["credentials_enable_service"] = False
178
+ prefs.setdefault("password_manager", {})["saving_enabled"] = False
179
+ prefs.setdefault("autofill", {})["profile_enabled"] = False
180
+ prefs_file.write_text(json.dumps(prefs), encoding="utf-8")
181
+ except Exception:
182
+ logger.debug("Could not patch Chrome preferences", exc_info=True)
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # Chrome launch / kill
187
+ # ---------------------------------------------------------------------------
188
+
189
+ def launch_chrome(worker_id: int, port: int | None = None,
190
+ headless: bool = False) -> subprocess.Popen:
191
+ """Launch a Chrome instance with remote debugging for a worker.
192
+
193
+ Args:
194
+ worker_id: Numeric worker identifier.
195
+ port: CDP port. Defaults to BASE_CDP_PORT + worker_id.
196
+ headless: Run Chrome in headless mode (no visible window).
197
+
198
+ Returns:
199
+ subprocess.Popen handle for the Chrome process.
200
+ """
201
+ if port is None:
202
+ port = BASE_CDP_PORT + worker_id
203
+
204
+ profile_dir = setup_worker_profile(worker_id)
205
+
206
+ # Kill any zombie Chrome from a previous run on this port
207
+ _kill_on_port(port)
208
+
209
+ # Patch preferences to suppress restore nag
210
+ _suppress_restore_nag(profile_dir)
211
+
212
+ chrome_exe = config.get_chrome_path()
213
+
214
+ cmd = [
215
+ chrome_exe,
216
+ f"--remote-debugging-port={port}",
217
+ f"--user-data-dir={profile_dir}",
218
+ "--profile-directory=Default",
219
+ "--no-first-run",
220
+ "--no-default-browser-check",
221
+ "--window-size=1024,768",
222
+ "--disable-session-crashed-bubble",
223
+ "--disable-features=InfiniteSessionRestore,PasswordManagerOnboarding",
224
+ "--hide-crash-restore-bubble",
225
+ "--noerrdialogs",
226
+ "--password-store=basic",
227
+ "--disable-save-password-bubble",
228
+ "--disable-popup-blocking",
229
+ # Block dangerous permissions at browser level
230
+ "--use-fake-device-for-media-stream",
231
+ "--use-fake-ui-for-media-stream",
232
+ "--deny-permission-prompts",
233
+ "--disable-notifications",
234
+ ]
235
+ if headless:
236
+ cmd.append("--headless=new")
237
+
238
+ # On Unix, start in a new process group so we can kill the whole tree
239
+ kwargs: dict = dict(stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
240
+ if platform.system() != "Windows":
241
+ import os
242
+ kwargs["preexec_fn"] = os.setsid
243
+
244
+ proc = subprocess.Popen(cmd, **kwargs)
245
+ with _chrome_lock:
246
+ _chrome_procs[worker_id] = proc
247
+
248
+ # Give Chrome time to start and open the debug port
249
+ time.sleep(3)
250
+ logger.info("[worker-%d] Chrome started on port %d (pid %d)",
251
+ worker_id, port, proc.pid)
252
+ return proc
253
+
254
+
255
+ def cleanup_worker(worker_id: int, process: subprocess.Popen | None) -> None:
256
+ """Kill a worker's Chrome instance and remove it from tracking.
257
+
258
+ Args:
259
+ worker_id: Numeric worker identifier.
260
+ process: The Popen handle returned by launch_chrome.
261
+ """
262
+ if process and process.poll() is None:
263
+ _kill_process_tree(process.pid)
264
+ with _chrome_lock:
265
+ _chrome_procs.pop(worker_id, None)
266
+ logger.info("[worker-%d] Chrome cleaned up", worker_id)
267
+
268
+
269
+ def kill_all_chrome() -> None:
270
+ """Kill all Chrome instances and any port zombies.
271
+
272
+ Called during graceful shutdown to ensure no orphan Chrome processes.
273
+ """
274
+ with _chrome_lock:
275
+ procs = dict(_chrome_procs)
276
+ _chrome_procs.clear()
277
+
278
+ for wid, proc in procs.items():
279
+ if proc.poll() is None:
280
+ _kill_process_tree(proc.pid)
281
+ _kill_on_port(BASE_CDP_PORT + wid)
282
+
283
+ # Sweep base port in case of zombies
284
+ _kill_on_port(BASE_CDP_PORT)
285
+
286
+
287
+ def reset_worker_dir(worker_id: int) -> Path:
288
+ """Wipe and recreate a worker's isolated working directory.
289
+
290
+ Each job gets a fresh working directory so that file conflicts
291
+ (resume PDFs, MCP configs) don't bleed between jobs.
292
+
293
+ Args:
294
+ worker_id: Numeric worker identifier.
295
+
296
+ Returns:
297
+ Path to the clean worker directory.
298
+ """
299
+ worker_dir = config.APPLY_WORKER_DIR / f"worker-{worker_id}"
300
+ if worker_dir.exists():
301
+ shutil.rmtree(str(worker_dir), ignore_errors=True)
302
+ worker_dir.mkdir(parents=True, exist_ok=True)
303
+ return worker_dir
304
+
305
+
306
+ def cleanup_on_exit() -> None:
307
+ """Atexit handler: kill all Chrome processes and sweep CDP ports.
308
+
309
+ Register this with atexit.register() at application startup.
310
+ """
311
+ with _chrome_lock:
312
+ procs = dict(_chrome_procs)
313
+ _chrome_procs.clear()
314
+
315
+ for wid, proc in procs.items():
316
+ if proc.poll() is None:
317
+ _kill_process_tree(proc.pid)
318
+ _kill_on_port(BASE_CDP_PORT + wid)
319
+
320
+ # Sweep base port for any orphan
321
+ _kill_on_port(BASE_CDP_PORT)
@@ -0,0 +1,203 @@
1
+ """Rich live dashboard for the apply pipeline.
2
+
3
+ Displays real-time worker status, job progress, and recent events
4
+ in a terminal dashboard using the Rich library.
5
+ """
6
+
7
+ import logging
8
+ import threading
9
+ import time
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+
14
+ from rich.console import Group
15
+ from rich.panel import Panel
16
+ from rich.table import Table
17
+ from rich.text import Text
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class WorkerState:
24
+ """Tracks the current state of the apply worker."""
25
+
26
+ worker_id: int = 0
27
+ status: str = "starting" # starting, applying, applied, failed, expired, captcha, idle, done
28
+ job_title: str = ""
29
+ company: str = ""
30
+ score: int = 0
31
+ start_time: float = 0.0
32
+ actions: int = 0
33
+ last_action: str = ""
34
+ jobs_applied: int = 0
35
+ jobs_failed: int = 0
36
+ jobs_done: int = 0
37
+ total_cost: float = 0.0
38
+ log_file: Path | None = None
39
+
40
+
41
+ # Module-level state (thread-safe via _lock)
42
+ _worker_states: dict[int, WorkerState] = {}
43
+ _events: list[str] = []
44
+ _lock = threading.Lock()
45
+ MAX_EVENTS = 8
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # State mutation helpers
50
+ # ---------------------------------------------------------------------------
51
+
52
+ def init_worker(worker_id: int = 0) -> None:
53
+ """Register the worker in the dashboard state."""
54
+ with _lock:
55
+ _worker_states[worker_id] = WorkerState(worker_id=worker_id)
56
+
57
+
58
+ def update_state(worker_id: int = 0, **kwargs) -> None:
59
+ """Update the worker's state fields.
60
+
61
+ Args:
62
+ worker_id: Which worker to update.
63
+ **kwargs: Field names and values to set on WorkerState.
64
+ """
65
+ with _lock:
66
+ state = _worker_states.get(worker_id)
67
+ if state is not None:
68
+ for key, value in kwargs.items():
69
+ setattr(state, key, value)
70
+
71
+
72
+ def get_state(worker_id: int = 0) -> WorkerState | None:
73
+ """Read the worker's current state."""
74
+ with _lock:
75
+ return _worker_states.get(worker_id)
76
+
77
+
78
+ def add_event(msg: str) -> None:
79
+ """Add a timestamped event to the scrolling event log.
80
+
81
+ Args:
82
+ msg: Rich markup string describing the event.
83
+ """
84
+ ts = datetime.now().strftime("%H:%M:%S")
85
+ with _lock:
86
+ _events.append(f"[dim]{ts}[/dim] {msg}")
87
+ if len(_events) > MAX_EVENTS:
88
+ _events.pop(0)
89
+
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # Rendering
93
+ # ---------------------------------------------------------------------------
94
+
95
+ # Status -> Rich style mapping
96
+ _STATUS_STYLES: dict[str, str] = {
97
+ "starting": "dim",
98
+ "idle": "dim",
99
+ "applying": "yellow",
100
+ "applied": "bold green",
101
+ "failed": "red",
102
+ "expired": "dim red",
103
+ "captcha": "magenta",
104
+ "login_issue": "red",
105
+ "done": "bold",
106
+ }
107
+
108
+
109
+ def render_dashboard() -> Table:
110
+ """Build the Rich table showing all worker statuses.
111
+
112
+ Returns:
113
+ A Rich Table object ready for display.
114
+ """
115
+ table = Table(title="ApplyPilot Dashboard", expand=True, show_lines=False)
116
+ table.add_column("W", style="bold", width=3, justify="center")
117
+ table.add_column("Job", min_width=30, max_width=50, no_wrap=True)
118
+ table.add_column("Status", width=12, justify="center")
119
+ table.add_column("Time", width=6, justify="right")
120
+ table.add_column("Acts", width=5, justify="right")
121
+ table.add_column("Last Action", min_width=20, max_width=35, no_wrap=True)
122
+ table.add_column("OK", width=4, justify="right", style="green")
123
+ table.add_column("Fail", width=4, justify="right", style="red")
124
+ table.add_column("Cost", width=8, justify="right")
125
+
126
+ with _lock:
127
+ states = sorted(_worker_states.values(), key=lambda s: s.worker_id)
128
+
129
+ total_applied = 0
130
+ total_failed = 0
131
+ total_cost = 0.0
132
+
133
+ for s in states:
134
+ elapsed = ""
135
+ if s.start_time and s.status == "applying":
136
+ elapsed = f"{int(time.time() - s.start_time)}s"
137
+
138
+ style = _STATUS_STYLES.get(s.status, "")
139
+ status_text = Text(s.status.upper(), style=style)
140
+
141
+ job_text = f"{s.job_title[:28]} @ {s.company[:16]}" if s.job_title else ""
142
+
143
+ table.add_row(
144
+ str(s.worker_id),
145
+ job_text,
146
+ status_text,
147
+ elapsed,
148
+ str(s.actions) if s.actions else "",
149
+ s.last_action[:35] if s.last_action else "",
150
+ str(s.jobs_applied),
151
+ str(s.jobs_failed),
152
+ f"${s.total_cost:.3f}" if s.total_cost else "",
153
+ )
154
+ total_applied += s.jobs_applied
155
+ total_failed += s.jobs_failed
156
+ total_cost += s.total_cost
157
+
158
+ # Totals row
159
+ table.add_section()
160
+ table.add_row(
161
+ "", "", "", "", "", "TOTAL",
162
+ str(total_applied), str(total_failed), f"${total_cost:.3f}",
163
+ style="bold",
164
+ )
165
+
166
+ return table
167
+
168
+
169
+ def render_full() -> Table | Group:
170
+ """Render the dashboard table plus the recent events panel.
171
+
172
+ Returns:
173
+ A Rich Group (table + events panel) or just the table if no events.
174
+ """
175
+ table = render_dashboard()
176
+
177
+ with _lock:
178
+ event_lines = list(_events)
179
+
180
+ if event_lines:
181
+ event_text = Text.from_markup("\n".join(event_lines))
182
+ events_panel = Panel(
183
+ event_text,
184
+ title="Recent Events",
185
+ border_style="dim",
186
+ height=min(MAX_EVENTS + 2, len(event_lines) + 2),
187
+ )
188
+ return Group(table, events_panel)
189
+
190
+ return table
191
+
192
+
193
+ def get_totals() -> dict[str, int | float]:
194
+ """Compute aggregate totals across all workers.
195
+
196
+ Returns:
197
+ Dict with keys: applied, failed, cost.
198
+ """
199
+ with _lock:
200
+ applied = sum(s.jobs_applied for s in _worker_states.values())
201
+ failed = sum(s.jobs_failed for s in _worker_states.values())
202
+ cost = sum(s.total_cost for s in _worker_states.values())
203
+ return {"applied": applied, "failed": failed, "cost": cost}