pytest-fast 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2748 @@
1
+ """pytest-fast — resident forkserver-based test accelerator (single-file, xdist alt).
2
+
3
+ Why not xdist: xdist cold-spawns N workers, EACH re-imports the app graph
4
+ (~4.5s × N CPU/run). Why not bare fork(): on macOS fork-without-exec segfaults
5
+ inside CoreFoundation/SystemConfiguration (psycopg2→getaddrinfo, httpx→getproxies).
6
+ The solution — **forkserver** (POSIX default in modern Python): one clean SINGLE-THREADED
7
+ server process preloads the app AND COLLECTS TESTS ONCE, forks workers off itself →
8
+ warm imports + a pre-built ITEMS list, no thread/framework fork crashes.
9
+
10
+ Socket address and TTL are passed by the CALLER, not baked in.
11
+ Modes (CLI `pytest-fast` or `python -m pytest_fast`):
12
+ * `--address X` (ensure+run): connect to a daemon at X (spawn one with `--ttl`
13
+ if absent) → run, print summary. Warmup on reruns ≈ fork(). With `--with-watcher`
14
+ also spawns a background watcher (pre-warm on source changes).
15
+ * `--serve --address X --ttl N`: be the resident daemon: collect ONCE, hold a warm
16
+ forkserver. src/tests changed OR relevant env (addopts + any prefix listed in
17
+ `PYTEST_FAST_ENV_PREFIXES`, see `_env_fingerprint`) changed → daemon replies
18
+ {'stale'} and exits (the client will spawn a fresh one). idle>N seconds → exit.
19
+ Control protocol: run / status / shutdown / promote (see `serve`).
20
+ * `--watch --address X --ttl N`: (internal) resident watcher: poll mtime →
21
+ debounce → staging-promote the daemon (boot a successor on a staging socket,
22
+ verify collect, soft-shutdown the old one, promote to canonical). Exits once
23
+ the daemon is gone via its own idle-ttl. Single-instance via flock.
24
+ * `--runs N` / `--dump PATH`: local in-process run.
25
+
26
+ Also a pytest plugin (auto-loaded via the `pytest11` entry point):
27
+ * `pytest --fast`: run the suite through the resident daemon while staying a
28
+ real pytest session — the daemon streams full per-phase reports, which we republish
29
+ through the controller's hooks → fully NATIVE reporting (terminalreporter, --durations,
30
+ -v/-s, junit, plugins, exit code) on top of warm fork-server execution. Forwards the
31
+ collected selection (-k/-m). `--fast-address/-workers/-ttl/-watch` tune it. Inert
32
+ unless `--fast` is passed (so a plain `pytest` run is unaffected).
33
+ * `OUTCOME_DUMP=PATH pytest -p pytest_fast`: writes {nodeid: outcome} — a reference dump for
34
+ outcome-diff comparison against xdist.
35
+
36
+ Behaviorally identical to xdist (same test set; marks/skip/xfail/reruns 1-to-1 —
37
+ runs go through the FULL pytest protocol `pytest_runtest_protocol`); reports are lossy.
38
+
39
+ ⚠ macOS fork safety: code that resolves `localhost` via `getaddrinfo` inside a fork
40
+ will segfault (mDNS/CoreFoundation init). If your app code does this, pre-resolve to
41
+ a numeric IP (e.g. `127.0.0.1`) in your config — pytest-fast doesn't auto-rewrite.
42
+ """
43
+
44
+ from __future__ import annotations
45
+
46
+ import os
47
+
48
+ # macOS fork-safety (no-op on Linux): no_proxy=* routes getproxies through the env path,
49
+ # bypassing SystemConfiguration; the OBJC guard suppresses ObjC initialize.
50
+ os.environ.setdefault("no_proxy", "*")
51
+ os.environ.setdefault("NO_PROXY", "*")
52
+ os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
53
+
54
+ import argparse
55
+ import fcntl
56
+ import hashlib
57
+ import json
58
+ import math
59
+ import multiprocessing as mp
60
+ import pickle
61
+ import selectors
62
+ import socket
63
+ import struct
64
+ import subprocess
65
+ import sys
66
+ import tempfile
67
+ import threading
68
+ import time
69
+ from contextlib import contextmanager
70
+ from pathlib import Path
71
+ from typing import TYPE_CHECKING, NamedTuple, NotRequired, TypedDict, cast
72
+
73
+ if TYPE_CHECKING:
74
+ import cProfile
75
+ from collections.abc import Callable, Iterator
76
+ from multiprocessing.context import DefaultContext
77
+
78
+ from _pytest.config import Config
79
+ from _pytest.config.argparsing import Parser
80
+ from _pytest.main import Session
81
+ from _pytest.nodes import Item
82
+ from _pytest.reports import TestReport
83
+
84
+
85
+ # Public API. Everything with a `_` prefix is an implementation detail (NOT covered by
86
+ # this package's semver promises). Tests/self-test code uses `_*`-names intentionally,
87
+ # but downstream consumers should rely on this list.
88
+ __all__ = [
89
+ "Daemon",
90
+ "RunResult",
91
+ "WorkerStats",
92
+ "categorize",
93
+ "default_workers", # public worker-count API (auto-detect, ignores overrides)
94
+ "main",
95
+ "main_cli",
96
+ "request_run", # client-side, module-level
97
+ "request_run_streamed", # client-side streaming (used by the --fast plugin controller)
98
+ "resolve_workers", # public worker-count API (full precedence; stable for external tooling)
99
+ ]
100
+
101
+
102
+ class RunResult(TypedDict):
103
+ """A single test outcome, shipped over the worker→master bus (pickle-serialized)."""
104
+
105
+ nodeid: str
106
+ outcome: str
107
+ duration: float
108
+ cpu: NotRequired[float] # per-test process CPU time (duration − cpu ≈ I/O wait); for `bench`
109
+ # cProfile rows (qualname, ncalls, tottime, cumtime) for the test, top-by-cumtime — only on the
110
+ # targeted profiling pass `bench` runs over its top bottleneck tests. Deterministic call counts.
111
+ profile: NotRequired[list[tuple[str, int, float, float]]]
112
+ longrepr: NotRequired[str] # failure text — only for failed/error
113
+ # Every phase report in pytest's serializable wire form (plain builtins, whitelist-safe),
114
+ # present only in full-report mode — lets the master/controller replay them through a real
115
+ # terminalreporter (--durations, junit, -v/-s, plugins). See `_run_one_item(full_report=...)`.
116
+ reports: NotRequired[list[dict[str, object]]]
117
+
118
+
119
+ class WorkerStats(TypedDict):
120
+ """Worker summary emitted at the end of a run (drives the par. metric + `--detailed` block).
121
+
122
+ `busy` is the WALL time spent inside the runtest protocol; `cpu` is this worker process's
123
+ CPU time over the same span (busy − cpu ≈ time blocked on I/O, e.g. a DB round-trip — that's
124
+ what makes par look full while cores idle). `bus_wait` is time the worker sat idle between
125
+ tests waiting for the master to hand out the next index. `run_wall` ≈ busy + bus_wait +
126
+ bookkeeping, so the rectangle N×run decomposes into Σbusy (useful) + Σbus_wait (bus overhead)
127
+ + Σ(run−run_wall) (tail/straggler drain)."""
128
+
129
+ wid: int
130
+ ran: int
131
+ busy: float
132
+ cpu: float
133
+ bus_wait: float
134
+ run_wall: float
135
+
136
+
137
+ class _RunOutcome(NamedTuple):
138
+ """Raw output of one fork→serve→collect cycle (`Daemon._execute_run`), before rendering —
139
+ shared by the single-run summary path and the N-run `--bench` aggregation."""
140
+
141
+ results: list[RunResult]
142
+ worker_stats: list[WorkerStats]
143
+ bus: dict[str, float]
144
+ warmup: float # fork+spawn time (t_ready − t0)
145
+ run_wall: float # execution wall (t_done − t_ready)
146
+ total: int
147
+ idx: int
148
+ exitcodes: list[int | None]
149
+
150
+
151
+ class ParMetrics(TypedDict):
152
+ """Derived parallelism metrics behind the `--detailed` block. Ratios are in '×' units
153
+ (worker-equivalents): a worker-seconds quantity divided by the run wall. See `WorkerStats`
154
+ for the N×run decomposition these come from."""
155
+
156
+ par: float # Σbusy / run — effective parallel speedup (≤ num_workers)
157
+ eff: float # par / num_workers — parallel efficiency in 0..1
158
+ cpu_par: float # Σcpu / run — cores' worth of CPU actually burned
159
+ cpu_sat: float # Σcpu / Σbusy — fraction of test-wall spent on-CPU (low → I/O-bound)
160
+ bus_lost: float # Σbus_wait / run — parallelism lost to inter-test bus round-trips
161
+ tail_lost: float # num_workers − Σrun_wall/run — lost to end-of-run straggler drain
162
+ ideal_wall: float # Σbusy / num_workers — best wall a work-conserving scheduler could reach
163
+ busy_s: float # Σbusy — total test-execution worker-seconds (serial-equivalent time)
164
+ cpu_s: float # Σcpu — total CPU-seconds
165
+ bus_wait_s: float # Σbus_wait — worker-seconds idle on the bus between tests
166
+ drain_s: float # N·run − Σrun_wall — worker-seconds idle while stragglers finish
167
+ idle_cores: float # num_workers − cpu_par — core-equivalents NOT computing (I/O wait + idle)
168
+ io_cores: float # par − cpu_par — workers in a test but blocked on I/O (not on CPU)
169
+ run_wall_min: float
170
+ run_wall_max: float
171
+ wall_spread: float # (run_wall_max − run_wall_min) / run — load imbalance, 0 = perfect
172
+ ran_min: int
173
+ ran_max: int
174
+ ran_ratio: float # ran_max / ran_min — test-COUNT spread (high + low wall_spread = healthy)
175
+ floor: float # longest single test — a hard lower bound on wall at ANY worker count
176
+ floor_nodeid: str
177
+ n_slow: int # tests ≥ 1s — the heavy tail
178
+ p99: float # 99th-percentile single-test duration
179
+
180
+
181
+ def _parallelism_metrics(
182
+ worker_stats: list[WorkerStats], run: float, num_workers: int, results: list[RunResult]
183
+ ) -> ParMetrics:
184
+ """Pure aggregation of worker stats → the `--detailed` parallelism metrics. Kept separate from
185
+ rendering so it's unit-testable without a live daemon. Every division is guarded (run / Σbusy /
186
+ num_workers / ran_min can be 0 on an empty or instant run)."""
187
+ sum_busy = sum(s["busy"] for s in worker_stats)
188
+ sum_cpu = sum(s["cpu"] for s in worker_stats)
189
+ sum_bus = sum(s["bus_wait"] for s in worker_stats)
190
+ sum_run_wall = sum(s["run_wall"] for s in worker_stats)
191
+ rans = [s["ran"] for s in worker_stats]
192
+ run_walls = [s["run_wall"] for s in worker_stats]
193
+ durs = sorted(r["duration"] for r in results)
194
+ floor, floor_nodeid = max(((r["duration"], r["nodeid"]) for r in results), default=(0.0, ""))
195
+ par = sum_busy / run if run else 0.0
196
+ cpu_par = sum_cpu / run if run else 0.0
197
+ ran_min, ran_max = (min(rans), max(rans)) if rans else (0, 0)
198
+ rw_min, rw_max = (min(run_walls), max(run_walls)) if run_walls else (0.0, 0.0)
199
+ p99 = durs[min(len(durs) - 1, round(0.99 * (len(durs) - 1)))] if durs else 0.0
200
+ return {
201
+ "par": par,
202
+ "eff": (par / num_workers) if num_workers else 0.0,
203
+ "cpu_par": cpu_par,
204
+ "cpu_sat": sum_cpu / sum_busy if sum_busy else 0.0,
205
+ "bus_lost": sum_bus / run if run else 0.0,
206
+ "tail_lost": max(0.0, num_workers - sum_run_wall / run) if run else 0.0,
207
+ "ideal_wall": sum_busy / num_workers if num_workers else 0.0,
208
+ "busy_s": sum_busy,
209
+ "cpu_s": sum_cpu,
210
+ "bus_wait_s": sum_bus,
211
+ "drain_s": max(0.0, num_workers * run - sum_run_wall),
212
+ "idle_cores": max(0.0, num_workers - cpu_par),
213
+ "io_cores": max(0.0, par - cpu_par),
214
+ "run_wall_min": rw_min,
215
+ "run_wall_max": rw_max,
216
+ "wall_spread": (rw_max - rw_min) / run if run else 0.0,
217
+ "ran_min": ran_min,
218
+ "ran_max": ran_max,
219
+ "ran_ratio": ran_max / ran_min if ran_min else 1.0,
220
+ "floor": floor,
221
+ "floor_nodeid": floor_nodeid,
222
+ "n_slow": sum(1 for d in durs if d >= 1.0),
223
+ "p99": p99,
224
+ }
225
+
226
+
227
+ def _suggest_workers(cpu_sat: float, cores: int, logical: int) -> int:
228
+ """Deterministic pool size (NOT a heuristic): if each worker is CPU-busy only `cpu_sat` of its
229
+ wall (the rest blocked on I/O), keeping `cores` cores saturated needs ≈`cores / cpu_sat` workers —
230
+ so that at any instant ~`cores` are in their CPU phase while the others overlap I/O (Little's-law
231
+ pool sizing).
232
+
233
+ But the cap is NOT the raw logical-core count. On big.LITTLE the workers past `cores` (the perf
234
+ cores) land on slower E-cores: empirically they add CPU-seconds (the work runs at ~half speed)
235
+ and only pay off by hiding the I/O-WAIT fraction `(1 − cpu_sat)`, not by adding CPU throughput.
236
+ So the ceiling is `cores + (logical − cores)·(1 − cpu_sat)` — the E-cores discounted by how
237
+ I/O-bound the suite is. A CPU-bound suite (cpu_sat→1) caps at `cores`; an I/O-bound one
238
+ (cpu_sat→0) can reach toward `logical`. On a uniform machine `cores == logical`, so it collapses
239
+ to a plain `cores` cap (no oversubscription suggestion — the right answer there too).
240
+ Verified on PRM2: raw `cores/cpu_sat` said 10 and 10w REGRESSED on per-db (E-core tax); the
241
+ discounted cap lands at ~8."""
242
+ if cpu_sat <= 0:
243
+ return cores
244
+ pool = math.ceil(cores / cpu_sat) # ideal if extra workers had free, full-speed cores
245
+ e_core_cap = cores + (logical - cores) * (1.0 - cpu_sat) # discount E-cores by the I/O fraction
246
+ return max(cores, min(pool, round(e_core_cap)))
247
+
248
+
249
+ def _par_verdict(m: ParMetrics, num_workers: int, cores: int, logical: int) -> str:
250
+ """One-line synthesis → what to actually do. DESCRIPTIVE, not over-claiming: the worker-count
251
+ suggestion is a deterministic formula (`_suggest_workers`), shown ONLY in the clean regime
252
+ (`num_workers ≤ cores`, where cpu_sat isn't depressed by oversubscription) and always with the
253
+ shared-resource caveat (the I/O overlap only pays off if the resource scales). Priority: a
254
+ straggler tail is the loudest problem; then overhead; then the CPU-vs-I/O ceiling."""
255
+ if m["wall_spread"] > 0.15 and m["floor_nodeid"]:
256
+ return (
257
+ f"straggler — walls spread {m['wall_spread']:.0%}; tail likely "
258
+ f"{m['floor_nodeid']} ({m['floor']:.1f}s). Split/redistribute it."
259
+ )
260
+ if m["eff"] < 0.90:
261
+ return "low efficiency — bus chatter (tests too short?) or oversubscribed past useful work."
262
+ cpu_sat = m["cpu_sat"]
263
+ if cpu_sat >= 0.85:
264
+ return f"CPU-saturated ({cpu_sat:.0%} on-CPU) — bound by {cores} cores; more workers won't help."
265
+ if num_workers > cores:
266
+ # Already above perf cores → cpu_sat is contention-depressed; don't trust it for a number.
267
+ return (
268
+ f"running {num_workers}w above {cores} perf cores — CPU can't speed past {cores}; "
269
+ f"extra workers only overlap I/O (watch contention / E-core stragglers)."
270
+ )
271
+ w_opt = _suggest_workers(cpu_sat, cores, logical)
272
+ if w_opt > num_workers:
273
+ return (
274
+ f"{cpu_sat:.0%} CPU/test → your {cores} cores sit ~{1 - cpu_sat:.0%} idle on I/O; "
275
+ f"≈{w_opt} workers may overlap that (pool size cores÷CPU-frac, E-cores past {cores} "
276
+ f"discounted by the I/O fraction). Try --workers {w_opt} — measure: a shared DB or the "
277
+ f"E-core tax can still cancel the gain."
278
+ )
279
+ return f"near-optimal at {num_workers}w — wall ≈ work/cores, little headroom."
280
+
281
+
282
+ def _detailed_par_lines(m: ParMetrics, run: float, num_workers: int, cores: int, logical: int) -> list[str]:
283
+ """Render the `--detailed` parallelism block. Beyond the raw ratios it surfaces: absolute
284
+ idle-seconds (not just '×'), idle core-equivalents (I/O-wait headroom — a FACT; the verdict
285
+ decides what it means), a balance read (count-spread is healthy when wall-spread is tiny —
286
+ work-stealing balances by TIME), the duration tail (floor is only the max), and a verdict with
287
+ a deterministic worker-count suggestion. `cores` = perf cores, `logical` = the hard cap."""
288
+ if m["cpu_sat"] >= 0.75:
289
+ bound = "compute-bound"
290
+ elif m["cpu_sat"] <= 0.40:
291
+ bound = "I/O-bound"
292
+ else:
293
+ bound = "mixed"
294
+ if m["wall_spread"] <= 0.10:
295
+ balance = f"by time — counts vary {m['ran_ratio']:.1f}x, walls within {m['wall_spread']:.0%} (healthy)"
296
+ else:
297
+ balance = f"UNEVEN — walls spread {m['wall_spread']:.0%} (straggler — see verdict)"
298
+ return [
299
+ " detail —",
300
+ f" eff : {m['eff']:.0%} (ideal wall {m['ideal_wall']:.2f}s vs {run:.2f}s actual)",
301
+ f" cpu : {m['cpu_par']:.2f}x of {num_workers} · {m['cpu_sat']:.0%} CPU / {1 - m['cpu_sat']:.0%} I/O "
302
+ f"({bound}) · ~{m['idle_cores']:.1f} cores idle ({m['io_cores']:.1f} on I/O)",
303
+ f" lost : {m['bus_wait_s'] + m['drain_s']:.2f} worker-s idle = "
304
+ f"bus {m['bus_wait_s']:.2f}s + tail {m['drain_s']:.2f}s",
305
+ f" balance : {balance} (ran {m['ran_min']}–{m['ran_max']}/w)",
306
+ f" floor : {m['floor']:.2f}s {m['floor_nodeid']} · {m['n_slow']} tests ≥1s, p99 {m['p99']:.2f}s",
307
+ f" verdict : {_par_verdict(m, num_workers, cores, logical)}",
308
+ ]
309
+
310
+
311
+ # ── logging helper ───────────────────────────────────────────────────────────
312
+
313
+
314
+ def _log(tag: str, msg: str) -> None:
315
+ """Timestamped log line — for daemon/watcher lifecycle messages. We avoid the
316
+ `logging` module on purpose: extra overhead and another init point in
317
+ forkserver-preload. `flush=True` is mandatory — otherwise with
318
+ `subprocess.Popen` stdout→file the lines may get stuck in the buffer until the
319
+ process exits."""
320
+ print(f"[{time.strftime('%H:%M:%S')}] [{tag}] {msg}", flush=True)
321
+
322
+
323
+ # ── AF_UNIX path-too-long workaround ─────────────────────────────────────────
324
+ #
325
+ # On macOS `sockaddr_un.sun_path` is only 104 bytes (108 on Linux). A long `address`
326
+ # (e.g. pytest's `tmp_path` under /private/var/folders/…) blows the limit → Python
327
+ # proactively raises `OSError: AF_UNIX path too long` BEFORE the syscall. Classic
328
+ # Unix trick: chdir into the dirname → bind/connect with the relative basename
329
+ # (10–20 bytes). The socket file physically sits at the same absolute path; the
330
+ # path in the kernel fits the limit. The context manager restores cwd.
331
+ #
332
+ # ⚠ chdir is process-wide. From multithreaded code do NOT call this from several
333
+ # threads at once. Our bind/connect AF_UNIX calls are synchronous (main thread of
334
+ # daemon, client, worker), so it's safe.
335
+
336
+ _AF_UNIX_SOFT_LIMIT = 100 # macOS hard limit is 104; leave headroom for padding/null-terminator/etc.
337
+
338
+ # Process-wide chdir is inherently race-prone (it affects the WHOLE process). The lock
339
+ # guarantees that two threads simultaneously bind/connecting to long paths won't
340
+ # chdir concurrently and trip each other's cwd. On single-threaded callsites
341
+ # (daemon, watcher, tests) the overhead of one uncontended lock.acquire is nanos.
342
+ _CHDIR_LOCK = threading.Lock()
343
+
344
+
345
+ @contextmanager
346
+ def _short_unix_path(address: str) -> Iterator[str]:
347
+ """Yields a path usable for AF_UNIX bind/connect. Short — returned as is (no
348
+ chdir side effect). Long — chdir into the dirname, yield the basename; cwd is
349
+ restored on block exit even if an exception is raised. The process-wide chdir
350
+ is wrapped in `_CHDIR_LOCK` so multithreaded clients don't race on cwd."""
351
+ if len(address.encode("utf-8")) <= _AF_UNIX_SOFT_LIMIT:
352
+ yield address
353
+ return
354
+ p = Path(address)
355
+ with _CHDIR_LOCK:
356
+ saved_cwd = os.getcwd()
357
+ os.chdir(p.parent)
358
+ try:
359
+ yield p.name
360
+ finally:
361
+ os.chdir(saved_cwd)
362
+
363
+
364
+ # ── thin bus: length-prefixed pickle ─────────────────────────────────────────
365
+
366
+ # Hard cap on a single frame (header is uint32, uncapped that's up to 4GB). A corrupted
367
+ # or malicious frame with a huge `length` → an attempt to allocate gigabytes inside
368
+ # `_recvn`. In full-report mode whole serialized TestReports ride the bus (longrepr +
369
+ # captured stdout/stderr/log sections), so a single test that prints a lot can produce a
370
+ # chunky frame; 256MB is generous headroom while still bounding a corrupt/hostile header.
371
+ _MAX_FRAME_BYTES = 256 * 1024 * 1024
372
+
373
+ # Cap on the number of pickle opcodes `_loads` will accept (decode-amplification guard).
374
+ # `_MAX_FRAME_BYTES` bounds the WIRE size but NOT the decoded object size: pickle memo
375
+ # back-references let a small frame fan out into a large object graph, and a bogus
376
+ # length-prefixed opcode makes the C unpickler pre-allocate gigabytes from a ~5-byte frame.
377
+ # Every constructed node costs ≥1 opcode, so bounding the opcode count bounds both the
378
+ # decoded node count and the cost of `_is_plain_builtins`. 1M is 10×+ above any real frame
379
+ # (a forwarded selection of N tests is ~N opcodes; no suite has a million tests). Found by
380
+ # the Atheris harness.
381
+ _MAX_PICKLE_OPS = 1_000_000
382
+
383
+ # Opcodes whose integer arg drives a C-unpickler PRE-ALLOCATION (the memo array resize for
384
+ # PUT/GET indices, the read-ahead buffer for a FRAME length) rather than being plain data.
385
+ # The C unpickler allocates from these BEFORE bounds-checking, so a ~5-byte LONG_BINPUT with
386
+ # a 2-billion index grows the memo to ~18 GB. `_loads` rejects any whose arg exceeds the
387
+ # frame size — a valid pickle never references or declares more than its own bytes. (1-byte
388
+ # BINPUT/BINGET are capped at 255 → harmless; string/bytes lengths are already caught by
389
+ # genops, which reads the data and fails 'truncated' on a bogus length.) Found by Atheris.
390
+ _ALLOC_ARG_OPCODES = frozenset({"FRAME", "LONG_BINPUT", "PUT", "LONG_BINGET", "GET"})
391
+
392
+
393
+ # Whitelist for `_SafeUnpickler.find_class`. Our wire protocol carries:
394
+ # - control messages: tuple/dict/str/int/float/bool/None/bytes
395
+ # - test results: `RunResult`/`WorkerStats` — these are TypedDicts, plain `dict` at runtime
396
+ # No user-defined classes traverse the bus — so the whitelist is pure builtins.
397
+ # Any attempt to deserialize a non-builtin → `UnpicklingError`.
398
+ #
399
+ # Why: pickle = arbitrary code execution. The Unix socket under /tmp is connectable
400
+ # by any process owned by the current user. On a single-user dev box the surface
401
+ # is small, but on a shared CI runner (or if pytest-fast runs in a sandbox with
402
+ # elevated privileges) — a malicious local pickle → RCE. The whitelist closes this.
403
+ _PICKLE_ALLOWED_BUILTINS = frozenset(
404
+ {
405
+ "builtins.tuple",
406
+ "builtins.dict",
407
+ "builtins.list",
408
+ "builtins.set",
409
+ "builtins.frozenset",
410
+ "builtins.str",
411
+ "builtins.int",
412
+ "builtins.float",
413
+ "builtins.bool",
414
+ "builtins.NoneType",
415
+ "builtins.bytes",
416
+ "builtins.bytearray",
417
+ "builtins.complex",
418
+ }
419
+ )
420
+
421
+
422
+ class _SafeUnpickler(pickle.Unpickler):
423
+ """`pickle.Unpickler` with a `find_class` whitelist — only builtin types pass.
424
+ Defense against malicious pickles on our bus (see `_PICKLE_ALLOWED_BUILTINS`)."""
425
+
426
+ def find_class(self, module: str, name: str) -> object:
427
+ qualname = f"{module}.{name}"
428
+ if qualname in _PICKLE_ALLOWED_BUILTINS:
429
+ return super().find_class(module, name)
430
+ msg = f"forbidden class in pickle stream: {qualname}"
431
+ raise pickle.UnpicklingError(msg)
432
+
433
+
434
+ # Concrete builtin VALUE types the bus legitimately carries. `_loads` rejects a decoded
435
+ # result containing anything else — notably a builtin *class* object. `find_class` must
436
+ # hand back builtin classes (`complex`, `frozenset`, …) so REDUCE can reconstruct their
437
+ # INSTANCES, but a frame that returns the class ITSELF as a value (`cbuiltins\ncomplex\n.`)
438
+ # is never legitimate protocol data. Not an RCE — every whitelisted class is a safe
439
+ # constructor — but it tightens the bus to plain data only (found by the Atheris harness).
440
+ _BUS_VALUE_TYPES = (type(None), bool, int, float, complex, str, bytes, bytearray, tuple, list, dict, set, frozenset)
441
+
442
+
443
+ def _is_plain_builtins(obj: object) -> bool:
444
+ """Iterative check that `obj` is composed solely of plain builtin VALUES — no class /
445
+ callable objects. Iterative (no recursion → no RecursionError on deep nesting) AND
446
+ identity-deduplicated: a pickle memo 'billion laughs' DAG (`m=[m,m]` ×N) is tiny in
447
+ memory via shared refs, but a naive walk visits 2^N paths and OOMs — tracking visited
448
+ ids collapses it to the number of DISTINCT objects (bounded by `_MAX_PICKLE_OPS`)."""
449
+ stack = [obj]
450
+ seen: set[int] = set()
451
+ while stack:
452
+ o = stack.pop()
453
+ oid = id(o)
454
+ if oid in seen:
455
+ continue
456
+ seen.add(oid)
457
+ if type(o) not in _BUS_VALUE_TYPES:
458
+ return False
459
+ if isinstance(o, dict):
460
+ stack.extend(o.keys())
461
+ stack.extend(o.values())
462
+ elif isinstance(o, list | tuple | set | frozenset):
463
+ stack.extend(o)
464
+ return True
465
+
466
+
467
+ def _loads(data: bytes) -> object:
468
+ """Safe analog of `pickle.loads` — routed through `_SafeUnpickler`, behind a pre-scan
469
+ and a post-check. Three layers, three threats:
470
+
471
+ * whitelist (`_SafeUnpickler.find_class`) → no RCE (only builtins deserialize);
472
+ * pre-scan (`pickletools.genops`) → no decode-amplification OOM: the C unpickler
473
+ pre-allocates a buffer for a length-prefixed opcode BEFORE checking the bytes are
474
+ present, so a ~5-byte frame claiming gigabytes OOMs the process under
475
+ `_MAX_FRAME_BYTES` entirely; genops reads from a BytesIO so a bogus length reads
476
+ short and raises (no allocation), and `_MAX_PICKLE_OPS` caps memo-based fan-out;
477
+ * post-check (`_is_plain_builtins`) → the result must be plain builtin *data*; a frame
478
+ returning a builtin *class* as a value (allowed by find_class for REDUCE) is rejected.
479
+
480
+ Both extra checks were driven by the Atheris harness (fuzz/fuzz_wire.py)."""
481
+ import io
482
+ import pickletools
483
+
484
+ n_ops = 0
485
+ data_len = len(data)
486
+ try:
487
+ for opcode, arg, _pos in pickletools.genops(data):
488
+ n_ops += 1
489
+ if n_ops > _MAX_PICKLE_OPS:
490
+ msg = f"pickle exceeds the opcode budget ({_MAX_PICKLE_OPS})"
491
+ raise pickle.UnpicklingError(msg)
492
+ if isinstance(arg, int) and arg > data_len and opcode.name in _ALLOC_ARG_OPCODES:
493
+ msg = f"{opcode.name} arg {arg} exceeds the {data_len}-byte frame (pre-allocation guard)"
494
+ raise pickle.UnpicklingError(msg)
495
+ except pickle.UnpicklingError:
496
+ raise
497
+ except Exception as exc:
498
+ # genops raised on a malformed / oversized-length frame — reject before it can
499
+ # reach the C unpickler and pre-allocate. Normalize to UnpicklingError so callers
500
+ # (`_recv`) treat it as a corrupt frame, exactly like any other decode failure.
501
+ msg = f"malformed pickle rejected before decode: {exc!r}"
502
+ raise pickle.UnpicklingError(msg) from exc
503
+ result = _SafeUnpickler(io.BytesIO(data)).load()
504
+ if not _is_plain_builtins(result):
505
+ msg = "decoded a non-data object (a builtin class/callable); the bus carries plain values only"
506
+ raise pickle.UnpicklingError(msg)
507
+ return result
508
+
509
+
510
+ def _send(sock: socket.socket, obj: object) -> int:
511
+ data = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
512
+ sock.sendall(struct.pack("!I", len(data)) + data)
513
+ return len(data) + 4
514
+
515
+
516
+ def _try_send(sock: socket.socket, obj: object) -> bool:
517
+ """Best-effort control reply. A same-user peer that disconnects BEFORE reading its
518
+ reply (e.g. a `status` client that hit its own recv timeout while the daemon was
519
+ momentarily busy) would otherwise make `sendall` raise BrokenPipe/ConnectionReset
520
+ and CRASH the resident daemon. The state change the reply accompanies (shutdown,
521
+ promote, stale-exit) must still proceed, so we swallow the error and report it.
522
+ Returns False if the peer was already gone."""
523
+ try:
524
+ _send(sock, obj)
525
+ except OSError:
526
+ return False
527
+ return True
528
+
529
+
530
+ def _recvn(sock: socket.socket, n: int) -> bytes | None:
531
+ buf = bytearray()
532
+ while len(buf) < n:
533
+ chunk = sock.recv(n - len(buf))
534
+ if not chunk:
535
+ return None
536
+ buf.extend(chunk)
537
+ return bytes(buf)
538
+
539
+
540
+ def _recv(sock: socket.socket) -> tuple[object, int]:
541
+ header = _recvn(sock, 4)
542
+ if header is None:
543
+ return None, 0
544
+ (length,) = struct.unpack("!I", header)
545
+ # Guard BEFORE `_recvn(sock, length)`: otherwise a corrupted header with length=4GB
546
+ # would allocate a 4GB bytearray inside `_recvn`. Return the same sentinel as for a
547
+ # truncated payload — callers (master, daemon, client) already treat that as
548
+ # "corrupted frame / peer gone" and close the connection.
549
+ if length > _MAX_FRAME_BYTES:
550
+ return None, 4
551
+ payload = _recvn(sock, length)
552
+ if payload is None:
553
+ return None, 4
554
+ try:
555
+ return _loads(payload), length + 4
556
+ except Exception:
557
+ # A zero-length or corrupt/hostile payload makes `_loads` raise (EOFError,
558
+ # UnpicklingError, …). That's a corrupted frame, not a fatal condition — return the
559
+ # same sentinel as a truncated payload; callers (master, daemon, client) already
560
+ # treat that as "corrupted frame / peer gone" and close the connection.
561
+ return None, length + 4
562
+
563
+
564
+ # ── pytest-faithful test outcome categorization ──────────────────────────────
565
+
566
+ _OUTCOME_PRIORITY = {"error": 5, "failed": 4, "xpassed": 3, "xfailed": 2, "skipped": 1, "passed": 0}
567
+
568
+
569
+ def categorize(config: Config, reports: list[TestReport]) -> str:
570
+ """Test category derived from its reports — same logic as pytest and plugins
571
+ (skipping, rerunfailures), via the `pytest_report_teststatus` hook. We ignore
572
+ 'rerun' (intermediate retries) and pick the most significant final category."""
573
+ best, best_p = "passed", -1
574
+ for rep in reports:
575
+ cat = config.hook.pytest_report_teststatus(report=rep, config=config)[0]
576
+ if not cat or cat == "rerun":
577
+ continue
578
+ p = _OUTCOME_PRIORITY.get(cat)
579
+ if p is None:
580
+ continue # unrecognized category (unknown third-party plugin) — don't let it win over passed
581
+ if p > best_p:
582
+ best, best_p = cat, p
583
+ return best
584
+
585
+
586
+ class _ReportCollector:
587
+ """Worker plugin: accumulates TestReports for the current item (pytest_runtest_logreport)."""
588
+
589
+ def __init__(self) -> None:
590
+ super().__init__()
591
+ self.reports: list[TestReport] = []
592
+
593
+ def pytest_runtest_logreport(self, report: TestReport) -> None:
594
+ self.reports.append(report)
595
+
596
+
597
+ # ── collection-once (runs at import time as the preload module "pytest_fast") ─
598
+ #
599
+ # forkserver calls `set_forkserver_preload(["pytest_fast"])` → imports THIS file
600
+ # as module "pytest_fast" and runs collection ONCE; forked workers inherit the
601
+ # ready-made items. The `__name__ == "pytest_fast"` guard matters: when launched as
602
+ # a script the module is named "__main__"/"__mp_main__" (mp target resolution) —
603
+ # collection is NOT needed there (otherwise it would run twice). Workers read the
604
+ # items as module globals (the fork inherits the heap).
605
+
606
+ # Public (no underscore, not ALL_CAPS) module globals are intentional: they are set
607
+ # by `_collect()` in the PRELOADED "pytest_fast" module (forkserver) or at import
608
+ # (spawn), and workers read them via `import pytest_fast` (their own globals are
609
+ # __main__/__mp_main__, where collect did NOT run). Underscore would trip pyright's
610
+ # cross-module private-access; ALL_CAPS — reportConstantRedefinition on reassign.
611
+ collected_config: Config | None = None
612
+ collected_items: list[Item] = []
613
+
614
+
615
+ # Seconds: after this long inside `_collect()` the watchdog thread prints all-threads
616
+ # stack traces to stderr. Goal — diagnosing a "hanging conftest" / `pytest_configure`
617
+ # hook that loops forever. Normal collect is sub-second on small repos and a few
618
+ # seconds on large ones; 30s is a generous bound that catches real hangs without
619
+ # spamming on slow CI.
620
+ _COLLECT_WATCHDOG_TIMEOUT = 30.0
621
+
622
+
623
+ def _collect() -> None:
624
+ global collected_config, collected_items
625
+ import faulthandler
626
+ import gc
627
+ import importlib.util
628
+
629
+ import pytest
630
+ from _pytest.config import get_config
631
+
632
+ # Watchdog: if collect hangs, after `_COLLECT_WATCHDOG_TIMEOUT` seconds we dump
633
+ # stack traces for all threads (including the current one — where pytest import/
634
+ # configure is wedged). This log lands in daemon.log → the hang site becomes
635
+ # obvious post-mortem. Thread daemon=True → if the process dies before the watchdog
636
+ # fires, the thread dies with it.
637
+ collect_done = threading.Event()
638
+
639
+ def _watchdog() -> None:
640
+ if collect_done.wait(timeout=_COLLECT_WATCHDOG_TIMEOUT):
641
+ return # collect finished in time — exit silently
642
+ print(
643
+ f"[pytest-fast] WARNING: _collect() taking >{_COLLECT_WATCHDOG_TIMEOUT}s; dumping all-threads stack:",
644
+ file=sys.stderr,
645
+ flush=True,
646
+ )
647
+ faulthandler.dump_traceback(file=sys.stderr, all_threads=True)
648
+ sys.stderr.flush()
649
+
650
+ threading.Thread(target=_watchdog, daemon=True, name="pytest-fast-collect-watchdog").start()
651
+
652
+ try:
653
+ args = ["-m", os.environ.get("PYTEST_FAST_MARK", ""), "-q"]
654
+ # `-n0` neutralizes ambient `-n auto` (from PYTEST_ADDOPTS / pytest.ini), but the
655
+ # option is owned by pytest-xdist: without it pytest fails with `UsageError:
656
+ # unrecognized arguments: -n0`. Append only when xdist is actually installed.
657
+ if importlib.util.find_spec("xdist") is not None:
658
+ args.append("-n0")
659
+ config = get_config(args)
660
+ config.parse(args)
661
+ # public counterpart of the private config._do_configure(): historic call of pytest_configure
662
+ config.hook.pytest_configure.call_historic(kwargs={"config": config})
663
+ session = pytest.Session.from_config(config)
664
+ config.hook.pytest_sessionstart(session=session)
665
+ config.hook.pytest_collection(session=session)
666
+ collected_config, collected_items = config, session.items
667
+ # Reap collect-time cyclic garbage BEFORE freezing. Importing test modules leaves
668
+ # transient garbage that only cyclic GC reclaims — notably stale pre-slots classes
669
+ # from `@attrs.define`/`@dataclass(slots=True)`, which linger as DUPLICATES in their
670
+ # base's `__subclasses__()` until collected. `gc.freeze()` pins whatever is live into
671
+ # the permanent generation, so without this collect first, those duplicates are
672
+ # frozen forever and every forked worker inherits a polluted `__subclasses__()` —
673
+ # breaking libraries that walk it (e.g. cattrs `include_subclasses`). Plain pytest
674
+ # avoids this because natural GC runs between collect and the test; the forkserver
675
+ # forks immediately, so we must reap explicitly here.
676
+ gc.collect()
677
+ gc.freeze() # heap (app+items) into the permanent generation → GC won't scan shared COW pages
678
+ finally:
679
+ collect_done.set() # watchdog thread exits quietly (success or exception — no stack dump)
680
+
681
+
682
+ # `_collect()` trigger is INTENTIONALLY at the bottom of the file (see block at end
683
+ # of __init__.py). It's NOT here: when pytest collects, it imports test files that do
684
+ # `from pytest_fast import <symbol>`. If the trigger fires now (while the module is
685
+ # still mid-load), the test-file import lands in a cache hit on the partially-loaded
686
+ # module — symbols declared later in this file are not yet available → silent ImportError
687
+ # → pytest skips the file entirely. So we collect only AFTER the whole module is initialized.
688
+
689
+
690
+ # ── worker (forkserver-child) ─────────────────────────────────────────────────
691
+
692
+
693
+ def _noop() -> None:
694
+ """Trivial target: starting it boots the forkserver + runs preload (collect)."""
695
+
696
+
697
+ def _failure_text(reports: list[TestReport]) -> str:
698
+ """Failure text to print: longrepr (traceback / assert diff / exception) for failed
699
+ phases plus their captured sections (stdout/stderr/log). We use `longreprtext` (str)
700
+ — it pickles trivially across the bus, unlike the longrepr object itself."""
701
+ parts: list[str] = []
702
+ for rep in reports:
703
+ if not rep.failed:
704
+ continue
705
+ prefix = "" if rep.when == "call" else f"[{rep.when}] "
706
+ if rep.longreprtext:
707
+ parts.append(prefix + rep.longreprtext)
708
+ parts.extend(f"----- {title} -----\n{content}" for title, content in rep.sections)
709
+ return "\n".join(parts)
710
+
711
+
712
+ def _durations_lines(results: list[RunResult], limit: int = 15, min_dur: float = 0.005) -> list[str]:
713
+ """A pytest `--durations`-style table from the full per-phase reports (full-report mode
714
+ only). Flattens every (duration, when, nodeid) phase across all results, slowest first.
715
+ Empty when no result carries serialized reports (lean mode)."""
716
+ phases: list[tuple[float, str, str]] = []
717
+ for r in results:
718
+ for rep in r.get("reports", []):
719
+ dur, when, nodeid = rep.get("duration"), rep.get("when"), rep.get("nodeid")
720
+ # `dur == dur` filters NaN (nan != nan): a NaN duration from a malformed/
721
+ # hostile serialized report poisons `list.sort()` below (NaN comparisons are
722
+ # all False → the sort silently leaves the table mis-ordered).
723
+ if isinstance(dur, int | float) and dur == dur and isinstance(when, str) and isinstance(nodeid, str):
724
+ phases.append((float(dur), when, nodeid))
725
+ phases.sort(reverse=True)
726
+ shown = [p for p in phases if p[0] >= min_dur][:limit]
727
+ if not shown:
728
+ return []
729
+ out = [f" DURATIONS (top {len(shown)}, ≥{min_dur * 1000:.0f}ms — per phase):"]
730
+ out.extend(f" {dur:8.3f}s {when:<9}{nodeid}" for dur, when, nodeid in shown)
731
+ return out
732
+
733
+
734
+ # `bench` thresholds — fixed constants so every finding is a deterministic function of the
735
+ # measured numbers, never a tuned/learned guess.
736
+ _BENCH_CLUSTER_MIN = 5 # ≥ this many tests sharing a heavy setup → a scope-widening cluster
737
+ _BENCH_SETUP_MIN_S = 0.05 # a setup phase this long counts as "heavy"
738
+ _BENCH_LEVER_MIN_S = 0.5 # don't report a lever that reclaims less than this
739
+ _BENCH_TOP_CALLS = 20 # how many slowest CALL phases to classify
740
+ _BENCH_MAX_LEVERS = 12
741
+ _BENCH_IO_FRAC = 0.20 # cpu/total below this → I/O-bound
742
+ _BENCH_CPU_FRAC = 0.80 # cpu/total above this → CPU-bound
743
+ _BENCH_CV = 0.40 # per-test coefficient of variation above this (≥2 runs) → unstable timing
744
+ _BENCH_PROFILE_NODES = 12 # how many top bottleneck tests the targeted cProfile pass re-runs
745
+
746
+
747
+ def _phase_split(result: RunResult) -> tuple[float, float, float]:
748
+ """(setup, call, teardown) wall seconds from a result's per-phase reports (full-report mode)."""
749
+ setup = call = teardown = 0.0
750
+ for rep in result.get("reports", []):
751
+ when, dur = rep.get("when"), rep.get("duration")
752
+ if isinstance(dur, int | float) and dur == dur: # dur == dur drops NaN
753
+ if when == "setup":
754
+ setup += float(dur)
755
+ elif when == "call":
756
+ call += float(dur)
757
+ elif when == "teardown":
758
+ teardown += float(dur)
759
+ return setup, call, teardown
760
+
761
+
762
+ def _bench_report(
763
+ result_runs: list[list[RunResult]],
764
+ run: float,
765
+ cores: int,
766
+ warmup_dropped: bool = False,
767
+ profiles: dict[str, list[tuple[str, int, float, float]]] | None = None,
768
+ ) -> str:
769
+ """Deterministic bottleneck report for `pytest-fast --bench[=N]`. Every lever is (measured number →
770
+ fixed rule → reclaimable worker-seconds), ranked by impact — NO heuristics. Needs full-report
771
+ mode (per-phase setup/call/teardown) + per-test `cpu`. Two lever families:
772
+
773
+ • SHARED SETUP — K tests in one file each paying ~S setup is K·S worker-seconds; a
774
+ session/module-scoped fixture pays it once → reclaim ≈ (K−1)·S. Marked [potential] because
775
+ whether the fixture is scope-widenable can't be read from timings (only the upper bound can).
776
+ • per-test CALL hot-spots — the slowest call phases, classified by `cpu/total`: I/O-bound
777
+ (waiting on DB/network), CPU-bound (algorithmic), or setup-heavy.
778
+
779
+ `result_runs` is one or more runs (the caller drops the warmup); per-test timings are AVERAGED
780
+ across them so the ranking isn't ruled by one noisy sample. The header states the deterministic
781
+ ceiling: best wall ≈ max(Σbusy/cores, longest-test)."""
782
+ from collections import defaultdict
783
+
784
+ line = "═" * 66
785
+ # Average each test's timings across the runs it appeared in: [appearances, total, cpu, s, c, t].
786
+ acc: dict[str, list[float]] = defaultdict(lambda: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
787
+ samples: dict[str, list[float]] = defaultdict(list) # per-test total durations across runs → variance
788
+ for results in result_runs:
789
+ for r in results:
790
+ s, c, t = _phase_split(r)
791
+ a = acc[r["nodeid"]]
792
+ a[0] += 1
793
+ a[1] += r["duration"]
794
+ a[2] += max(0.0, r.get("cpu", 0.0))
795
+ a[3] += s
796
+ a[4] += c
797
+ a[5] += t
798
+ samples[r["nodeid"]].append(r["duration"])
799
+ recs = [
800
+ (nid, nid.split("::", 1)[0], a[1] / a[0], a[2] / a[0], a[3] / a[0], a[4] / a[0], a[5] / a[0])
801
+ for nid, a in acc.items()
802
+ if a[0]
803
+ ]
804
+ n_tests = len(recs)
805
+ n_runs = len(result_runs)
806
+ sum_total = sum(x[2] for x in recs)
807
+ sum_setup = sum(x[4] for x in recs)
808
+ sum_call = sum(x[5] for x in recs)
809
+ sum_teardown = sum(x[6] for x in recs)
810
+ floor, floor_id = max(((x[2], x[0]) for x in recs), default=(0.0, ""))
811
+ ideal = sum_total / cores if cores else 0.0
812
+ best = max(ideal, floor)
813
+ means = sorted(x[2] for x in recs)
814
+
815
+ def _pct(q: float) -> float:
816
+ return means[min(len(means) - 1, round(q * (len(means) - 1)))] if means else 0.0
817
+
818
+ avg_note = f"avg of {n_runs} run{'s' if n_runs != 1 else ''}" + (" + warmup dropped" if warmup_dropped else "")
819
+ out = [
820
+ f"\n{line}",
821
+ f" pytest-fast bench — {n_tests} tests, {run:.2f}s wall @ {cores}w ({avg_note})",
822
+ line,
823
+ f" best @ {cores} cores ≈ {best:.2f}s · floor (longest test) {floor:.2f}s {floor_id}",
824
+ ]
825
+ if sum_total > 0:
826
+ out.append(
827
+ f" where time goes: setup {sum_setup / sum_total:.0%} · call {sum_call / sum_total:.0%} · "
828
+ f"teardown {sum_teardown / sum_total:.0%} (of {sum_total:.0f}s test-wall)"
829
+ )
830
+ out.append(
831
+ f" per-test wall : p50 {_pct(0.5):.3f}s · p90 {_pct(0.9):.3f}s · p99 {_pct(0.99):.3f}s · max {floor:.2f}s"
832
+ )
833
+
834
+ levers: list[tuple[float, str, list[str]]] = []
835
+
836
+ # 1. SHARED-SETUP clusters — by file.
837
+ by_file: dict[str, list[float]] = defaultdict(list)
838
+ for _nodeid, file, _total, _cpu, setup, _call, _teardown in recs:
839
+ if setup >= _BENCH_SETUP_MIN_S:
840
+ by_file[file].append(setup)
841
+ for file, setups in by_file.items():
842
+ if len(setups) < _BENCH_CLUSTER_MIN:
843
+ continue
844
+ tot = sum(setups)
845
+ one = tot / len(setups)
846
+ saving = tot - one # session-scope pays one setup instead of len(setups)
847
+ if saving >= _BENCH_LEVER_MIN_S:
848
+ levers.append(
849
+ (
850
+ saving,
851
+ "SHARED SETUP",
852
+ [
853
+ f"{file} — {len(setups)} tests × ~{one:.2f}s setup = {tot:.1f}s total",
854
+ f"→ session/module-scope the fixture (if scope-widenable): ~{one:.2f}s once → "
855
+ f"reclaim ~{saving:.1f} worker-s (~{saving / cores:.1f}s wall@{cores}w) [potential]",
856
+ ],
857
+ )
858
+ )
859
+
860
+ # 2. per-test CALL hot-spots — slowest call phases, classified.
861
+ for nodeid, _file, total, cpu, setup, call, teardown in sorted(recs, key=lambda x: x[5], reverse=True)[
862
+ :_BENCH_TOP_CALLS
863
+ ]:
864
+ if call < _BENCH_LEVER_MIN_S:
865
+ break
866
+ cpu_frac = (cpu / total) if (cpu >= 0 and total > 0) else -1.0
867
+ off_cpu = max(0.0, total - cpu) if cpu >= 0 else -1.0
868
+ # Tips state only what the timings DETERMINE (where the cost is), never a guessed cause/fix
869
+ # (whether it's a query, a sleep, a subprocess, an algorithm — timings can't tell).
870
+ if setup > call and setup >= _BENCH_SETUP_MIN_S:
871
+ cat, tip = "SETUP-HEAVY", f"cost is fixture setup ({setup:.2f}s > call {call:.2f}s), not the test body"
872
+ elif 0 <= cpu_frac < _BENCH_IO_FRAC:
873
+ cat, tip = (
874
+ "I/O-BOUND",
875
+ f"{off_cpu:.2f}s off-CPU (I/O wait) — cost is outside Python; CPU/more-workers won't cut it",
876
+ )
877
+ elif cpu_frac > _BENCH_CPU_FRAC:
878
+ cat, tip = "CPU-BOUND", f"{cpu:.2f}s on-CPU — real compute; bounded by core speed"
879
+ else:
880
+ cat, tip = "MIXED", "cost split across CPU and I/O — see the phase breakdown"
881
+ cpu_note = f", {cpu_frac:.0%} CPU" if cpu_frac >= 0 else ""
882
+ body = [
883
+ f"{nodeid} ({total:.2f}s: setup {setup:.2f}/call {call:.2f}/teardown {teardown:.2f}{cpu_note})",
884
+ f"→ {tip}",
885
+ ]
886
+ rows = (profiles or {}).get(nodeid)
887
+ if rows:
888
+ body.append("profile (top by SELF wall — where it's actually burned; ncalls exact):")
889
+ body.extend(f" {self_t:6.3f}s self {nc:>8,}× {name}" for name, nc, self_t, _cum in rows)
890
+ levers.append((call, cat, body))
891
+
892
+ levers.sort(key=lambda x: x[0], reverse=True)
893
+ if levers:
894
+ out.append(" ── levers (ranked by reclaimable worker-seconds) ─────────────────")
895
+ for i, (saving, cat, body) in enumerate(levers[:_BENCH_MAX_LEVERS], 1):
896
+ out.append(f" {i:>2}. {cat:<12} ~{saving:5.1f} w-s")
897
+ out.extend(f" {ln}" for ln in body)
898
+ else:
899
+ out.append(" no levers above the reporting threshold — the suite is already lean.")
900
+
901
+ # Unstable timing — needs ≥2 measured runs (so --bench=3+). cv = stdev/mean per test; a high cv
902
+ # is a measured fact (flaky perf / ordering-sensitive / contended), not a heuristic. Ranked by
903
+ # cv·mean (the wall actually at stake), only for tests big enough to matter.
904
+ unstable: list[tuple[float, str, float, float]] = []
905
+ for nid, xs in samples.items():
906
+ if len(xs) >= 2:
907
+ m = sum(xs) / len(xs)
908
+ if m >= _BENCH_LEVER_MIN_S:
909
+ sd = math.sqrt(sum((x - m) ** 2 for x in xs) / len(xs))
910
+ if m and sd / m >= _BENCH_CV:
911
+ unstable.append((sd / m, nid, m, sd))
912
+ if unstable:
913
+ unstable.sort(key=lambda u: u[0] * u[2], reverse=True)
914
+ out.append(f" ── unstable timing (cv ≥ {_BENCH_CV:.0%} across {n_runs} runs) ──────────────────────")
915
+ out.extend(f" {nid} {m:.2f}s ±{sd:.2f}s (cv {cv:.0%})" for cv, nid, m, sd in unstable[:8])
916
+ elif n_runs < 2:
917
+ out.append(" (timing-stability needs ≥2 measured runs — try --bench=3+)")
918
+ out.append(line)
919
+ return "\n".join(out)
920
+
921
+
922
+ def _top_profile_rows(pr: cProfile.Profile, limit: int = 8) -> list[tuple[str, int, float, float]]:
923
+ """Top functions of a finished `cProfile.Profile`, by SELF time (`inlinetime`) — time spent IN
924
+ the function, excluding subcalls. Self time (not cumulative) surfaces the actual leaves where the
925
+ wall is burned — a blocking syscall, a hot compute loop, a repeated query — instead of the
926
+ pytest/pluggy wrapper chain (whose cumulative time is ~the whole test but tells you nothing).
927
+ Each row is (qualname, ncalls, selftime, cumtime); ncalls are EXACT, so a leaf called 47× in one
928
+ test is the measured (not guessed) N+1 / hot-call signal. Plain builtins → whitelist-safe."""
929
+ rows: list[tuple[float, float, int, str]] = []
930
+ for e in pr.getstats():
931
+ code = e.code
932
+ if isinstance(code, str):
933
+ label = code # a built-in, e.g. "<built-in method ... read>" / "<method 'recv' ...>"
934
+ else:
935
+ short = code.co_filename.rsplit("/", 1)[-1]
936
+ label = f"{code.co_name} ({short}:{code.co_firstlineno})"
937
+ rows.append((e.inlinetime, e.totaltime, e.callcount, label[:70]))
938
+ rows.sort(reverse=True) # by self time
939
+ return [(lbl, nc, round(self_t, 4), round(cum_t, 4)) for self_t, cum_t, nc, lbl in rows[:limit]]
940
+
941
+
942
+ def _run_one_item(
943
+ item: Item, nextitem: Item | None, collector: _ReportCollector, *, full_report: bool = False, profile: bool = False
944
+ ) -> RunResult:
945
+ """Run a test via the FULL pytest protocol (hook, not function): setup/call/
946
+ teardown, capture, rerunfailures, makereport — behavior 1-to-1 with regular pytest.
947
+
948
+ `full_report=True` also attaches every phase report in pytest's serializable wire form
949
+ (`pytest_report_to_serializable` → plain builtins, whitelist-safe) so the master/controller
950
+ can replay them through a real terminalreporter (--durations, junit, -v/-s, plugins). Off by
951
+ default — the lean path ships only the outcome summary.
952
+
953
+ `profile=True` (the `bench` targeted pass over its top bottleneck tests) runs the protocol under
954
+ `cProfile` and attaches the top-by-cumtime functions → deterministic where-in-the-code attribution."""
955
+ collector.reports.clear()
956
+ pr = None
957
+ if profile:
958
+ import cProfile
959
+
960
+ pr = cProfile.Profile()
961
+ pr.enable()
962
+ try:
963
+ item.ihook.pytest_runtest_protocol(item=item, nextitem=nextitem)
964
+ finally:
965
+ if pr is not None:
966
+ pr.disable()
967
+ duration = sum(r.duration for r in collector.reports)
968
+ outcome = categorize(item.config, collector.reports)
969
+ result: RunResult = {"nodeid": item.nodeid, "outcome": outcome, "duration": duration}
970
+ if pr is not None:
971
+ result["profile"] = _top_profile_rows(pr)
972
+ if outcome in {"failed", "error"}:
973
+ result["longrepr"] = _failure_text(collector.reports) # traceback only for reds
974
+ if full_report:
975
+ config = item.config
976
+ result["reports"] = [
977
+ cast("dict[str, object]", config.hook.pytest_report_to_serializable(config=config, report=rep))
978
+ for rep in collector.reports
979
+ ]
980
+ return result
981
+
982
+
983
+ def _worker_hang_timeout() -> float:
984
+ """Seconds after which a worker still running a single test dumps all-threads stack
985
+ traces to stderr (which lands in the daemon log). Diagnoses runaway tests / GIL
986
+ deadlocks / blocked I/O inside `pytest_runtest_protocol`. 0 = disabled (default,
987
+ so legitimately-slow tests don't dump on every run); typical opt-in is 60–120s.
988
+
989
+ Env var: `PYTEST_FAST_WORKER_HANG_TIMEOUT=<seconds>`."""
990
+ try:
991
+ return max(0.0, float(os.environ.get("PYTEST_FAST_WORKER_HANG_TIMEOUT", "0")))
992
+ except ValueError:
993
+ return 0.0
994
+
995
+
996
+ def _worker_main(
997
+ wid: int, sock_path: str, full_report: bool = False, send_nodeids: bool = False, profile: bool = False
998
+ ) -> None:
999
+ # IMPORTANT: read globals via `import pytest_fast`, NOT as bare names. `_collect()`
1000
+ # set them on the PRELOADED "pytest_fast" module (forkserver) / at import (spawn),
1001
+ # whereas `_worker_main`'s own globals are __main__/__mp_main__ (collect did NOT run there).
1002
+ t_start = time.perf_counter()
1003
+ import pytest_fast # forkserver: cache hit (preloaded+collected); spawn: imports+collects here
1004
+
1005
+ config = pytest_fast.collected_config
1006
+ assert config is not None, "forkserver/spawn must have collected tests before worker start"
1007
+ items = pytest_fast.collected_items
1008
+ collector = _ReportCollector()
1009
+ config.pluginmanager.register(collector)
1010
+ collect_wall = time.perf_counter() - t_start
1011
+
1012
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1013
+ sock.connect(sock_path)
1014
+ # In selection mode the master needs nodeid→index to run a subset; ship the nodeid list
1015
+ # once per worker in 'ready' (None otherwise → lean).
1016
+ ready_nodeids = [it.nodeid for it in items] if send_nodeids else None
1017
+ _send(sock, ("ready", wid, len(items), collect_wall, ready_nodeids))
1018
+
1019
+ # Per-test hang watchdog: when `PYTEST_FAST_WORKER_HANG_TIMEOUT` > 0, arm a
1020
+ # `faulthandler` timer before each `_run_one_item` and cancel it after the test
1021
+ # returns. If a test exceeds the timeout, faulthandler dumps all-threads tracebacks
1022
+ # to stderr (→ daemon log in resident mode) AND prints the nodeid we were running,
1023
+ # so a deadlock pinpoints the offending test instead of presenting as silent hang.
1024
+ hang_timeout = _worker_hang_timeout()
1025
+ faulthandler_mod = None
1026
+ if hang_timeout > 0:
1027
+ import faulthandler
1028
+
1029
+ faulthandler_mod = faulthandler
1030
+ if not faulthandler.is_enabled():
1031
+ faulthandler.enable()
1032
+
1033
+ run_start = time.perf_counter()
1034
+ busy = 0.0 # wall inside the runtest protocol
1035
+ cpu = 0.0 # this worker's CPU time over that same span (busy − cpu ≈ I/O wait)
1036
+ bus_wait = 0.0 # idle between tests, waiting for the master to hand out the next index
1037
+ ran = 0
1038
+ prev: Item | None = None
1039
+ pending: RunResult | None = None
1040
+ while True:
1041
+ t_bus = time.perf_counter()
1042
+ _send(sock, ("req", wid, pending))
1043
+ reply, _ = _recv(sock)
1044
+ bus_wait += time.perf_counter() - t_bus # send result + wait for next idx = non-busy gap
1045
+ # Master gone / malformed reply → break out and exit cleanly (os._exit below). The
1046
+ # master sees EOF and the run is flagged untrusted via the result undercount.
1047
+ if not isinstance(reply, tuple) or len(reply) < 2:
1048
+ break
1049
+ idx_msg = cast("tuple[object, object]", reply) # master → ('idx', pick)
1050
+ idx = idx_msg[1]
1051
+ cur = items[idx] if isinstance(idx, int) and 0 <= idx < len(items) else None
1052
+ if prev is not None:
1053
+ t0 = time.perf_counter()
1054
+ c0 = time.process_time()
1055
+ if faulthandler_mod is not None:
1056
+ faulthandler_mod.dump_traceback_later(hang_timeout, repeat=True, exit=False)
1057
+ try:
1058
+ pending = _run_one_item(prev, cur, collector, full_report=full_report, profile=profile)
1059
+ except BaseException:
1060
+ if faulthandler_mod is not None:
1061
+ faulthandler_mod.cancel_dump_traceback_later()
1062
+ # Worker died mid-test (runtime error in the protocol itself, NOT a test
1063
+ # failure — those are captured as reports). Print the offending nodeid
1064
+ # so the daemon log shows which test we were on, then re-raise so the
1065
+ # process exits with non-zero and the master flags the run untrusted.
1066
+ print(
1067
+ f"[pytest-fast] worker {wid} crashed while running {prev.nodeid!r}",
1068
+ file=sys.stderr,
1069
+ flush=True,
1070
+ )
1071
+ raise
1072
+ if faulthandler_mod is not None:
1073
+ faulthandler_mod.cancel_dump_traceback_later()
1074
+ busy += time.perf_counter() - t0
1075
+ cpu_this = time.process_time() - c0
1076
+ cpu += cpu_this
1077
+ pending["cpu"] = cpu_this # per-test CPU → `bench` I/O-vs-CPU classification
1078
+ ran += 1
1079
+ else:
1080
+ pending = None
1081
+ if cur is None:
1082
+ stats: WorkerStats = {
1083
+ "wid": wid,
1084
+ "ran": ran,
1085
+ "busy": busy,
1086
+ "cpu": cpu,
1087
+ "bus_wait": bus_wait,
1088
+ "run_wall": time.perf_counter() - run_start,
1089
+ }
1090
+ _send(sock, ("fin", wid, pending, stats))
1091
+ break
1092
+ prev = cur
1093
+ sock.close()
1094
+ # Worker exit: `os._exit(0)` — skip atexit hooks AND non-daemon thread joins.
1095
+ # Returning normally would let interpreter shutdown join() every alive non-daemon
1096
+ # thread; tests that spawn worker threads (intentionally — `test_run_given_concurrently`
1097
+ # — or unintentionally — pytest's threadexception plugin warning on an orphan thread)
1098
+ # leave those threads alive, and the worker would never exit → `procs[wid].join()` in
1099
+ # master hangs forever, presenting as a silent post-`F` deadlock. We've already sent
1100
+ # `fin` and closed the bus socket, so a hard exit is correct (the master got every
1101
+ # report; nothing else legitimate is pending). Mirrors stdlib multiprocessing's own
1102
+ # advice for worker children whose application code may leave threads running.
1103
+ os._exit(0)
1104
+
1105
+
1106
+ # ── reference outcome-dump (when loaded as `-p pytest_fast` with OUTCOME_DUMP) ─
1107
+ #
1108
+ # Under xdist the controller re-publishes worker reports → its hook sees ALL tests.
1109
+ # On sessionfinish (not an xdist worker) we write {nodeid: outcome} for outcome-diff.
1110
+
1111
+ _DUMP_REPORTS: dict[str, list[TestReport]] = {}
1112
+
1113
+
1114
+ def pytest_runtest_logreport(report: TestReport) -> None:
1115
+ if os.environ.get("OUTCOME_DUMP"):
1116
+ _DUMP_REPORTS.setdefault(report.nodeid, []).append(report)
1117
+
1118
+
1119
+ def pytest_sessionfinish(session: object) -> None:
1120
+ dump = os.environ.get("OUTCOME_DUMP")
1121
+ config = getattr(session, "config", None)
1122
+ if not dump or config is None or hasattr(config, "workerinput"):
1123
+ return # no dump configured / xdist worker (controller aggregates)
1124
+ out = {nodeid: categorize(config, reps) for nodeid, reps in _DUMP_REPORTS.items()}
1125
+ with Path(dump).open("w") as f:
1126
+ json.dump(out, f, indent=0, sort_keys=True)
1127
+
1128
+
1129
+ # ── master ───────────────────────────────────────────────────────────────────
1130
+
1131
+
1132
+ class Daemon:
1133
+ def __init__(self, num_workers: int, start_method: str, dump_path: str | None = None) -> None:
1134
+ super().__init__()
1135
+ # Real raise, not `assert` — this is a safety invariant (0 workers → nothing runs →
1136
+ # silent green), and `assert` is stripped under `python -O`. Entry points reject `< 1`
1137
+ # earlier with a friendlier message; this is the last-line guard for direct callers.
1138
+ if num_workers < 1:
1139
+ msg = f"num_workers must be >= 1, got {num_workers}"
1140
+ raise ValueError(msg)
1141
+ self.num_workers = num_workers
1142
+ self.start_method = start_method
1143
+ self.dump_path = dump_path
1144
+ # Context + preload are created ONCE; the forkserver lazy-spawns on the first
1145
+ # Process.start() and collects tests there, subsequent forks reuse the ready items.
1146
+ # get_context(str) in typeshed → BaseContext (no .Process); at runtime the context
1147
+ # is always concrete (Default/Spawn/Fork/ForkServer) and .Process exists — the cast
1148
+ # to DefaultContext gives the correct .Process(...) signature. set_forkserver_preload
1149
+ # is declared on BaseContext directly, so it's accessible too.
1150
+ self.ctx = cast("DefaultContext", mp.get_context(start_method))
1151
+ if start_method == "forkserver":
1152
+ self.ctx.set_forkserver_preload(["pytest_fast"])
1153
+ self._run_counter = 0
1154
+ # The `_PYTEST_FAST_COLLECT` flag is NOT set here on purpose — it's a global
1155
+ # side effect that would leak into env even if the object is built but not used.
1156
+ # We set it immediately before the first `Process.start()` (see `_arm_collect_flag`),
1157
+ # which is where it semantically belongs.
1158
+
1159
+ def _arm_collect_flag(self) -> None:
1160
+ """Set the env flag for the forkserver preload — right before the first `Process.start`.
1161
+
1162
+ The forkserver lazy-spawns on the first `.start()`; the flag must be in its env
1163
+ snapshot, otherwise the preload import of `pytest_fast` won't run `_collect()`.
1164
+ Idempotent: repeated calls are safe (same string reassigned)."""
1165
+ os.environ["_PYTEST_FAST_COLLECT"] = "1"
1166
+
1167
+ # ── public modes ─────────────────────────────────────────────────────────
1168
+
1169
+ def run(self, runs: int, *, full_report: bool = False, detailed: bool = False, bench: int = 0) -> int:
1170
+ """Local mode: single-shot (runs=1) or N runs in one process. `bench=N` is its own N-run
1171
+ loop (warmup dropped) → one bottleneck report; it ignores `runs`."""
1172
+ if bench > 0:
1173
+ rc, summary = self._run_bench(bench)
1174
+ print(summary)
1175
+ return rc
1176
+ rc = 0
1177
+ for _ in range(runs):
1178
+ rc, summary = self._run_once(full_report=full_report, detailed=detailed)
1179
+ print(summary)
1180
+ return rc
1181
+
1182
+ def serve(self, address: str, ttl: float) -> int:
1183
+ """Resident daemon. Collect once; idle>ttl → exit; sources changed → stale-exit.
1184
+
1185
+ The forkserver holds the code AND env loaded AT BOOT TIME. If src/tests were
1186
+ edited afterwards — forks would run STALE code; if relevant env (any var
1187
+ whose prefix is in `PYTEST_FAST_ENV_PREFIXES`, plus addopts) changed — they
1188
+ would run with the stale collect/patches. So on every `run`/`status` request
1189
+ we compare max(mtime) of sources AND the caller's env fingerprint against the
1190
+ boot snapshot (see `_stale_reason`): on mismatch we reply {'stale': True} and
1191
+ exit, the client spawns a fresh daemon (fresh collect).
1192
+
1193
+ Control protocol (one message per connect, serialized by the accept loop —
1194
+ which is why it never tears an active run apart):
1195
+ * ('run', fp) → stale check (mtime+env fp), then run + stream + summary;
1196
+ * ('status', fp) → {'ready': True, 'stale': bool} (cheap, for watcher/client);
1197
+ * ('shutdown',) → {'bye': True} and exit (watcher shuts the old one AFTER its run);
1198
+ * ('promote', new_addr) → rebind to new_addr (staging→canonical on promote).
1199
+ """
1200
+ boot_mtime = _max_source_mtime() # baseline BEFORE boot: an edit mid-build → stale
1201
+ boot_fp = _env_fingerprint() # env snapshot at boot: change to relevant env → stale-respawn
1202
+ _log("daemon", f"booting — collect once ({self.start_method}, {self.num_workers}w)…")
1203
+ t0 = time.perf_counter()
1204
+ self._arm_collect_flag() # arm the env flag right before the first Process.start()
1205
+ boot = self.ctx.Process(target=_noop)
1206
+ boot.start()
1207
+ boot.join() # forks the forkserver → it imports preload (collect) → warm
1208
+ _log("daemon", f"ready in {time.perf_counter() - t0:.2f}s, listening {address}, ttl={ttl}s")
1209
+
1210
+ cur = address # current listening address — may change via ('promote', …)
1211
+ ctl = _bind_ctl(cur, ttl)
1212
+ try:
1213
+ while True:
1214
+ try:
1215
+ conn, _addr = ctl.accept()
1216
+ except TimeoutError:
1217
+ _log("daemon", f"idle > {ttl}s — shutting down")
1218
+ return 0
1219
+ with conn:
1220
+ # Bound the command read against a "slowloris" peer: the accept loop is
1221
+ # serial, so a same-user process that connects and sends nothing (or a
1222
+ # partial header) would otherwise block this blocking `_recv` forever and
1223
+ # wedge the daemon for everyone. Legitimate clients send the whole command
1224
+ # frame up front (it's already in the socket buffer by the time we accept),
1225
+ # so a short read deadline never trips them. TimeoutError ⊂ OSError.
1226
+ conn.settimeout(_CONTROL_CMD_TIMEOUT)
1227
+ try:
1228
+ msg, _ = _recv(conn)
1229
+ except OSError:
1230
+ continue # stalled / reset peer — drop it, keep serving
1231
+ # A run streams progress/reports for as long as the suite takes; clear the
1232
+ # deadline so a long but healthy run isn't aborted mid-stream.
1233
+ conn.settimeout(None)
1234
+ if not isinstance(msg, tuple) or not msg:
1235
+ continue # empty/garbled connect (ping/probe) or empty tuple
1236
+ parts = cast("tuple[object, ...]", msg) # control: (cmd, *args)
1237
+ cmd = parts[0]
1238
+ # Slice for fp, NOT `parts[1] if len(parts) > 1`: the len() guard makes
1239
+ # pyright narrow tuple arity and breaks `parts[1]` in the promote branch.
1240
+ fp_args = parts[1:]
1241
+ client_fp = str(fp_args[0]) if fp_args else None # caller env fingerprint
1242
+ if cmd == "status":
1243
+ # _try_send (not _send): a status client that already hit its own recv
1244
+ # timeout and disconnected must not crash us with a BrokenPipe on reply.
1245
+ _try_send(
1246
+ conn, {"ready": True, "stale": _stale_reason(boot_mtime, boot_fp, client_fp) is not None}
1247
+ )
1248
+ continue
1249
+ if cmd == "shutdown":
1250
+ _try_send(conn, {"bye": True}) # reply best-effort; shut down regardless
1251
+ _log("daemon", "shutdown requested — exiting")
1252
+ return 0 # finally releases socket+pid
1253
+ if cmd == "promote":
1254
+ # Derive new_addr from the fp_args slice (not parts[1]) to keep pyright's
1255
+ # tuple-arity narrowing happy (see the fp_args comment above).
1256
+ if not fp_args:
1257
+ continue # malformed promote (no address)
1258
+ new_addr = str(fp_args[0])
1259
+ # A promote may only retarget within the SAME directory as the current
1260
+ # address (staging→canonical are siblings). The control socket is
1261
+ # connectable by any same-user process and new_addr flows into
1262
+ # _redirect_stdio's log path — an arbitrary path would let a stray/hostile
1263
+ # peer redirect the daemon's stdio. Reject anything else.
1264
+ if Path(new_addr).parent != Path(cur).parent or new_addr == cur:
1265
+ _try_send(conn, {"promoted": False})
1266
+ _log("daemon", f"refused promote to unexpected address {new_addr!r}")
1267
+ continue
1268
+ ctl.close()
1269
+ _remove_pid(cur)
1270
+ Path(cur).unlink(missing_ok=True)
1271
+ cur = new_addr
1272
+ ctl = _bind_ctl(cur, ttl)
1273
+ _redirect_stdio(_daemon_log_path(cur)) # lifecycle logs → log of the new address
1274
+ _try_send(conn, {"promoted": True})
1275
+ _log("daemon", f"promoted → listening {cur}")
1276
+ continue
1277
+ if cmd != "run":
1278
+ # Unknown/garbage command from a same-user peer. Must be ignored, NOT
1279
+ # treated as a run: an arbitrary tuple like ('x', 'y') used to fall into
1280
+ # the run branch, where a non-matching fingerprint made the daemon reply
1281
+ # {stale} and EXIT — i.e. any stray frame could shut the resident daemon
1282
+ # down (a same-user DoS). Real clients always send the verb "run".
1283
+ continue
1284
+ # run request: ('run', fp[, full_report[, stream[, nodeids[, detailed]]]])
1285
+ reason = _stale_reason(boot_mtime, boot_fp, client_fp)
1286
+ if reason is not None:
1287
+ _log("daemon", f"{reason} — exiting stale")
1288
+ _try_send(conn, {"stale": True}) # best-effort; exit stale regardless
1289
+ return 0 # finally releases the socket → client spawns a fresh daemon
1290
+ # Optional args: ('run', fp, full_report[, stream[, nodeids]]). Old clients send
1291
+ # only fp → lean. `stream` (the plugin controller) asks the daemon to stream the
1292
+ # serialized per-phase reports back; `nodeids` (the controller's collected set)
1293
+ # restricts the run to that selection.
1294
+ full_report = bool(fp_args[1]) if len(fp_args) > 1 else False
1295
+ stream = len(fp_args) > 2 and bool(fp_args[2])
1296
+ selection = (
1297
+ cast("list[str]", fp_args[3]) if len(fp_args) > 3 and isinstance(fp_args[3], list) else None
1298
+ )
1299
+ # `detailed` (CLI `--detailed`) adds the extended parallelism block; `bench`
1300
+ # (CLI `--bench=N`, an int run-count) renders the deterministic bottleneck report
1301
+ # instead (N runs, warmup dropped, full reports forced internally). Both are
1302
+ # irrelevant in stream mode (the plugin controller renders natively).
1303
+ detailed = len(fp_args) > 4 and bool(fp_args[4])
1304
+ bench = int(fp_args[5]) if len(fp_args) > 5 and isinstance(fp_args[5], int) else 0
1305
+ if stream:
1306
+ # Controller renders natively from the streamed reports → no daemon-side
1307
+ # progress frames (progress_conn=None), full reports required.
1308
+ rc, summary = self._run_once(full_report=True, report_conn=conn, selection=selection)
1309
+ elif bench > 0:
1310
+ rc, summary = self._run_bench(bench, progress_conn=conn)
1311
+ else:
1312
+ # progress_conn=conn: workers write dots into the DAEMON log, not the
1313
+ # client's terminal — so we stream progress over this same socket
1314
+ # (otherwise the client sits silent the whole run and looks frozen).
1315
+ rc, summary = self._run_once(progress_conn=conn, full_report=full_report, detailed=detailed)
1316
+ try:
1317
+ _send(conn, {"rc": rc, "summary": summary})
1318
+ except OSError:
1319
+ # client gone (Ctrl-C) before the final frame — the run is already done,
1320
+ # the daemon does NOT crash (used to crash on BrokenPipe here), stays warm
1321
+ _log("daemon", "client gone before summary; run completed, staying warm")
1322
+ finally:
1323
+ ctl.close()
1324
+ _remove_pid(cur)
1325
+ Path(cur).unlink(missing_ok=True)
1326
+
1327
+ # ── one run (fork workers + work-stealing dispatch) ──────────────────────
1328
+
1329
+ def _execute_run(
1330
+ self,
1331
+ progress_conn: socket.socket | None,
1332
+ *,
1333
+ full_report: bool,
1334
+ report_conn: socket.socket | None,
1335
+ selection: list[str] | None,
1336
+ profile: bool = False,
1337
+ ) -> _RunOutcome:
1338
+ """One fork→serve→collect cycle. Returns raw results + timing + integrity, NO rendering —
1339
+ shared by the single-run `_run_once` and the N-run `_run_bench`. `profile` (the bench
1340
+ targeted pass) makes the workers run each test under cProfile."""
1341
+ idx = self._run_counter
1342
+ self._run_counter += 1
1343
+ t0 = time.perf_counter()
1344
+ # Per-run worker socket (short name in TMPDIR — pid+idx unique, AF_UNIX limit
1345
+ # not breached). `tempfile.gettempdir()` honors $TMPDIR (matters for sandboxes
1346
+ # and tmpfs setups where `/tmp` might not exist or be read-only).
1347
+ sock_path = f"{tempfile.gettempdir()}/pytest_fast_{os.getpid()}_{idx}.sock"
1348
+ Path(sock_path).unlink(missing_ok=True)
1349
+ server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1350
+ server.bind(sock_path)
1351
+ server.listen(self.num_workers)
1352
+
1353
+ send_nodeids = selection is not None
1354
+ procs = [
1355
+ self.ctx.Process(
1356
+ target=_worker_main, args=(wid, sock_path, full_report, send_nodeids, profile), daemon=True
1357
+ )
1358
+ for wid in range(self.num_workers)
1359
+ ]
1360
+ self._arm_collect_flag() # local-run mode (serve() also calls; repeated invocation is idempotent)
1361
+ for p in procs:
1362
+ p.start()
1363
+
1364
+ try:
1365
+ results, worker_stats, bus, t_ready, total = self._serve_bus(server, progress_conn, report_conn, selection)
1366
+ finally:
1367
+ # Bounded join: a healthy worker exits within milliseconds of sending `fin`
1368
+ # (it calls `os._exit(0)`). If join exceeds the budget, the worker is wedged
1369
+ # (rare — non-daemon thread the `os._exit` guard missed, or a crash before
1370
+ # the exit call) and we kill it rather than wait forever. The bus has already
1371
+ # closed; nothing more is pending from a wedged worker.
1372
+ for p in procs:
1373
+ p.join(timeout=_WORKER_JOIN_TIMEOUT)
1374
+ if p.is_alive():
1375
+ print(
1376
+ f"[pytest-fast] worker pid={p.pid} did not exit within "
1377
+ f"{_WORKER_JOIN_TIMEOUT}s after fin — killing",
1378
+ file=sys.stderr,
1379
+ flush=True,
1380
+ )
1381
+ p.kill()
1382
+ p.join(timeout=1.0)
1383
+ server.close()
1384
+ Path(sock_path).unlink(missing_ok=True)
1385
+ t_done = time.perf_counter()
1386
+
1387
+ if self.dump_path is not None:
1388
+ with Path(self.dump_path).open("w") as f:
1389
+ json.dump({r["nodeid"]: r["outcome"] for r in results}, f, indent=0, sort_keys=True)
1390
+
1391
+ return _RunOutcome(
1392
+ results=results,
1393
+ worker_stats=worker_stats,
1394
+ bus=bus,
1395
+ warmup=t_ready - t0,
1396
+ run_wall=t_done - t_ready,
1397
+ total=total,
1398
+ idx=idx,
1399
+ exitcodes=[p.exitcode for p in procs],
1400
+ )
1401
+
1402
+ @staticmethod
1403
+ def _run_untrusted(o: _RunOutcome) -> bool:
1404
+ """A worker may die BEFORE sending results (import/assert in `_worker_main`) → results are
1405
+ partial and rc would be a false green (possibly n=0/0). A non-zero worker exitcode OR a
1406
+ result undercount (< collected total) → the run is NOT trusted."""
1407
+ crashed = any(code not in (0, None) for code in o.exitcodes)
1408
+ incomplete = o.total > 0 and len(o.results) < o.total
1409
+ return crashed or incomplete
1410
+
1411
+ def _run_once(
1412
+ self,
1413
+ progress_conn: socket.socket | None = None,
1414
+ *,
1415
+ full_report: bool = False,
1416
+ report_conn: socket.socket | None = None,
1417
+ selection: list[str] | None = None,
1418
+ detailed: bool = False,
1419
+ ) -> tuple[int, str]:
1420
+ o = self._execute_run(progress_conn, full_report=full_report, report_conn=report_conn, selection=selection)
1421
+ label = "BOOT (collect once)" if o.idx == 0 else f"run #{o.idx} (warm)"
1422
+ summary = self._report(
1423
+ o.results,
1424
+ o.worker_stats,
1425
+ o.bus,
1426
+ o.total,
1427
+ warmup=o.warmup,
1428
+ run=o.run_wall,
1429
+ label=label,
1430
+ full_report=full_report,
1431
+ detailed=detailed,
1432
+ )
1433
+ rc = 1 if any(r["outcome"] in {"failed", "error"} for r in o.results) else 0
1434
+ if self._run_untrusted(o):
1435
+ rc = 1
1436
+ summary += (
1437
+ f"\n ⚠ UNTRUSTED RUN — worker crashed / result undercount: "
1438
+ f"results={len(o.results)}/{o.total}, worker exitcodes={o.exitcodes} (see daemon log)"
1439
+ )
1440
+ return rc, summary
1441
+
1442
+ def _run_bench(self, n_runs: int, progress_conn: socket.socket | None = None) -> tuple[int, str]:
1443
+ """`--bench=N`: run the suite N times (full reports), drop the FIRST as warmup (its fork +
1444
+ first-touch DB/cache costs are unrepresentative), render the deterministic bottleneck report
1445
+ over the averaged remainder. N=1 keeps the single (warmup-tainted) run. Then a TARGETED
1446
+ cProfile pass over just the top bottleneck tests adds function-level attribution — wise
1447
+ orchestration: pay the profiler's overhead only on the handful of tests that hold the wall."""
1448
+ runs = [
1449
+ self._execute_run(progress_conn, full_report=True, report_conn=None, selection=None)
1450
+ for _ in range(max(1, n_runs))
1451
+ ]
1452
+ measured = runs[1:] if len(runs) > 1 else runs # drop warmup when we have more than one
1453
+ avg_wall = sum(o.run_wall for o in measured) / len(measured)
1454
+ result_runs = [o.results for o in measured]
1455
+ profiles = self._profile_top_tests(result_runs)
1456
+ summary = _bench_report(
1457
+ result_runs,
1458
+ run=avg_wall,
1459
+ cores=self.num_workers, # the ACTUAL parallelism of this run, not the machine default
1460
+ warmup_dropped=len(runs) > 1,
1461
+ profiles=profiles,
1462
+ )
1463
+ rc = (
1464
+ 1
1465
+ if any(self._run_untrusted(o) or any(r["outcome"] in {"failed", "error"} for r in o.results) for o in runs)
1466
+ else 0
1467
+ )
1468
+ return rc, summary
1469
+
1470
+ def _profile_top_tests(self, result_runs: list[list[RunResult]]) -> dict[str, list[tuple[str, int, float, float]]]:
1471
+ """Targeted cProfile pass: rank tests by average wall, re-run ONLY the top
1472
+ `_BENCH_PROFILE_NODES` under cProfile (one extra run of a handful of tests, cheap against the
1473
+ warm forkserver), and return {nodeid: top cProfile rows}. Best-effort — any failure just
1474
+ means the bench report renders without function-level attribution."""
1475
+ from collections import defaultdict
1476
+
1477
+ agg: dict[str, list[float]] = defaultdict(lambda: [0.0, 0.0]) # nodeid → [count, sum(duration)]
1478
+ for results in result_runs:
1479
+ for r in results:
1480
+ a = agg[r["nodeid"]]
1481
+ a[0] += 1
1482
+ a[1] += r["duration"]
1483
+ ranked = sorted(((a[1] / a[0], nid) for nid, a in agg.items() if a[0]), reverse=True)
1484
+ top = [nid for _dur, nid in ranked[:_BENCH_PROFILE_NODES]]
1485
+ if not top:
1486
+ return {}
1487
+ try:
1488
+ o = self._execute_run(None, full_report=False, report_conn=None, selection=top, profile=True)
1489
+ except Exception as exc:
1490
+ _log("daemon", f"bench profiling pass failed ({exc!r}) — report renders without profiles")
1491
+ return {}
1492
+ return {r["nodeid"]: r["profile"] for r in o.results if "profile" in r}
1493
+
1494
+ def _serve_bus(
1495
+ self,
1496
+ server: socket.socket,
1497
+ progress_conn: socket.socket | None = None,
1498
+ report_conn: socket.socket | None = None,
1499
+ selection: list[str] | None = None,
1500
+ ) -> tuple[list[RunResult], list[WorkerStats], dict[str, float], float, int]:
1501
+ # Worker connect with timeout: if a forked worker died BEFORE connect (warmup
1502
+ # crash), we don't block in accept() forever — start with whoever made it.
1503
+ sel = selectors.DefaultSelector()
1504
+ # try/finally so the selector's kernel fd (kqueue/epoll) is always closed: the
1505
+ # selector is part of a reference cycle (BaseSelector ↔ its map), so refcounting
1506
+ # alone won't free it until cyclic GC — and the resident daemon gc.freeze()s at boot
1507
+ # and rarely GCs → one leaked fd per run → eventual EMFILE.
1508
+ try:
1509
+ server.settimeout(_WORKER_ACCEPT_TIMEOUT)
1510
+ for _ in range(self.num_workers):
1511
+ try:
1512
+ conn, _addr = server.accept()
1513
+ except TimeoutError:
1514
+ break
1515
+ sel.register(conn, selectors.EVENT_READ)
1516
+ server.settimeout(None)
1517
+ expected = len(sel.get_map())
1518
+
1519
+ total: int | None = None
1520
+ # When `selection` is set (the --fast plugin forwards the controller's collected
1521
+ # nodeids), pick_list holds the daemon-side item indices to actually run, built from
1522
+ # the first worker 'ready' that carries the nodeid list. None → run the full suite.
1523
+ pick_list: list[int] | None = None
1524
+ queue_pos = 0
1525
+ results: list[RunResult] = []
1526
+ worker_stats: list[WorkerStats] = []
1527
+ tx = rx = req_count = 0
1528
+ t_ready = 0.0
1529
+ ready_seen = 0
1530
+ active = expected
1531
+ last_emit = 0.0
1532
+
1533
+ def emit_progress(*, force: bool = False) -> None:
1534
+ nonlocal progress_conn, last_emit
1535
+ tgt = len(pick_list) if pick_list is not None else total # how many we actually run
1536
+ if progress_conn is None or tgt is None:
1537
+ return
1538
+ now = time.perf_counter()
1539
+ done = len(results)
1540
+ if not force and done < tgt and now - last_emit < _PROGRESS_THROTTLE_SEC:
1541
+ return # throttled to _PROGRESS_THROTTLE_SEC; the final frame (done==tgt) is always sent
1542
+ last_emit = now
1543
+ try:
1544
+ _send(progress_conn, {"progress": (done, tgt)})
1545
+ except OSError:
1546
+ progress_conn = None # client gone (Ctrl-C) — stop sending, but complete the run
1547
+
1548
+ def emit_reports(result: RunResult) -> None:
1549
+ # Stream each phase report ({'report': <serialized>}) to the controller as it
1550
+ # arrives (full-report/plugin mode) so it can republish into a real terminalreporter
1551
+ # live. No-op unless report_conn is set and the result carries full reports.
1552
+ nonlocal report_conn
1553
+ if report_conn is None:
1554
+ return
1555
+ reps = result.get("reports")
1556
+ if not reps:
1557
+ return
1558
+ for rep in reps:
1559
+ try:
1560
+ _send(report_conn, {"report": rep})
1561
+ except OSError:
1562
+ report_conn = None # controller gone — stop streaming, complete the run
1563
+ return
1564
+
1565
+ while active > 0:
1566
+ for key, _mask in sel.select():
1567
+ conn = key.fileobj
1568
+ assert isinstance(conn, socket.socket)
1569
+ msg, nbytes = _recv(conn)
1570
+ rx += nbytes
1571
+ if not isinstance(msg, tuple):
1572
+ sel.unregister(conn)
1573
+ conn.close()
1574
+ active -= 1
1575
+ continue
1576
+ parts = cast("tuple[object, ...]", msg) # worker msg: ('ready'|'req'|'fin', …)
1577
+ kind = parts[0]
1578
+ if kind == "ready":
1579
+ total = cast("int", parts[2])
1580
+ # Selection mode: resolve the controller's nodeids → daemon item indices
1581
+ # from the first 'ready' that carries the worker's nodeid list. Unknown
1582
+ # nodeids (not in this daemon's collection) are dropped — the controller's
1583
+ # collection-match guard reports them.
1584
+ if selection is not None and pick_list is None and len(parts) > 4 and parts[4] is not None:
1585
+ idx_of = {nid: i for i, nid in enumerate(cast("list[str]", parts[4]))}
1586
+ pick_list = [idx_of[n] for n in selection if n in idx_of]
1587
+ ready_seen += 1
1588
+ if ready_seen == expected:
1589
+ t_ready = time.perf_counter()
1590
+ elif kind == "req":
1591
+ result = cast("RunResult | None", parts[2])
1592
+ if result is not None:
1593
+ results.append(result)
1594
+ emit_progress()
1595
+ emit_reports(result)
1596
+ if pick_list is not None:
1597
+ pick = pick_list[queue_pos] if queue_pos < len(pick_list) else None
1598
+ else:
1599
+ pick = queue_pos if total is not None and queue_pos < total else None
1600
+ if pick is not None:
1601
+ queue_pos += 1
1602
+ try:
1603
+ tx += _send(conn, ("idx", pick))
1604
+ except OSError:
1605
+ # Worker died after sending 'req' (rare). Treat as a disconnect and
1606
+ # finish the run — the result undercount (and the worker's nonzero
1607
+ # exitcode) flag it untrusted in _run_once, rather than crashing the daemon.
1608
+ sel.unregister(conn)
1609
+ conn.close()
1610
+ active -= 1
1611
+ continue
1612
+ req_count += 1
1613
+ else: # "fin"
1614
+ result = cast("RunResult | None", parts[2])
1615
+ if result is not None:
1616
+ results.append(result)
1617
+ emit_reports(result)
1618
+ worker_stats.append(cast("WorkerStats", parts[3]))
1619
+ sel.unregister(conn)
1620
+ conn.close()
1621
+ active -= 1
1622
+
1623
+ emit_progress(force=True) # final frame (done==target) — guaranteed
1624
+ bus = {"tx": float(tx), "rx": float(rx), "req_count": float(req_count)}
1625
+ run_total = len(pick_list) if pick_list is not None else (total or 0)
1626
+ return results, worker_stats, bus, t_ready, run_total
1627
+ finally:
1628
+ sel.close()
1629
+
1630
+ def _report(
1631
+ self,
1632
+ results: list[RunResult],
1633
+ worker_stats: list[WorkerStats],
1634
+ bus: dict[str, float],
1635
+ total: int,
1636
+ warmup: float,
1637
+ run: float,
1638
+ label: str,
1639
+ full_report: bool = False,
1640
+ detailed: bool = False,
1641
+ ) -> str:
1642
+ from collections import Counter
1643
+
1644
+ counts = Counter(r["outcome"] for r in results)
1645
+ failed = counts["failed"] + counts["error"]
1646
+ sum_busy = sum(s["busy"] for s in worker_stats)
1647
+ run_walls = [s["run_wall"] for s in worker_stats]
1648
+ breakdown = ", ".join(f"{n} {cat}" for cat, n in sorted(counts.items()))
1649
+ line = "═" * 66
1650
+ out = [
1651
+ f"\n{line}",
1652
+ f" {self.start_method.upper()} DAEMON — {self.num_workers}w — {label}",
1653
+ line,
1654
+ f" results : {breakdown} (n={len(results)}/{total})",
1655
+ f" warmup : {warmup:6.2f}s (fork+spawn; ~0 for resident rerun)",
1656
+ f" RUN : {run:6.2f}s ← wall",
1657
+ f" par. : {(sum_busy / run if run else 0):.2f}x of {self.num_workers}"
1658
+ f" (run-wall max={max(run_walls) if run_walls else 0:.2f} min={min(run_walls) if run_walls else 0:.2f})",
1659
+ ]
1660
+ if detailed:
1661
+ metrics = _parallelism_metrics(worker_stats, run, self.num_workers, results)
1662
+ # cores = perf-core count (the throughput baseline + the default worker count);
1663
+ # logical = the hard cap for any worker-count suggestion.
1664
+ cores = _default_workers()
1665
+ logical = os.cpu_count() or cores
1666
+ out.extend(_detailed_par_lines(metrics, run, self.num_workers, cores, logical))
1667
+ out.append(f" bus : {int(bus['req_count'])} round-trips, {bus['rx'] / 1024:.0f}KB rx")
1668
+ if failed:
1669
+ out.append(f" FAILURES ({failed}):")
1670
+ for r in results:
1671
+ if r["outcome"] not in {"failed", "error"}:
1672
+ continue
1673
+ out.append(f" ✗ {r['nodeid']}")
1674
+ longrepr = r.get("longrepr")
1675
+ if isinstance(longrepr, str) and longrepr.strip():
1676
+ out.extend(f" {ln}" for ln in longrepr.splitlines())
1677
+ xpassed = [r for r in results if r["outcome"] == "xpassed"]
1678
+ if xpassed:
1679
+ out.append(f" XPASS ({len(xpassed)}) — stale xfail entries (now pass, drop them):")
1680
+ out.extend(f" ? {r['nodeid']}" for r in xpassed)
1681
+ if full_report:
1682
+ # Full-report mode: a real per-phase --durations table (from serialized reports).
1683
+ out.extend(_durations_lines(results))
1684
+ else:
1685
+ # Lean mode: only whole-test durations are known — show the ≥1s offenders.
1686
+ slow = sorted(results, key=lambda r: r["duration"], reverse=True)
1687
+ slow = [r for r in slow if r["duration"] >= 1.0][:10]
1688
+ if slow:
1689
+ out.append(f" SLOWEST (≥1s, top {len(slow)}):")
1690
+ out.extend(f" {r['duration']:7.2f}s {r['nodeid']}" for r in slow)
1691
+ out.append(line)
1692
+ return "\n".join(out)
1693
+
1694
+
1695
+ # ── client-side: request a run from the resident daemon ──────────────────────
1696
+
1697
+
1698
+ def request_run(
1699
+ address: str, *, full_report: bool = False, detailed: bool = False, bench: int = 0
1700
+ ) -> dict[str, object]:
1701
+ """Trigger a run on the daemon; stream progress to stdout, return the final frame
1702
+ (`{rc, summary}` or `{stale: True}`). The daemon sends N `{'progress': (done,total)}`
1703
+ frames then one final frame — we recv in a loop until a non-progress frame arrives.
1704
+
1705
+ `full_report` asks the daemon to run workers in full-report mode (per-phase reports →
1706
+ a real --durations table in the summary). The flag rides as the 2nd tuple element, so
1707
+ a daemon that predates it simply ignores it and runs lean.
1708
+
1709
+ Module-level (not a method of `Daemon`) — this is the **client**, not a server
1710
+ method; keeping it on `Daemon` would mix both protocol sides into one class."""
1711
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1712
+ with _short_unix_path(address) as connect_path:
1713
+ sock.connect(connect_path)
1714
+ with sock:
1715
+ # ('run', fp, full_report, stream, nodeids, detailed, bench) — non-streamed CLI run, so
1716
+ # stream=False / nodeids=None; `bench` (an int run-count, 0=off) makes the daemon render the
1717
+ # bottleneck report (it forces full reports itself). Trailing elements are ignored by a
1718
+ # daemon predating them (back-compatible).
1719
+ _send(sock, ("run", _env_fingerprint(), full_report, False, None, detailed, bench))
1720
+ while True:
1721
+ raw, _ = _recv(sock)
1722
+ if not isinstance(raw, dict):
1723
+ return {"rc": 1, "summary": "[pytest-fast] daemon closed connection mid-run"}
1724
+ frame = cast("dict[str, object]", raw) # daemon frame: progress | stale | rc/summary
1725
+ if "progress" in frame:
1726
+ done, total = cast("tuple[int, int]", frame["progress"])
1727
+ print(f"\r running {done}/{total} …", end="", flush=True)
1728
+ continue
1729
+ print("\r" + " " * 32 + "\r", end="", flush=True) # erase the progress line
1730
+ return frame
1731
+
1732
+
1733
+ def request_run_streamed(
1734
+ address: str, on_report: Callable[[dict[str, object]], None], nodeids: list[str] | None = None
1735
+ ) -> dict[str, object]:
1736
+ """Client for full-report **streaming** (the `pytest --fast` plugin controller). Triggers a
1737
+ run in stream mode, invokes `on_report(serialized_report)` for each per-phase report as it
1738
+ arrives, and returns the final frame (`{rc, summary}` or `{stale: True}`).
1739
+
1740
+ `nodeids` restricts the run to the controller's collected selection (so -k/-m/paths work);
1741
+ None runs the daemon's full suite.
1742
+
1743
+ The controller replays the streamed reports through its own real terminalreporter, so native
1744
+ pytest reporting (--durations / junit / -v/-s / plugins) all work — while the warm forkserver
1745
+ daemon does the actual execution (amortized collect, fork-warm workers)."""
1746
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1747
+ with _short_unix_path(address) as connect_path:
1748
+ sock.connect(connect_path)
1749
+ with sock:
1750
+ _send(sock, ("run", _env_fingerprint(), True, True, nodeids)) # full_report + stream + selection
1751
+ while True:
1752
+ raw, _ = _recv(sock)
1753
+ if not isinstance(raw, dict):
1754
+ return {"rc": 1, "summary": "[pytest-fast] daemon closed connection mid-run"}
1755
+ frame = cast("dict[str, object]", raw) # daemon frame: report | stale | rc/summary
1756
+ rep = frame.get("report")
1757
+ if rep is not None:
1758
+ on_report(cast("dict[str, object]", rep))
1759
+ continue
1760
+ return frame # stale | {rc, summary}
1761
+
1762
+
1763
+ # ── orchestration: ensure resident daemon + run / stale-restart ──────────────
1764
+
1765
+
1766
+ def _split_env_list(name: str, default: list[str]) -> list[str]:
1767
+ """Parse a comma/colon-separated env var into a list, falling back to `default`
1768
+ when unset. PATH-style semantics: env REPLACES the default (does not add to it).
1769
+ An explicit empty value (`PYTEST_FAST_WATCH_DIRS=""`) yields an empty list — that
1770
+ is, "scan nothing", which is occasionally useful for tooling."""
1771
+ raw = os.environ.get(name)
1772
+ if raw is None:
1773
+ return default
1774
+ return [p.strip() for p in raw.replace(":", ",").split(",") if p.strip()]
1775
+
1776
+
1777
+ def _watch_dirs() -> list[str]:
1778
+ """Dirs scanned recursively for `*.py` mtime. Default `src,tests`.
1779
+ `PYTEST_FAST_WATCH_DIRS` (comma/colon-separated, repo-relative) REPLACES the
1780
+ default — e.g. a flat-layout project sets `PYTEST_FAST_WATCH_DIRS=mypkg,tests`."""
1781
+ return _split_env_list("PYTEST_FAST_WATCH_DIRS", ["src", "tests"])
1782
+
1783
+
1784
+ def _watch_files() -> list[str]:
1785
+ """Standalone config files included in the mtime scan (repo-relative). Default
1786
+ `pyproject.toml,pytest.ini`. `PYTEST_FAST_WATCH_FILES` (comma/colon-separated)
1787
+ REPLACES the default — add `setup.cfg`, `tox.ini`, `conftest.py`, etc. as your
1788
+ project needs."""
1789
+ return _split_env_list("PYTEST_FAST_WATCH_FILES", ["pyproject.toml", "pytest.ini"])
1790
+
1791
+
1792
+ def _project_root() -> Path:
1793
+ """Project root for the `*.py` mtime scan. Default — `os.getcwd()` at call time
1794
+ (where `pytest-fast` was launched from). Override — `PYTEST_FAST_ROOT` (absolute
1795
+ or relative path); useful if you launch outside the repo root, or for pytest-fast
1796
+ self-tests."""
1797
+ override = os.environ.get("PYTEST_FAST_ROOT")
1798
+ return Path(override).resolve() if override else Path.cwd()
1799
+
1800
+
1801
+ def _iter_source_paths() -> Iterator[Path]:
1802
+ """All files under watch dirs + watch files — a single traversal point for both
1803
+ `_max_source_mtime` (which needs max) and `_any_source_newer` (which needs early-exit)."""
1804
+ root = _project_root()
1805
+ for name in _watch_dirs():
1806
+ base = root / name
1807
+ yield from base.rglob("*.py")
1808
+ for name in _watch_files():
1809
+ yield root / name
1810
+
1811
+
1812
+ def _max_source_mtime() -> float:
1813
+ """max(mtime) over watch dirs + watch files — cheaply detects code/config changes.
1814
+ At boot/watcher we need the actual MAX (cached as baseline and polled). For the
1815
+ stale check in the hot path we use `_any_source_newer` — it short-circuits on
1816
+ the first newer file."""
1817
+ latest = 0.0
1818
+ for p in _iter_source_paths():
1819
+ try:
1820
+ latest = max(latest, p.stat().st_mtime)
1821
+ except OSError:
1822
+ continue
1823
+ return latest
1824
+
1825
+
1826
+ def _any_source_newer(threshold: float) -> bool:
1827
+ """Early-exit variant of `_max_source_mtime` for the stale check: stop at the
1828
+ first file with mtime > threshold. On large repos (thousands of .py) after the
1829
+ first edit this runs in O(1) instead of O(N) — every `request_run` against a
1830
+ staled daemon drops from tens of ms to single ms. On a fresh daemon (no edits)
1831
+ there's no win — we walk everything."""
1832
+ for p in _iter_source_paths():
1833
+ try:
1834
+ if p.stat().st_mtime > threshold:
1835
+ return True
1836
+ except OSError:
1837
+ continue
1838
+ return False
1839
+
1840
+
1841
+ # Env vars whose change must invalidate the warm daemon: collection/patch-time
1842
+ # inputs the forkserver baked at boot, which DON'T touch any source file mtime.
1843
+ # Flipping any of these from the caller auto-triggers a stale-respawn — no manual
1844
+ # daemon kill. The explicit keys affect collection/run (marker filter, addopts,
1845
+ # dump, watch-root). User-app env prefixes are configurable via
1846
+ # `PYTEST_FAST_ENV_PREFIXES` (comma-separated) — set e.g. `MYAPP_,FEATURE_` so any
1847
+ # `MYAPP_DB__HOST=...` or `FEATURE_X=...` shift triggers a respawn.
1848
+ _FINGERPRINT_KEYS = (
1849
+ "PYTEST_FAST_MARK",
1850
+ "PYTEST_ADDOPTS",
1851
+ "OUTCOME_DUMP",
1852
+ "PYTEST_FAST_WATCH_DIRS",
1853
+ "PYTEST_FAST_WATCH_FILES",
1854
+ "PYTEST_FAST_ROOT",
1855
+ "PYTEST_FAST_ENV_PREFIXES", # change in the prefix list itself must respawn too
1856
+ )
1857
+
1858
+
1859
+ def _fingerprint_prefixes() -> tuple[str, ...]:
1860
+ """User-configured env-var prefixes that should drive staleness. Parsed from
1861
+ `PYTEST_FAST_ENV_PREFIXES` (comma-separated). Empty by default — only the
1862
+ explicit `_FINGERPRINT_KEYS` matter unless the caller opts in to app config."""
1863
+ raw = os.environ.get("PYTEST_FAST_ENV_PREFIXES", "")
1864
+ return tuple(p.strip() for p in raw.split(",") if p.strip())
1865
+
1866
+
1867
+ def _env_fingerprint() -> str:
1868
+ """Stable hash of env vars that influence collection/patching. Daemon snapshots
1869
+ it at boot; caller sends its current one on run/status → mismatch ⇒ stale-respawn."""
1870
+ prefixes = _fingerprint_prefixes()
1871
+ items = {k: v for k, v in os.environ.items() if k in _FINGERPRINT_KEYS or any(k.startswith(p) for p in prefixes)}
1872
+ blob = "\0".join(f"{k}={items[k]}" for k in sorted(items))
1873
+ # `surrogateescape`, NOT a strict `.encode()`: a non-UTF-8 byte env value (common for
1874
+ # an app var matched via PYTEST_FAST_ENV_PREFIXES) is decoded into os.environ as
1875
+ # surrogate-escaped chars (\udc80–\udcff); strict UTF-8 then raises UnicodeEncodeError
1876
+ # — and this runs on EVERY client request, so it would crash the whole run. The
1877
+ # surrogateescape handler reverses the decode to the original bytes → stable + total.
1878
+ return hashlib.sha1(blob.encode("utf-8", "surrogateescape")).hexdigest()
1879
+
1880
+
1881
+ def _stale_reason(boot_mtime: float, boot_fp: str, client_fp: str | None) -> str | None:
1882
+ """Why a warm daemon must be discarded, or None if still fresh. Source edits beat
1883
+ env changes in the message only; either alone forces a respawn. `client_fp` is
1884
+ None for legacy callers that don't send a fingerprint → env check is skipped.
1885
+
1886
+ Uses `_any_source_newer` (early-exit), NOT `_max_source_mtime` — on large repos
1887
+ that's O(1) instead of O(N) once the first newer file is found."""
1888
+ if _any_source_newer(boot_mtime):
1889
+ return "sources changed"
1890
+ if client_fp is not None and client_fp != boot_fp:
1891
+ return "env changed"
1892
+ return None
1893
+
1894
+
1895
+ # ── lifecycle helpers: pidfile + control-socket bind + status/shutdown/promote ─
1896
+
1897
+
1898
+ def _pid_path(address: str) -> Path:
1899
+ return Path(address + ".pid")
1900
+
1901
+
1902
+ def _write_pid(address: str) -> None:
1903
+ """Atomically write the pidfile via write-temp-then-rename. Naive `write_text` =
1904
+ open+truncate+write+close: between truncate and write a concurrent `_read_pid` could
1905
+ read empty → `int("")` → ValueError → `_daemon_alive` falsely False. POSIX rename
1906
+ is atomic — readers see either the old or the new content, never empty."""
1907
+ pid_path = _pid_path(address)
1908
+ tmp = pid_path.with_suffix(pid_path.suffix + ".tmp")
1909
+ tmp.write_text(str(os.getpid()))
1910
+ tmp.replace(pid_path)
1911
+
1912
+
1913
+ def _read_pid(address: str) -> int | None:
1914
+ try:
1915
+ return int(_pid_path(address).read_text().strip())
1916
+ except (OSError, ValueError):
1917
+ return None
1918
+
1919
+
1920
+ def _remove_pid(address: str) -> None:
1921
+ _pid_path(address).unlink(missing_ok=True)
1922
+
1923
+
1924
+ def _bind_ctl(address: str, ttl: float) -> socket.socket:
1925
+ """Bind the control unix socket at `address` (unlink+bind+listen) and write the pidfile."""
1926
+ Path(address).unlink(missing_ok=True)
1927
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1928
+ with _short_unix_path(address) as bind_path:
1929
+ s.bind(bind_path)
1930
+ # Generous backlog: the accept loop is serial (one control message at a time), so a
1931
+ # burst of near-simultaneous probes (e.g. parallel `status` pings) can pile up faster
1932
+ # than we accept them. 8 was small enough that a flood got connections refused; 64
1933
+ # absorbs realistic bursts without dropping callers.
1934
+ s.listen(64)
1935
+ s.settimeout(ttl)
1936
+ _write_pid(address)
1937
+ return s
1938
+
1939
+
1940
+ def _daemon_alive(address: str) -> bool:
1941
+ """Is the daemon alive — via pidfile + os.kill(pid,0). Cheap and does NOT block
1942
+ during a run (unlike status: a daemon busy with a run is not in accept and won't
1943
+ reply in time)."""
1944
+ pid = _read_pid(address)
1945
+ if pid is None:
1946
+ return False
1947
+ try:
1948
+ os.kill(pid, 0)
1949
+ except ProcessLookupError:
1950
+ return False
1951
+ except PermissionError:
1952
+ return True
1953
+ return True
1954
+
1955
+
1956
+ def _status(address: str) -> dict[str, object] | None:
1957
+ """Ping the daemon: ('status',) → {ready, stale}. None if there's no socket /
1958
+ the daemon is busy with a run (settimeout: a busy daemon isn't in the accept
1959
+ loop and won't reply in time)."""
1960
+ try:
1961
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
1962
+ s.settimeout(_STATUS_PING_TIMEOUT)
1963
+ with _short_unix_path(address) as connect_path:
1964
+ s.connect(connect_path)
1965
+ except OSError:
1966
+ return None
1967
+ with s:
1968
+ try:
1969
+ _send(s, ("status", _env_fingerprint())) # fp → status accounts for env change, not only mtime
1970
+ reply, _ = _recv(s)
1971
+ except OSError:
1972
+ return None
1973
+ return cast("dict[str, object]", reply) if isinstance(reply, dict) else None
1974
+
1975
+
1976
+ def _await_ready(address: str, proc: subprocess.Popen[bytes], timeout: float) -> bool:
1977
+ """Wait for the daemon to be ready (ready=True). Early exit if the process DIED
1978
+ (broken edit → forkserver-preload/collect crashed at startup): we don't wait the
1979
+ whole timeout, return False immediately."""
1980
+ deadline = time.monotonic() + timeout
1981
+ while time.monotonic() < deadline:
1982
+ if proc.poll() is not None:
1983
+ return False # successor crashed (didn't collect) — give up at once
1984
+ st = _status(address)
1985
+ if st is not None and st.get("ready"):
1986
+ return True
1987
+ time.sleep(_READY_POLL_INTERVAL)
1988
+ return False
1989
+
1990
+
1991
+ def _await_socket_gone(address: str, timeout: float) -> bool:
1992
+ """Wait until the daemon's control socket file disappears — that's the signal
1993
+ "its `finally` in `serve()` ran and released the address". Used as a replacement
1994
+ for the "pid is dead" check: `os.kill(pid, 0)` on a zombie child returns success
1995
+ until an explicit `wait()` (which may never happen if the parent doesn't reap),
1996
+ whereas the socket file is simply there-or-not, regardless of reap status."""
1997
+ deadline = time.monotonic() + timeout
1998
+ sock_path = Path(address)
1999
+ while time.monotonic() < deadline:
2000
+ if not sock_path.exists():
2001
+ return True
2002
+ time.sleep(_PID_DEAD_POLL_INTERVAL)
2003
+ return False
2004
+
2005
+
2006
+ def _shutdown_daemon(address: str) -> None:
2007
+ """Ask the daemon to exit cleanly. The message is serialized through its accept
2008
+ loop AFTER the current run — an active run is never torn (unlike SIGKILL)."""
2009
+ try:
2010
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
2011
+ with _short_unix_path(address) as connect_path:
2012
+ s.connect(connect_path)
2013
+ except OSError:
2014
+ return
2015
+ with s:
2016
+ try:
2017
+ _send(s, ("shutdown",))
2018
+ _recv(s) # {bye} (or close) → daemon released resources and is exiting
2019
+ except OSError:
2020
+ pass
2021
+
2022
+
2023
+ def _promote(staging: str, canonical: str) -> bool:
2024
+ """Tell the staging daemon to rebind to the canonical address."""
2025
+ try:
2026
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
2027
+ with _short_unix_path(staging) as connect_path:
2028
+ s.connect(connect_path)
2029
+ except OSError:
2030
+ return False
2031
+ with s:
2032
+ try:
2033
+ _send(s, ("promote", canonical))
2034
+ reply, _ = _recv(s)
2035
+ except OSError:
2036
+ return False
2037
+ if not isinstance(reply, dict):
2038
+ return False
2039
+ return bool(cast("dict[str, object]", reply).get("promoted"))
2040
+
2041
+
2042
+ @contextmanager
2043
+ def _respawn_lock(address: str) -> Iterator[None]:
2044
+ """Exclusive flock around (re)spawning the daemon: watcher-promote and the
2045
+ client's stale-respawn don't race for the canonical socket (otherwise double-boot
2046
+ / orphan daemon)."""
2047
+ with Path(address + ".respawn.lock").open("w") as f:
2048
+ try:
2049
+ fcntl.flock(f, fcntl.LOCK_EX)
2050
+ yield
2051
+ finally:
2052
+ fcntl.flock(f, fcntl.LOCK_UN)
2053
+
2054
+
2055
+ def _daemon_log_path(address: str) -> Path:
2056
+ """Daemon log file for `address`: a staging daemon (*.staging) writes into a separate
2057
+ log to avoid clobbering canonical's. Single source of truth for _spawn_daemon and
2058
+ promote-redirect.
2059
+
2060
+ Derived from `address` (per-worktree socket → per-worktree log): otherwise two daemons
2061
+ from different worktrees would write to the same file and interleave lines. We strip
2062
+ `.staging`/`.sock` and append `-daemon[.staging].log`."""
2063
+ staging = address.endswith(".staging")
2064
+ base = address.removesuffix(".staging") if staging else address
2065
+ base = base.removesuffix(".sock")
2066
+ suffix = ".staging" if staging else ""
2067
+ return Path(f"{base}-daemon{suffix}.log")
2068
+
2069
+
2070
+ def _redirect_stdio(path: Path) -> None:
2071
+ """Redirect fd 1/2 of the CURRENT process into `path` (append). Needed on promote:
2072
+ the daemon was spawned with stdout→staging-log, after rebinding to canonical its
2073
+ lifecycle logs should land in canonical's log (otherwise the "current" daemon writes
2074
+ into …-daemon.staging.log → debugging confusion). dup2 copies the fd over 1/2;
2075
+ sys.stdout/sys.stderr (wrappers around fd 1/2) then automatically write to the new file.
2076
+
2077
+ O_NOFOLLOW: refuse to follow a symlink at the log path. The control socket is connectable
2078
+ by any same-user process, so a stray/hostile peer could pre-plant a symlink there to capture
2079
+ the daemon's stdio. On any open error we keep the current stdio rather than crash."""
2080
+ try:
2081
+ fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_APPEND | os.O_NOFOLLOW, 0o600)
2082
+ except OSError:
2083
+ return
2084
+ try:
2085
+ os.dup2(fd, 1)
2086
+ os.dup2(fd, 2)
2087
+ finally:
2088
+ os.close(fd)
2089
+
2090
+
2091
+ def _self_invocation() -> list[str]:
2092
+ """Command for re-exec'ing pytest-fast itself in a background process (daemon/watcher).
2093
+
2094
+ We use `python -m pytest_fast` rather than a file path — the package is already on
2095
+ sys.path (we got imported, after all); it doesn't depend on how the package is
2096
+ installed (editable, wheel, src-layout). `__main__.py` proxies argv into `main()`."""
2097
+ return [sys.executable, "-m", "pytest_fast"]
2098
+
2099
+
2100
+ def _subprocess_env() -> dict[str, str]:
2101
+ """Env for spawning a fresh pytest-fast subprocess (daemon or watcher).
2102
+
2103
+ We scrub `_PYTEST_FAST_COLLECT`: if the parent is another pytest-fast whose
2104
+ `Daemon.__init__` armed the flag, the child's main process would needlessly
2105
+ run `_collect()` at the top of `__init__.py`. The child's own `Daemon.__init__`
2106
+ will arm the flag again right before booting forkserver, where collect is actually
2107
+ needed — env flows into the forkserver through `ctx.Process.start()`."""
2108
+ env = os.environ.copy()
2109
+ env.pop("_PYTEST_FAST_COLLECT", None)
2110
+ return env
2111
+
2112
+
2113
+ def _append_restart_marker(log: Path) -> None:
2114
+ """Append a `=== restart at TS ===` separator to the log before a new spawn.
2115
+ Append mode (rather than truncate): keep the post-mortem of the previous daemon/
2116
+ watcher incarnation. Without this `_spawn_daemon` on every stale-respawn wiped
2117
+ out the previous logs (which matters when debugging a flapping daemon)."""
2118
+ with log.open("a") as f:
2119
+ f.write(f"\n=== restart at {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n")
2120
+
2121
+
2122
+ def _spawn_daemon(workers: int, start_method: str, address: str, ttl: float) -> subprocess.Popen[bytes]:
2123
+ """Bring up a resident daemon as a detached process (survives the caller). Returns
2124
+ Popen → the caller can detect death early (broken collect). A staging daemon
2125
+ (address ends in `.staging`) writes into a separate log to avoid disturbing canonical."""
2126
+ log = _daemon_log_path(address)
2127
+ cmd = [
2128
+ *_self_invocation(),
2129
+ "--serve",
2130
+ "--address",
2131
+ address,
2132
+ "--ttl",
2133
+ str(ttl),
2134
+ "--workers",
2135
+ str(workers),
2136
+ "--start-method",
2137
+ start_method,
2138
+ ]
2139
+ _append_restart_marker(log)
2140
+ with log.open("a") as f:
2141
+ proc = subprocess.Popen(cmd, stdout=f, stderr=subprocess.STDOUT, start_new_session=True, env=_subprocess_env())
2142
+ print(f"[pytest-fast] starting resident daemon (socket {address}, ttl {int(ttl)}s, log {log})", file=sys.stderr)
2143
+ return proc
2144
+
2145
+
2146
+ def _coordinated_spawn(workers: int, start_method: str, address: str, ttl: float) -> None:
2147
+ """Spawn the canonical daemon under the respawn lock. If a fresh daemon is already
2148
+ up (the watcher pre-warmed it) — do nothing, the client just reconnects."""
2149
+ with _respawn_lock(address):
2150
+ st = _status(address)
2151
+ if st is not None and st.get("ready") and not st.get("stale", True):
2152
+ return
2153
+ _spawn_daemon(workers, start_method, address, ttl)
2154
+
2155
+
2156
+ def _run_via_daemon(
2157
+ workers: int,
2158
+ start_method: str,
2159
+ address: str,
2160
+ ttl: float,
2161
+ *,
2162
+ with_watcher: bool,
2163
+ run: Callable[[str], dict[str, object]],
2164
+ ) -> dict[str, object]:
2165
+ """Ensure a resident daemon at `address`, then execute `run(address)` against it —
2166
+ spawning the daemon if absent and respawning it on a `{stale}` reply, bounded by the
2167
+ boot deadline. Returns the final frame from `run` (`{rc, summary}`), or `{rc: 1}` if the
2168
+ daemon never came up / kept reporting stale. Shared by the CLI client (`_ensure_and_run`)
2169
+ and the `--fast` plugin controller (`pytest_runtestloop`).
2170
+
2171
+ Stale is detected by the daemon BEFORE it runs anything (it replies `{stale}` and exits),
2172
+ so a streaming `run` that gets respawned never double-emits reports."""
2173
+ if with_watcher:
2174
+ _ensure_watcher(workers, start_method, address, ttl)
2175
+ deadline = time.monotonic() + _DAEMON_BOOT_TIMEOUT
2176
+ spawned = False
2177
+ while True:
2178
+ try:
2179
+ reply = run(address)
2180
+ except (FileNotFoundError, ConnectionRefusedError):
2181
+ if not spawned:
2182
+ _coordinated_spawn(workers, start_method, address, ttl)
2183
+ spawned = True
2184
+ if time.monotonic() > deadline:
2185
+ print("[pytest-fast] daemon failed to start in time", file=sys.stderr)
2186
+ return {"rc": 1}
2187
+ time.sleep(_DAEMON_BACKOFF_AFTER_SPAWN)
2188
+ continue
2189
+ if reply.get("stale"):
2190
+ print("[pytest-fast] sources/env changed — restarting daemon (fresh collect)", file=sys.stderr)
2191
+ _coordinated_spawn(workers, start_method, address, ttl)
2192
+ spawned = True
2193
+ if time.monotonic() > deadline:
2194
+ # Perpetual staleness (e.g. two callers with different env fingerprints sharing
2195
+ # one socket, or a watched file with a future mtime) — give up at the deadline
2196
+ # instead of spinning the client forever.
2197
+ print("[pytest-fast] daemon kept reporting stale past boot deadline", file=sys.stderr)
2198
+ return {"rc": 1}
2199
+ time.sleep(_DAEMON_BACKOFF_AFTER_STALE) # let the old release the socket and the new boot
2200
+ continue
2201
+ return reply
2202
+
2203
+
2204
+ def _ensure_and_run(
2205
+ workers: int,
2206
+ start_method: str,
2207
+ address: str,
2208
+ ttl: float,
2209
+ *,
2210
+ with_watcher: bool,
2211
+ full_report: bool = False,
2212
+ detailed: bool = False,
2213
+ bench: int = 0,
2214
+ ) -> int:
2215
+ """CLI client (front A): connect to the daemon → run → print the daemon-rendered summary."""
2216
+ reply = _run_via_daemon(
2217
+ workers,
2218
+ start_method,
2219
+ address,
2220
+ ttl,
2221
+ with_watcher=with_watcher,
2222
+ run=lambda addr: request_run(addr, full_report=full_report, detailed=detailed, bench=bench),
2223
+ )
2224
+ summary = reply.get("summary")
2225
+ if summary is not None:
2226
+ print(summary)
2227
+ rc = reply.get("rc")
2228
+ return rc if isinstance(rc, int) else 1
2229
+
2230
+
2231
+ # ── pytest plugin front-end (`pytest -p pytest_fast --fast`) ─────────────────
2232
+ #
2233
+ # Run the suite through the resident warm daemon while THIS process stays a real pytest
2234
+ # session — so native reporting (terminalreporter, --durations, junit, -v/-s, plugins) all
2235
+ # work: we just republish the daemon's streamed per-phase reports through the controller's
2236
+ # own `pytest_runtest_logreport` hook (the same mechanism xdist uses). The forkserver daemon
2237
+ # does the execution (amortized collect + fork-warm workers), so it stays fast across reruns.
2238
+ #
2239
+ # The hooks are INERT unless --fast is passed (like xdist with -n), so loading `-p pytest_fast`
2240
+ # for the OUTCOME_DUMP reference mode is unaffected.
2241
+
2242
+
2243
+ def _default_workers() -> int:
2244
+ """Default worker count — the number of PERFORMANCE cores.
2245
+
2246
+ On Apple Silicon (and other big.LITTLE designs) cores split into performance (P) and
2247
+ efficiency (E) cores; E-cores run roughly half the throughput. The work-stealing dispatch
2248
+ finishes when the SLOWEST worker drains, so a worker scheduled onto an E-core becomes a
2249
+ straggler that bounds the whole run — more workers than P-cores doesn't speed things up,
2250
+ it just adds stragglers plus memory/scheduler contention. So default to the P-core count
2251
+ (macOS: `hw.perflevel0.physicalcpu`). Other platforms fall back to the logical CPU count."""
2252
+ if sys.platform == "darwin":
2253
+ try:
2254
+ out = subprocess.run(
2255
+ ["sysctl", "-n", "hw.perflevel0.physicalcpu"],
2256
+ capture_output=True,
2257
+ text=True,
2258
+ timeout=2.0,
2259
+ check=False,
2260
+ )
2261
+ n = int(out.stdout.strip())
2262
+ if n > 0:
2263
+ return n
2264
+ except (OSError, ValueError):
2265
+ pass # not Apple Silicon / sysctl unavailable → fall through
2266
+ return os.cpu_count() or 1
2267
+
2268
+
2269
+ def _resolve_workers(cli_value: int | None) -> int:
2270
+ """Worker count precedence: explicit CLI/option value → `PYTEST_FAST_WORKERS` env →
2271
+ performance-core auto-detect (`_default_workers`).
2272
+
2273
+ The single chokepoint that guarantees a VALID (`>= 1`) count for every caller — the CLI,
2274
+ the `--fast` plugin, and external tooling via the public `resolve_workers`. An explicit
2275
+ `< 1` (a `--workers`/`--fast-workers` option or a *parseable* `PYTEST_FAST_WORKERS`) is a
2276
+ user error and raises `ValueError`: 0 workers means no worker ever runs, so the suite
2277
+ exits green having executed nothing — a silent false-pass a test runner must never produce.
2278
+ Callers surface the error idiomatically (CLI → `parser.error`, plugin → `pytest.UsageError`).
2279
+ An UNPARSEABLE env value (e.g. `garbage`) is treated as unset and falls back to auto-detect,
2280
+ which always returns `>= 1` — so this function never returns a value below 1."""
2281
+ if cli_value is not None:
2282
+ if cli_value < 1:
2283
+ msg = f"worker count must be >= 1, got {cli_value}"
2284
+ raise ValueError(msg)
2285
+ return cli_value
2286
+ env = os.environ.get("PYTEST_FAST_WORKERS")
2287
+ if env:
2288
+ try:
2289
+ n = int(env)
2290
+ except ValueError:
2291
+ n = None # unparseable → treat as unset, fall through to auto-detect
2292
+ if n is not None:
2293
+ if n < 1:
2294
+ msg = f"PYTEST_FAST_WORKERS must be >= 1, got {n}"
2295
+ raise ValueError(msg)
2296
+ return n
2297
+ return _default_workers()
2298
+
2299
+
2300
+ def resolve_workers(cli_value: int | None = None) -> int:
2301
+ """The worker count pytest-fast will use, by the documented precedence: an explicit
2302
+ `cli_value` → `PYTEST_FAST_WORKERS` → performance-core auto-detect. Stable public API for
2303
+ external tooling that needs to size a per-worker resource pool to match the run (prefer this
2304
+ over the private `_resolve_workers`; behavior is identical). Raises `ValueError` on an
2305
+ explicit `< 1` value; an unparseable env var falls back to auto-detect. See also the
2306
+ `pytest-fast --print-inferred-workers` CLI, which prints exactly `resolve_workers()`."""
2307
+ return _resolve_workers(cli_value)
2308
+
2309
+
2310
+ def default_workers() -> int:
2311
+ """The auto-detected default worker count — performance cores on Apple Silicon, logical CPUs
2312
+ elsewhere — ignoring any `--workers` / `PYTEST_FAST_WORKERS` override. Public; always `>= 1`.
2313
+ Use `resolve_workers` instead when overrides should win."""
2314
+ return _default_workers()
2315
+
2316
+
2317
+ def _resolve_ttl(cli_value: float | None) -> float:
2318
+ """Idle-TTL precedence: explicit CLI/option value → `PYTEST_FAST_TTL` env → 600s."""
2319
+ if cli_value is not None:
2320
+ return cli_value
2321
+ env = os.environ.get("PYTEST_FAST_TTL")
2322
+ if env:
2323
+ try:
2324
+ return float(env)
2325
+ except ValueError:
2326
+ pass
2327
+ return 600.0
2328
+
2329
+
2330
+ def _default_fast_address() -> str:
2331
+ """Per-project daemon socket when no address is given: a short, stable name in TMPDIR
2332
+ derived from the project root (so two checkouts don't share one daemon)."""
2333
+ slug = hashlib.sha1(str(_project_root()).encode()).hexdigest()[:10]
2334
+ return f"{tempfile.gettempdir()}/pytest-fast-{slug}.sock"
2335
+
2336
+
2337
+ def _resolve_fast_address(cli_value: str | None) -> str:
2338
+ """Daemon address precedence: `--fast-address` option → `PYTEST_FAST_ADDRESS` env →
2339
+ per-project default.
2340
+
2341
+ ⚠ Prefer `PYTEST_FAST_ADDRESS` (or `--fast-address=PATH`, with an `=`) over the space form
2342
+ `--fast-address PATH`: pytest determines rootdir/inifile from the raw argv BEFORE any plugin
2343
+ loads, scanning it for existing paths — so once the daemon's socket file exists, a bare
2344
+ `--fast-address /tmp/x.sock` makes pytest root at `/tmp`, silently losing `pythonpath`/ini
2345
+ discovery. The `=` form and the env var keep the path out of that positional scan."""
2346
+ return cli_value or os.environ.get("PYTEST_FAST_ADDRESS") or _default_fast_address()
2347
+
2348
+
2349
+ def pytest_addoption(parser: Parser) -> None:
2350
+ group = parser.getgroup("pytest-fast", "resident forkserver accelerator")
2351
+ group.addoption(
2352
+ "--fast",
2353
+ action="store_true",
2354
+ default=False,
2355
+ help="run the suite via a resident pytest-fast daemon (warm forkserver workers, native reporting)",
2356
+ )
2357
+ group.addoption(
2358
+ "--fast-address",
2359
+ default=None,
2360
+ help="daemon unix socket (or $PYTEST_FAST_ADDRESS; default: derived from the project root). "
2361
+ "Use the '=' form (--fast-address=PATH) or the env var — a bare space-separated path can be "
2362
+ "mistaken for the rootdir once the socket exists.",
2363
+ )
2364
+ group.addoption(
2365
+ "--fast-workers",
2366
+ type=int,
2367
+ default=None,
2368
+ help="worker count for --fast (or $PYTEST_FAST_WORKERS; default: performance-core count)",
2369
+ )
2370
+ group.addoption(
2371
+ "--fast-ttl",
2372
+ type=float,
2373
+ default=None,
2374
+ help="daemon idle TTL seconds for --fast (or $PYTEST_FAST_TTL; default 600)",
2375
+ )
2376
+ group.addoption(
2377
+ "--fast-watch",
2378
+ action="store_true",
2379
+ default=False,
2380
+ help="also keep a background watcher pre-warming the daemon on source changes",
2381
+ )
2382
+
2383
+
2384
+ def pytest_runtestloop(session: Session) -> bool | None:
2385
+ """When --fast: hand execution to the resident daemon and republish its streamed reports
2386
+ through this controller's hooks (native reporting). Returns True (loop handled). Inert
2387
+ (returns None → pytest's normal in-process loop) otherwise."""
2388
+ config = session.config
2389
+ if not config.getoption("fast", default=False):
2390
+ return None
2391
+ if session.testsfailed and not config.getvalue("continue_on_collection_errors"):
2392
+ raise session.Interrupted(f"{session.testsfailed} error(s) during collection")
2393
+
2394
+ address = _resolve_fast_address(cast("str | None", config.getoption("fast_address")))
2395
+ try:
2396
+ workers = _resolve_workers(cast("int | None", config.getoption("fast_workers")))
2397
+ except ValueError as exc:
2398
+ # An invalid --fast-workers / PYTEST_FAST_WORKERS must fail this session cleanly, NOT
2399
+ # spawn a daemon with 0 workers (which would run nothing and exit green). UsageError is
2400
+ # pytest's idiom for a bad invocation — same as the collection-match guard below.
2401
+ import pytest
2402
+
2403
+ raise pytest.UsageError(f"--fast: {exc}") from exc
2404
+ ttl = _resolve_ttl(cast("float | None", config.getoption("fast_ttl")))
2405
+ with_watcher = bool(config.getoption("fast_watch"))
2406
+
2407
+ collected = [item.nodeid for item in session.items]
2408
+ collected_set = set(collected)
2409
+ seen: set[str] = set()
2410
+
2411
+ def on_report(data: dict[str, object]) -> None:
2412
+ rep = config.hook.pytest_report_from_serializable(config=config, data=data)
2413
+ seen.add(rep.nodeid)
2414
+ # Republish into the controller's real terminalreporter / plugins / pass-fail accounting.
2415
+ config.hook.pytest_runtest_logreport(report=rep)
2416
+
2417
+ # Forward THIS session's collected nodeids → the daemon runs exactly that selection (so
2418
+ # -k/-m/explicit paths work; the full suite is just "all nodeids").
2419
+ reply = _run_via_daemon(
2420
+ workers,
2421
+ "forkserver",
2422
+ address,
2423
+ ttl,
2424
+ with_watcher=with_watcher,
2425
+ run=lambda addr: request_run_streamed(addr, on_report, collected),
2426
+ )
2427
+
2428
+ # Collection-match guard: every selected test must have been run by the daemon. A `missing`
2429
+ # nodeid means the daemon's collection lacks it (drifted/stale despite the fingerprint check)
2430
+ # — fail loudly rather than silently under-report.
2431
+ missing = collected_set - seen
2432
+ if missing:
2433
+ import pytest
2434
+
2435
+ raise pytest.UsageError(
2436
+ f"--fast: {len(missing)} selected test(s) were not run by the daemon "
2437
+ f"(e.g. {sorted(missing)[:3]}) — its collection may differ from this session. "
2438
+ "Try again (the daemon will respawn on a source/env change), or run without --fast."
2439
+ )
2440
+
2441
+ rc = reply.get("rc")
2442
+ if rc not in (0, None) and not session.testsfailed:
2443
+ # Daemon flagged the run untrusted (worker crash / result undercount) but no republished
2444
+ # report marked a failure — surface it so a green exit can't hide a broken run.
2445
+ session.shouldfail = "pytest-fast: daemon reported an untrusted run (see daemon log)"
2446
+ return True
2447
+
2448
+
2449
+ # ── source watcher (--watch): pre-warm staging successor, then promote ────────
2450
+ #
2451
+ # Optional (--with-watcher on the client spawns it detached). Lives in THIS file —
2452
+ # spawns itself as `… --watch` (same trick as _spawn_daemon). No extra dependencies:
2453
+ # poll mtime + staging-promote. Idea: ~2.8s of new-forkserver boot is amortized into
2454
+ # the idle gap AFTER an edit, so by the time the user re-runs tests the daemon is
2455
+ # already warm and fresh.
2456
+
2457
+ # Watch poll/debounce are env-overridable (`PYTEST_FAST_WATCH_POLL` / `_DEBOUNCE`, seconds): tune
2458
+ # reactivity vs CPU, and let the test suite drop them to ~0.05 so watcher tests run in ~0.3s instead
2459
+ # of ~2.7s. Read at module load → a freshly-spawned watcher subprocess picks up the caller's env.
2460
+ _WATCH_POLL = float(os.environ.get("PYTEST_FAST_WATCH_POLL", "0.5")) # seconds between max(mtime) polls
2461
+ _WATCH_DEBOUNCE = float(os.environ.get("PYTEST_FAST_WATCH_DEBOUNCE", "0.7")) # silence after last edit → one reboot
2462
+ _WATCH_GONE_GRACE = 3.0 # seconds without the daemon → watcher exits (lifetime tied to daemon ttl)
2463
+ _STAGING_BOOT_TIMEOUT = 90.0 # upper bound on successor boot (normal ~3s;
2464
+ # a broken edit is caught immediately via process death in _await_ready, not this timeout)
2465
+
2466
+ # Poll intervals inside await-loops (sleep between two condition checks). Smaller =
2467
+ # faster response + slightly more CPU; larger = more reaction delay. Kept small — a daemon boots
2468
+ # in well under 100ms, so a 0.2s ready-poll was pure dead time on every spawn (×20+ tests).
2469
+ _READY_POLL_INTERVAL = 0.02
2470
+ _PID_DEAD_POLL_INTERVAL = 0.02
2471
+ _DEBOUNCE_POLL_INTERVAL = 0.05
2472
+
2473
+ # Network/IPC timeouts.
2474
+ _WORKER_ACCEPT_TIMEOUT = 60.0 # seconds for each worker's connect to the master server
2475
+ _WORKER_JOIN_TIMEOUT = 10.0 # seconds master waits for a worker process to exit after `fin`
2476
+ _STATUS_PING_TIMEOUT = 2.0 # seconds for a status ping; a daemon busy with a run isn't in accept
2477
+ _CONTROL_CMD_TIMEOUT = 5.0 # seconds to read a control command before dropping the conn (anti-slowloris)
2478
+ _PROGRESS_THROTTLE_SEC = 0.1 # 10 frames/s; the final frame is force-flushed anyway
2479
+
2480
+ # Daemon-spawn orchestration (only in `_ensure_and_run` / client side).
2481
+ _DAEMON_BOOT_TIMEOUT = 120.0 # upper bound waiting for the spawned daemon to answer status
2482
+ _DAEMON_BACKOFF_AFTER_SPAWN = 0.3 # pause between a failed connect and the next attempt
2483
+ _DAEMON_BACKOFF_AFTER_STALE = 0.5 # pause between a {stale} reply and the connect to the fresh daemon
2484
+
2485
+
2486
+ def _await_stable_mtime() -> float:
2487
+ """Block until max(mtime) has been "quiet" for `_WATCH_DEBOUNCE` seconds → return it.
2488
+ Protects against rebooting mid-batch when an agent makes N consecutive edits."""
2489
+ prev = _max_source_mtime()
2490
+ quiet_deadline = time.monotonic() + _WATCH_DEBOUNCE
2491
+ while time.monotonic() < quiet_deadline:
2492
+ time.sleep(_DEBOUNCE_POLL_INTERVAL)
2493
+ cur = _max_source_mtime()
2494
+ if cur != prev:
2495
+ prev = cur
2496
+ quiet_deadline = time.monotonic() + _WATCH_DEBOUNCE
2497
+ return prev
2498
+
2499
+
2500
+ def _staging_promote(workers: int, start_method: str, address: str, ttl: float) -> bool:
2501
+ """Build the successor on the staging socket, await ready, then softly shut down
2502
+ the old one (after its current run) and rebind the successor to canonical. Broken
2503
+ edit → successor doesn't collect → return False, leaving the current daemon
2504
+ untouched."""
2505
+ staging = address + ".staging"
2506
+ Path(staging).unlink(missing_ok=True)
2507
+ _remove_pid(staging)
2508
+ with _respawn_lock(address):
2509
+ st = _status(address)
2510
+ if st is not None and st.get("ready") and not st.get("stale", True):
2511
+ return True # already fresh (the client raced us) — nothing to pre-warm
2512
+ proc = _spawn_daemon(workers, start_method, staging, ttl)
2513
+ if not _await_ready(staging, proc, _STAGING_BOOT_TIMEOUT):
2514
+ _shutdown_daemon(staging) # best effort: in case it came up but too late
2515
+ Path(staging).unlink(missing_ok=True)
2516
+ _remove_pid(staging)
2517
+ return False
2518
+ _shutdown_daemon(address) # blocks until the current run finishes — we don't tear it
2519
+ if not _await_socket_gone(address, 30.0):
2520
+ # Old daemon never released the canonical socket (stuck in a very long run). Abort
2521
+ # rather than bind over a live socket; shut the staging successor so it isn't orphaned.
2522
+ _log("watcher", "old daemon didn't release canonical socket — aborting promote")
2523
+ _shutdown_daemon(staging)
2524
+ Path(staging).unlink(missing_ok=True)
2525
+ _remove_pid(staging)
2526
+ return False
2527
+ return _promote(staging, address) # old's finally released the canonical socket → we can bind
2528
+
2529
+
2530
+ def _spawn_watcher(workers: int, start_method: str, address: str, ttl: float, cwd: str | None = None) -> None:
2531
+ """Detached watcher process (self-exec of the same package with --watch).
2532
+
2533
+ `cwd` controls where the watcher (and, via `_staging_promote → _spawn_daemon`,
2534
+ the staging daemons it spawns) runs pytest collection. Default `None` = inherit
2535
+ from the caller; for external users that's the project root (where they invoked
2536
+ `pytest-fast`). pytest-fast's own tests pass `cwd=tmp_project` explicitly, otherwise
2537
+ staging-spawn under self-test would collect itself — infinite recursion."""
2538
+ # Per-worktree log (derived from address): otherwise watchers from different worktrees
2539
+ # would write into the same file.
2540
+ log = Path(address.removesuffix(".sock") + "-watcher.log")
2541
+ cmd = [
2542
+ *_self_invocation(),
2543
+ "--watch",
2544
+ "--address",
2545
+ address,
2546
+ "--ttl",
2547
+ str(ttl),
2548
+ "--workers",
2549
+ str(workers),
2550
+ "--start-method",
2551
+ start_method,
2552
+ ]
2553
+ _append_restart_marker(log)
2554
+ with log.open("a") as f:
2555
+ subprocess.Popen(
2556
+ cmd,
2557
+ stdout=f,
2558
+ stderr=subprocess.STDOUT,
2559
+ start_new_session=True,
2560
+ env=_subprocess_env(),
2561
+ cwd=cwd,
2562
+ )
2563
+ print(f"[pytest-fast] starting source watcher (pre-warm; log {log})", file=sys.stderr)
2564
+
2565
+
2566
+ def _ensure_watcher(workers: int, start_method: str, address: str, ttl: float) -> None:
2567
+ """Bring up the watcher if it's not already running (single-instance via watcher
2568
+ flock). Spawn is idempotent: a redundant watcher exits on its own when it can't
2569
+ take the lock."""
2570
+ with Path(address + ".watcher.lock").open("w") as probe:
2571
+ try:
2572
+ fcntl.flock(probe, fcntl.LOCK_EX | fcntl.LOCK_NB)
2573
+ except OSError:
2574
+ return # lock taken → a watcher is already alive
2575
+ fcntl.flock(probe, fcntl.LOCK_UN) # free → release and spawn the real one
2576
+ _spawn_watcher(workers, start_method, address, ttl)
2577
+
2578
+
2579
+ def _watch(workers: int, start_method: str, address: str, ttl: float) -> int:
2580
+ """Resident watcher: poll mtime → debounce → staging-promote the daemon. Single
2581
+ instance via flock. Exits when the daemon is gone via its own idle-ttl
2582
+ (watcher is NOT keep-alive)."""
2583
+ lock_path = address + ".watcher.lock"
2584
+ with Path(lock_path).open("w") as lockf:
2585
+ try:
2586
+ fcntl.flock(lockf, fcntl.LOCK_EX | fcntl.LOCK_NB)
2587
+ except OSError:
2588
+ _log("watcher", "another watcher already holds the lock — exiting")
2589
+ return 0
2590
+ _log(
2591
+ "watcher",
2592
+ f"up; pre-warming {address} on source change (poll {_WATCH_POLL}s, debounce {_WATCH_DEBOUNCE}s)",
2593
+ )
2594
+ last_warmed = _max_source_mtime()
2595
+ last_attempted = last_warmed
2596
+ gone_since: float | None = None
2597
+ try:
2598
+ while True:
2599
+ time.sleep(_WATCH_POLL)
2600
+ if not _daemon_alive(address):
2601
+ if gone_since is None:
2602
+ gone_since = time.monotonic()
2603
+ elif time.monotonic() - gone_since > _WATCH_GONE_GRACE:
2604
+ _log("watcher", "daemon gone (idle-ttl) — exiting")
2605
+ return 0
2606
+ continue
2607
+ gone_since = None
2608
+ mtime = _max_source_mtime()
2609
+ if mtime <= last_warmed or mtime == last_attempted:
2610
+ continue # no new edits (or we already tried exactly this state)
2611
+ settled = _await_stable_mtime()
2612
+ if settled <= last_warmed:
2613
+ continue # edits rolled back
2614
+ last_attempted = settled
2615
+ _log("watcher", "source change settled — pre-warming successor…")
2616
+ if _staging_promote(workers, start_method, address, ttl):
2617
+ last_warmed = settled
2618
+ _log("watcher", "promoted fresh warm daemon")
2619
+ else:
2620
+ _log("watcher", "successor did not collect (broken edit?) — kept current daemon")
2621
+ finally:
2622
+ Path(lock_path).unlink(missing_ok=True)
2623
+
2624
+
2625
+ def main(argv: list[str]) -> int:
2626
+ parser = argparse.ArgumentParser(description="pytest-fast: resident forkserver test accelerator")
2627
+ parser.add_argument(
2628
+ "--workers", type=int, default=None, help="worker count (or $PYTEST_FAST_WORKERS; default: performance cores)"
2629
+ )
2630
+ parser.add_argument(
2631
+ "--print-inferred-workers",
2632
+ action="store_true",
2633
+ help="print the resolved worker count (honoring --workers / $PYTEST_FAST_WORKERS / "
2634
+ "performance-core auto-detect) and exit — so external tooling can size a per-worker "
2635
+ "pool to match the run without importing pytest-fast internals",
2636
+ )
2637
+ parser.add_argument("--start-method", choices=["spawn", "forkserver", "fork"], default="forkserver")
2638
+ parser.add_argument("--address", help="unix socket of the resident daemon (or $PYTEST_FAST_ADDRESS)")
2639
+ parser.add_argument(
2640
+ "--ttl", type=float, default=None, help="serve/ensure: idle seconds before self-shutdown (or $PYTEST_FAST_TTL)"
2641
+ )
2642
+ parser.add_argument("--serve", action="store_true", help="be the resident daemon (needs --address)")
2643
+ parser.add_argument(
2644
+ "--watch", action="store_true", help="(internal) be the resident source watcher (needs --address)"
2645
+ )
2646
+ parser.add_argument(
2647
+ "--with-watcher",
2648
+ action="store_true",
2649
+ help="ensure a background source watcher pre-warms the daemon on every src/tests change",
2650
+ )
2651
+ parser.add_argument("--runs", type=int, default=1, help="local single-process mode: number of in-process runs")
2652
+ parser.add_argument("--dump", help="local mode: write {nodeid: outcome} JSON (for the outcome-diff harness)")
2653
+ parser.add_argument(
2654
+ "--full-report",
2655
+ action="store_true",
2656
+ help="ship full per-phase reports → a real --durations table in the summary (heavier bus)",
2657
+ )
2658
+ parser.add_argument(
2659
+ "--detailed",
2660
+ action="store_true",
2661
+ help="add the extended parallelism block to the summary (eff%%, CPU vs I/O, lost-time "
2662
+ "breakdown, per-worker spread, the wall-bounding test)",
2663
+ )
2664
+ parser.add_argument(
2665
+ "--bench",
2666
+ nargs="?",
2667
+ const=2,
2668
+ default=0,
2669
+ type=int,
2670
+ metavar="N",
2671
+ help="run the suite N times (default 2; the first is dropped as warmup) and print a "
2672
+ "deterministic bottleneck report instead of the run summary — shared-setup clusters, slowest "
2673
+ "CPU/IO calls, the wall ceiling — what to optimize to go faster. More runs → steadier ranking",
2674
+ )
2675
+ ns = parser.parse_args(argv)
2676
+ try:
2677
+ workers = _resolve_workers(ns.workers)
2678
+ except ValueError as exc:
2679
+ parser.error(str(exc)) # exits 2 — never proceeds with an invalid count
2680
+ if ns.print_inferred_workers:
2681
+ print(workers)
2682
+ return 0
2683
+ ttl = _resolve_ttl(ns.ttl)
2684
+ address = ns.address or os.environ.get("PYTEST_FAST_ADDRESS")
2685
+
2686
+ if ns.watch:
2687
+ if not address:
2688
+ parser.error("--watch requires --address")
2689
+ return _watch(workers, ns.start_method, address, ttl)
2690
+ if ns.serve:
2691
+ if not address:
2692
+ parser.error("--serve requires --address")
2693
+ return Daemon(num_workers=workers, start_method=ns.start_method).serve(address, ttl)
2694
+ if address:
2695
+ return _ensure_and_run(
2696
+ workers,
2697
+ ns.start_method,
2698
+ address,
2699
+ ttl,
2700
+ with_watcher=ns.with_watcher,
2701
+ full_report=ns.full_report,
2702
+ detailed=ns.detailed,
2703
+ bench=ns.bench,
2704
+ )
2705
+ return Daemon(num_workers=workers, start_method=ns.start_method, dump_path=ns.dump).run(
2706
+ ns.runs, full_report=ns.full_report, detailed=ns.detailed, bench=ns.bench
2707
+ )
2708
+
2709
+
2710
+ def main_cli() -> int:
2711
+ """Console-script entry: `pytest-fast …` (see `[project.scripts]` in pyproject.toml).
2712
+ Thin wrapper over `main()` — Click-style, so the entry point doesn't call `main(argv=None)`."""
2713
+ return main(sys.argv[1:])
2714
+
2715
+
2716
+ # ── forkserver-preload trigger (AT THE BOTTOM of the file — see rationale near `_collect`) ──
2717
+ #
2718
+ # forkserver does `__import__("pytest_fast")` → loads the WHOLE __init__.py → then
2719
+ # triggers this block (it's guaranteed to be last). By this point every public/private
2720
+ # symbol of the package is defined, so when pytest at collect time imports test files
2721
+ # (and they reach for `from pytest_fast import _env_fingerprint`, `Daemon`,
2722
+ # `_max_source_mtime`, ...), all those names are already available.
2723
+ #
2724
+ # If the trigger were higher up (like in the original single-file PoC under bin/),
2725
+ # test-file imports would hit a cache hit on the partially-loaded module and silently
2726
+ # ImportError on every symbol declared below the trigger — pytest swallows those
2727
+ # ImportErrors during collect and just skips the file entirely.
2728
+
2729
+ if __name__ == "pytest_fast" and os.environ.get("_PYTEST_FAST_COLLECT"):
2730
+ # forkserver/multiprocessing swallows ImportError from `__import__(preload)` (see
2731
+ # `Lib/multiprocessing/forkserver.py:main`). If `_collect()` raises something else,
2732
+ # the forkserver keeps going but `collected_config` stays None → workers crash on
2733
+ # the assert with a mysterious "config is None". So we catch EVERYTHING here, dump
2734
+ # the traceback to stderr (lands in daemon.log), and re-raise — the forkserver then
2735
+ # sees that preload failed.
2736
+ import traceback as _tb
2737
+
2738
+ try:
2739
+ _collect()
2740
+ except BaseException:
2741
+ print("[pytest-fast] FATAL: _collect() raised in forkserver preload:", file=sys.stderr)
2742
+ _tb.print_exc(file=sys.stderr)
2743
+ sys.stderr.flush()
2744
+ raise
2745
+
2746
+
2747
+ if __name__ == "__main__":
2748
+ raise SystemExit(main(sys.argv[1:]))