dispatch-kit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ """dispatch-kit — a pure, fail-closed library for gating expensive remote/external work.
2
+
3
+ Decide whether an expensive operation may run, how much it will cost, and where it should run —
4
+ the same machinery for a cloud GPU job (Cloud Run L4 over a tailnet) and a paid LLM/SDK API call:
5
+
6
+ from dispatch_kit import BudgetCap, BudgetState, admits, estimate_cost # the hard $ cap
7
+ from dispatch_kit import select_backend, BackendKind, ToolRequirements # where it runs
8
+ from dispatch_kit import SecretRef, ExternalEndpoint, log_egress # opt-in API egress
9
+ from dispatch_kit import Approval # the approval audit fact
10
+
11
+ Pure domain — stdlib only, no I/O, no provider SDK code; every check is fail-closed (default budget
12
+ ``0`` = paid work off; SDK egress opt-in only; a missing key/over-budget refuses). The transport
13
+ auth (who) is a separate concern — pair this with ``tailnet-guard``. The consuming app keeps its job
14
+ entity, persistence, and executor; this owns the policy.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from .approval import Approval, ApprovalOutcome
20
+ from .budget import (
21
+ AdmissionDecision,
22
+ BudgetCap,
23
+ BudgetState,
24
+ BudgetWindow,
25
+ CostRates,
26
+ admits,
27
+ estimate_cost,
28
+ )
29
+ from .dispatch import (
30
+ DispatchError,
31
+ JobStore,
32
+ Lease,
33
+ Transport,
34
+ WorkerExecutor,
35
+ is_lease_stale,
36
+ should_give_up,
37
+ )
38
+ from .egress import (
39
+ EnvLookup,
40
+ ExternalEndpoint,
41
+ SecretMissingError,
42
+ SecretRef,
43
+ log_egress,
44
+ )
45
+ from .estimate import CostEstimate, HostCapabilities, vram_fits
46
+ from .routing import (
47
+ BackendCapabilities,
48
+ BackendKind,
49
+ NodeIdentity,
50
+ NoEligibleBackendError,
51
+ Routable,
52
+ ToolRequirements,
53
+ select_backend,
54
+ )
55
+
56
+ __all__ = [
57
+ "AdmissionDecision",
58
+ "Approval",
59
+ "ApprovalOutcome",
60
+ "BackendCapabilities",
61
+ "BackendKind",
62
+ "BudgetCap",
63
+ "BudgetState",
64
+ "BudgetWindow",
65
+ "CostEstimate",
66
+ "CostRates",
67
+ "DispatchError",
68
+ "EnvLookup",
69
+ "ExternalEndpoint",
70
+ "HostCapabilities",
71
+ "JobStore",
72
+ "Lease",
73
+ "NoEligibleBackendError",
74
+ "NodeIdentity",
75
+ "Routable",
76
+ "SecretMissingError",
77
+ "SecretRef",
78
+ "ToolRequirements",
79
+ "Transport",
80
+ "WorkerExecutor",
81
+ "admits",
82
+ "estimate_cost",
83
+ "is_lease_stale",
84
+ "log_egress",
85
+ "select_backend",
86
+ "should_give_up",
87
+ "vram_fits",
88
+ ]
@@ -0,0 +1,44 @@
1
+ """Approval — the human decision + audit record that gates an expensive (non-local) job.
2
+
3
+ A gated job leaves "awaiting approval" only by a human decision; recording WHO decided, WHEN, and
4
+ WHY keeps a paid run attributable (append-only in spirit). The state machine — which states are
5
+ gated and the transitions between them — lives in the consuming app's job entity; this is the
6
+ shared audit fact and the binary outcome both apps record.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import enum
12
+ from dataclasses import dataclass
13
+
14
+
15
+ class ApprovalOutcome(enum.StrEnum):
16
+ """The decision a human made on a gated job."""
17
+
18
+ APPROVED = "approved"
19
+ REJECTED = "rejected"
20
+
21
+
22
+ @dataclass(frozen=True, slots=True)
23
+ class Approval:
24
+ """Who approved or rejected a gated job, when, and why — the audit fact.
25
+
26
+ ``author``/``reason``/``decided_at`` are all required: a decision is never anonymous,
27
+ unexplained, or untimestamped. ``decided_at`` is ISO-8601, server-assigned at the decision.
28
+ ``outcome`` defaults to APPROVED so the common "approve" path constructs positionally; a
29
+ rejection passes ``ApprovalOutcome.REJECTED``.
30
+ """
31
+
32
+ author: str
33
+ reason: str
34
+ decided_at: str
35
+ outcome: ApprovalOutcome = ApprovalOutcome.APPROVED
36
+
37
+ def __post_init__(self) -> None:
38
+ for name, value in (
39
+ ("author", self.author),
40
+ ("reason", self.reason),
41
+ ("decided_at", self.decided_at),
42
+ ):
43
+ if not value:
44
+ raise ValueError(f"an approval needs a non-empty {name}")
dispatch_kit/budget.py ADDED
@@ -0,0 +1,156 @@
1
+ """Budget ledger — a hard, fail-closed spend cap that gates expensive (non-local) work.
2
+
3
+ Cloud GPU jobs and paid API calls cost real money, so a **hard** spend cap is a circuit breaker
4
+ against errant testing or an agent flooding dispatch. The cap is enforced by **reserving** a job's
5
+ upper-bound cost the moment it is approved/queued, against BOTH a per-run window and a per-month
6
+ window: a job is admitted only if ``reserved + spent + its estimate`` stays within both caps. A
7
+ burst of approvals reserves cumulatively, so once a cap is reached the next job is refused — you
8
+ cannot even *queue* past it. The default cap is **zero**, so paid work stays off until you
9
+ deliberately set a budget.
10
+
11
+ This is pure domain (the rule); the persisted ledger rows + the per-backend rates live in the
12
+ consuming app's storage layer. Money is a :class:`~decimal.Decimal` (exact), never a float — a
13
+ fraction of a cent per GPU-second compounds, and a cost guard that drifts is not a guard.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import enum
19
+ from dataclasses import dataclass
20
+ from decimal import Decimal
21
+
22
+
23
+ class BudgetWindow(enum.StrEnum):
24
+ """The windows a reservation is checked against — BOTH must have room (fail closed).
25
+
26
+ ``RUN`` bounds a single batch/session (stops one runaway loop); ``MONTH`` bounds slow
27
+ accumulation across sessions. A job must fit under both to be admitted.
28
+ """
29
+
30
+ RUN = "run"
31
+ MONTH = "month"
32
+
33
+
34
+ @dataclass(frozen=True, slots=True)
35
+ class BudgetCap:
36
+ """The hard ceiling per window, in USD. Default ``0`` means paid work is OFF (fail closed)."""
37
+
38
+ run_usd: Decimal = Decimal(0)
39
+ month_usd: Decimal = Decimal(0)
40
+
41
+ def __post_init__(self) -> None:
42
+ if self.run_usd < 0 or self.month_usd < 0:
43
+ raise ValueError("a budget cap cannot be negative")
44
+
45
+ def for_window(self, window: BudgetWindow) -> Decimal:
46
+ """The cap for ``window`` (the single source the admission rule reads)."""
47
+ return self.run_usd if window is BudgetWindow.RUN else self.month_usd
48
+
49
+
50
+ @dataclass(frozen=True, slots=True)
51
+ class BudgetState:
52
+ """Committed USD for a window: ``reserved`` (approved, not yet run) + ``spent`` (reconciled).
53
+
54
+ Reserving on approval and only reconciling ``reserved -> spent`` on completion is what makes a
55
+ burst of approvals count immediately — the cap sees the whole queue's cost, not just what ran.
56
+ """
57
+
58
+ reserved_usd: Decimal = Decimal(0)
59
+ spent_usd: Decimal = Decimal(0)
60
+
61
+ def __post_init__(self) -> None:
62
+ if self.reserved_usd < 0 or self.spent_usd < 0:
63
+ raise ValueError("budget reserved/spent cannot be negative")
64
+
65
+ def committed(self) -> Decimal:
66
+ """The total already committed against the cap (reserved + spent)."""
67
+ return self.reserved_usd + self.spent_usd
68
+
69
+
70
+ @dataclass(frozen=True, slots=True)
71
+ class CostRates:
72
+ """Per-second USD rates for a backend — plain config data (no provider SDK code here).
73
+
74
+ ``idle_tail_s`` is the post-request warm tail you still pay (a GPU Cloud Run instance bills for
75
+ ~10 min after the last request before scaling to zero); folding it into the estimate keeps the
76
+ reservation an upper bound. A token-priced API can model itself with ``gpu_usd_per_s`` = 0 and a
77
+ flat per-call cost via :func:`estimate_cost` inputs, or extend this with its own rate object.
78
+ """
79
+
80
+ gpu_usd_per_s: Decimal
81
+ vcpu_usd_per_s: Decimal
82
+ gib_usd_per_s: Decimal
83
+ idle_tail_s: Decimal = Decimal(0)
84
+
85
+ def __post_init__(self) -> None:
86
+ for name, value in (
87
+ ("gpu_usd_per_s", self.gpu_usd_per_s),
88
+ ("vcpu_usd_per_s", self.vcpu_usd_per_s),
89
+ ("gib_usd_per_s", self.gib_usd_per_s),
90
+ ("idle_tail_s", self.idle_tail_s),
91
+ ):
92
+ if value < 0:
93
+ raise ValueError(f"{name} cannot be negative")
94
+
95
+
96
+ @dataclass(frozen=True, slots=True)
97
+ class AdmissionDecision:
98
+ """Whether a job fits the budget, and — when it does not — which window refused and why.
99
+
100
+ ``refused_window`` is the first window (run, then month) that lacked room; ``None`` on admit.
101
+ The reason is human-facing, so the refusal is legible at approval, never a silent drop.
102
+ """
103
+
104
+ admitted: bool
105
+ refused_window: BudgetWindow | None
106
+ reason: str
107
+
108
+
109
+ def admits(
110
+ estimate_usd: Decimal,
111
+ run_state: BudgetState,
112
+ month_state: BudgetState,
113
+ cap: BudgetCap,
114
+ ) -> AdmissionDecision:
115
+ """Can a job whose upper-bound cost is ``estimate_usd`` be admitted? Fail-closed, both windows.
116
+
117
+ Admitted only if ``reserved + spent + estimate <= cap`` for the run AND the month window. The
118
+ default cap (zero) admits nothing — paid work stays off until a budget is set. A negative
119
+ estimate is refused (its cost is not reasoned about); a zero-cost job within a zero cap is fine.
120
+ """
121
+ if estimate_usd < 0:
122
+ return AdmissionDecision(False, None, "a cost estimate cannot be negative")
123
+ for window, state in ((BudgetWindow.RUN, run_state), (BudgetWindow.MONTH, month_state)):
124
+ cap_window = cap.for_window(window)
125
+ if state.committed() + estimate_usd > cap_window:
126
+ return AdmissionDecision(
127
+ False,
128
+ window,
129
+ (
130
+ f"would exceed the {window.value} budget cap (${cap_window}): "
131
+ f"${state.committed()} committed + ${estimate_usd} estimate"
132
+ ),
133
+ )
134
+ return AdmissionDecision(True, None, "within budget")
135
+
136
+
137
+ def estimate_cost(
138
+ rates: CostRates,
139
+ *,
140
+ max_runtime_s: int,
141
+ vcpus: int,
142
+ memory_gib: int,
143
+ ) -> Decimal:
144
+ """Upper-bound USD for one job: (max runtime + idle tail) x (GPU + vCPU + memory) rates. Pure.
145
+
146
+ Uses the caller's declared MAX runtime plus the warm idle tail, so the reservation
147
+ over-estimates; the ledger reconciles ``reserved -> spent`` from the backend's true reported
148
+ runtime on completion. Over-reserving is the safe direction for a hard cap.
149
+ """
150
+ if max_runtime_s < 0 or vcpus < 0 or memory_gib < 0:
151
+ raise ValueError("runtime/vcpus/memory cannot be negative")
152
+ billable_s = Decimal(max_runtime_s) + rates.idle_tail_s
153
+ per_second = (
154
+ rates.gpu_usd_per_s + rates.vcpu_usd_per_s * vcpus + rates.gib_usd_per_s * memory_gib
155
+ )
156
+ return billable_s * per_second
@@ -0,0 +1,102 @@
1
+ """Dispatch contract for expensive async jobs that run anywhere on the tailnet.
2
+
3
+ Both a PULL worker (it polls: claim a job, run it, complete it) and a PUSH orchestrator (it posts a
4
+ job to a worker and waits) need the SAME guarantees: a job is CLAIMED atomically so it runs exactly
5
+ once, a stale result is REJECTED, and a worker that dies mid-job has its lease RECOVERED. This
6
+ module is the shared CONTRACT — the pure lease rules + the store/transport/worker protocols. The
7
+ store (SQLite vs SQLAlchemy), the transport (pull vs push), and the payload (a transcribe request vs
8
+ a tool invocation) are per-app ADAPTERS, so two apps converge on one model WITHOUT a shared DB.
9
+
10
+ Auth (who may run a job) is :mod:`tailnet_guard`; policy (afford / route / approve) is the rest of
11
+ :mod:`dispatch_kit`. This is only the run-it-once-recoverably engine on top of those.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Sequence
17
+ from dataclasses import dataclass
18
+ from typing import Any, Protocol
19
+
20
+
21
+ class DispatchError(RuntimeError):
22
+ """A dispatch invariant was violated (a stale complete, or a job recovered past its cap)."""
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class Lease:
27
+ """A claim on a job: when it was leased and how many times it has been recovered.
28
+
29
+ ``leased_at`` is epoch seconds, stamped at the atomic claim. ``attempts`` counts recoveries (a
30
+ worker that died and had its lease reclaimed); a job past ``max_attempts`` fails rather than
31
+ re-leasing forever.
32
+ """
33
+
34
+ job_id: str
35
+ leased_at: float
36
+ attempts: int = 0
37
+
38
+
39
+ def is_lease_stale(leased_at: float, now: float, ttl_seconds: float) -> bool:
40
+ """Whether a lease has outlived its TTL (the worker likely died) and may be reclaimed.
41
+
42
+ The single home of the lease-staleness rule, shared by every store's ``recover_stale`` so a
43
+ "leased but silent" job is reclaimed on one schedule. ``ttl_seconds`` should comfortably exceed
44
+ the longest a healthy run takes, so a slow job is not stolen from a live worker.
45
+ """
46
+ return (now - leased_at) >= ttl_seconds
47
+
48
+
49
+ def should_give_up(attempts: int, max_attempts: int) -> bool:
50
+ """Whether a job has been recovered too many times and should FAIL rather than re-lease.
51
+
52
+ Fail-closed against a poison job that crashes every worker it lands on: after ``max_attempts``
53
+ recoveries it is marked failed (with its recorded error), never re-queued indefinitely.
54
+ """
55
+ return attempts >= max_attempts
56
+
57
+
58
+ class JobStore(Protocol):
59
+ """The authoritative job store — the ONE place a job's claim/complete is decided, ATOMICALLY.
60
+
61
+ ``claim`` must be atomic (one transaction / compare-and-set): two concurrent workers can never
62
+ both claim the same job — that single claim is the run-exactly-once guarantee. ``complete`` must
63
+ REJECT a result for a job not currently leased/running (a stale resubmit after the lease was
64
+ recovered), returning ``False`` so a re-run cannot clobber a fresh result. ``recover_stale``
65
+ re-leases jobs whose lease outlived the TTL (see :func:`is_lease_stale`), bumping the attempt
66
+ count and failing a job past ``max_attempts`` (see :func:`should_give_up`). The payload type is
67
+ the app's own (a transcribe request, a tool invocation, ...).
68
+ """
69
+
70
+ def claim(self, lanes: Sequence[str]) -> tuple[str, Any] | None:
71
+ """Atomically claim ONE runnable job in ``lanes`` -> ``(job_id, payload)``, or ``None``."""
72
+
73
+ def complete(self, job_id: str, result: Any) -> bool:
74
+ """Apply a result IFF the job is leased/running; ``False`` if stale (done/recovered)."""
75
+
76
+ def recover_stale(self, *, now: float, ttl_seconds: float, max_attempts: int) -> Sequence[str]:
77
+ """Re-lease jobs whose lease is stale (return their ids); fail those past the cap."""
78
+
79
+
80
+ class Transport(Protocol):
81
+ """The hop that moves a job to where it runs — the only thing that differs push vs pull.
82
+
83
+ PULL: the worker calls the store's claim/complete directly, so there is no Transport. PUSH: the
84
+ orchestrator submits the encoded job to a worker endpoint and gets the encoded result back.
85
+ Either way the worker's auth (``tailnet_guard``) and the policy are unchanged — this is
86
+ purely the byte-moving seam, so an app swaps pull for push without touching its job model.
87
+ """
88
+
89
+ def submit(self, envelope: dict[str, Any], token: str) -> dict[str, Any]:
90
+ """Deliver an encoded job to a worker with its capability token; return the response."""
91
+
92
+
93
+ class WorkerExecutor(Protocol):
94
+ """The worker-side run contract: decode (integrity-checked) -> run -> encode.
95
+
96
+ The decode MUST verify every artifact's content hash, and the run happens only AFTER the auth
97
+ guard passes (guard-before-decode), so a tampered/unauthorized job never reaches the executor.
98
+ Returns the encoded result the store/transport carries back.
99
+ """
100
+
101
+ def execute(self, payload: Any) -> Any:
102
+ """Run an already-authorized, integrity-checked payload and return its encoded result."""
dispatch_kit/egress.py ADDED
@@ -0,0 +1,108 @@
1
+ """External-API egress — the ONE sanctioned way data leaves the trust boundary, opt-in + logged.
2
+
3
+ A hosted SDK or LLM API (Rowan, Gemini, Claude, ...) is public TLS + an API key — it intentionally
4
+ breaks the tailnet-only / local-only default. This module gives the three non-negotiable properties
5
+ WITHOUT each caller re-rolling them:
6
+
7
+ * the key is a SECRET REFERENCE (an env-var name), resolved at call time, never inlined/logged;
8
+ * a missing key fails closed (raises) — never an unauthenticated call;
9
+ * every dispatch logs the egress (that data left the boundary, to which host) — never the key.
10
+
11
+ The actual HTTP request stays in the consuming app (it knows its payload shape); this module owns
12
+ the credential discipline + the egress audit so every external call is consistent and never the
13
+ default. Pair it with :func:`dispatch_kit.routing.select_backend` (SDK is opt-in) and the budget.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import os
20
+ from collections.abc import Callable
21
+ from dataclasses import dataclass
22
+ from urllib.parse import urlsplit
23
+
24
+ _LOGGER = logging.getLogger("dispatch_kit.egress")
25
+
26
+ #: A lookup from an env-var name to its value (or ``None``) — injectable so tests need no real env.
27
+ EnvLookup = Callable[[str], "str | None"]
28
+
29
+
30
+ class SecretMissingError(RuntimeError):
31
+ """The referenced secret (API key) is absent from the environment — fail closed.
32
+
33
+ A missing key is a hard stop, not a silent unauthenticated call: the opt-in egress exception is
34
+ only valid with an explicit, configured credential.
35
+ """
36
+
37
+
38
+ @dataclass(frozen=True, slots=True)
39
+ class SecretRef:
40
+ """A reference to a secret by ENV VAR NAME — never the value.
41
+
42
+ The credential is sourced from a secret at call time (:meth:`resolve`) and cannot leak into
43
+ source or a serialized config: a config object holds only the *name* of the env var.
44
+ """
45
+
46
+ env_var: str
47
+
48
+ def __post_init__(self) -> None:
49
+ if not self.env_var:
50
+ raise ValueError("a SecretRef needs a non-empty env var name")
51
+
52
+ def resolve(self, env: EnvLookup = os.environ.get) -> str:
53
+ """Read the secret from the environment now; raise :class:`SecretMissingError` if unset."""
54
+ value = env(self.env_var)
55
+ if not value:
56
+ raise SecretMissingError(
57
+ f"secret env var {self.env_var!r} is unset — refusing an unauthenticated call"
58
+ )
59
+ return value
60
+
61
+
62
+ @dataclass(frozen=True, slots=True)
63
+ class ExternalEndpoint:
64
+ """A hosted external API the app may call (opt-in egress): an https base URL + its key ref.
65
+
66
+ Construction refuses a non-``https://`` URL (public egress must be TLS). ``name`` is a stable
67
+ display label used in the egress audit; ``secret`` references the key by env-var name only.
68
+ """
69
+
70
+ name: str
71
+ base_url: str
72
+ secret: SecretRef
73
+
74
+ def __post_init__(self) -> None:
75
+ if not self.name:
76
+ raise ValueError("an external endpoint needs a non-empty name")
77
+ if not self.base_url.startswith("https://"):
78
+ raise ValueError(
79
+ f"external endpoint {self.name!r} must use https:// (public TLS); got "
80
+ f"{self.base_url!r}"
81
+ )
82
+
83
+ def host(self) -> str:
84
+ """The endpoint host, for the egress audit (never the full URL with query/credentials)."""
85
+ return urlsplit(self.base_url).hostname or self.base_url
86
+
87
+ def bearer(self, env: EnvLookup = os.environ.get) -> str:
88
+ """Resolve the key now and return the ``Authorization`` header value (``Bearer <key>``).
89
+
90
+ Raises :class:`SecretMissingError` if the key is unset, so a request is never built without
91
+ a credential. The returned string contains the key — it is for the header only, never a log.
92
+ """
93
+ return f"Bearer {self.secret.resolve(env)}"
94
+
95
+
96
+ def log_egress(endpoint: ExternalEndpoint, *, detail: str = "") -> None:
97
+ """Audit that data is leaving the trust boundary, to which host — never the secret.
98
+
99
+ Call this immediately before an external request. Logs the endpoint name + host only (the
100
+ audit-relevant fact); ``detail`` may add a non-sensitive note (e.g. the tool/model name).
101
+ """
102
+ suffix = f" [{detail}]" if detail else ""
103
+ _LOGGER.warning(
104
+ "egress: %r -> host %r (data leaves the trust boundary)%s",
105
+ endpoint.name,
106
+ endpoint.host(),
107
+ suffix,
108
+ )
@@ -0,0 +1,58 @@
1
+ """Pre-dispatch cost + feasibility estimates — refuse a job that cannot fit BEFORE it runs.
2
+
3
+ A backend declares how much compute it has (:class:`HostCapabilities`); an adapter declares what a
4
+ job will cost and need (:class:`CostEstimate`). The shared :func:`vram_fits` rule — "no GPU means a
5
+ GPU job is infeasible" — lives in exactly one place so the local cost gate and the backend router
6
+ agree. All pure: no I/O, no side effects.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+
13
+
14
+ def vram_fits(available_gb: float | None, required_gb: float | None) -> bool:
15
+ """The single VRAM-budget rule, shared by the host dispatch-kit and the backend router. Pure.
16
+
17
+ A CPU job (``required_gb`` is ``None``) always fits; a GPU job needs a GPU present
18
+ (``available_gb`` not ``None``) with at least the required VRAM. Both the local cost gate
19
+ (:meth:`HostCapabilities.can_fit_vram`) and the backend capability match defer here so the
20
+ fail-closed "no GPU means a GPU job is infeasible" rule lives in exactly one place.
21
+ """
22
+ if required_gb is None:
23
+ return True
24
+ if available_gb is None:
25
+ return False
26
+ return available_gb >= required_gb
27
+
28
+
29
+ @dataclass(frozen=True, slots=True)
30
+ class HostCapabilities:
31
+ """A machine's compute budget, supplied per dispatch so the cost gate can fail closed.
32
+
33
+ ``available_vram_gb`` of ``None`` means no GPU is present (a GPU job is then infeasible). A
34
+ CPU-only tool ignores this; a GPU tool compares its ``estimated_vram_gb`` against the budget so
35
+ a too-large job is refused before the executor runs. The default models a no-GPU host so a
36
+ missing budget fails closed, never open.
37
+ """
38
+
39
+ available_vram_gb: float | None = None
40
+
41
+ def can_fit_vram(self, required_gb: float | None) -> bool:
42
+ """True if a job needing ``required_gb`` of VRAM fits here (a CPU job always fits)."""
43
+ return vram_fits(self.available_vram_gb, required_gb)
44
+
45
+
46
+ @dataclass(frozen=True, slots=True)
47
+ class CostEstimate:
48
+ """A pre-dispatch estimate so a too-large/too-long job is refused before it runs.
49
+
50
+ ``feasible_locally`` is the gate result (does it fit the host that would run it); ``reason`` is
51
+ the human-facing rationale carried on a refusal. ``estimated_seconds`` + ``estimated_vram_gb``
52
+ also feed the budget reservation (:func:`dispatch_kit.budget.estimate_cost`).
53
+ """
54
+
55
+ estimated_seconds: float
56
+ estimated_vram_gb: float | None
57
+ feasible_locally: bool
58
+ reason: str
dispatch_kit/py.typed ADDED
File without changes
@@ -0,0 +1,157 @@
1
+ """Backend routing — the PURE preference policy that picks where a job runs.
2
+
3
+ A job carries its :class:`ToolRequirements` (what it needs); a backend declares its
4
+ :class:`BackendCapabilities` (what it can run). :func:`select_backend` is pure: it filters
5
+ to the backends that can satisfy the job, then applies the fixed policy preference —
6
+ **LOCAL -> LAN worker -> CLOUD worker -> SDK** — returning the first match. The SDK (the deliberate
7
+ public-egress exception) is admitted only when the caller explicitly opts in; otherwise it is never
8
+ selected, even if it is the only fit (fail closed on egress).
9
+
10
+ The router never runs anything and holds no executor — it only *chooses*, over any objects that are
11
+ :class:`Routable` (have a ``kind`` and a ``can_satisfy``). The consuming app binds the actual
12
+ executor onto its own backend type; this library stays free of that I/O.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import enum
18
+ from collections.abc import Iterable
19
+ from dataclasses import dataclass
20
+ from typing import Protocol, TypeVar
21
+
22
+ from .estimate import vram_fits
23
+
24
+
25
+ class BackendKind(enum.StrEnum):
26
+ """Where a job's compute runs — sets the router's preference and lands on provenance/audit."""
27
+
28
+ LOCAL = "local"
29
+ """In-process, or a local subprocess/container on the orchestrator host."""
30
+ LAN_WORKER = "lan_worker"
31
+ """A spare LAN desktop running a worker agent, reached over the tailnet."""
32
+ CLOUD_WORKER = "cloud_worker"
33
+ """A cloud GPU (e.g. GCP L4 on Cloud Run) running a worker agent, reached over the tailnet."""
34
+ SDK = "sdk"
35
+ """A third-party hosted SDK / API over public TLS — the one opt-in egress exception."""
36
+
37
+
38
+ # The fixed policy preference order. LOCAL first (cheapest, no egress), then a LAN worker, then a
39
+ # cloud worker, and SDK last — and only when explicitly opted in. This tuple IS the policy; the
40
+ # router walks it in order, so changing the preference is a one-line edit here, not scattered.
41
+ _PREFERENCE_ORDER: tuple[BackendKind, ...] = (
42
+ BackendKind.LOCAL,
43
+ BackendKind.LAN_WORKER,
44
+ BackendKind.CLOUD_WORKER,
45
+ BackendKind.SDK,
46
+ )
47
+
48
+
49
+ @dataclass(frozen=True, slots=True)
50
+ class NodeIdentity:
51
+ """The identity of a node a job ran on — pinned for audit + reproducibility.
52
+
53
+ For a local job this is the orchestrator host. For a remote worker it is the worker's pinned
54
+ tailnet identity (its MagicDNS name / tailnet IP), so an audit records not just *that* a job ran
55
+ remotely but *on which authorized worker* — the same identity the trust barrier pins.
56
+ """
57
+
58
+ name: str
59
+ address: str | None = None
60
+
61
+ def __post_init__(self) -> None:
62
+ if not self.name:
63
+ raise ValueError("a NodeIdentity requires a non-empty name")
64
+
65
+
66
+ @dataclass(frozen=True, slots=True)
67
+ class ToolRequirements:
68
+ """What a job needs of a backend. Pure data.
69
+
70
+ ``min_vram_gb`` of ``None`` is a CPU job (any backend's compute fits). ``image`` set means the
71
+ job runs in a pinned container, so a backend must be able to run that image. ``tool_id`` lets a
72
+ backend that ships only certain tools (no image) declare it can run them. The consuming app
73
+ derives these from its own manifest/invocation — this library does not couple to those types.
74
+ """
75
+
76
+ tool_id: str
77
+ min_vram_gb: float | None = None
78
+ image: str | None = None
79
+
80
+
81
+ @dataclass(frozen=True, slots=True)
82
+ class BackendCapabilities:
83
+ """What a backend can run — matched against :class:`ToolRequirements` by the router. Pure data.
84
+
85
+ ``available_vram_gb`` of ``None`` means no GPU (a GPU job cannot run here). ``images`` /
86
+ ``tool_ids`` of ``None`` mean "no restriction" (a general-purpose backend that runs any pinned
87
+ image or any tool); a non-empty set restricts the backend to exactly those.
88
+ """
89
+
90
+ has_gpu: bool = False
91
+ available_vram_gb: float | None = None
92
+ images: frozenset[str] | None = None
93
+ tool_ids: frozenset[str] | None = None
94
+
95
+ def can_satisfy(self, requirements: ToolRequirements) -> bool:
96
+ """True if a backend with these capabilities can run a job with ``requirements``. Pure.
97
+
98
+ VRAM feasibility defers to the shared :func:`dispatch_kit.estimate.vram_fits` rule (the one
99
+ home of "no GPU means a GPU job is infeasible"); the image/tool gate is local.
100
+ """
101
+ if not vram_fits(self.available_vram_gb, requirements.min_vram_gb):
102
+ return False
103
+ if requirements.image is not None and self.images is not None:
104
+ return requirements.image in self.images
105
+ if requirements.image is None and self.tool_ids is not None:
106
+ return requirements.tool_id in self.tool_ids
107
+ return True
108
+
109
+
110
+ class Routable(Protocol):
111
+ """A backend the router can choose among: it has a :class:`BackendKind` and can answer whether
112
+ it can run a job. The consuming app's executor-bound backend type satisfies this structurally,
113
+ so :func:`select_backend` chooses without this library ever knowing about executors."""
114
+
115
+ @property
116
+ def kind(self) -> BackendKind:
117
+ """The backend's kind, which sets its routing preference."""
118
+
119
+ def can_satisfy(self, requirements: ToolRequirements) -> bool:
120
+ """Whether this backend can run a job with the given requirements."""
121
+
122
+
123
+ class NoEligibleBackendError(RuntimeError):
124
+ """Raised when no registered backend can satisfy a job's requirements under the policy.
125
+
126
+ Fail closed: a job with no place to run is a hard error, never a silent local fallback that
127
+ might OOM or an SDK call the caller did not opt into.
128
+ """
129
+
130
+
131
+ _R = TypeVar("_R", bound=Routable)
132
+
133
+
134
+ def select_backend(
135
+ backends: Iterable[_R],
136
+ requirements: ToolRequirements,
137
+ *,
138
+ allow_sdk: bool = False,
139
+ ) -> _R:
140
+ """Pick the preferred backend that can satisfy ``requirements`` — PURE policy.
141
+
142
+ Walks the fixed preference order (LOCAL -> LAN -> CLOUD -> SDK) and returns the first registered
143
+ backend of that kind whose capabilities satisfy the job. An SDK backend is considered ONLY when
144
+ ``allow_sdk`` is true — the deliberate egress exception is opt-in, so by default SDK is skipped
145
+ even if it is the only fit. Raises :class:`NoEligibleBackendError` if nothing eligible matches.
146
+ """
147
+ candidates = list(backends)
148
+ for kind in _PREFERENCE_ORDER:
149
+ if kind is BackendKind.SDK and not allow_sdk:
150
+ continue
151
+ for backend in candidates:
152
+ if backend.kind is kind and backend.can_satisfy(requirements):
153
+ return backend
154
+ raise NoEligibleBackendError(
155
+ f"no eligible backend for tool {requirements.tool_id!r} "
156
+ f"(vram>={requirements.min_vram_gb}, image={requirements.image}, allow_sdk={allow_sdk})"
157
+ )
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.4
2
+ Name: dispatch-kit
3
+ Version: 0.1.0
4
+ Summary: Pure, fail-closed cost-gating for expensive remote/external work: a hard $ budget cap, backend routing (local->cloud->SDK), and opt-in audited API egress.
5
+ Author-email: Aryan Falahatpisheh <aryanfalahat@gmail.com>
6
+ License: MIT
7
+ Keywords: budget,cost,cloud,gpu,llm,dispatch,egress,approval
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.11
17
+ Description-Content-Type: text/markdown
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=7.0; extra == "dev"
20
+ Requires-Dist: mypy>=1.0; extra == "dev"
21
+ Requires-Dist: black>=23.0; extra == "dev"
22
+ Requires-Dist: flake8>=6.0; extra == "dev"
23
+ Requires-Dist: pylint>=3.0; extra == "dev"
24
+ Requires-Dist: ruff>=0.1; extra == "dev"
25
+
26
+ # dispatch-kit
27
+
28
+ A tiny, **pure, dependency-free** library for gating expensive remote/external work — the same
29
+ machinery for a **cloud GPU job** (a Cloud Run L4 reached over a tailnet) and a **paid LLM/SDK API
30
+ call** (Gemini, Claude, Rowan). It answers three questions, fail-closed:
31
+
32
+ - **Can we afford it?** — a hard, reserve-on-approval **budget cap** (per-run + per-month).
33
+ - **Where should it run?** — a pure **router**: `LOCAL → LAN → CLOUD → SDK`, SDK opt-in only.
34
+ - **Is the external call safe?** — opt-in, audited **API egress** with reference-only secrets.
35
+
36
+ It owns the *policy* (afford / route / approve / egress); your app keeps its job entity,
37
+ persistence, and executor. The transport *auth* (who may talk) is a separate concern — pair this
38
+ with [`tailnet-guard`](https://github.com/falahat/tailnet-guard). Stdlib only; every check is
39
+ fail-closed (default budget `0` = paid work off; SDK never auto-selected; a missing key refuses).
40
+
41
+ ## Use
42
+
43
+ ```python
44
+ from decimal import Decimal
45
+ from dispatch_kit import (
46
+ BudgetCap, BudgetState, CostRates, admits, estimate_cost, # the hard $ cap
47
+ select_backend, BackendKind, ToolRequirements, # the where
48
+ SecretRef, ExternalEndpoint, log_egress, # opt-in API egress
49
+ Approval, # the approval audit fact
50
+ )
51
+
52
+ # 1. Reserve-on-approval: refuse a job that would push past the cap (both windows).
53
+ rates = CostRates(gpu_usd_per_s=Decimal("0.0008"), vcpu_usd_per_s=Decimal("0.00001"),
54
+ gib_usd_per_s=Decimal("0.000002"), idle_tail_s=Decimal(600))
55
+ cost = estimate_cost(rates, max_runtime_s=3600, vcpus=8, memory_gib=32) # an UPPER bound
56
+ decision = admits(cost, run_state, month_state, BudgetCap(run_usd=Decimal(50), month_usd=Decimal(500)))
57
+ if not decision.admitted:
58
+ raise OverBudget(decision.reason) # default cap is $0 — paid work is off until you set one
59
+
60
+ # 2. Pick where it runs — LOCAL first, SDK only if explicitly allowed.
61
+ backend = select_backend(my_backends, ToolRequirements(tool_id="cofold", min_vram_gb=24.0))
62
+
63
+ # 3. An LLM/SDK key is a REFERENCE (env var name), resolved at call time, never logged.
64
+ gemini = ExternalEndpoint("gemini", "https://generativelanguage.googleapis.com",
65
+ SecretRef("GEMINI_API_KEY"))
66
+ log_egress(gemini, detail="summarize") # audit that data left the boundary
67
+ headers = {"Authorization": gemini.bearer()} # raises if the key is unset (never an unauth call)
68
+ ```
69
+
70
+ ## What's in the box
71
+
72
+ | Module | Purpose |
73
+ |---|---|
74
+ | `budget` | `BudgetCap` / `BudgetState` / `CostRates` / `admits` / `estimate_cost` — the hard, Decimal-exact, reserve-on-approval cap across a run + month window |
75
+ | `estimate` | `CostEstimate` / `HostCapabilities` / `vram_fits` — the one "no GPU ⇒ a GPU job is infeasible" rule, shared by the gate and the router |
76
+ | `routing` | `BackendKind` / `BackendCapabilities` / `ToolRequirements` / `select_backend` (generic over a `Routable`) — the pure `LOCAL→LAN→CLOUD→SDK` policy; SDK opt-in |
77
+ | `egress` | `SecretRef` / `ExternalEndpoint` / `log_egress` — reference-only API keys, https-only, fail-closed on a missing key, audited egress (SDKs **and** LLM APIs) |
78
+ | `approval` | `Approval` / `ApprovalOutcome` — the who/when/why audit fact for a gated job |
79
+ | `dispatch` | `JobStore` / `Transport` / `WorkerExecutor` protocols + `is_lease_stale` / `should_give_up` / `Lease` — the run-it-once-recoverably contract (atomic claim, stale-reject, lease recovery); push vs pull is only the `Transport` adapter |
80
+
81
+ ## Notes
82
+
83
+ - **The budget cap lives in your dispatch service, never the UI** — an agent hitting the API
84
+ directly is still gated. Default cap `$0`; if spend can't be computed, refuse.
85
+ - **Reserve on approval, reconcile on completion** — approving reserves the estimate immediately so
86
+ a burst counts against the cap; the worker's true runtime reconciles `reserved → spent`.
87
+ - **SDK / external egress is the one deliberate exception** — never the default (`allow_sdk` /
88
+ opt-in), always logged, the key sourced from a secret at call time and never written to a log.
@@ -0,0 +1,12 @@
1
+ dispatch_kit/__init__.py,sha256=PUWm1pmCcji8QpDA9ZLZ2m3mzZhOzhumFSFypqlVLoA,2356
2
+ dispatch_kit/approval.py,sha256=6capLUQJbognPTSAzsoRyrNwWPJwRbxt6tkyDLqvxgs,1552
3
+ dispatch_kit/budget.py,sha256=m-2MAkyWq0kg7asp1g5t6lfnyOAXjjcvjM_oJy8K9wI,6329
4
+ dispatch_kit/dispatch.py,sha256=h2u2wVOFoQMWT_K-3affUilPm6E28VNDnRWPap3zPPk,5003
5
+ dispatch_kit/egress.py,sha256=HPNCAJh0LTVG9pfkjyQbHWpaiPkSyUG4jUkKMpX4RRY,4392
6
+ dispatch_kit/estimate.py,sha256=dnUv4iDAh3UyAjCgkq-mRWrhZP1t984aUhlfL9PHN6g,2507
7
+ dispatch_kit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ dispatch_kit/routing.py,sha256=BO0z8lUvteNOl19cyCAgEoDhFfOzDVvi_ffmnITkW1M,6698
9
+ dispatch_kit-0.1.0.dist-info/METADATA,sha256=CfBSqIDhM-wX5e7TfPuw6qhOLo0d1dJOK9TXDKM13xU,5256
10
+ dispatch_kit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ dispatch_kit-0.1.0.dist-info/top_level.txt,sha256=MQSrew1pPSR4Ei9U80LdMbq3gEjKk03Xa597gflZrsE,13
12
+ dispatch_kit-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ dispatch_kit