dispatch-kit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dispatch_kit-0.1.0/PKG-INFO +88 -0
- dispatch_kit-0.1.0/README.md +63 -0
- dispatch_kit-0.1.0/pyproject.toml +82 -0
- dispatch_kit-0.1.0/setup.cfg +9 -0
- dispatch_kit-0.1.0/src/dispatch_kit/__init__.py +88 -0
- dispatch_kit-0.1.0/src/dispatch_kit/approval.py +44 -0
- dispatch_kit-0.1.0/src/dispatch_kit/budget.py +156 -0
- dispatch_kit-0.1.0/src/dispatch_kit/dispatch.py +102 -0
- dispatch_kit-0.1.0/src/dispatch_kit/egress.py +108 -0
- dispatch_kit-0.1.0/src/dispatch_kit/estimate.py +58 -0
- dispatch_kit-0.1.0/src/dispatch_kit/py.typed +0 -0
- dispatch_kit-0.1.0/src/dispatch_kit/routing.py +157 -0
- dispatch_kit-0.1.0/src/dispatch_kit.egg-info/PKG-INFO +88 -0
- dispatch_kit-0.1.0/src/dispatch_kit.egg-info/SOURCES.txt +22 -0
- dispatch_kit-0.1.0/src/dispatch_kit.egg-info/dependency_links.txt +1 -0
- dispatch_kit-0.1.0/src/dispatch_kit.egg-info/requires.txt +8 -0
- dispatch_kit-0.1.0/src/dispatch_kit.egg-info/top_level.txt +1 -0
- dispatch_kit-0.1.0/tests/test_approval.py +31 -0
- dispatch_kit-0.1.0/tests/test_budget.py +74 -0
- dispatch_kit-0.1.0/tests/test_dispatch.py +25 -0
- dispatch_kit-0.1.0/tests/test_egress.py +64 -0
- dispatch_kit-0.1.0/tests/test_estimate.py +31 -0
- dispatch_kit-0.1.0/tests/test_routing.py +87 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dispatch-kit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pure, fail-closed cost-gating for expensive remote/external work: a hard $ budget cap, backend routing (local->cloud->SDK), and opt-in audited API egress.
|
|
5
|
+
Author-email: Aryan Falahatpisheh <aryanfalahat@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: budget,cost,cloud,gpu,llm,dispatch,egress,approval
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
20
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
21
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
22
|
+
Requires-Dist: flake8>=6.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pylint>=3.0; extra == "dev"
|
|
24
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# dispatch-kit
|
|
27
|
+
|
|
28
|
+
A tiny, **pure, dependency-free** library for gating expensive remote/external work — the same
|
|
29
|
+
machinery for a **cloud GPU job** (a Cloud Run L4 reached over a tailnet) and a **paid LLM/SDK API
|
|
30
|
+
call** (Gemini, Claude, Rowan). It answers three questions, fail-closed:
|
|
31
|
+
|
|
32
|
+
- **Can we afford it?** — a hard, reserve-on-approval **budget cap** (per-run + per-month).
|
|
33
|
+
- **Where should it run?** — a pure **router**: `LOCAL → LAN → CLOUD → SDK`, SDK opt-in only.
|
|
34
|
+
- **Is the external call safe?** — opt-in, audited **API egress** with reference-only secrets.
|
|
35
|
+
|
|
36
|
+
It owns the *policy* (afford / route / approve / egress); your app keeps its job entity,
|
|
37
|
+
persistence, and executor. The transport *auth* (who may talk) is a separate concern — pair this
|
|
38
|
+
with [`tailnet-guard`](https://github.com/falahat/tailnet-guard). Stdlib only; every check is
|
|
39
|
+
fail-closed (default budget `0` = paid work off; SDK never auto-selected; a missing key refuses).
|
|
40
|
+
|
|
41
|
+
## Use
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from decimal import Decimal
|
|
45
|
+
from dispatch_kit import (
|
|
46
|
+
BudgetCap, BudgetState, CostRates, admits, estimate_cost, # the hard $ cap
|
|
47
|
+
select_backend, BackendKind, ToolRequirements, # the where
|
|
48
|
+
SecretRef, ExternalEndpoint, log_egress, # opt-in API egress
|
|
49
|
+
Approval, # the approval audit fact
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# 1. Reserve-on-approval: refuse a job that would push past the cap (both windows).
|
|
53
|
+
rates = CostRates(gpu_usd_per_s=Decimal("0.0008"), vcpu_usd_per_s=Decimal("0.00001"),
|
|
54
|
+
gib_usd_per_s=Decimal("0.000002"), idle_tail_s=Decimal(600))
|
|
55
|
+
cost = estimate_cost(rates, max_runtime_s=3600, vcpus=8, memory_gib=32) # an UPPER bound
|
|
56
|
+
decision = admits(cost, run_state, month_state, BudgetCap(run_usd=Decimal(50), month_usd=Decimal(500)))
|
|
57
|
+
if not decision.admitted:
|
|
58
|
+
raise OverBudget(decision.reason) # default cap is $0 — paid work is off until you set one
|
|
59
|
+
|
|
60
|
+
# 2. Pick where it runs — LOCAL first, SDK only if explicitly allowed.
|
|
61
|
+
backend = select_backend(my_backends, ToolRequirements(tool_id="cofold", min_vram_gb=24.0))
|
|
62
|
+
|
|
63
|
+
# 3. An LLM/SDK key is a REFERENCE (env var name), resolved at call time, never logged.
|
|
64
|
+
gemini = ExternalEndpoint("gemini", "https://generativelanguage.googleapis.com",
|
|
65
|
+
SecretRef("GEMINI_API_KEY"))
|
|
66
|
+
log_egress(gemini, detail="summarize") # audit that data left the boundary
|
|
67
|
+
headers = {"Authorization": gemini.bearer()} # raises if the key is unset (never an unauth call)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## What's in the box
|
|
71
|
+
|
|
72
|
+
| Module | Purpose |
|
|
73
|
+
|---|---|
|
|
74
|
+
| `budget` | `BudgetCap` / `BudgetState` / `CostRates` / `admits` / `estimate_cost` — the hard, Decimal-exact, reserve-on-approval cap across a run + month window |
|
|
75
|
+
| `estimate` | `CostEstimate` / `HostCapabilities` / `vram_fits` — the one "no GPU ⇒ a GPU job is infeasible" rule, shared by the gate and the router |
|
|
76
|
+
| `routing` | `BackendKind` / `BackendCapabilities` / `ToolRequirements` / `select_backend` (generic over a `Routable`) — the pure `LOCAL→LAN→CLOUD→SDK` policy; SDK opt-in |
|
|
77
|
+
| `egress` | `SecretRef` / `ExternalEndpoint` / `log_egress` — reference-only API keys, https-only, fail-closed on a missing key, audited egress (SDKs **and** LLM APIs) |
|
|
78
|
+
| `approval` | `Approval` / `ApprovalOutcome` — the who/when/why audit fact for a gated job |
|
|
79
|
+
| `dispatch` | `JobStore` / `Transport` / `WorkerExecutor` protocols + `is_lease_stale` / `should_give_up` / `Lease` — the run-it-once-recoverably contract (atomic claim, stale-reject, lease recovery); push vs pull is only the `Transport` adapter |
|
|
80
|
+
|
|
81
|
+
## Notes
|
|
82
|
+
|
|
83
|
+
- **The budget cap lives in your dispatch service, never the UI** — an agent hitting the API
|
|
84
|
+
directly is still gated. Default cap `$0`; if spend can't be computed, refuse.
|
|
85
|
+
- **Reserve on approval, reconcile on completion** — approving reserves the estimate immediately so
|
|
86
|
+
a burst counts against the cap; the worker's true runtime reconciles `reserved → spent`.
|
|
87
|
+
- **SDK / external egress is the one deliberate exception** — never the default (`allow_sdk` /
|
|
88
|
+
opt-in), always logged, the key sourced from a secret at call time and never written to a log.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# dispatch-kit
|
|
2
|
+
|
|
3
|
+
A tiny, **pure, dependency-free** library for gating expensive remote/external work — the same
|
|
4
|
+
machinery for a **cloud GPU job** (a Cloud Run L4 reached over a tailnet) and a **paid LLM/SDK API
|
|
5
|
+
call** (Gemini, Claude, Rowan). It answers three questions, fail-closed:
|
|
6
|
+
|
|
7
|
+
- **Can we afford it?** — a hard, reserve-on-approval **budget cap** (per-run + per-month).
|
|
8
|
+
- **Where should it run?** — a pure **router**: `LOCAL → LAN → CLOUD → SDK`, SDK opt-in only.
|
|
9
|
+
- **Is the external call safe?** — opt-in, audited **API egress** with reference-only secrets.
|
|
10
|
+
|
|
11
|
+
It owns the *policy* (afford / route / approve / egress); your app keeps its job entity,
|
|
12
|
+
persistence, and executor. The transport *auth* (who may talk) is a separate concern — pair this
|
|
13
|
+
with [`tailnet-guard`](https://github.com/falahat/tailnet-guard). Stdlib only; every check is
|
|
14
|
+
fail-closed (default budget `0` = paid work off; SDK never auto-selected; a missing key refuses).
|
|
15
|
+
|
|
16
|
+
## Use
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from decimal import Decimal
|
|
20
|
+
from dispatch_kit import (
|
|
21
|
+
BudgetCap, BudgetState, CostRates, admits, estimate_cost, # the hard $ cap
|
|
22
|
+
select_backend, BackendKind, ToolRequirements, # the where
|
|
23
|
+
SecretRef, ExternalEndpoint, log_egress, # opt-in API egress
|
|
24
|
+
Approval, # the approval audit fact
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# 1. Reserve-on-approval: refuse a job that would push past the cap (both windows).
|
|
28
|
+
rates = CostRates(gpu_usd_per_s=Decimal("0.0008"), vcpu_usd_per_s=Decimal("0.00001"),
|
|
29
|
+
gib_usd_per_s=Decimal("0.000002"), idle_tail_s=Decimal(600))
|
|
30
|
+
cost = estimate_cost(rates, max_runtime_s=3600, vcpus=8, memory_gib=32) # an UPPER bound
|
|
31
|
+
decision = admits(cost, run_state, month_state, BudgetCap(run_usd=Decimal(50), month_usd=Decimal(500)))
|
|
32
|
+
if not decision.admitted:
|
|
33
|
+
raise OverBudget(decision.reason) # default cap is $0 — paid work is off until you set one
|
|
34
|
+
|
|
35
|
+
# 2. Pick where it runs — LOCAL first, SDK only if explicitly allowed.
|
|
36
|
+
backend = select_backend(my_backends, ToolRequirements(tool_id="cofold", min_vram_gb=24.0))
|
|
37
|
+
|
|
38
|
+
# 3. An LLM/SDK key is a REFERENCE (env var name), resolved at call time, never logged.
|
|
39
|
+
gemini = ExternalEndpoint("gemini", "https://generativelanguage.googleapis.com",
|
|
40
|
+
SecretRef("GEMINI_API_KEY"))
|
|
41
|
+
log_egress(gemini, detail="summarize") # audit that data left the boundary
|
|
42
|
+
headers = {"Authorization": gemini.bearer()} # raises if the key is unset (never an unauth call)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## What's in the box
|
|
46
|
+
|
|
47
|
+
| Module | Purpose |
|
|
48
|
+
|---|---|
|
|
49
|
+
| `budget` | `BudgetCap` / `BudgetState` / `CostRates` / `admits` / `estimate_cost` — the hard, Decimal-exact, reserve-on-approval cap across a run + month window |
|
|
50
|
+
| `estimate` | `CostEstimate` / `HostCapabilities` / `vram_fits` — the one "no GPU ⇒ a GPU job is infeasible" rule, shared by the gate and the router |
|
|
51
|
+
| `routing` | `BackendKind` / `BackendCapabilities` / `ToolRequirements` / `select_backend` (generic over a `Routable`) — the pure `LOCAL→LAN→CLOUD→SDK` policy; SDK opt-in |
|
|
52
|
+
| `egress` | `SecretRef` / `ExternalEndpoint` / `log_egress` — reference-only API keys, https-only, fail-closed on a missing key, audited egress (SDKs **and** LLM APIs) |
|
|
53
|
+
| `approval` | `Approval` / `ApprovalOutcome` — the who/when/why audit fact for a gated job |
|
|
54
|
+
| `dispatch` | `JobStore` / `Transport` / `WorkerExecutor` protocols + `is_lease_stale` / `should_give_up` / `Lease` — the run-it-once-recoverably contract (atomic claim, stale-reject, lease recovery); push vs pull is only the `Transport` adapter |
|
|
55
|
+
|
|
56
|
+
## Notes
|
|
57
|
+
|
|
58
|
+
- **The budget cap lives in your dispatch service, never the UI** — an agent hitting the API
|
|
59
|
+
directly is still gated. Default cap `$0`; if spend can't be computed, refuse.
|
|
60
|
+
- **Reserve on approval, reconcile on completion** — approving reserves the estimate immediately so
|
|
61
|
+
a burst counts against the cap; the worker's true runtime reconciles `reserved → spent`.
|
|
62
|
+
- **SDK / external egress is the one deliberate exception** — never the default (`allow_sdk` /
|
|
63
|
+
opt-in), always logged, the key sourced from a secret at call time and never written to a log.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dispatch-kit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Pure, fail-closed cost-gating for expensive remote/external work: a hard $ budget cap, backend routing (local->cloud->SDK), and opt-in audited API egress."
|
|
9
|
+
authors = [{ name = "Aryan Falahatpisheh", email = "aryanfalahat@gmail.com" }]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["budget", "cost", "cloud", "gpu", "llm", "dispatch", "egress", "approval"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Software Development :: Libraries",
|
|
22
|
+
]
|
|
23
|
+
requires-python = ">=3.11"
|
|
24
|
+
dependencies = []
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=7.0",
|
|
29
|
+
"mypy>=1.0",
|
|
30
|
+
"black>=23.0",
|
|
31
|
+
"flake8>=6.0",
|
|
32
|
+
"pylint>=3.0",
|
|
33
|
+
"ruff>=0.1",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["src"]
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.package-data]
|
|
40
|
+
dispatch_kit = ["py.typed"]
|
|
41
|
+
|
|
42
|
+
# --- Quality gates (strict; zero inline suppressions — fix root causes) ---
|
|
43
|
+
|
|
44
|
+
[tool.black]
|
|
45
|
+
line-length = 100
|
|
46
|
+
target-version = ["py311"]
|
|
47
|
+
|
|
48
|
+
[tool.ruff]
|
|
49
|
+
target-version = "py311"
|
|
50
|
+
line-length = 100
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint]
|
|
53
|
+
select = ["E", "F", "W", "I", "N", "UP", "B", "C4", "SIM", "PTH", "YTT", "ARG", "RUF"]
|
|
54
|
+
|
|
55
|
+
[tool.ruff.lint.per-file-ignores]
|
|
56
|
+
# pytest injects fixtures by name; ARG can't see the framework consumes them.
|
|
57
|
+
"tests/**" = ["ARG001", "ARG002"]
|
|
58
|
+
|
|
59
|
+
[tool.mypy]
|
|
60
|
+
python_version = "3.11"
|
|
61
|
+
strict = true
|
|
62
|
+
mypy_path = "src"
|
|
63
|
+
|
|
64
|
+
[tool.pytest.ini_options]
|
|
65
|
+
testpaths = ["tests"]
|
|
66
|
+
python_files = ["test_*.py"]
|
|
67
|
+
addopts = ["-q"]
|
|
68
|
+
|
|
69
|
+
[tool.coverage.run]
|
|
70
|
+
source = ["src/dispatch_kit"]
|
|
71
|
+
omit = ["*/tests/*"]
|
|
72
|
+
|
|
73
|
+
[tool.pylint.main]
|
|
74
|
+
source-roots = ["src"]
|
|
75
|
+
|
|
76
|
+
[tool.pylint.format]
|
|
77
|
+
max-line-length = 100
|
|
78
|
+
|
|
79
|
+
[tool.pylint.design]
|
|
80
|
+
# Pure value-object domain: frozen data records with few/no methods are expected.
|
|
81
|
+
min-public-methods = 0
|
|
82
|
+
max-args = 6
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""dispatch-kit — a pure, fail-closed library for gating expensive remote/external work.
|
|
2
|
+
|
|
3
|
+
Decide whether an expensive operation may run, how much it will cost, and where it should run —
|
|
4
|
+
the same machinery for a cloud GPU job (Cloud Run L4 over a tailnet) and a paid LLM/SDK API call:
|
|
5
|
+
|
|
6
|
+
from dispatch_kit import BudgetCap, BudgetState, admits, estimate_cost # the hard $ cap
|
|
7
|
+
from dispatch_kit import select_backend, BackendKind, ToolRequirements # where it runs
|
|
8
|
+
from dispatch_kit import SecretRef, ExternalEndpoint, log_egress # opt-in API egress
|
|
9
|
+
from dispatch_kit import Approval # the approval audit fact
|
|
10
|
+
|
|
11
|
+
Pure domain — stdlib only, no I/O, no provider SDK code; every check is fail-closed (default budget
|
|
12
|
+
``0`` = paid work off; SDK egress opt-in only; a missing key/over-budget refuses). The transport
|
|
13
|
+
auth (who) is a separate concern — pair this with ``tailnet-guard``. The consuming app keeps its job
|
|
14
|
+
entity, persistence, and executor; this owns the policy.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from .approval import Approval, ApprovalOutcome
|
|
20
|
+
from .budget import (
|
|
21
|
+
AdmissionDecision,
|
|
22
|
+
BudgetCap,
|
|
23
|
+
BudgetState,
|
|
24
|
+
BudgetWindow,
|
|
25
|
+
CostRates,
|
|
26
|
+
admits,
|
|
27
|
+
estimate_cost,
|
|
28
|
+
)
|
|
29
|
+
from .dispatch import (
|
|
30
|
+
DispatchError,
|
|
31
|
+
JobStore,
|
|
32
|
+
Lease,
|
|
33
|
+
Transport,
|
|
34
|
+
WorkerExecutor,
|
|
35
|
+
is_lease_stale,
|
|
36
|
+
should_give_up,
|
|
37
|
+
)
|
|
38
|
+
from .egress import (
|
|
39
|
+
EnvLookup,
|
|
40
|
+
ExternalEndpoint,
|
|
41
|
+
SecretMissingError,
|
|
42
|
+
SecretRef,
|
|
43
|
+
log_egress,
|
|
44
|
+
)
|
|
45
|
+
from .estimate import CostEstimate, HostCapabilities, vram_fits
|
|
46
|
+
from .routing import (
|
|
47
|
+
BackendCapabilities,
|
|
48
|
+
BackendKind,
|
|
49
|
+
NodeIdentity,
|
|
50
|
+
NoEligibleBackendError,
|
|
51
|
+
Routable,
|
|
52
|
+
ToolRequirements,
|
|
53
|
+
select_backend,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
__all__ = [
|
|
57
|
+
"AdmissionDecision",
|
|
58
|
+
"Approval",
|
|
59
|
+
"ApprovalOutcome",
|
|
60
|
+
"BackendCapabilities",
|
|
61
|
+
"BackendKind",
|
|
62
|
+
"BudgetCap",
|
|
63
|
+
"BudgetState",
|
|
64
|
+
"BudgetWindow",
|
|
65
|
+
"CostEstimate",
|
|
66
|
+
"CostRates",
|
|
67
|
+
"DispatchError",
|
|
68
|
+
"EnvLookup",
|
|
69
|
+
"ExternalEndpoint",
|
|
70
|
+
"HostCapabilities",
|
|
71
|
+
"JobStore",
|
|
72
|
+
"Lease",
|
|
73
|
+
"NoEligibleBackendError",
|
|
74
|
+
"NodeIdentity",
|
|
75
|
+
"Routable",
|
|
76
|
+
"SecretMissingError",
|
|
77
|
+
"SecretRef",
|
|
78
|
+
"ToolRequirements",
|
|
79
|
+
"Transport",
|
|
80
|
+
"WorkerExecutor",
|
|
81
|
+
"admits",
|
|
82
|
+
"estimate_cost",
|
|
83
|
+
"is_lease_stale",
|
|
84
|
+
"log_egress",
|
|
85
|
+
"select_backend",
|
|
86
|
+
"should_give_up",
|
|
87
|
+
"vram_fits",
|
|
88
|
+
]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Approval — the human decision + audit record that gates an expensive (non-local) job.
|
|
2
|
+
|
|
3
|
+
A gated job leaves "awaiting approval" only by a human decision; recording WHO decided, WHEN, and
|
|
4
|
+
WHY keeps a paid run attributable (append-only in spirit). The state machine — which states are
|
|
5
|
+
gated and the transitions between them — lives in the consuming app's job entity; this is the
|
|
6
|
+
shared audit fact and the binary outcome both apps record.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import enum
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ApprovalOutcome(enum.StrEnum):
|
|
16
|
+
"""The decision a human made on a gated job."""
|
|
17
|
+
|
|
18
|
+
APPROVED = "approved"
|
|
19
|
+
REJECTED = "rejected"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True, slots=True)
|
|
23
|
+
class Approval:
|
|
24
|
+
"""Who approved or rejected a gated job, when, and why — the audit fact.
|
|
25
|
+
|
|
26
|
+
``author``/``reason``/``decided_at`` are all required: a decision is never anonymous,
|
|
27
|
+
unexplained, or untimestamped. ``decided_at`` is ISO-8601, server-assigned at the decision.
|
|
28
|
+
``outcome`` defaults to APPROVED so the common "approve" path constructs positionally; a
|
|
29
|
+
rejection passes ``ApprovalOutcome.REJECTED``.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
author: str
|
|
33
|
+
reason: str
|
|
34
|
+
decided_at: str
|
|
35
|
+
outcome: ApprovalOutcome = ApprovalOutcome.APPROVED
|
|
36
|
+
|
|
37
|
+
def __post_init__(self) -> None:
|
|
38
|
+
for name, value in (
|
|
39
|
+
("author", self.author),
|
|
40
|
+
("reason", self.reason),
|
|
41
|
+
("decided_at", self.decided_at),
|
|
42
|
+
):
|
|
43
|
+
if not value:
|
|
44
|
+
raise ValueError(f"an approval needs a non-empty {name}")
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Budget ledger — a hard, fail-closed spend cap that gates expensive (non-local) work.
|
|
2
|
+
|
|
3
|
+
Cloud GPU jobs and paid API calls cost real money, so a **hard** spend cap is a circuit breaker
|
|
4
|
+
against errant testing or an agent flooding dispatch. The cap is enforced by **reserving** a job's
|
|
5
|
+
upper-bound cost the moment it is approved/queued, against BOTH a per-run window and a per-month
|
|
6
|
+
window: a job is admitted only if ``reserved + spent + its estimate`` stays within both caps. A
|
|
7
|
+
burst of approvals reserves cumulatively, so once a cap is reached the next job is refused — you
|
|
8
|
+
cannot even *queue* past it. The default cap is **zero**, so paid work stays off until you
|
|
9
|
+
deliberately set a budget.
|
|
10
|
+
|
|
11
|
+
This is pure domain (the rule); the persisted ledger rows + the per-backend rates live in the
|
|
12
|
+
consuming app's storage layer. Money is a :class:`~decimal.Decimal` (exact), never a float — a
|
|
13
|
+
fraction of a cent per GPU-second compounds, and a cost guard that drifts is not a guard.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import enum
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from decimal import Decimal
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BudgetWindow(enum.StrEnum):
|
|
24
|
+
"""The windows a reservation is checked against — BOTH must have room (fail closed).
|
|
25
|
+
|
|
26
|
+
``RUN`` bounds a single batch/session (stops one runaway loop); ``MONTH`` bounds slow
|
|
27
|
+
accumulation across sessions. A job must fit under both to be admitted.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
RUN = "run"
|
|
31
|
+
MONTH = "month"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True, slots=True)
|
|
35
|
+
class BudgetCap:
|
|
36
|
+
"""The hard ceiling per window, in USD. Default ``0`` means paid work is OFF (fail closed)."""
|
|
37
|
+
|
|
38
|
+
run_usd: Decimal = Decimal(0)
|
|
39
|
+
month_usd: Decimal = Decimal(0)
|
|
40
|
+
|
|
41
|
+
def __post_init__(self) -> None:
|
|
42
|
+
if self.run_usd < 0 or self.month_usd < 0:
|
|
43
|
+
raise ValueError("a budget cap cannot be negative")
|
|
44
|
+
|
|
45
|
+
def for_window(self, window: BudgetWindow) -> Decimal:
|
|
46
|
+
"""The cap for ``window`` (the single source the admission rule reads)."""
|
|
47
|
+
return self.run_usd if window is BudgetWindow.RUN else self.month_usd
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True, slots=True)
|
|
51
|
+
class BudgetState:
|
|
52
|
+
"""Committed USD for a window: ``reserved`` (approved, not yet run) + ``spent`` (reconciled).
|
|
53
|
+
|
|
54
|
+
Reserving on approval and only reconciling ``reserved -> spent`` on completion is what makes a
|
|
55
|
+
burst of approvals count immediately — the cap sees the whole queue's cost, not just what ran.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
reserved_usd: Decimal = Decimal(0)
|
|
59
|
+
spent_usd: Decimal = Decimal(0)
|
|
60
|
+
|
|
61
|
+
def __post_init__(self) -> None:
|
|
62
|
+
if self.reserved_usd < 0 or self.spent_usd < 0:
|
|
63
|
+
raise ValueError("budget reserved/spent cannot be negative")
|
|
64
|
+
|
|
65
|
+
def committed(self) -> Decimal:
|
|
66
|
+
"""The total already committed against the cap (reserved + spent)."""
|
|
67
|
+
return self.reserved_usd + self.spent_usd
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True, slots=True)
|
|
71
|
+
class CostRates:
|
|
72
|
+
"""Per-second USD rates for a backend — plain config data (no provider SDK code here).
|
|
73
|
+
|
|
74
|
+
``idle_tail_s`` is the post-request warm tail you still pay (a GPU Cloud Run instance bills for
|
|
75
|
+
~10 min after the last request before scaling to zero); folding it into the estimate keeps the
|
|
76
|
+
reservation an upper bound. A token-priced API can model itself with ``gpu_usd_per_s`` = 0 and a
|
|
77
|
+
flat per-call cost via :func:`estimate_cost` inputs, or extend this with its own rate object.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
gpu_usd_per_s: Decimal
|
|
81
|
+
vcpu_usd_per_s: Decimal
|
|
82
|
+
gib_usd_per_s: Decimal
|
|
83
|
+
idle_tail_s: Decimal = Decimal(0)
|
|
84
|
+
|
|
85
|
+
def __post_init__(self) -> None:
|
|
86
|
+
for name, value in (
|
|
87
|
+
("gpu_usd_per_s", self.gpu_usd_per_s),
|
|
88
|
+
("vcpu_usd_per_s", self.vcpu_usd_per_s),
|
|
89
|
+
("gib_usd_per_s", self.gib_usd_per_s),
|
|
90
|
+
("idle_tail_s", self.idle_tail_s),
|
|
91
|
+
):
|
|
92
|
+
if value < 0:
|
|
93
|
+
raise ValueError(f"{name} cannot be negative")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass(frozen=True, slots=True)
|
|
97
|
+
class AdmissionDecision:
|
|
98
|
+
"""Whether a job fits the budget, and — when it does not — which window refused and why.
|
|
99
|
+
|
|
100
|
+
``refused_window`` is the first window (run, then month) that lacked room; ``None`` on admit.
|
|
101
|
+
The reason is human-facing, so the refusal is legible at approval, never a silent drop.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
admitted: bool
|
|
105
|
+
refused_window: BudgetWindow | None
|
|
106
|
+
reason: str
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def admits(
|
|
110
|
+
estimate_usd: Decimal,
|
|
111
|
+
run_state: BudgetState,
|
|
112
|
+
month_state: BudgetState,
|
|
113
|
+
cap: BudgetCap,
|
|
114
|
+
) -> AdmissionDecision:
|
|
115
|
+
"""Can a job whose upper-bound cost is ``estimate_usd`` be admitted? Fail-closed, both windows.
|
|
116
|
+
|
|
117
|
+
Admitted only if ``reserved + spent + estimate <= cap`` for the run AND the month window. The
|
|
118
|
+
default cap (zero) admits nothing — paid work stays off until a budget is set. A negative
|
|
119
|
+
estimate is refused (its cost is not reasoned about); a zero-cost job within a zero cap is fine.
|
|
120
|
+
"""
|
|
121
|
+
if estimate_usd < 0:
|
|
122
|
+
return AdmissionDecision(False, None, "a cost estimate cannot be negative")
|
|
123
|
+
for window, state in ((BudgetWindow.RUN, run_state), (BudgetWindow.MONTH, month_state)):
|
|
124
|
+
cap_window = cap.for_window(window)
|
|
125
|
+
if state.committed() + estimate_usd > cap_window:
|
|
126
|
+
return AdmissionDecision(
|
|
127
|
+
False,
|
|
128
|
+
window,
|
|
129
|
+
(
|
|
130
|
+
f"would exceed the {window.value} budget cap (${cap_window}): "
|
|
131
|
+
f"${state.committed()} committed + ${estimate_usd} estimate"
|
|
132
|
+
),
|
|
133
|
+
)
|
|
134
|
+
return AdmissionDecision(True, None, "within budget")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def estimate_cost(
|
|
138
|
+
rates: CostRates,
|
|
139
|
+
*,
|
|
140
|
+
max_runtime_s: int,
|
|
141
|
+
vcpus: int,
|
|
142
|
+
memory_gib: int,
|
|
143
|
+
) -> Decimal:
|
|
144
|
+
"""Upper-bound USD for one job: (max runtime + idle tail) x (GPU + vCPU + memory) rates. Pure.
|
|
145
|
+
|
|
146
|
+
Uses the caller's declared MAX runtime plus the warm idle tail, so the reservation
|
|
147
|
+
over-estimates; the ledger reconciles ``reserved -> spent`` from the backend's true reported
|
|
148
|
+
runtime on completion. Over-reserving is the safe direction for a hard cap.
|
|
149
|
+
"""
|
|
150
|
+
if max_runtime_s < 0 or vcpus < 0 or memory_gib < 0:
|
|
151
|
+
raise ValueError("runtime/vcpus/memory cannot be negative")
|
|
152
|
+
billable_s = Decimal(max_runtime_s) + rates.idle_tail_s
|
|
153
|
+
per_second = (
|
|
154
|
+
rates.gpu_usd_per_s + rates.vcpu_usd_per_s * vcpus + rates.gib_usd_per_s * memory_gib
|
|
155
|
+
)
|
|
156
|
+
return billable_s * per_second
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Dispatch contract for expensive async jobs that run anywhere on the tailnet.
|
|
2
|
+
|
|
3
|
+
Both a PULL worker (it polls: claim a job, run it, complete it) and a PUSH orchestrator (it posts a
|
|
4
|
+
job to a worker and waits) need the SAME guarantees: a job is CLAIMED atomically so it runs exactly
|
|
5
|
+
once, a stale result is REJECTED, and a worker that dies mid-job has its lease RECOVERED. This
|
|
6
|
+
module is the shared CONTRACT — the pure lease rules + the store/transport/worker protocols. The
|
|
7
|
+
store (SQLite vs SQLAlchemy), the transport (pull vs push), and the payload (a transcribe request vs
|
|
8
|
+
a tool invocation) are per-app ADAPTERS, so two apps converge on one model WITHOUT a shared DB.
|
|
9
|
+
|
|
10
|
+
Auth (who may run a job) is :mod:`tailnet_guard`; policy (afford / route / approve) is the rest of
|
|
11
|
+
:mod:`dispatch_kit`. This is only the run-it-once-recoverably engine on top of those.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from collections.abc import Sequence
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from typing import Any, Protocol
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DispatchError(RuntimeError):
|
|
22
|
+
"""A dispatch invariant was violated (a stale complete, or a job recovered past its cap)."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True, slots=True)
|
|
26
|
+
class Lease:
|
|
27
|
+
"""A claim on a job: when it was leased and how many times it has been recovered.
|
|
28
|
+
|
|
29
|
+
``leased_at`` is epoch seconds, stamped at the atomic claim. ``attempts`` counts recoveries (a
|
|
30
|
+
worker that died and had its lease reclaimed); a job past ``max_attempts`` fails rather than
|
|
31
|
+
re-leasing forever.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
job_id: str
|
|
35
|
+
leased_at: float
|
|
36
|
+
attempts: int = 0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_lease_stale(leased_at: float, now: float, ttl_seconds: float) -> bool:
|
|
40
|
+
"""Whether a lease has outlived its TTL (the worker likely died) and may be reclaimed.
|
|
41
|
+
|
|
42
|
+
The single home of the lease-staleness rule, shared by every store's ``recover_stale`` so a
|
|
43
|
+
"leased but silent" job is reclaimed on one schedule. ``ttl_seconds`` should comfortably exceed
|
|
44
|
+
the longest a healthy run takes, so a slow job is not stolen from a live worker.
|
|
45
|
+
"""
|
|
46
|
+
return (now - leased_at) >= ttl_seconds
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def should_give_up(attempts: int, max_attempts: int) -> bool:
|
|
50
|
+
"""Whether a job has been recovered too many times and should FAIL rather than re-lease.
|
|
51
|
+
|
|
52
|
+
Fail-closed against a poison job that crashes every worker it lands on: after ``max_attempts``
|
|
53
|
+
recoveries it is marked failed (with its recorded error), never re-queued indefinitely.
|
|
54
|
+
"""
|
|
55
|
+
return attempts >= max_attempts
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class JobStore(Protocol):
|
|
59
|
+
"""The authoritative job store — the ONE place a job's claim/complete is decided, ATOMICALLY.
|
|
60
|
+
|
|
61
|
+
``claim`` must be atomic (one transaction / compare-and-set): two concurrent workers can never
|
|
62
|
+
both claim the same job — that single claim is the run-exactly-once guarantee. ``complete`` must
|
|
63
|
+
REJECT a result for a job not currently leased/running (a stale resubmit after the lease was
|
|
64
|
+
recovered), returning ``False`` so a re-run cannot clobber a fresh result. ``recover_stale``
|
|
65
|
+
re-leases jobs whose lease outlived the TTL (see :func:`is_lease_stale`), bumping the attempt
|
|
66
|
+
count and failing a job past ``max_attempts`` (see :func:`should_give_up`). The payload type is
|
|
67
|
+
the app's own (a transcribe request, a tool invocation, ...).
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def claim(self, lanes: Sequence[str]) -> tuple[str, Any] | None:
|
|
71
|
+
"""Atomically claim ONE runnable job in ``lanes`` -> ``(job_id, payload)``, or ``None``."""
|
|
72
|
+
|
|
73
|
+
def complete(self, job_id: str, result: Any) -> bool:
|
|
74
|
+
"""Apply a result IFF the job is leased/running; ``False`` if stale (done/recovered)."""
|
|
75
|
+
|
|
76
|
+
def recover_stale(self, *, now: float, ttl_seconds: float, max_attempts: int) -> Sequence[str]:
|
|
77
|
+
"""Re-lease jobs whose lease is stale (return their ids); fail those past the cap."""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Transport(Protocol):
|
|
81
|
+
"""The hop that moves a job to where it runs — the only thing that differs push vs pull.
|
|
82
|
+
|
|
83
|
+
PULL: the worker calls the store's claim/complete directly, so there is no Transport. PUSH: the
|
|
84
|
+
orchestrator submits the encoded job to a worker endpoint and gets the encoded result back.
|
|
85
|
+
Either way the worker's auth (``tailnet_guard``) and the policy are unchanged — this is
|
|
86
|
+
purely the byte-moving seam, so an app swaps pull for push without touching its job model.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def submit(self, envelope: dict[str, Any], token: str) -> dict[str, Any]:
|
|
90
|
+
"""Deliver an encoded job to a worker with its capability token; return the response."""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class WorkerExecutor(Protocol):
|
|
94
|
+
"""The worker-side run contract: decode (integrity-checked) -> run -> encode.
|
|
95
|
+
|
|
96
|
+
The decode MUST verify every artifact's content hash, and the run happens only AFTER the auth
|
|
97
|
+
guard passes (guard-before-decode), so a tampered/unauthorized job never reaches the executor.
|
|
98
|
+
Returns the encoded result the store/transport carries back.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def execute(self, payload: Any) -> Any:
|
|
102
|
+
"""Run an already-authorized, integrity-checked payload and return its encoded result."""
|