forgesight-governance 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forgesight_governance-0.1.0/.gitignore +38 -0
- forgesight_governance-0.1.0/PKG-INFO +89 -0
- forgesight_governance-0.1.0/README.md +64 -0
- forgesight_governance-0.1.0/pyproject.toml +43 -0
- forgesight_governance-0.1.0/src/forgesight_governance/__init__.py +42 -0
- forgesight_governance-0.1.0/src/forgesight_governance/_settings.py +15 -0
- forgesight_governance-0.1.0/src/forgesight_governance/budget.py +296 -0
- forgesight_governance-0.1.0/src/forgesight_governance/kill_switch.py +127 -0
- forgesight_governance-0.1.0/src/forgesight_governance/policy.py +127 -0
- forgesight_governance-0.1.0/src/forgesight_governance/py.typed +0 -0
- forgesight_governance-0.1.0/tests/test_governance.py +414 -0
- forgesight_governance-0.1.0/tests/test_projection.py +242 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
*.so
|
|
9
|
+
|
|
10
|
+
# venv / tooling
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
.uv/
|
|
14
|
+
uv.lock
|
|
15
|
+
|
|
16
|
+
# test / type / lint caches
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.ruff_cache/
|
|
20
|
+
.coverage
|
|
21
|
+
.coverage.*
|
|
22
|
+
coverage.xml
|
|
23
|
+
htmlcov/
|
|
24
|
+
|
|
25
|
+
# secrets / local env (never commit)
|
|
26
|
+
.env
|
|
27
|
+
.env.*
|
|
28
|
+
|
|
29
|
+
# editor / OS
|
|
30
|
+
.DS_Store
|
|
31
|
+
.idea/
|
|
32
|
+
.vscode/
|
|
33
|
+
|
|
34
|
+
# local-only session working state (per the workspace pipeline)
|
|
35
|
+
.claude/state/
|
|
36
|
+
|
|
37
|
+
# local-only launch planning (not part of the published repo)
|
|
38
|
+
/launch/
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forgesight-governance
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ForgeSight governance — cost budgets, policy enforcement, and a kill-switch on the interceptor chain.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Scaffoldic/forgesight
|
|
6
|
+
Project-URL: Repository, https://github.com/Scaffoldic/forgesight
|
|
7
|
+
Project-URL: Issues, https://github.com/Scaffoldic/forgesight/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/Scaffoldic/forgesight/blob/main/docs/releases/v0.1.md
|
|
9
|
+
Author: kjoshi
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
Keywords: ai-agents,budgets,finops,forgesight,governance,observability
|
|
12
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Information Technology
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: System :: Monitoring
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.11
|
|
23
|
+
Requires-Dist: forgesight-core
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# forgesight-governance
|
|
27
|
+
|
|
28
|
+
Cost budgets, policy enforcement, and a kill-switch for [ForgeSight](https://github.com/Scaffoldic/forgesight).
|
|
29
|
+
The SDK already knows what every LLM call costs and already has a veto point (the interceptor
|
|
30
|
+
chain) — this package turns that cost signal into a **control**: stop the spend, deny a model,
|
|
31
|
+
or cut off one scope's runs in seconds.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install forgesight-governance
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import forgesight
|
|
39
|
+
from forgesight_governance import BudgetInterceptor, PolicyInterceptor, KillSwitch
|
|
40
|
+
|
|
41
|
+
forgesight.configure(interceptors=[
|
|
42
|
+
KillSwitch.from_config(), # cheapest veto first
|
|
43
|
+
PolicyInterceptor.from_config(),
|
|
44
|
+
BudgetInterceptor.from_config(),
|
|
45
|
+
])
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or purely by name (entry-point auto-load) — `interceptors: [{name: kill-switch}, {name: policy}, {name: budget}]` plus a `governance:` block.
|
|
49
|
+
|
|
50
|
+
## What you get
|
|
51
|
+
|
|
52
|
+
- **Budgets.** Per-run / per-team / per-repo / per-environment USD or token caps, keyed on the
|
|
53
|
+
business metadata the SDK already attaches (FR-5). A breach raises `BudgetExceeded` →
|
|
54
|
+
`RunStatus.BUDGET_EXCEEDED`; the run record still flushes (telemetry is never lost).
|
|
55
|
+
- **Policy.** First-match-wins rules over metadata: `deny` a model set (e.g. unpriced/preview
|
|
56
|
+
models in prod), `allow`-list the models a team may call, or `redact` content for PII-tagged
|
|
57
|
+
runs. A denial raises `PolicyDenied` → `RunStatus.GUARDRAIL`.
|
|
58
|
+
- **Kill-switch.** Flip `FORGESIGHT_KILL_REPO_PAYMENTS_AGENT=true` (or a file entry) and every
|
|
59
|
+
run for that repo trips immediately, while every other agent keeps running — no redeploy.
|
|
60
|
+
|
|
61
|
+
```yaml
|
|
62
|
+
governance:
|
|
63
|
+
budgets:
|
|
64
|
+
per_run: { usd: 5.0 }
|
|
65
|
+
per_team: { growth: { usd: 200.0 }, research: { usd: 2000.0 } }
|
|
66
|
+
on_breach: "raise" # raise | drop | mark
|
|
67
|
+
policies:
|
|
68
|
+
rules:
|
|
69
|
+
- match: { environment: "prod" }
|
|
70
|
+
action: "deny"
|
|
71
|
+
models: ["*-experimental", "gpt-*-preview"]
|
|
72
|
+
- match: { pii: "true" }
|
|
73
|
+
action: "redact"
|
|
74
|
+
kill_switch:
|
|
75
|
+
source: "env" # env | file
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
A budget/policy/kill trip is the **one sanctioned case** where an interceptor's exception
|
|
79
|
+
propagates to the caller (a deliberate control, not a telemetry failure, P6). All of
|
|
80
|
+
`governance.*` is absent → disabled; install is necessary, config is the enabler.
|
|
81
|
+
|
|
82
|
+
## Out of scope (0.3)
|
|
83
|
+
|
|
84
|
+
Fleet-wide / cross-process caps (process-local for now; the `BudgetCap` + total-store split
|
|
85
|
+
leaves room for a shared store), spend forecasting, and a general policy language.
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
|
|
89
|
+
Apache-2.0
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# forgesight-governance
|
|
2
|
+
|
|
3
|
+
Cost budgets, policy enforcement, and a kill-switch for [ForgeSight](https://github.com/Scaffoldic/forgesight).
|
|
4
|
+
The SDK already knows what every LLM call costs and already has a veto point (the interceptor
|
|
5
|
+
chain) — this package turns that cost signal into a **control**: stop the spend, deny a model,
|
|
6
|
+
or cut off one scope's runs in seconds.
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install forgesight-governance
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
import forgesight
|
|
14
|
+
from forgesight_governance import BudgetInterceptor, PolicyInterceptor, KillSwitch
|
|
15
|
+
|
|
16
|
+
forgesight.configure(interceptors=[
|
|
17
|
+
KillSwitch.from_config(), # cheapest veto first
|
|
18
|
+
PolicyInterceptor.from_config(),
|
|
19
|
+
BudgetInterceptor.from_config(),
|
|
20
|
+
])
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or purely by name (entry-point auto-load) — `interceptors: [{name: kill-switch}, {name: policy}, {name: budget}]` plus a `governance:` block.
|
|
24
|
+
|
|
25
|
+
## What you get
|
|
26
|
+
|
|
27
|
+
- **Budgets.** Per-run / per-team / per-repo / per-environment USD or token caps, keyed on the
|
|
28
|
+
business metadata the SDK already attaches (FR-5). A breach raises `BudgetExceeded` →
|
|
29
|
+
`RunStatus.BUDGET_EXCEEDED`; the run record still flushes (telemetry is never lost).
|
|
30
|
+
- **Policy.** First-match-wins rules over metadata: `deny` a model set (e.g. unpriced/preview
|
|
31
|
+
models in prod), `allow`-list the models a team may call, or `redact` content for PII-tagged
|
|
32
|
+
runs. A denial raises `PolicyDenied` → `RunStatus.GUARDRAIL`.
|
|
33
|
+
- **Kill-switch.** Flip `FORGESIGHT_KILL_REPO_PAYMENTS_AGENT=true` (or a file entry) and every
|
|
34
|
+
run for that repo trips immediately, while every other agent keeps running — no redeploy.
|
|
35
|
+
|
|
36
|
+
```yaml
|
|
37
|
+
governance:
|
|
38
|
+
budgets:
|
|
39
|
+
per_run: { usd: 5.0 }
|
|
40
|
+
per_team: { growth: { usd: 200.0 }, research: { usd: 2000.0 } }
|
|
41
|
+
on_breach: "raise" # raise | drop | mark
|
|
42
|
+
policies:
|
|
43
|
+
rules:
|
|
44
|
+
- match: { environment: "prod" }
|
|
45
|
+
action: "deny"
|
|
46
|
+
models: ["*-experimental", "gpt-*-preview"]
|
|
47
|
+
- match: { pii: "true" }
|
|
48
|
+
action: "redact"
|
|
49
|
+
kill_switch:
|
|
50
|
+
source: "env" # env | file
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
A budget/policy/kill trip is the **one sanctioned case** where an interceptor's exception
|
|
54
|
+
propagates to the caller (a deliberate control, not a telemetry failure, P6). All of
|
|
55
|
+
`governance.*` is absent → disabled; install is necessary, config is the enabler.
|
|
56
|
+
|
|
57
|
+
## Out of scope (0.3)
|
|
58
|
+
|
|
59
|
+
Fleet-wide / cross-process caps (process-local for now; the `BudgetCap` + total-store split
|
|
60
|
+
leaves room for a shared store), spend forecasting, and a general policy language.
|
|
61
|
+
|
|
62
|
+
## License
|
|
63
|
+
|
|
64
|
+
Apache-2.0
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "forgesight-governance"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "ForgeSight governance — cost budgets, policy enforcement, and a kill-switch on the interceptor chain."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "Apache-2.0"
|
|
8
|
+
authors = [{ name = "kjoshi" }]
|
|
9
|
+
keywords = ["observability", "finops", "budgets", "governance", "ai-agents", "forgesight"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"Intended Audience :: Information Technology",
|
|
14
|
+
"Topic :: System :: Monitoring",
|
|
15
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
16
|
+
"License :: OSI Approved :: Apache Software License",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Typing :: Typed",
|
|
21
|
+
]
|
|
22
|
+
dependencies = ["forgesight-core"]
|
|
23
|
+
|
|
24
|
+
[project.entry-points."forgesight.interceptors"]
|
|
25
|
+
budget = "forgesight_governance.budget:BudgetInterceptor.from_config"
|
|
26
|
+
policy = "forgesight_governance.policy:PolicyInterceptor.from_config"
|
|
27
|
+
kill-switch = "forgesight_governance.kill_switch:KillSwitch.from_config"
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://github.com/Scaffoldic/forgesight"
|
|
31
|
+
Repository = "https://github.com/Scaffoldic/forgesight"
|
|
32
|
+
Issues = "https://github.com/Scaffoldic/forgesight/issues"
|
|
33
|
+
Changelog = "https://github.com/Scaffoldic/forgesight/blob/main/docs/releases/v0.1.md"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/forgesight_governance"]
|
|
41
|
+
|
|
42
|
+
[tool.uv.sources]
|
|
43
|
+
forgesight-core = { workspace = true }
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""ForgeSight governance — budgets, policy, and a kill-switch on the interceptor chain."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from forgesight_api import GovernanceSignal
|
|
6
|
+
|
|
7
|
+
from .budget import (
|
|
8
|
+
BudgetCap,
|
|
9
|
+
BudgetExceeded,
|
|
10
|
+
BudgetInterceptor,
|
|
11
|
+
BudgetScope,
|
|
12
|
+
ProjectionConfig,
|
|
13
|
+
)
|
|
14
|
+
from .kill_switch import (
|
|
15
|
+
EnvKillSwitchSource,
|
|
16
|
+
FileKillSwitchSource,
|
|
17
|
+
KillSwitch,
|
|
18
|
+
KillSwitchEngaged,
|
|
19
|
+
KillSwitchSource,
|
|
20
|
+
)
|
|
21
|
+
from .policy import PolicyAction, PolicyDenied, PolicyInterceptor, PolicyRule
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"BudgetCap",
|
|
27
|
+
"BudgetExceeded",
|
|
28
|
+
"BudgetInterceptor",
|
|
29
|
+
"BudgetScope",
|
|
30
|
+
"EnvKillSwitchSource",
|
|
31
|
+
"FileKillSwitchSource",
|
|
32
|
+
"GovernanceSignal",
|
|
33
|
+
"KillSwitch",
|
|
34
|
+
"KillSwitchEngaged",
|
|
35
|
+
"KillSwitchSource",
|
|
36
|
+
"PolicyAction",
|
|
37
|
+
"PolicyDenied",
|
|
38
|
+
"PolicyInterceptor",
|
|
39
|
+
"PolicyRule",
|
|
40
|
+
"ProjectionConfig",
|
|
41
|
+
"__version__",
|
|
42
|
+
]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Shared helper to read the ``governance:`` block from the SDK's layered settings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from forgesight_core.config import load_settings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def governance_settings(settings: Mapping[str, Any] | None = None) -> dict[str, Any]:
|
|
12
|
+
"""Return the ``governance`` config block (file → env layered), or ``{}`` if absent."""
|
|
13
|
+
resolved = settings if settings is not None else load_settings()
|
|
14
|
+
block = resolved.get("governance")
|
|
15
|
+
return dict(block) if isinstance(block, Mapping) else {}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""``BudgetInterceptor`` — turn the cost signal into a control (feat-020).
|
|
2
|
+
|
|
3
|
+
Runs on each completed LLM record: adds the call's cost / tokens to the per-scope running
|
|
4
|
+
totals (keyed on the business metadata the SDK already attaches — FR-5) and, if a cap would
|
|
5
|
+
be breached, enforces ``on_breach``. ``raise`` halts the run with ``BudgetExceeded`` (a
|
|
6
|
+
:class:`~forgesight_api.GovernanceSignal`, the one sanctioned interceptor-raises case) →
|
|
7
|
+
``RunStatus.BUDGET_EXCEEDED``; the run record still flushes. Process-local totals (a shared
|
|
8
|
+
store is a follow-up). It rides the locked ``Interceptor`` SPI — no new core surface.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Mapping, Sequence
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from enum import StrEnum
|
|
16
|
+
from typing import Any, Literal
|
|
17
|
+
|
|
18
|
+
from forgesight_api import GovernanceSignal, PricingProvider, Record, RunStatus, TokenUsage
|
|
19
|
+
from forgesight_core import get_runtime
|
|
20
|
+
|
|
21
|
+
from ._settings import governance_settings
|
|
22
|
+
|
|
23
|
+
BUDGET_EXCEEDED_ATTR = "forgesight.budget.exceeded"
|
|
24
|
+
_SCOPE_METADATA = {"team": "team", "repo": "repo", "environment": "environment"}
|
|
25
|
+
_ESTIMATES = ("max_tokens", "input_ratio", "fixed")
|
|
26
|
+
_ON_UNPRICED = ("allow", "deny")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True, slots=True)
|
|
30
|
+
class ProjectionConfig:
|
|
31
|
+
"""Pre-call projection settings (feat-026). Off ⇒ budgets stay post-hoc (feat-020)."""
|
|
32
|
+
|
|
33
|
+
enabled: bool = False
|
|
34
|
+
output_token_estimate: Literal["max_tokens", "input_ratio", "fixed"] = "max_tokens"
|
|
35
|
+
fixed_output_tokens: int = 0 # used when estimate == "fixed"
|
|
36
|
+
input_ratio: float = 1.0 # output ~= input_ratio * declared input
|
|
37
|
+
on_unpriced: Literal["allow", "deny"] = "allow" # cost=None → can't guarantee under cap
|
|
38
|
+
|
|
39
|
+
def __post_init__(self) -> None:
|
|
40
|
+
if self.output_token_estimate not in _ESTIMATES:
|
|
41
|
+
raise ValueError(f"output_token_estimate must be one of {_ESTIMATES}")
|
|
42
|
+
if self.on_unpriced not in _ON_UNPRICED:
|
|
43
|
+
raise ValueError(f"on_unpriced must be one of {_ON_UNPRICED}")
|
|
44
|
+
if self.output_token_estimate == "fixed" and self.fixed_output_tokens <= 0:
|
|
45
|
+
raise ValueError("fixed_output_tokens must be > 0 when estimate == 'fixed'")
|
|
46
|
+
if self.output_token_estimate == "input_ratio" and self.input_ratio <= 0:
|
|
47
|
+
raise ValueError("input_ratio must be > 0 when estimate == 'input_ratio'")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class BudgetScope(StrEnum):
|
|
51
|
+
RUN = "run"
|
|
52
|
+
TEAM = "team"
|
|
53
|
+
REPO = "repo"
|
|
54
|
+
ENVIRONMENT = "environment"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True, slots=True)
|
|
58
|
+
class BudgetCap:
|
|
59
|
+
scope: BudgetScope
|
|
60
|
+
key: str | None = None # e.g. "growth" for scope=team; None = the per-run cap / every value
|
|
61
|
+
usd: float | None = None
|
|
62
|
+
tokens: int | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class BudgetExceeded(GovernanceSignal):
|
|
66
|
+
"""Raised when a cap would be breached. Carries the trip context."""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
scope: BudgetScope,
|
|
72
|
+
key: str | None,
|
|
73
|
+
cap_usd: float | None,
|
|
74
|
+
cap_tokens: int | None,
|
|
75
|
+
accumulated_usd: float,
|
|
76
|
+
projected_usd: float,
|
|
77
|
+
) -> None:
|
|
78
|
+
super().__init__(
|
|
79
|
+
f"budget exceeded for {scope}={key}: ${projected_usd:.4f} > cap ${cap_usd}",
|
|
80
|
+
run_status=RunStatus.BUDGET_EXCEEDED,
|
|
81
|
+
)
|
|
82
|
+
self.scope = scope
|
|
83
|
+
self.key = key
|
|
84
|
+
self.cap_usd = cap_usd
|
|
85
|
+
self.cap_tokens = cap_tokens
|
|
86
|
+
self.accumulated_usd = accumulated_usd
|
|
87
|
+
self.projected_usd = projected_usd
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class BudgetInterceptor:
|
|
91
|
+
"""Accumulate per-scope spend and enforce caps on the LLM-call path."""
|
|
92
|
+
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
*,
|
|
96
|
+
caps: Sequence[BudgetCap],
|
|
97
|
+
on_breach: Literal["raise", "drop", "mark"] = "raise",
|
|
98
|
+
pricing: PricingProvider | None = None,
|
|
99
|
+
projection: ProjectionConfig | None = None,
|
|
100
|
+
) -> None:
|
|
101
|
+
if on_breach not in ("raise", "drop", "mark"):
|
|
102
|
+
raise ValueError(f"on_breach must be raise|drop|mark, got {on_breach!r}")
|
|
103
|
+
for cap in caps:
|
|
104
|
+
if cap.usd is None and cap.tokens is None:
|
|
105
|
+
raise ValueError(f"BudgetCap for {cap.scope}={cap.key} sets neither usd nor tokens")
|
|
106
|
+
self._caps = list(caps)
|
|
107
|
+
self._on_breach = on_breach
|
|
108
|
+
self._pricing = pricing # None ⇒ resolve the runtime's configured PricingProvider
|
|
109
|
+
self._projection = projection # None ⇒ post-hoc only (feat-020 behaviour)
|
|
110
|
+
self._totals: dict[tuple[BudgetScope, str], dict[str, float]] = {}
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def from_config(cls, settings: Mapping[str, Any] | None = None) -> BudgetInterceptor:
|
|
114
|
+
budgets = governance_settings(settings).get("budgets")
|
|
115
|
+
budgets = budgets if isinstance(budgets, Mapping) else {}
|
|
116
|
+
caps = _parse_caps(budgets)
|
|
117
|
+
on_breach = str(budgets.get("on_breach", "raise"))
|
|
118
|
+
projection = _parse_projection(budgets.get("projection"))
|
|
119
|
+
return cls(caps=caps, on_breach=on_breach, projection=projection) # type: ignore[arg-type]
|
|
120
|
+
|
|
121
|
+
# --- pre-call projection (feat-026) -----------------------------------
|
|
122
|
+
def precall(self, record: Record) -> None:
|
|
123
|
+
"""Estimate this LLM call's cost *before* it is made and deny if a cap would be
|
|
124
|
+
breached. A **guard only** — it never commits to the running totals; the realized
|
|
125
|
+
cost on the completed record (``intercept``) stays the sole writer, so a
|
|
126
|
+
conservative over-estimate can't permanently inflate the accumulator."""
|
|
127
|
+
if self._projection is None or not self._projection.enabled or record.llm is None:
|
|
128
|
+
return
|
|
129
|
+
projected = self._project_cost(record)
|
|
130
|
+
if projected is None:
|
|
131
|
+
if self._projection.on_unpriced == "deny":
|
|
132
|
+
self._raise_for(record, projected_usd=float("inf"))
|
|
133
|
+
return
|
|
134
|
+
self._raise_for(record, projected_usd=projected, additive=True)
|
|
135
|
+
|
|
136
|
+
def _raise_for(self, record: Record, *, projected_usd: float, additive: bool = False) -> None:
|
|
137
|
+
for cap in self._caps:
|
|
138
|
+
if cap.usd is None:
|
|
139
|
+
continue
|
|
140
|
+
acc_key = _accumulator_key(cap, record)
|
|
141
|
+
if acc_key is None:
|
|
142
|
+
continue
|
|
143
|
+
accumulated = self._totals.get((cap.scope, acc_key), {"usd": 0.0})["usd"]
|
|
144
|
+
total = accumulated + projected_usd if additive else projected_usd
|
|
145
|
+
if total > cap.usd:
|
|
146
|
+
raise BudgetExceeded(
|
|
147
|
+
scope=cap.scope,
|
|
148
|
+
key=acc_key if cap.scope is not BudgetScope.RUN else None,
|
|
149
|
+
cap_usd=cap.usd,
|
|
150
|
+
cap_tokens=cap.tokens,
|
|
151
|
+
accumulated_usd=accumulated,
|
|
152
|
+
projected_usd=total,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _project_cost(self, record: Record) -> float | None:
|
|
156
|
+
assert record.llm is not None
|
|
157
|
+
assert self._projection is not None
|
|
158
|
+
usage = record.llm.usage
|
|
159
|
+
estimate = self._projection.output_token_estimate
|
|
160
|
+
if estimate == "input_ratio":
|
|
161
|
+
output = int(usage.input * self._projection.input_ratio)
|
|
162
|
+
elif estimate == "fixed":
|
|
163
|
+
output = self._projection.fixed_output_tokens
|
|
164
|
+
else: # max_tokens — the caller-declared worst case (stuffed into usage.output)
|
|
165
|
+
output = usage.output
|
|
166
|
+
projected_usage = TokenUsage(input=usage.input, output=output)
|
|
167
|
+
pricing = self._pricing or get_runtime().pricing
|
|
168
|
+
if pricing is None:
|
|
169
|
+
return None
|
|
170
|
+
return pricing.price(record.llm.provider, record.llm.request_model, projected_usage)
|
|
171
|
+
|
|
172
|
+
# --- Interceptor SPI --------------------------------------------------
|
|
173
|
+
def intercept(self, record: Record) -> Record | None:
|
|
174
|
+
if record.llm is None:
|
|
175
|
+
return record # governance acts only on LLM calls
|
|
176
|
+
cost = record.llm.cost_usd or 0.0
|
|
177
|
+
tokens = record.llm.usage.total
|
|
178
|
+
for cap in self._caps:
|
|
179
|
+
acc_key = _accumulator_key(cap, record)
|
|
180
|
+
if acc_key is None:
|
|
181
|
+
continue
|
|
182
|
+
total = self._totals.setdefault((cap.scope, acc_key), {"usd": 0.0, "tokens": 0.0})
|
|
183
|
+
projected_usd = total["usd"] + cost
|
|
184
|
+
projected_tokens = total["tokens"] + tokens
|
|
185
|
+
if _breaches(cap, projected_usd, projected_tokens):
|
|
186
|
+
breached = self._enforce(cap, acc_key, record, total["usd"], projected_usd)
|
|
187
|
+
if breached is not record:
|
|
188
|
+
return breached
|
|
189
|
+
total["usd"] = projected_usd
|
|
190
|
+
total["tokens"] = projected_tokens
|
|
191
|
+
self._emit_utilization(cap, acc_key, projected_usd)
|
|
192
|
+
return record
|
|
193
|
+
|
|
194
|
+
def _emit_utilization(self, cap: BudgetCap, acc_key: str, accumulated_usd: float) -> None:
|
|
195
|
+
"""Record forgesight.cost.budget_utilization (spend/cap) through the runtime's
|
|
196
|
+
metrics subsystem (feat-026). Core never depends on governance — governance records
|
|
197
|
+
through core's public surface. No-op when metrics are off or the cap has no usd."""
|
|
198
|
+
if cap.usd is None or cap.usd <= 0:
|
|
199
|
+
return
|
|
200
|
+
metrics = get_runtime().metrics
|
|
201
|
+
if metrics is None:
|
|
202
|
+
return
|
|
203
|
+
metrics.set_budget_utilization(
|
|
204
|
+
accumulated_usd / cap.usd,
|
|
205
|
+
{"budget.scope": cap.scope.value, "budget.key": acc_key},
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def _enforce(
|
|
209
|
+
self,
|
|
210
|
+
cap: BudgetCap,
|
|
211
|
+
acc_key: str,
|
|
212
|
+
record: Record,
|
|
213
|
+
accumulated_usd: float,
|
|
214
|
+
projected_usd: float,
|
|
215
|
+
) -> Record | None:
|
|
216
|
+
if self._on_breach == "raise":
|
|
217
|
+
raise BudgetExceeded(
|
|
218
|
+
scope=cap.scope,
|
|
219
|
+
key=acc_key if cap.scope is not BudgetScope.RUN else None,
|
|
220
|
+
cap_usd=cap.usd,
|
|
221
|
+
cap_tokens=cap.tokens,
|
|
222
|
+
accumulated_usd=accumulated_usd,
|
|
223
|
+
projected_usd=projected_usd,
|
|
224
|
+
)
|
|
225
|
+
if self._on_breach == "drop":
|
|
226
|
+
return None
|
|
227
|
+
# mark: flag the record but let the run continue
|
|
228
|
+
from types import MappingProxyType
|
|
229
|
+
|
|
230
|
+
attrs = dict(record.attributes)
|
|
231
|
+
attrs[BUDGET_EXCEEDED_ATTR] = True
|
|
232
|
+
from dataclasses import replace
|
|
233
|
+
|
|
234
|
+
return replace(record, attributes=MappingProxyType(attrs))
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _accumulator_key(cap: BudgetCap, record: Record) -> str | None:
|
|
238
|
+
if cap.scope is BudgetScope.RUN:
|
|
239
|
+
return record.run_id # the per-run cap applies to every run, keyed by run id
|
|
240
|
+
value = record.attributes.get(_SCOPE_METADATA[cap.scope.value])
|
|
241
|
+
if value is None:
|
|
242
|
+
return None
|
|
243
|
+
if cap.key is not None and str(value) != cap.key:
|
|
244
|
+
return None # this cap targets a different key
|
|
245
|
+
return str(value)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _breaches(cap: BudgetCap, usd: float, tokens: float) -> bool:
|
|
249
|
+
if cap.usd is not None and usd > cap.usd:
|
|
250
|
+
return True
|
|
251
|
+
return cap.tokens is not None and tokens > cap.tokens
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _parse_caps(budgets: Mapping[str, Any]) -> list[BudgetCap]:
|
|
255
|
+
caps: list[BudgetCap] = []
|
|
256
|
+
per_run = budgets.get("per_run")
|
|
257
|
+
if isinstance(per_run, Mapping):
|
|
258
|
+
caps.append(
|
|
259
|
+
BudgetCap(BudgetScope.RUN, None, _f(per_run.get("usd")), _i(per_run.get("tokens")))
|
|
260
|
+
)
|
|
261
|
+
for block_key, scope in (
|
|
262
|
+
("per_team", BudgetScope.TEAM),
|
|
263
|
+
("per_repo", BudgetScope.REPO),
|
|
264
|
+
("per_environment", BudgetScope.ENVIRONMENT),
|
|
265
|
+
):
|
|
266
|
+
block = budgets.get(block_key)
|
|
267
|
+
if not isinstance(block, Mapping):
|
|
268
|
+
continue
|
|
269
|
+
for key, caps_for_key in block.items():
|
|
270
|
+
if isinstance(caps_for_key, Mapping):
|
|
271
|
+
caps.append(
|
|
272
|
+
BudgetCap(
|
|
273
|
+
scope, str(key), _f(caps_for_key.get("usd")), _i(caps_for_key.get("tokens"))
|
|
274
|
+
)
|
|
275
|
+
)
|
|
276
|
+
return caps
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _parse_projection(raw: Any) -> ProjectionConfig | None:
|
|
280
|
+
if not isinstance(raw, Mapping):
|
|
281
|
+
return None
|
|
282
|
+
return ProjectionConfig(
|
|
283
|
+
enabled=bool(raw.get("enabled", False)),
|
|
284
|
+
output_token_estimate=str(raw.get("output_token_estimate", "max_tokens")), # type: ignore[arg-type]
|
|
285
|
+
fixed_output_tokens=int(raw.get("fixed_output_tokens", 0)),
|
|
286
|
+
input_ratio=float(raw.get("input_ratio", 1.0)),
|
|
287
|
+
on_unpriced=str(raw.get("on_unpriced", "allow")), # type: ignore[arg-type]
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _f(value: Any) -> float | None:
|
|
292
|
+
return float(value) if value is not None else None
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _i(value: Any) -> int | None:
|
|
296
|
+
return int(value) if value is not None else None
|