goodput-http 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goodput/CLAUDE.md +115 -0
- goodput/__init__.py +139 -0
- goodput/api.py +33 -0
- goodput/checkpoint.py +88 -0
- goodput/circuit.py +139 -0
- goodput/classify.py +191 -0
- goodput/cli.py +178 -0
- goodput/clock.py +61 -0
- goodput/config.py +323 -0
- goodput/control/__init__.py +9 -0
- goodput/control/aimd.py +112 -0
- goodput/control/base.py +109 -0
- goodput/control/gradient.py +83 -0
- goodput/control_mode.py +268 -0
- goodput/engine.py +588 -0
- goodput/events.py +79 -0
- goodput/exceptions.py +35 -0
- goodput/models.py +217 -0
- goodput/plugins.py +63 -0
- goodput/py.typed +0 -0
- goodput/ratelimit.py +140 -0
- goodput/redaction.py +103 -0
- goodput/report.py +261 -0
- goodput/retry.py +146 -0
- goodput/retry_after.py +105 -0
- goodput/routing/__init__.py +26 -0
- goodput/routing/health.py +138 -0
- goodput/routing/route.py +110 -0
- goodput/routing/selector.py +143 -0
- goodput/scope.py +64 -0
- goodput/sim.py +111 -0
- goodput/sinks/__init__.py +9 -0
- goodput/sinks/base.py +34 -0
- goodput/sinks/jsonl.py +76 -0
- goodput/sinks/memory.py +46 -0
- goodput/sources.py +126 -0
- goodput/stats.py +182 -0
- goodput/transport.py +158 -0
- goodput_http-0.1.0.dist-info/METADATA +222 -0
- goodput_http-0.1.0.dist-info/RECORD +43 -0
- goodput_http-0.1.0.dist-info/WHEEL +4 -0
- goodput_http-0.1.0.dist-info/entry_points.txt +10 -0
- goodput_http-0.1.0.dist-info/licenses/LICENSE +19 -0
goodput/CLAUDE.md
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# CLAUDE.md — src/goodput
|
|
2
|
+
|
|
3
|
+
Module-level guidance for the library source. Read the root `CLAUDE.md` first for
|
|
4
|
+
the invariants and toolchain; this file maps the modules and their contracts.
|
|
5
|
+
|
|
6
|
+
## Dependency direction
|
|
7
|
+
|
|
8
|
+
Keep imports flowing one way to avoid cycles. Rough layers (low → high):
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
exceptions, clock, redaction, stats, models, control_mode (leaf primitives)
|
|
12
|
+
│
|
|
13
|
+
retry_after, classify, ratelimit, circuit, retry, scope, events
|
|
14
|
+
│
|
|
15
|
+
routing/ (route → health → selector), control/ (base → aimd/gradient)
|
|
16
|
+
│
|
|
17
|
+
transport, sources, report, sinks/, checkpoint, plugins
|
|
18
|
+
│
|
|
19
|
+
engine ──► api ──► __init__ (public surface), cli
|
|
20
|
+
│
|
|
21
|
+
sim (test/simulation transport; depends on transport+routing, used by tests)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
`engine.py` is the only module that wires everything together. Do not import
|
|
25
|
+
`engine` from lower layers. `config.py` sits beside the leaf layer (it depends on
|
|
26
|
+
`control_mode`, `circuit`, `retry`) and is imported widely.
|
|
27
|
+
|
|
28
|
+
## Module responsibilities (contracts)
|
|
29
|
+
|
|
30
|
+
- **`control_mode.py`** — the authority primitive `Controlled[T]` and the
|
|
31
|
+
`fixed/adaptive/bounded_adaptive/disabled/inherit` constructors. **This is the
|
|
32
|
+
most important file in the project.** `Controlled` is a frozen dataclass;
|
|
33
|
+
"changing" a value calls `.propose()` which returns `(new, accepted, reason)`
|
|
34
|
+
and *refuses* to mutate FIXED/DISABLED/INHERIT. `.as_float()` widens int knobs
|
|
35
|
+
for controllers. Never add a mutation path that bypasses `.propose()`.
|
|
36
|
+
- **`models.py`** — `Request` (immutable template), `LogicalRequest`,
|
|
37
|
+
`AttemptRecord`, `LogicalResult`, `Outcome`, `ErrorClass`, `ThrottleSignal`,
|
|
38
|
+
`Classification`. `LogicalResult.retry_count == attempt_count - 1`. Idempotency
|
|
39
|
+
is resolved by `Request.is_idempotent()` (explicit override, else method).
|
|
40
|
+
- **`config.py`** — `EngineConfig` (pydantic v2, `extra="forbid"`),
|
|
41
|
+
`Objective`/`Mode`/`ThrottleScopeMode` enums, `SafetyCeilings`,
|
|
42
|
+
`AuthorizationContext`. `validate_constraints()` returns a `ValidationPlan`
|
|
43
|
+
with pre-execution conflict detection and a `render()` dry-run report. Add new
|
|
44
|
+
contradiction checks here, not in the engine.
|
|
45
|
+
- **`classify.py`** — `Classifiers` bundle (success/throttle/error) over a
|
|
46
|
+
transport-agnostic `ResponseView`. Defaults: 2xx success, 429/Retry-After/
|
|
47
|
+
exhausted-RateLimit throttle, 5xx retryable, 4xx not. Custom classifiers plug
|
|
48
|
+
in via `Classifiers.build(...)`.
|
|
49
|
+
- **`retry.py` / `retry_after.py`** — `decide_retry()` is pure and takes injected
|
|
50
|
+
`rng` (no internal randomness — the engine owns the RNG for determinism).
|
|
51
|
+
`RetryBudget` bounds amplification. `retry_after.py` parsers are total (never
|
|
52
|
+
raise; return `None` on malformed input).
|
|
53
|
+
- **`ratelimit.py`** — `TokenBucket` (clock-injected, weighted costs) and
|
|
54
|
+
`ScopedRateLimiter` keyed by scope string. Rate ≠ concurrency: this is pacing
|
|
55
|
+
only. Isolation is achieved purely by giving distinct keys.
|
|
56
|
+
- **`circuit.py`** — `CircuitBreaker` (CLOSED/OPEN/HALF_OPEN) + `CircuitRegistry`.
|
|
57
|
+
Throttles count toward opening by default; auth errors do not.
|
|
58
|
+
- **`routing/`** — see below.
|
|
59
|
+
- **`control/`** — see below.
|
|
60
|
+
- **`transport.py`** — `Transport` protocol + `HttpxTransport` (one persistent
|
|
61
|
+
`AsyncClient` **per route**, never per request). `_map_httpx_error()` maps
|
|
62
|
+
exceptions to `ErrorClass`. `AttemptOutcome` carries a `ResponseView`.
|
|
63
|
+
- **`scope.py`** — `make_scope_resolver(mode)` returns `(Request, Route) -> str`.
|
|
64
|
+
ISOLATED_FOR_TEST keys by route+credential+IP; SHARED uses one key; DECLARED
|
|
65
|
+
keys per origin. `_host_of()` is reused by the engine's authorization check.
|
|
66
|
+
- **`engine.py`** — the orchestrator: bounded pipeline, permit accounting,
|
|
67
|
+
control loop, watchdog, checkpoint integration. Structured-concurrency: an
|
|
68
|
+
inner task group runs producer+workers; when it drains, the outer group's
|
|
69
|
+
background loops are cancelled.
|
|
70
|
+
- **`report.py`** — `MetricsAccumulator` (incremental, bounded) → `Report`.
|
|
71
|
+
Strictly separates logical-request counts from attempt counts.
|
|
72
|
+
- **`stats.py`** — bounded-memory `LogHistogram`, `EwmaRate`, `RollingWindow`,
|
|
73
|
+
and `ExactQuantiles` (small workloads only).
|
|
74
|
+
- **`events.py`** — failure-isolated synchronous `EventBus`. Subscribers can't
|
|
75
|
+
crash the engine. Event `data` must already be redaction-safe.
|
|
76
|
+
- **`redaction.py`** — the single `Redactor`. All serialization paths use it.
|
|
77
|
+
- **`sinks/`**, **`checkpoint.py`**, **`plugins.py`**, **`cli.py`**, **`api.py`**,
|
|
78
|
+
**`sim.py`** — see their docstrings; `sim.py` is the deterministic test server.
|
|
79
|
+
|
|
80
|
+
## routing/
|
|
81
|
+
|
|
82
|
+
- **`route.py`** — `Route` (id, kind, proxy_url, local_address, weight, per-route
|
|
83
|
+
`Controlled` concurrency) and `RoutePool` (with `active_count` authority). A
|
|
84
|
+
FIXED `active_count` means "keep exactly N active whenever possible."
|
|
85
|
+
- **`health.py`** — `RouteHealthTracker`. `_is_route_attributable()` decides
|
|
86
|
+
whether a failure can quarantine a route (transport-level: yes; HTTP/app: no).
|
|
87
|
+
This distinction is a hard requirement — do not blur it.
|
|
88
|
+
- **`selector.py`** — selectors take an injected `rng` (0..1) for determinism and
|
|
89
|
+
only choose among `health.is_available()` routes. They never change the route
|
|
90
|
+
set or active count.
|
|
91
|
+
|
|
92
|
+
## control/
|
|
93
|
+
|
|
94
|
+
- **`base.py`** — `Sample` (observations), `Decision` (auditable record),
|
|
95
|
+
`Controller` protocol, and `make_decision()` which routes a proposal through
|
|
96
|
+
the knob's `.propose()` so authority/bounds are always enforced — even a buggy
|
|
97
|
+
controller cannot exceed bounds or move a FIXED knob.
|
|
98
|
+
- **`aimd.py`** — additive-increase/multiplicative-decrease with a Vegas-style
|
|
99
|
+
min-latency baseline and post-decrease cooldown (anti-oscillation).
|
|
100
|
+
- **`gradient.py`** — smoothed latency-gradient controller.
|
|
101
|
+
|
|
102
|
+
When adding a controller: implement `decide(knob, sample) -> (knob, Decision)`,
|
|
103
|
+
always go through `make_decision()`, register it under the `goodput.controllers`
|
|
104
|
+
entry point in `pyproject.toml`, and add a simulation test.
|
|
105
|
+
|
|
106
|
+
## Style
|
|
107
|
+
|
|
108
|
+
- `from __future__ import annotations` at the top of every module; prefer
|
|
109
|
+
`X | None` over `Optional[X]` (ruff enforces).
|
|
110
|
+
- Full type hints; `mypy --strict` must pass on `src`. Avoid `# type: ignore`
|
|
111
|
+
except the few documented spots (pydantic field variance, optional `tomli`).
|
|
112
|
+
- Match the existing docstring density: each module opens with a docstring citing
|
|
113
|
+
the relevant brief section(s); public classes/functions are documented.
|
|
114
|
+
- No new required dependencies. Substantial integrations go behind extras in
|
|
115
|
+
`pyproject.toml` and import lazily inside functions.
|
goodput/__init__.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""goodput — an adaptive HTTP execution engine that maximizes successful
|
|
2
|
+
logical-request goodput.
|
|
3
|
+
|
|
4
|
+
Public API (brief §32). The most common entry points::
|
|
5
|
+
|
|
6
|
+
import goodput as gp
|
|
7
|
+
|
|
8
|
+
result = gp.run_sync(
|
|
9
|
+
gp.repeat(gp.Request("GET", "https://example.com/health"), times=1000),
|
|
10
|
+
config=gp.EngineConfig(
|
|
11
|
+
global_concurrency=gp.bounded_adaptive(minimum=1, maximum=200, initial=10),
|
|
12
|
+
),
|
|
13
|
+
)
|
|
14
|
+
print(result.report.summary())
|
|
15
|
+
|
|
16
|
+
The library configures no logging and performs no networking at import time
|
|
17
|
+
(brief §4).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from .api import run, run_sync
|
|
23
|
+
from .classify import Classifiers, ResponseView
|
|
24
|
+
from .config import (
|
|
25
|
+
AuthorizationContext,
|
|
26
|
+
EngineConfig,
|
|
27
|
+
Mode,
|
|
28
|
+
Objective,
|
|
29
|
+
SafetyCeilings,
|
|
30
|
+
ThrottleScopeMode,
|
|
31
|
+
ValidationPlan,
|
|
32
|
+
load_config,
|
|
33
|
+
)
|
|
34
|
+
from .control_mode import (
|
|
35
|
+
Controlled,
|
|
36
|
+
ControlMode,
|
|
37
|
+
FixedValueError,
|
|
38
|
+
adaptive,
|
|
39
|
+
bounded_adaptive,
|
|
40
|
+
disabled,
|
|
41
|
+
fixed,
|
|
42
|
+
inherit,
|
|
43
|
+
)
|
|
44
|
+
from .engine import Engine, RunResult
|
|
45
|
+
from .exceptions import (
|
|
46
|
+
AuthorizationError,
|
|
47
|
+
ConfigError,
|
|
48
|
+
ConstraintViolation,
|
|
49
|
+
EmergencyStop,
|
|
50
|
+
GoodputError,
|
|
51
|
+
PluginError,
|
|
52
|
+
SinkError,
|
|
53
|
+
)
|
|
54
|
+
from .models import (
|
|
55
|
+
AttemptRecord,
|
|
56
|
+
Classification,
|
|
57
|
+
ErrorClass,
|
|
58
|
+
LogicalRequest,
|
|
59
|
+
LogicalResult,
|
|
60
|
+
Outcome,
|
|
61
|
+
Request,
|
|
62
|
+
ThrottleSignal,
|
|
63
|
+
)
|
|
64
|
+
from .report import Report
|
|
65
|
+
from .retry import RetryBudget, RetryPolicy
|
|
66
|
+
from .routing import Route, RouteKind, RoutePool
|
|
67
|
+
from .sources import (
|
|
68
|
+
from_async_iterable,
|
|
69
|
+
from_factory,
|
|
70
|
+
from_iterable,
|
|
71
|
+
from_jsonl,
|
|
72
|
+
parameter_matrix,
|
|
73
|
+
repeat,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
__version__ = "0.1.0"
|
|
77
|
+
|
|
78
|
+
__all__ = [
|
|
79
|
+
"__version__",
|
|
80
|
+
# high-level
|
|
81
|
+
"run",
|
|
82
|
+
"run_sync",
|
|
83
|
+
"Engine",
|
|
84
|
+
"RunResult",
|
|
85
|
+
# config
|
|
86
|
+
"EngineConfig",
|
|
87
|
+
"Mode",
|
|
88
|
+
"Objective",
|
|
89
|
+
"ThrottleScopeMode",
|
|
90
|
+
"SafetyCeilings",
|
|
91
|
+
"AuthorizationContext",
|
|
92
|
+
"ValidationPlan",
|
|
93
|
+
"load_config",
|
|
94
|
+
# control authority
|
|
95
|
+
"ControlMode",
|
|
96
|
+
"Controlled",
|
|
97
|
+
"FixedValueError",
|
|
98
|
+
"fixed",
|
|
99
|
+
"adaptive",
|
|
100
|
+
"bounded_adaptive",
|
|
101
|
+
"disabled",
|
|
102
|
+
"inherit",
|
|
103
|
+
# models
|
|
104
|
+
"Request",
|
|
105
|
+
"LogicalRequest",
|
|
106
|
+
"LogicalResult",
|
|
107
|
+
"AttemptRecord",
|
|
108
|
+
"Classification",
|
|
109
|
+
"ThrottleSignal",
|
|
110
|
+
"Outcome",
|
|
111
|
+
"ErrorClass",
|
|
112
|
+
# classify
|
|
113
|
+
"Classifiers",
|
|
114
|
+
"ResponseView",
|
|
115
|
+
# retry
|
|
116
|
+
"RetryPolicy",
|
|
117
|
+
"RetryBudget",
|
|
118
|
+
# routing
|
|
119
|
+
"Route",
|
|
120
|
+
"RouteKind",
|
|
121
|
+
"RoutePool",
|
|
122
|
+
# sources
|
|
123
|
+
"repeat",
|
|
124
|
+
"from_iterable",
|
|
125
|
+
"from_async_iterable",
|
|
126
|
+
"from_factory",
|
|
127
|
+
"from_jsonl",
|
|
128
|
+
"parameter_matrix",
|
|
129
|
+
# report
|
|
130
|
+
"Report",
|
|
131
|
+
# exceptions
|
|
132
|
+
"GoodputError",
|
|
133
|
+
"ConfigError",
|
|
134
|
+
"ConstraintViolation",
|
|
135
|
+
"AuthorizationError",
|
|
136
|
+
"EmergencyStop",
|
|
137
|
+
"SinkError",
|
|
138
|
+
"PluginError",
|
|
139
|
+
]
|
goodput/api.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""High-level convenience API (brief §32).
|
|
2
|
+
|
|
3
|
+
Small, intuitive entry points that wrap the :class:`~goodput.engine.Engine`:
|
|
4
|
+
|
|
5
|
+
* :func:`run` — execute an async request source and return a :class:`RunResult`.
|
|
6
|
+
* :func:`run_sync` — a synchronous wrapper for scripts/notebooks.
|
|
7
|
+
|
|
8
|
+
Power users instantiate :class:`~goodput.engine.Engine` directly for full control
|
|
9
|
+
over transport, routes, selectors, controllers, and sinks.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import anyio
|
|
15
|
+
|
|
16
|
+
from .config import EngineConfig
|
|
17
|
+
from .engine import Engine, RunResult
|
|
18
|
+
from .sources import RequestSource
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def run(source: RequestSource, *, config: EngineConfig | None = None, **kwargs: object) -> RunResult:
|
|
22
|
+
"""Execute ``source`` with a fresh engine and return the run result.
|
|
23
|
+
|
|
24
|
+
Extra keyword arguments are forwarded to :class:`~goodput.engine.Engine`
|
|
25
|
+
(e.g. ``routes=``, ``selector=``, ``controller=``, ``sinks=``, ``seed=``).
|
|
26
|
+
"""
|
|
27
|
+
async with Engine(config, **kwargs) as engine: # type: ignore[arg-type]
|
|
28
|
+
return await engine.run(source)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def run_sync(source: RequestSource, *, config: EngineConfig | None = None, **kwargs: object) -> RunResult:
|
|
32
|
+
"""Synchronous wrapper around :func:`run` (brief §32 convenience wrapper)."""
|
|
33
|
+
return anyio.run(lambda: run(source, config=config, **kwargs))
|
goodput/checkpoint.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Checkpointing and resume (brief §29).
|
|
2
|
+
|
|
3
|
+
A checkpoint records which logical-request IDs have reached a terminal state so a
|
|
4
|
+
resumed run can skip them. We deliberately make a narrow, honest guarantee
|
|
5
|
+
(brief §29): resume avoids *re-executing terminal requests we recorded*; it does
|
|
6
|
+
**not** provide network-side exactly-once semantics — that requires server-side
|
|
7
|
+
idempotency or equivalent application guarantees.
|
|
8
|
+
|
|
9
|
+
The default store is a JSONL append log (crash-safe-ish: each terminal ID is one
|
|
10
|
+
line, flushed periodically). A config hash + schema version guard against
|
|
11
|
+
resuming with an incompatible configuration.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TextIO
|
|
19
|
+
|
|
20
|
+
SCHEMA_VERSION = 1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CheckpointStore:
|
|
24
|
+
"""Append-only checkpoint of completed logical-request IDs."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, path: str | Path, *, config_hash: str = "", flush_every: int = 50) -> None:
|
|
27
|
+
self.path = Path(path)
|
|
28
|
+
self._config_hash = config_hash
|
|
29
|
+
self._flush_every = flush_every
|
|
30
|
+
self._fh: TextIO | None = None
|
|
31
|
+
self._since_flush = 0
|
|
32
|
+
self._completed: set[str] = set()
|
|
33
|
+
|
|
34
|
+
def load(self) -> set[str]:
|
|
35
|
+
"""Load previously-completed IDs (validating schema + config hash)."""
|
|
36
|
+
if not self.path.exists():
|
|
37
|
+
return set()
|
|
38
|
+
with open(self.path, encoding="utf-8") as fh:
|
|
39
|
+
first = fh.readline().strip()
|
|
40
|
+
if first:
|
|
41
|
+
header = json.loads(first)
|
|
42
|
+
if header.get("schema") != SCHEMA_VERSION:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"checkpoint schema {header.get('schema')} != {SCHEMA_VERSION}"
|
|
45
|
+
)
|
|
46
|
+
if self._config_hash and header.get("config_hash") != self._config_hash:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
"checkpoint config_hash mismatch: refusing to resume with a "
|
|
49
|
+
"different configuration"
|
|
50
|
+
)
|
|
51
|
+
for line in fh:
|
|
52
|
+
line = line.strip()
|
|
53
|
+
if not line:
|
|
54
|
+
continue
|
|
55
|
+
obj = json.loads(line)
|
|
56
|
+
if "id" in obj:
|
|
57
|
+
self._completed.add(obj["id"])
|
|
58
|
+
return set(self._completed)
|
|
59
|
+
|
|
60
|
+
def open(self) -> None:
|
|
61
|
+
new_file = not self.path.exists()
|
|
62
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
self._fh = open(self.path, "a", encoding="utf-8")
|
|
64
|
+
if new_file:
|
|
65
|
+
self._fh.write(
|
|
66
|
+
json.dumps({"schema": SCHEMA_VERSION, "config_hash": self._config_hash}) + "\n"
|
|
67
|
+
)
|
|
68
|
+
self._fh.flush()
|
|
69
|
+
|
|
70
|
+
def is_completed(self, logical_id: str) -> bool:
|
|
71
|
+
return logical_id in self._completed
|
|
72
|
+
|
|
73
|
+
def record(self, logical_id: str) -> None:
|
|
74
|
+
if self._fh is None:
|
|
75
|
+
self.open()
|
|
76
|
+
assert self._fh is not None
|
|
77
|
+
self._completed.add(logical_id)
|
|
78
|
+
self._fh.write(json.dumps({"id": logical_id}) + "\n")
|
|
79
|
+
self._since_flush += 1
|
|
80
|
+
if self._since_flush >= self._flush_every:
|
|
81
|
+
self._fh.flush()
|
|
82
|
+
self._since_flush = 0
|
|
83
|
+
|
|
84
|
+
def close(self) -> None:
|
|
85
|
+
if self._fh is not None:
|
|
86
|
+
self._fh.flush()
|
|
87
|
+
self._fh.close()
|
|
88
|
+
self._fh = None
|
goodput/circuit.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Circuit breakers (brief §21).
|
|
2
|
+
|
|
3
|
+
A circuit breaker protects a scope (origin/route/IP/credential/...) from
|
|
4
|
+
hammering a failing dependency. States: CLOSED → OPEN (on failure threshold) →
|
|
5
|
+
HALF_OPEN (after reset delay; admits limited probes) → CLOSED (on probe success)
|
|
6
|
+
or back to OPEN (on probe failure).
|
|
7
|
+
|
|
8
|
+
Throttle, overload, auth, and transport errors can be weighted differently
|
|
9
|
+
(brief §21): by default throttles count toward opening (they indicate the server
|
|
10
|
+
wants less traffic) but auth failures do *not* (they will not be fixed by
|
|
11
|
+
waiting).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import enum
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
|
|
19
|
+
from .clock import Clock, MonotonicClock
|
|
20
|
+
from .models import ErrorClass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CircuitState(str, enum.Enum):
|
|
24
|
+
CLOSED = "closed"
|
|
25
|
+
OPEN = "open"
|
|
26
|
+
HALF_OPEN = "half_open"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class CircuitConfig:
|
|
31
|
+
failure_threshold: int = 5
|
|
32
|
+
"""Consecutive (weighted) failures before opening."""
|
|
33
|
+
error_ratio_threshold: float = 0.5
|
|
34
|
+
min_samples: int = 20
|
|
35
|
+
"""Minimum samples in the window before ratio can trip the breaker."""
|
|
36
|
+
reset_seconds: float = 5.0
|
|
37
|
+
half_open_max_probes: int = 3
|
|
38
|
+
window_size: int = 50
|
|
39
|
+
count_throttle_as_failure: bool = True
|
|
40
|
+
count_auth_as_failure: bool = False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class CircuitBreaker:
|
|
45
|
+
"""A single circuit breaker for one scope key."""
|
|
46
|
+
|
|
47
|
+
config: CircuitConfig = field(default_factory=CircuitConfig)
|
|
48
|
+
clock: Clock = field(default_factory=MonotonicClock)
|
|
49
|
+
state: CircuitState = CircuitState.CLOSED
|
|
50
|
+
_consecutive_failures: int = 0
|
|
51
|
+
_opened_at: float | None = None
|
|
52
|
+
_half_open_probes: int = 0
|
|
53
|
+
_window: list[bool] = field(default_factory=list) # True = failure
|
|
54
|
+
|
|
55
|
+
def _counts_as_failure(self, error_class: ErrorClass) -> bool:
|
|
56
|
+
if error_class is ErrorClass.NONE:
|
|
57
|
+
return False
|
|
58
|
+
if error_class is ErrorClass.THROTTLE and not self.config.count_throttle_as_failure:
|
|
59
|
+
return False
|
|
60
|
+
if error_class is ErrorClass.AUTH and not self.config.count_auth_as_failure:
|
|
61
|
+
return False
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
def allow(self) -> bool:
|
|
65
|
+
"""Whether a new attempt may proceed under the current state."""
|
|
66
|
+
if self.state is CircuitState.CLOSED:
|
|
67
|
+
return True
|
|
68
|
+
if self.state is CircuitState.OPEN:
|
|
69
|
+
assert self._opened_at is not None
|
|
70
|
+
if self.clock.now() - self._opened_at >= self.config.reset_seconds:
|
|
71
|
+
self.state = CircuitState.HALF_OPEN
|
|
72
|
+
self._half_open_probes = 0
|
|
73
|
+
return True
|
|
74
|
+
return False
|
|
75
|
+
# HALF_OPEN: admit a bounded number of probes.
|
|
76
|
+
if self._half_open_probes < self.config.half_open_max_probes:
|
|
77
|
+
self._half_open_probes += 1
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
def record(self, *, success: bool, error_class: ErrorClass = ErrorClass.NONE) -> CircuitState:
|
|
82
|
+
"""Record an attempt outcome and return the resulting state."""
|
|
83
|
+
is_failure = (not success) and self._counts_as_failure(error_class)
|
|
84
|
+
|
|
85
|
+
self._window.append(is_failure)
|
|
86
|
+
if len(self._window) > self.config.window_size:
|
|
87
|
+
self._window.pop(0)
|
|
88
|
+
|
|
89
|
+
if self.state is CircuitState.HALF_OPEN:
|
|
90
|
+
if is_failure:
|
|
91
|
+
self._trip()
|
|
92
|
+
elif success:
|
|
93
|
+
self._close()
|
|
94
|
+
return self.state
|
|
95
|
+
|
|
96
|
+
if is_failure:
|
|
97
|
+
self._consecutive_failures += 1
|
|
98
|
+
elif success:
|
|
99
|
+
self._consecutive_failures = 0
|
|
100
|
+
|
|
101
|
+
if self._should_trip():
|
|
102
|
+
self._trip()
|
|
103
|
+
return self.state
|
|
104
|
+
|
|
105
|
+
def _should_trip(self) -> bool:
|
|
106
|
+
if self._consecutive_failures >= self.config.failure_threshold:
|
|
107
|
+
return True
|
|
108
|
+
if len(self._window) >= self.config.min_samples:
|
|
109
|
+
ratio = sum(self._window) / len(self._window)
|
|
110
|
+
if ratio >= self.config.error_ratio_threshold:
|
|
111
|
+
return True
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
def _trip(self) -> None:
|
|
115
|
+
self.state = CircuitState.OPEN
|
|
116
|
+
self._opened_at = self.clock.now()
|
|
117
|
+
self._half_open_probes = 0
|
|
118
|
+
|
|
119
|
+
def _close(self) -> None:
|
|
120
|
+
self.state = CircuitState.CLOSED
|
|
121
|
+
self._consecutive_failures = 0
|
|
122
|
+
self._opened_at = None
|
|
123
|
+
self._window.clear()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class CircuitRegistry:
|
|
127
|
+
"""Circuit breakers keyed by scope (brief §21 scopes)."""
|
|
128
|
+
|
|
129
|
+
def __init__(self, config: CircuitConfig | None = None, clock: Clock | None = None) -> None:
|
|
130
|
+
self._config = config or CircuitConfig()
|
|
131
|
+
self._clock = clock or MonotonicClock()
|
|
132
|
+
self._breakers: dict[str, CircuitBreaker] = {}
|
|
133
|
+
|
|
134
|
+
def breaker(self, key: str) -> CircuitBreaker:
|
|
135
|
+
b = self._breakers.get(key)
|
|
136
|
+
if b is None:
|
|
137
|
+
b = CircuitBreaker(config=self._config, clock=self._clock)
|
|
138
|
+
self._breakers[key] = b
|
|
139
|
+
return b
|