hedge-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hedge/__init__.py +17 -0
- hedge/_options.py +35 -0
- hedge/_stats.py +75 -0
- hedge/budget/__init__.py +5 -0
- hedge/budget/_token_bucket.py +52 -0
- hedge/interceptor/__init__.py +23 -0
- hedge/interceptor/_grpc.py +302 -0
- hedge/py.typed +0 -0
- hedge/sketch/__init__.py +6 -0
- hedge/sketch/_ddsketch.py +173 -0
- hedge/sketch/_windowed.py +115 -0
- hedge/transport/__init__.py +24 -0
- hedge/transport/_aiohttp.py +135 -0
- hedge/transport/_base.py +159 -0
- hedge/transport/_httpx.py +84 -0
- hedge_python-0.1.0.dist-info/METADATA +367 -0
- hedge_python-0.1.0.dist-info/RECORD +19 -0
- hedge_python-0.1.0.dist-info/WHEEL +4 -0
- hedge_python-0.1.0.dist-info/licenses/LICENSE +21 -0
hedge/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Adaptive hedged request library for Python.
|
|
2
|
+
|
|
3
|
+
Learns per-host latency distributions using DDSketch, fires a backup request
|
|
4
|
+
when the primary exceeds its estimated p90, and caps hedge rate with a token
|
|
5
|
+
bucket to prevent load amplification during outages.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from hedge._options import HedgeConfig
|
|
9
|
+
from hedge._stats import Stats, StatsSnapshot
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"HedgeConfig",
|
|
13
|
+
"Stats",
|
|
14
|
+
"StatsSnapshot",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
hedge/_options.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Configuration options for hedge transports and interceptors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from hedge._stats import Stats
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class HedgeConfig:
|
|
14
|
+
"""Configuration for hedge behavior.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
percentile: Sketch quantile used as hedge trigger (default: 0.90).
|
|
18
|
+
max_hedges: Maximum concurrent hedge requests per call (default: 1).
|
|
19
|
+
budget_percent: Max hedge rate as percent of total traffic (default: 10.0).
|
|
20
|
+
estimated_rps: Expected requests per second; sets token bucket capacity (default: 100.0).
|
|
21
|
+
min_delay: Floor on the hedge delay in seconds (default: 0.001).
|
|
22
|
+
warmup_requests: Number of initial requests using fixed delay (default: 20).
|
|
23
|
+
warmup_delay: Fixed hedge delay during warmup in seconds (default: 0.01).
|
|
24
|
+
window_duration: Sketch window rotation interval in seconds (default: 30.0).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
percentile: float = 0.90
|
|
28
|
+
max_hedges: int = 1
|
|
29
|
+
budget_percent: float = 10.0
|
|
30
|
+
estimated_rps: float = 100.0
|
|
31
|
+
min_delay: float = 0.001
|
|
32
|
+
warmup_requests: int = 20
|
|
33
|
+
warmup_delay: float = 0.01
|
|
34
|
+
window_duration: float = 30.0
|
|
35
|
+
stats: Stats | None = field(default=None, repr=False)
|
hedge/_stats.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Thread-safe statistics for hedge operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Stats:
|
|
10
|
+
"""Thread-safe counters for hedge operations.
|
|
11
|
+
|
|
12
|
+
All fields use a lock for atomic updates and are safe to read concurrently.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._lock = threading.Lock()
|
|
17
|
+
self.total_requests: int = 0
|
|
18
|
+
self.hedged_requests: int = 0
|
|
19
|
+
self.hedge_wins: int = 0
|
|
20
|
+
self.primary_wins: int = 0
|
|
21
|
+
self.budget_exhausted: int = 0
|
|
22
|
+
self.warmup_requests: int = 0
|
|
23
|
+
|
|
24
|
+
def _increment(self, field: str, value: int = 1) -> None:
|
|
25
|
+
with self._lock:
|
|
26
|
+
setattr(self, field, getattr(self, field) + value)
|
|
27
|
+
|
|
28
|
+
def increment_total(self) -> None:
|
|
29
|
+
self._increment("total_requests")
|
|
30
|
+
|
|
31
|
+
def increment_hedged(self) -> None:
|
|
32
|
+
self._increment("hedged_requests")
|
|
33
|
+
|
|
34
|
+
def increment_hedge_wins(self) -> None:
|
|
35
|
+
self._increment("hedge_wins")
|
|
36
|
+
|
|
37
|
+
def increment_primary_wins(self) -> None:
|
|
38
|
+
self._increment("primary_wins")
|
|
39
|
+
|
|
40
|
+
def increment_budget_exhausted(self) -> None:
|
|
41
|
+
self._increment("budget_exhausted")
|
|
42
|
+
|
|
43
|
+
def increment_warmup(self) -> None:
|
|
44
|
+
self._increment("warmup_requests")
|
|
45
|
+
|
|
46
|
+
def snapshot(self) -> StatsSnapshot:
|
|
47
|
+
"""Take a consistent point-in-time copy of all counters."""
|
|
48
|
+
with self._lock:
|
|
49
|
+
return StatsSnapshot(
|
|
50
|
+
total_requests=self.total_requests,
|
|
51
|
+
hedged_requests=self.hedged_requests,
|
|
52
|
+
hedge_wins=self.hedge_wins,
|
|
53
|
+
primary_wins=self.primary_wins,
|
|
54
|
+
budget_exhausted=self.budget_exhausted,
|
|
55
|
+
warmup_requests=self.warmup_requests,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def hedge_rate(self) -> float:
|
|
59
|
+
"""Return hedged_requests / total_requests, or 0.0 if no requests."""
|
|
60
|
+
with self._lock:
|
|
61
|
+
if self.total_requests == 0:
|
|
62
|
+
return 0.0
|
|
63
|
+
return self.hedged_requests / self.total_requests
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class StatsSnapshot:
|
|
68
|
+
"""Immutable point-in-time snapshot of Stats."""
|
|
69
|
+
|
|
70
|
+
total_requests: int
|
|
71
|
+
hedged_requests: int
|
|
72
|
+
hedge_wins: int
|
|
73
|
+
primary_wins: int
|
|
74
|
+
budget_exhausted: int
|
|
75
|
+
warmup_requests: int
|
hedge/budget/__init__.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Token bucket algorithm for controlling hedge request rate."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TokenBucket:
|
|
10
|
+
"""Controls hedge request rate using a token bucket algorithm.
|
|
11
|
+
|
|
12
|
+
The bucket refills at ``estimated_rps * budget_percent / 100`` tokens per
|
|
13
|
+
second. During genuine outages the bucket drains and hedging stops,
|
|
14
|
+
preventing the load-doubling spiral that would deepen the incident.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
budget_percent: Max hedge rate as percent of total traffic.
|
|
18
|
+
estimated_rps: Expected requests per second.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, budget_percent: float = 10.0, estimated_rps: float = 100.0) -> None:
|
|
22
|
+
self._lock = threading.Lock()
|
|
23
|
+
self._budget_percent = budget_percent
|
|
24
|
+
self._rate = estimated_rps * (budget_percent / 100.0)
|
|
25
|
+
self._max_burst = max(self._rate * 2, 1.0)
|
|
26
|
+
self._tokens = self._max_burst
|
|
27
|
+
self._last_refill = time.monotonic()
|
|
28
|
+
|
|
29
|
+
def try_acquire(self) -> bool:
|
|
30
|
+
"""Return True if a hedge token is available, False if over budget."""
|
|
31
|
+
with self._lock:
|
|
32
|
+
now = time.monotonic()
|
|
33
|
+
elapsed = now - self._last_refill
|
|
34
|
+
self._last_refill = now
|
|
35
|
+
|
|
36
|
+
self._tokens += elapsed * self._rate
|
|
37
|
+
if self._tokens > self._max_burst:
|
|
38
|
+
self._tokens = self._max_burst
|
|
39
|
+
|
|
40
|
+
if self._tokens < 1.0:
|
|
41
|
+
return False
|
|
42
|
+
self._tokens -= 1.0
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
def set_rps(self, rps: float) -> None:
|
|
46
|
+
"""Update the hedge rate as traffic changes, preserving the budget percentage."""
|
|
47
|
+
with self._lock:
|
|
48
|
+
self._rate = rps * (self._budget_percent / 100.0)
|
|
49
|
+
max_burst = max(self._rate * 2, 1.0)
|
|
50
|
+
self._max_burst = max_burst
|
|
51
|
+
if self._tokens > self._max_burst:
|
|
52
|
+
self._tokens = self._max_burst
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""gRPC interceptors with adaptive hedging."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from hedge.interceptor._grpc import HedgedServerStreamInterceptor, HedgedUnaryInterceptor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def __getattr__(name: str): # type: ignore[no-untyped-def]
|
|
12
|
+
if name == "HedgedUnaryInterceptor":
|
|
13
|
+
from hedge.interceptor._grpc import HedgedUnaryInterceptor
|
|
14
|
+
|
|
15
|
+
return HedgedUnaryInterceptor
|
|
16
|
+
if name == "HedgedServerStreamInterceptor":
|
|
17
|
+
from hedge.interceptor._grpc import HedgedServerStreamInterceptor
|
|
18
|
+
|
|
19
|
+
return HedgedServerStreamInterceptor
|
|
20
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = ["HedgedUnaryInterceptor", "HedgedServerStreamInterceptor"]
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Hedged gRPC client interceptors.
|
|
2
|
+
|
|
3
|
+
Supports both Unary-Unary and Unary-Stream (server streaming) RPCs.
|
|
4
|
+
|
|
5
|
+
Usage::
|
|
6
|
+
|
|
7
|
+
from hedge import HedgeConfig
|
|
8
|
+
from hedge.interceptor import HedgedUnaryInterceptor
|
|
9
|
+
|
|
10
|
+
channel = grpc.aio.insecure_channel("localhost:50051")
|
|
11
|
+
hedged_channel = grpc.aio.insecure_channel(
|
|
12
|
+
"localhost:50051",
|
|
13
|
+
interceptors=[HedgedUnaryInterceptor(config=HedgeConfig())],
|
|
14
|
+
)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import contextlib
|
|
21
|
+
import math
|
|
22
|
+
import time
|
|
23
|
+
from collections import defaultdict
|
|
24
|
+
from typing import Any, Callable
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import grpc # type: ignore[import-untyped]
|
|
28
|
+
import grpc.aio # type: ignore[import-untyped]
|
|
29
|
+
except ImportError as exc:
|
|
30
|
+
raise ImportError(
|
|
31
|
+
"grpcio is required for gRPC interceptors. "
|
|
32
|
+
"Install it with: pip install hedge-python[grpc]"
|
|
33
|
+
) from exc
|
|
34
|
+
|
|
35
|
+
from hedge._options import HedgeConfig
|
|
36
|
+
from hedge._stats import Stats
|
|
37
|
+
from hedge.budget import TokenBucket
|
|
38
|
+
from hedge.sketch import WindowedSketch
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class HedgedUnaryInterceptor(grpc.aio.UnaryUnaryClientInterceptor): # type: ignore[misc]
|
|
42
|
+
"""gRPC Unary-Unary client interceptor with adaptive hedging.
|
|
43
|
+
|
|
44
|
+
Learns per-target latency distributions and fires a backup RPC when the
|
|
45
|
+
primary exceeds its estimated percentile threshold.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
config: Hedge configuration. Defaults to ``HedgeConfig()``.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, config: HedgeConfig | None = None) -> None:
|
|
52
|
+
self._config = config or HedgeConfig()
|
|
53
|
+
self.stats = self._config.stats or Stats()
|
|
54
|
+
self._budget = TokenBucket(self._config.budget_percent, self._config.estimated_rps)
|
|
55
|
+
self._sketches: dict[str, WindowedSketch] = {}
|
|
56
|
+
self._counters: dict[str, int] = defaultdict(int)
|
|
57
|
+
self._lock = asyncio.Lock()
|
|
58
|
+
|
|
59
|
+
def _sketch_for(self, target: str) -> WindowedSketch:
|
|
60
|
+
if target not in self._sketches:
|
|
61
|
+
sketch = WindowedSketch(0.01, self._config.window_duration)
|
|
62
|
+
sketch.start_async()
|
|
63
|
+
self._sketches[target] = sketch
|
|
64
|
+
return self._sketches[target]
|
|
65
|
+
|
|
66
|
+
async def _increment_counter(self, target: str) -> int:
|
|
67
|
+
async with self._lock:
|
|
68
|
+
self._counters[target] += 1
|
|
69
|
+
return self._counters[target]
|
|
70
|
+
|
|
71
|
+
async def intercept_unary_unary(
|
|
72
|
+
self,
|
|
73
|
+
continuation: Callable[..., Any],
|
|
74
|
+
client_call_details: grpc.aio.ClientCallDetails,
|
|
75
|
+
request: Any,
|
|
76
|
+
) -> Any:
|
|
77
|
+
"""Intercept a unary-unary RPC with adaptive hedging."""
|
|
78
|
+
self.stats.increment_total()
|
|
79
|
+
|
|
80
|
+
target = client_call_details.method
|
|
81
|
+
sketch = self._sketch_for(target)
|
|
82
|
+
request_number = await self._increment_counter(target)
|
|
83
|
+
|
|
84
|
+
if request_number <= self._config.warmup_requests:
|
|
85
|
+
self.stats.increment_warmup()
|
|
86
|
+
hedge_delay = self._config.warmup_delay
|
|
87
|
+
else:
|
|
88
|
+
estimate = sketch.quantile(self._config.percentile)
|
|
89
|
+
hedge_delay = estimate if estimate > 0 and not math.isnan(estimate) else self._config.warmup_delay
|
|
90
|
+
|
|
91
|
+
hedge_delay = max(hedge_delay, self._config.min_delay)
|
|
92
|
+
start = time.monotonic()
|
|
93
|
+
|
|
94
|
+
# We track the live Call object for each in-flight attempt so we can
|
|
95
|
+
# cancel the underlying RPC (not just the asyncio task) when a loser
|
|
96
|
+
# is dropped. ``continuation`` returns a Call object almost
|
|
97
|
+
# immediately; the actual RTT is spent in ``await call``. We must
|
|
98
|
+
# combine both steps inside the task, otherwise the task completes
|
|
99
|
+
# instantly and the hedge timer never fires.
|
|
100
|
+
call_holder: dict[asyncio.Task[Any], Any] = {}
|
|
101
|
+
|
|
102
|
+
async def invoke() -> Any:
|
|
103
|
+
call = await continuation(client_call_details, request)
|
|
104
|
+
# Record the Call for the currently running task so the loser
|
|
105
|
+
# branch can cancel the RPC on the wire.
|
|
106
|
+
current = asyncio.current_task()
|
|
107
|
+
if current is not None:
|
|
108
|
+
call_holder[current] = call
|
|
109
|
+
return await call
|
|
110
|
+
|
|
111
|
+
# Launch primary
|
|
112
|
+
primary_task = asyncio.create_task(invoke())
|
|
113
|
+
|
|
114
|
+
# Wait hedge_delay
|
|
115
|
+
done, _ = await asyncio.wait({primary_task}, timeout=hedge_delay)
|
|
116
|
+
if done:
|
|
117
|
+
response = primary_task.result()
|
|
118
|
+
sketch.add(time.monotonic() - start)
|
|
119
|
+
return response
|
|
120
|
+
|
|
121
|
+
# Budget check
|
|
122
|
+
if not self._budget.try_acquire():
|
|
123
|
+
self.stats.increment_budget_exhausted()
|
|
124
|
+
response = await primary_task
|
|
125
|
+
sketch.add(time.monotonic() - start)
|
|
126
|
+
return response
|
|
127
|
+
|
|
128
|
+
# Launch hedge
|
|
129
|
+
self.stats.increment_hedged()
|
|
130
|
+
hedge_task = asyncio.create_task(invoke())
|
|
131
|
+
|
|
132
|
+
done, pending = await asyncio.wait(
|
|
133
|
+
{primary_task, hedge_task},
|
|
134
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
winner_task = done.pop()
|
|
138
|
+
elapsed = time.monotonic() - start
|
|
139
|
+
|
|
140
|
+
# Cancel losers: cancel the underlying gRPC Call first (best-effort),
|
|
141
|
+
# then await the task so we don't leak it.
|
|
142
|
+
for task in pending:
|
|
143
|
+
loser_call = call_holder.get(task)
|
|
144
|
+
if loser_call is not None:
|
|
145
|
+
with contextlib.suppress(Exception):
|
|
146
|
+
loser_call.cancel()
|
|
147
|
+
task.cancel()
|
|
148
|
+
with contextlib.suppress(asyncio.CancelledError, Exception):
|
|
149
|
+
await task
|
|
150
|
+
|
|
151
|
+
if winner_task is primary_task:
|
|
152
|
+
self.stats.increment_primary_wins()
|
|
153
|
+
else:
|
|
154
|
+
self.stats.increment_hedge_wins()
|
|
155
|
+
|
|
156
|
+
response = winner_task.result()
|
|
157
|
+
sketch.add(elapsed)
|
|
158
|
+
return response
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class HedgedServerStreamInterceptor(grpc.aio.UnaryStreamClientInterceptor): # type: ignore[misc]
|
|
162
|
+
"""gRPC Unary-Stream (server streaming) client interceptor with adaptive hedging.
|
|
163
|
+
|
|
164
|
+
Uses time-to-first-message (TTFM) as the hedge signal: if the primary
|
|
165
|
+
stream does not yield its first message within the estimated percentile
|
|
166
|
+
latency, a backup stream is started. Whichever stream yields first wins;
|
|
167
|
+
the loser is cancelled.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
config: Hedge configuration. Defaults to ``HedgeConfig()``.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, config: HedgeConfig | None = None) -> None:
|
|
174
|
+
self._config = config or HedgeConfig()
|
|
175
|
+
self.stats = self._config.stats or Stats()
|
|
176
|
+
self._budget = TokenBucket(self._config.budget_percent, self._config.estimated_rps)
|
|
177
|
+
self._sketches: dict[str, WindowedSketch] = {}
|
|
178
|
+
self._counters: dict[str, int] = defaultdict(int)
|
|
179
|
+
self._lock = asyncio.Lock()
|
|
180
|
+
|
|
181
|
+
def _sketch_for(self, target: str) -> WindowedSketch:
|
|
182
|
+
if target not in self._sketches:
|
|
183
|
+
sketch = WindowedSketch(0.01, self._config.window_duration)
|
|
184
|
+
sketch.start_async()
|
|
185
|
+
self._sketches[target] = sketch
|
|
186
|
+
return self._sketches[target]
|
|
187
|
+
|
|
188
|
+
async def _increment_counter(self, target: str) -> int:
|
|
189
|
+
async with self._lock:
|
|
190
|
+
self._counters[target] += 1
|
|
191
|
+
return self._counters[target]
|
|
192
|
+
|
|
193
|
+
async def intercept_unary_stream(
|
|
194
|
+
self,
|
|
195
|
+
continuation: Callable[..., Any],
|
|
196
|
+
client_call_details: grpc.aio.ClientCallDetails,
|
|
197
|
+
request: Any,
|
|
198
|
+
) -> Any:
|
|
199
|
+
"""Intercept a unary-stream RPC with adaptive hedging based on TTFM."""
|
|
200
|
+
self.stats.increment_total()
|
|
201
|
+
|
|
202
|
+
target = client_call_details.method
|
|
203
|
+
sketch = self._sketch_for(target)
|
|
204
|
+
request_number = await self._increment_counter(target)
|
|
205
|
+
|
|
206
|
+
if request_number <= self._config.warmup_requests:
|
|
207
|
+
self.stats.increment_warmup()
|
|
208
|
+
hedge_delay = self._config.warmup_delay
|
|
209
|
+
else:
|
|
210
|
+
estimate = sketch.quantile(self._config.percentile)
|
|
211
|
+
hedge_delay = estimate if estimate > 0 and not math.isnan(estimate) else self._config.warmup_delay
|
|
212
|
+
|
|
213
|
+
hedge_delay = max(hedge_delay, self._config.min_delay)
|
|
214
|
+
start = time.monotonic()
|
|
215
|
+
|
|
216
|
+
# Combine ``await continuation`` (which may itself include connection
|
|
217
|
+
# setup / header round-trips) and ``call.read()`` (TTFM) in one task,
|
|
218
|
+
# so the timer reflects the real time-to-first-message.
|
|
219
|
+
call_holder: dict[asyncio.Task[Any], Any] = {}
|
|
220
|
+
|
|
221
|
+
async def invoke_and_read_first() -> tuple[Any, Any]:
|
|
222
|
+
call = await continuation(client_call_details, request)
|
|
223
|
+
current = asyncio.current_task()
|
|
224
|
+
if current is not None:
|
|
225
|
+
call_holder[current] = call
|
|
226
|
+
first_msg = await call.read()
|
|
227
|
+
return call, first_msg
|
|
228
|
+
|
|
229
|
+
# Launch primary
|
|
230
|
+
primary_task = asyncio.create_task(invoke_and_read_first())
|
|
231
|
+
|
|
232
|
+
done, _ = await asyncio.wait({primary_task}, timeout=hedge_delay)
|
|
233
|
+
if done:
|
|
234
|
+
primary_call, first_msg = primary_task.result()
|
|
235
|
+
sketch.add(time.monotonic() - start)
|
|
236
|
+
return _PrependedStream(first_msg, primary_call)
|
|
237
|
+
|
|
238
|
+
# Budget check
|
|
239
|
+
if not self._budget.try_acquire():
|
|
240
|
+
self.stats.increment_budget_exhausted()
|
|
241
|
+
primary_call, first_msg = await primary_task
|
|
242
|
+
sketch.add(time.monotonic() - start)
|
|
243
|
+
return _PrependedStream(first_msg, primary_call)
|
|
244
|
+
|
|
245
|
+
# Launch hedge
|
|
246
|
+
self.stats.increment_hedged()
|
|
247
|
+
hedge_task = asyncio.create_task(invoke_and_read_first())
|
|
248
|
+
|
|
249
|
+
done, pending = await asyncio.wait(
|
|
250
|
+
{primary_task, hedge_task},
|
|
251
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
winner_task = done.pop()
|
|
255
|
+
winner_call, first_msg = winner_task.result()
|
|
256
|
+
elapsed = time.monotonic() - start
|
|
257
|
+
sketch.add(elapsed)
|
|
258
|
+
|
|
259
|
+
if winner_task is primary_task:
|
|
260
|
+
self.stats.increment_primary_wins()
|
|
261
|
+
else:
|
|
262
|
+
self.stats.increment_hedge_wins()
|
|
263
|
+
|
|
264
|
+
# Cancel loser: cancel the underlying gRPC Call first (best-effort),
|
|
265
|
+
# then await the task so we don't leak it.
|
|
266
|
+
for task in pending:
|
|
267
|
+
loser_call = call_holder.get(task)
|
|
268
|
+
if loser_call is not None:
|
|
269
|
+
with contextlib.suppress(Exception):
|
|
270
|
+
loser_call.cancel()
|
|
271
|
+
task.cancel()
|
|
272
|
+
with contextlib.suppress(asyncio.CancelledError, Exception):
|
|
273
|
+
await task
|
|
274
|
+
|
|
275
|
+
return _PrependedStream(first_msg, winner_call)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class _PrependedStream:
|
|
279
|
+
"""Wraps a gRPC stream, prepending an already-read first message.
|
|
280
|
+
|
|
281
|
+
This allows the interceptor to consume the first message for TTFM
|
|
282
|
+
measurement while still presenting a complete stream to the caller.
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
def __init__(self, first_msg: Any, call: Any) -> None:
|
|
286
|
+
self._first_msg = first_msg
|
|
287
|
+
self._call = call
|
|
288
|
+
self._first_yielded = False
|
|
289
|
+
|
|
290
|
+
def __aiter__(self) -> _PrependedStream:
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
async def __anext__(self) -> Any:
|
|
294
|
+
if not self._first_yielded:
|
|
295
|
+
self._first_yielded = True
|
|
296
|
+
if self._first_msg is grpc.aio.EOF:
|
|
297
|
+
raise StopAsyncIteration
|
|
298
|
+
return self._first_msg
|
|
299
|
+
msg = await self._call.read()
|
|
300
|
+
if msg is grpc.aio.EOF:
|
|
301
|
+
raise StopAsyncIteration
|
|
302
|
+
return msg
|
hedge/py.typed
ADDED
|
File without changes
|
hedge/sketch/__init__.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""DDSketch streaming quantile estimator.
|
|
2
|
+
|
|
3
|
+
Based on Masson et al., "DDSketch: A fast and fully-mergeable quantile sketch
|
|
4
|
+
with relative-error guarantees", VLDB 2019.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _LogMapping:
|
|
14
|
+
"""Maps values to bucket indices using logarithmic scaling.
|
|
15
|
+
|
|
16
|
+
For a given relative accuracy alpha, gamma = (1+alpha)/(1-alpha).
|
|
17
|
+
A positive value x maps to bucket index ceil(ln(x) / ln(gamma)).
|
|
18
|
+
|
|
19
|
+
The guarantee: any value in bucket i is within a factor of gamma^0.5 of
|
|
20
|
+
the bucket's representative value, giving relative error <= alpha.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
__slots__ = ("gamma", "multiplier")
|
|
24
|
+
|
|
25
|
+
def __init__(self, relative_accuracy: float) -> None:
|
|
26
|
+
self.gamma = (1 + relative_accuracy) / (1 - relative_accuracy)
|
|
27
|
+
self.multiplier = 1.0 / math.log(self.gamma)
|
|
28
|
+
|
|
29
|
+
def index(self, value: float) -> int:
|
|
30
|
+
"""Return the bucket index for a strictly positive value."""
|
|
31
|
+
return math.ceil(math.log(value) * self.multiplier)
|
|
32
|
+
|
|
33
|
+
def value(self, index: int) -> float:
|
|
34
|
+
"""Return the representative value (geometric midpoint) for a bucket."""
|
|
35
|
+
return math.exp((index - 0.5) / self.multiplier)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class _Store:
|
|
39
|
+
"""Sparse map of bucket indices to cumulative counts."""
|
|
40
|
+
|
|
41
|
+
__slots__ = ("bins", "count")
|
|
42
|
+
|
|
43
|
+
def __init__(self) -> None:
|
|
44
|
+
self.bins: dict[int, float] = defaultdict(float)
|
|
45
|
+
self.count: float = 0.0
|
|
46
|
+
|
|
47
|
+
def add(self, index: int) -> None:
|
|
48
|
+
self.bins[index] += 1.0
|
|
49
|
+
self.count += 1.0
|
|
50
|
+
|
|
51
|
+
def merge(self, other: _Store) -> None:
|
|
52
|
+
for idx, cnt in other.bins.items():
|
|
53
|
+
self.bins[idx] += cnt
|
|
54
|
+
self.count += other.count
|
|
55
|
+
|
|
56
|
+
def reset(self) -> None:
|
|
57
|
+
self.bins = defaultdict(float)
|
|
58
|
+
self.count = 0.0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class DDSketch:
|
|
62
|
+
"""Streaming quantile sketch with relative-error guarantees.
|
|
63
|
+
|
|
64
|
+
Positive and negative values are stored in separate sparse bucket maps.
|
|
65
|
+
Zero values are counted separately. Min and max are tracked exactly.
|
|
66
|
+
|
|
67
|
+
Property: for any quantile q, the returned estimate satisfies
|
|
68
|
+
|estimate - true_value| / |true_value| <= relative_accuracy
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
relative_accuracy: Target relative accuracy. 0.01 means estimates
|
|
72
|
+
are within +/-1% of the true value. Must be in (0, 1).
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, relative_accuracy: float = 0.01) -> None:
|
|
76
|
+
if not (0 < relative_accuracy < 1):
|
|
77
|
+
raise ValueError("relative_accuracy must be in (0, 1)")
|
|
78
|
+
self._mapping = _LogMapping(relative_accuracy)
|
|
79
|
+
self._positive = _Store()
|
|
80
|
+
self._negative = _Store()
|
|
81
|
+
self._zero_count: float = 0.0
|
|
82
|
+
self._count: int = 0
|
|
83
|
+
self._min: float = math.inf
|
|
84
|
+
self._max: float = -math.inf
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def count(self) -> int:
|
|
88
|
+
"""Total number of values added."""
|
|
89
|
+
return self._count
|
|
90
|
+
|
|
91
|
+
def add(self, value: float) -> None:
|
|
92
|
+
"""Record a single value. O(1) per insert.
|
|
93
|
+
|
|
94
|
+
NaN and infinite values are silently ignored.
|
|
95
|
+
"""
|
|
96
|
+
if math.isnan(value) or math.isinf(value):
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
if value > 0:
|
|
100
|
+
self._positive.add(self._mapping.index(value))
|
|
101
|
+
elif value < 0:
|
|
102
|
+
self._negative.add(self._mapping.index(-value))
|
|
103
|
+
else:
|
|
104
|
+
self._zero_count += 1.0
|
|
105
|
+
|
|
106
|
+
self._count += 1
|
|
107
|
+
if value < self._min:
|
|
108
|
+
self._min = value
|
|
109
|
+
if value > self._max:
|
|
110
|
+
self._max = value
|
|
111
|
+
|
|
112
|
+
def quantile(self, q: float) -> float:
|
|
113
|
+
"""Return the estimated value at quantile q in [0, 1].
|
|
114
|
+
|
|
115
|
+
Returns math.nan if the sketch is empty.
|
|
116
|
+
|
|
117
|
+
The estimate satisfies the relative-error guarantee:
|
|
118
|
+
|estimate - true_value| / |true_value| <= relative_accuracy
|
|
119
|
+
"""
|
|
120
|
+
if self._count == 0:
|
|
121
|
+
return math.nan
|
|
122
|
+
if q <= 0:
|
|
123
|
+
return self._min
|
|
124
|
+
if q >= 1:
|
|
125
|
+
return self._max
|
|
126
|
+
|
|
127
|
+
rank: float = float(math.ceil(q * self._count))
|
|
128
|
+
|
|
129
|
+
# Negative values: iterate descending (most negative -> least negative)
|
|
130
|
+
if self._negative.count > 0:
|
|
131
|
+
cumulative = 0.0
|
|
132
|
+
for idx in sorted(self._negative.bins.keys(), reverse=True):
|
|
133
|
+
cumulative += self._negative.bins[idx]
|
|
134
|
+
if cumulative >= rank:
|
|
135
|
+
return -self._mapping.value(idx)
|
|
136
|
+
rank -= self._negative.count
|
|
137
|
+
|
|
138
|
+
# Zero values
|
|
139
|
+
if self._zero_count > 0:
|
|
140
|
+
rank -= self._zero_count
|
|
141
|
+
if rank <= 0:
|
|
142
|
+
return 0.0
|
|
143
|
+
|
|
144
|
+
# Positive values: iterate ascending (least positive -> most positive)
|
|
145
|
+
if self._positive.count > 0:
|
|
146
|
+
cumulative = 0.0
|
|
147
|
+
for idx in sorted(self._positive.bins.keys()):
|
|
148
|
+
cumulative += self._positive.bins[idx]
|
|
149
|
+
if cumulative >= rank:
|
|
150
|
+
return self._mapping.value(idx)
|
|
151
|
+
|
|
152
|
+
return self._max
|
|
153
|
+
|
|
154
|
+
def merge(self, other: DDSketch) -> None:
|
|
155
|
+
"""Combine other into self. The merge is exact: no additional error accumulates."""
|
|
156
|
+
self._positive.merge(other._positive)
|
|
157
|
+
self._negative.merge(other._negative)
|
|
158
|
+
self._zero_count += other._zero_count
|
|
159
|
+
self._count += other._count
|
|
160
|
+
if other._count > 0:
|
|
161
|
+
if other._min < self._min:
|
|
162
|
+
self._min = other._min
|
|
163
|
+
if other._max > self._max:
|
|
164
|
+
self._max = other._max
|
|
165
|
+
|
|
166
|
+
def reset(self) -> None:
|
|
167
|
+
"""Clear all state. Used for windowed / tumbling-window decay."""
|
|
168
|
+
self._positive.reset()
|
|
169
|
+
self._negative.reset()
|
|
170
|
+
self._zero_count = 0.0
|
|
171
|
+
self._count = 0
|
|
172
|
+
self._min = math.inf
|
|
173
|
+
self._max = -math.inf
|