rollgate 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rollgate/__init__.py +135 -0
- rollgate/cache.py +260 -0
- rollgate/circuit_breaker.py +240 -0
- rollgate/client.py +562 -0
- rollgate/dedup.py +172 -0
- rollgate/errors.py +162 -0
- rollgate/evaluate.py +345 -0
- rollgate/metrics.py +567 -0
- rollgate/reasons.py +115 -0
- rollgate/retry.py +177 -0
- rollgate/tracing.py +434 -0
- rollgate-1.0.0.dist-info/METADATA +288 -0
- rollgate-1.0.0.dist-info/RECORD +14 -0
- rollgate-1.0.0.dist-info/WHEEL +4 -0
rollgate/metrics.py
ADDED
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SDK Metrics Collection.
|
|
3
|
+
Tracks request performance, cache efficiency, error rates, and flag evaluations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Dict, List, Optional, Callable, Any
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CircuitStateValue(Enum):
|
|
14
|
+
"""Circuit breaker state values."""
|
|
15
|
+
CLOSED = "closed"
|
|
16
|
+
OPEN = "open"
|
|
17
|
+
HALF_OPEN = "half-open"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class WindowedStats:
|
|
22
|
+
"""Statistics for a time window."""
|
|
23
|
+
requests: int = 0
|
|
24
|
+
errors: int = 0
|
|
25
|
+
avg_latency_ms: float = 0
|
|
26
|
+
error_rate: float = 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class FlagStats:
|
|
31
|
+
"""Statistics for a single flag."""
|
|
32
|
+
evaluations: int = 0
|
|
33
|
+
true_count: int = 0
|
|
34
|
+
false_count: int = 0
|
|
35
|
+
true_rate: float = 0
|
|
36
|
+
avg_evaluation_time_ms: float = 0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class FlagEvaluationMetrics:
|
|
41
|
+
"""Flag evaluation metrics."""
|
|
42
|
+
total_evaluations: int = 0
|
|
43
|
+
evaluations_per_flag: Dict[str, FlagStats] = field(default_factory=dict)
|
|
44
|
+
avg_evaluation_time_ms: float = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class TimeWindowMetrics:
|
|
49
|
+
"""Time-windowed metrics."""
|
|
50
|
+
one_minute: WindowedStats = field(default_factory=WindowedStats)
|
|
51
|
+
five_minutes: WindowedStats = field(default_factory=WindowedStats)
|
|
52
|
+
fifteen_minutes: WindowedStats = field(default_factory=WindowedStats)
|
|
53
|
+
one_hour: WindowedStats = field(default_factory=WindowedStats)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class MetricsSnapshot:
|
|
58
|
+
"""Complete snapshot of all metrics."""
|
|
59
|
+
# Request metrics
|
|
60
|
+
total_requests: int = 0
|
|
61
|
+
successful_requests: int = 0
|
|
62
|
+
failed_requests: int = 0
|
|
63
|
+
success_rate: float = 0
|
|
64
|
+
error_rate: float = 0
|
|
65
|
+
|
|
66
|
+
# Latency metrics (in milliseconds)
|
|
67
|
+
avg_latency_ms: float = 0
|
|
68
|
+
min_latency_ms: float = 0
|
|
69
|
+
max_latency_ms: float = 0
|
|
70
|
+
p50_latency_ms: float = 0
|
|
71
|
+
p95_latency_ms: float = 0
|
|
72
|
+
p99_latency_ms: float = 0
|
|
73
|
+
|
|
74
|
+
# Cache metrics
|
|
75
|
+
cache_hits: int = 0
|
|
76
|
+
cache_misses: int = 0
|
|
77
|
+
cache_hit_rate: float = 0
|
|
78
|
+
not_modified_responses: int = 0
|
|
79
|
+
|
|
80
|
+
# Error metrics
|
|
81
|
+
errors_by_category: Dict[str, int] = field(default_factory=dict)
|
|
82
|
+
|
|
83
|
+
# Circuit breaker metrics
|
|
84
|
+
circuit_opens: int = 0
|
|
85
|
+
circuit_closes: int = 0
|
|
86
|
+
circuit_state: str = "closed"
|
|
87
|
+
|
|
88
|
+
# Flag evaluation metrics
|
|
89
|
+
flag_evaluations: FlagEvaluationMetrics = field(default_factory=FlagEvaluationMetrics)
|
|
90
|
+
|
|
91
|
+
# Time-windowed metrics
|
|
92
|
+
windows: TimeWindowMetrics = field(default_factory=TimeWindowMetrics)
|
|
93
|
+
|
|
94
|
+
# Timing
|
|
95
|
+
uptime_ms: int = 0
|
|
96
|
+
last_request_at: Optional[int] = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class RequestMetrics:
|
|
101
|
+
"""Metrics for a single request."""
|
|
102
|
+
endpoint: str
|
|
103
|
+
status_code: int
|
|
104
|
+
latency_ms: float
|
|
105
|
+
cache_hit: bool = False
|
|
106
|
+
not_modified: bool = False
|
|
107
|
+
error: Optional[str] = None
|
|
108
|
+
error_category: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class TimestampedRequest:
|
|
113
|
+
"""Request with timestamp for time windows."""
|
|
114
|
+
timestamp: float
|
|
115
|
+
latency_ms: float
|
|
116
|
+
success: bool
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class FlagEvaluation:
|
|
121
|
+
"""Record of a flag evaluation."""
|
|
122
|
+
flag_key: str
|
|
123
|
+
result: bool
|
|
124
|
+
evaluation_time_ms: float
|
|
125
|
+
timestamp: float
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# Time windows in seconds
|
|
129
|
+
TIME_WINDOWS = {
|
|
130
|
+
"1m": 60,
|
|
131
|
+
"5m": 5 * 60,
|
|
132
|
+
"15m": 15 * 60,
|
|
133
|
+
"1h": 60 * 60,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class SDKMetrics:
|
|
138
|
+
"""
|
|
139
|
+
Collects and reports SDK metrics.
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
```python
|
|
143
|
+
metrics = SDKMetrics()
|
|
144
|
+
|
|
145
|
+
# Record a request
|
|
146
|
+
metrics.record_request(RequestMetrics(
|
|
147
|
+
endpoint="/api/v1/flags",
|
|
148
|
+
status_code=200,
|
|
149
|
+
latency_ms=45.2,
|
|
150
|
+
cache_hit=False,
|
|
151
|
+
))
|
|
152
|
+
|
|
153
|
+
# Record a flag evaluation
|
|
154
|
+
metrics.record_evaluation("my-feature", True, 0.5)
|
|
155
|
+
|
|
156
|
+
# Get snapshot
|
|
157
|
+
snap = metrics.snapshot()
|
|
158
|
+
print(f"Success rate: {snap.success_rate}%")
|
|
159
|
+
|
|
160
|
+
# Export to Prometheus format
|
|
161
|
+
print(metrics.to_prometheus())
|
|
162
|
+
```
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def __init__(self):
|
|
166
|
+
"""Initialize metrics collector."""
|
|
167
|
+
self._total_requests = 0
|
|
168
|
+
self._successful_requests = 0
|
|
169
|
+
self._failed_requests = 0
|
|
170
|
+
self._cache_hits = 0
|
|
171
|
+
self._cache_misses = 0
|
|
172
|
+
self._not_modified_responses = 0
|
|
173
|
+
self._circuit_opens = 0
|
|
174
|
+
self._circuit_closes = 0
|
|
175
|
+
self._circuit_state = CircuitStateValue.CLOSED
|
|
176
|
+
|
|
177
|
+
self._latencies: List[float] = []
|
|
178
|
+
self._max_latency_history = 1000
|
|
179
|
+
|
|
180
|
+
self._errors_by_category: Dict[str, int] = defaultdict(int)
|
|
181
|
+
|
|
182
|
+
# Time-windowed request tracking
|
|
183
|
+
self._timestamped_requests: List[TimestampedRequest] = []
|
|
184
|
+
self._max_timestamped_requests = 10000
|
|
185
|
+
|
|
186
|
+
# Flag evaluation tracking
|
|
187
|
+
self._flag_stats: Dict[str, Dict[str, Any]] = {}
|
|
188
|
+
self._timestamped_evaluations: List[FlagEvaluation] = []
|
|
189
|
+
self._max_timestamped_evaluations = 10000
|
|
190
|
+
self._total_evaluations = 0
|
|
191
|
+
self._total_evaluation_time_ms = 0.0
|
|
192
|
+
|
|
193
|
+
self._start_time = time.time()
|
|
194
|
+
self._last_request_at: Optional[float] = None
|
|
195
|
+
|
|
196
|
+
# Event listeners
|
|
197
|
+
self._listeners: Dict[str, List[Callable[[MetricsSnapshot], None]]] = defaultdict(list)
|
|
198
|
+
|
|
199
|
+
def record_request(self, metrics: RequestMetrics) -> None:
|
|
200
|
+
"""
|
|
201
|
+
Record a completed request.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
metrics: Request metrics to record
|
|
205
|
+
"""
|
|
206
|
+
now = time.time()
|
|
207
|
+
self._total_requests += 1
|
|
208
|
+
self._last_request_at = now
|
|
209
|
+
|
|
210
|
+
success = 200 <= metrics.status_code < 400
|
|
211
|
+
|
|
212
|
+
if success:
|
|
213
|
+
self._successful_requests += 1
|
|
214
|
+
else:
|
|
215
|
+
self._failed_requests += 1
|
|
216
|
+
|
|
217
|
+
# Track cache metrics
|
|
218
|
+
if metrics.not_modified:
|
|
219
|
+
self._not_modified_responses += 1
|
|
220
|
+
self._cache_hits += 1
|
|
221
|
+
elif metrics.cache_hit:
|
|
222
|
+
self._cache_hits += 1
|
|
223
|
+
else:
|
|
224
|
+
self._cache_misses += 1
|
|
225
|
+
|
|
226
|
+
# Track latency
|
|
227
|
+
self._latencies.append(metrics.latency_ms)
|
|
228
|
+
if len(self._latencies) > self._max_latency_history:
|
|
229
|
+
self._latencies.pop(0)
|
|
230
|
+
|
|
231
|
+
# Track timestamped request for time windows
|
|
232
|
+
self._timestamped_requests.append(TimestampedRequest(
|
|
233
|
+
timestamp=now,
|
|
234
|
+
latency_ms=metrics.latency_ms,
|
|
235
|
+
success=success,
|
|
236
|
+
))
|
|
237
|
+
if len(self._timestamped_requests) > self._max_timestamped_requests:
|
|
238
|
+
self._timestamped_requests.pop(0)
|
|
239
|
+
|
|
240
|
+
# Track errors by category
|
|
241
|
+
if metrics.error_category:
|
|
242
|
+
self._errors_by_category[metrics.error_category] += 1
|
|
243
|
+
|
|
244
|
+
# Emit update event
|
|
245
|
+
self._emit("request", self.snapshot())
|
|
246
|
+
|
|
247
|
+
def record_evaluation(
|
|
248
|
+
self,
|
|
249
|
+
flag_key: str,
|
|
250
|
+
result: bool,
|
|
251
|
+
evaluation_time_ms: float = 0,
|
|
252
|
+
) -> None:
|
|
253
|
+
"""
|
|
254
|
+
Record a flag evaluation.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
flag_key: The flag that was evaluated
|
|
258
|
+
result: Evaluation result
|
|
259
|
+
evaluation_time_ms: Time taken to evaluate
|
|
260
|
+
"""
|
|
261
|
+
now = time.time()
|
|
262
|
+
self._total_evaluations += 1
|
|
263
|
+
self._total_evaluation_time_ms += evaluation_time_ms
|
|
264
|
+
|
|
265
|
+
# Update per-flag stats
|
|
266
|
+
if flag_key not in self._flag_stats:
|
|
267
|
+
self._flag_stats[flag_key] = {
|
|
268
|
+
"count": 0,
|
|
269
|
+
"true_count": 0,
|
|
270
|
+
"total_time_ms": 0,
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
stats = self._flag_stats[flag_key]
|
|
274
|
+
stats["count"] += 1
|
|
275
|
+
if result:
|
|
276
|
+
stats["true_count"] += 1
|
|
277
|
+
stats["total_time_ms"] += evaluation_time_ms
|
|
278
|
+
|
|
279
|
+
# Track timestamped evaluation for time windows
|
|
280
|
+
self._timestamped_evaluations.append(FlagEvaluation(
|
|
281
|
+
flag_key=flag_key,
|
|
282
|
+
result=result,
|
|
283
|
+
evaluation_time_ms=evaluation_time_ms,
|
|
284
|
+
timestamp=now,
|
|
285
|
+
))
|
|
286
|
+
if len(self._timestamped_evaluations) > self._max_timestamped_evaluations:
|
|
287
|
+
self._timestamped_evaluations.pop(0)
|
|
288
|
+
|
|
289
|
+
# Emit update event
|
|
290
|
+
self._emit("evaluation", self.snapshot())
|
|
291
|
+
|
|
292
|
+
def record_circuit_state_change(self, new_state: CircuitStateValue) -> None:
|
|
293
|
+
"""
|
|
294
|
+
Record a circuit breaker state change.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
new_state: New circuit state
|
|
298
|
+
"""
|
|
299
|
+
old_state = self._circuit_state
|
|
300
|
+
self._circuit_state = new_state
|
|
301
|
+
|
|
302
|
+
if new_state == CircuitStateValue.OPEN and old_state != CircuitStateValue.OPEN:
|
|
303
|
+
self._circuit_opens += 1
|
|
304
|
+
elif new_state == CircuitStateValue.CLOSED and old_state != CircuitStateValue.CLOSED:
|
|
305
|
+
self._circuit_closes += 1
|
|
306
|
+
|
|
307
|
+
self._emit("circuit-change", self.snapshot())
|
|
308
|
+
|
|
309
|
+
def get_circuit_state(self) -> CircuitStateValue:
|
|
310
|
+
"""Get current circuit breaker state."""
|
|
311
|
+
return self._circuit_state
|
|
312
|
+
|
|
313
|
+
def on(
|
|
314
|
+
self,
|
|
315
|
+
event: str,
|
|
316
|
+
callback: Callable[[MetricsSnapshot], None],
|
|
317
|
+
) -> None:
|
|
318
|
+
"""
|
|
319
|
+
Subscribe to metrics events.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
event: Event name ('request', 'evaluation', 'circuit-change')
|
|
323
|
+
callback: Callback function
|
|
324
|
+
"""
|
|
325
|
+
self._listeners[event].append(callback)
|
|
326
|
+
|
|
327
|
+
def off(
|
|
328
|
+
self,
|
|
329
|
+
event: str,
|
|
330
|
+
callback: Callable[[MetricsSnapshot], None],
|
|
331
|
+
) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Unsubscribe from metrics events.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
event: Event name
|
|
337
|
+
callback: Callback function to remove
|
|
338
|
+
"""
|
|
339
|
+
if callback in self._listeners[event]:
|
|
340
|
+
self._listeners[event].remove(callback)
|
|
341
|
+
|
|
342
|
+
def clear_listeners(self) -> None:
|
|
343
|
+
"""Clear all listeners (for cleanup)."""
|
|
344
|
+
self._listeners.clear()
|
|
345
|
+
|
|
346
|
+
def _emit(self, event: str, data: MetricsSnapshot) -> None:
|
|
347
|
+
"""Emit an event to all listeners."""
|
|
348
|
+
for callback in self._listeners.get(event, []):
|
|
349
|
+
try:
|
|
350
|
+
callback(data)
|
|
351
|
+
except Exception:
|
|
352
|
+
pass # Ignore callback errors
|
|
353
|
+
|
|
354
|
+
def snapshot(self) -> MetricsSnapshot:
|
|
355
|
+
"""
|
|
356
|
+
Get a snapshot of all metrics.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
Complete metrics snapshot
|
|
360
|
+
"""
|
|
361
|
+
sorted_latencies = sorted(self._latencies)
|
|
362
|
+
total_cache_requests = self._cache_hits + self._cache_misses
|
|
363
|
+
|
|
364
|
+
return MetricsSnapshot(
|
|
365
|
+
total_requests=self._total_requests,
|
|
366
|
+
successful_requests=self._successful_requests,
|
|
367
|
+
failed_requests=self._failed_requests,
|
|
368
|
+
success_rate=(
|
|
369
|
+
(self._successful_requests / self._total_requests) * 100
|
|
370
|
+
if self._total_requests > 0 else 0
|
|
371
|
+
),
|
|
372
|
+
error_rate=(
|
|
373
|
+
(self._failed_requests / self._total_requests) * 100
|
|
374
|
+
if self._total_requests > 0 else 0
|
|
375
|
+
),
|
|
376
|
+
|
|
377
|
+
avg_latency_ms=self._calculate_average(sorted_latencies),
|
|
378
|
+
min_latency_ms=sorted_latencies[0] if sorted_latencies else 0,
|
|
379
|
+
max_latency_ms=sorted_latencies[-1] if sorted_latencies else 0,
|
|
380
|
+
p50_latency_ms=self._calculate_percentile(sorted_latencies, 50),
|
|
381
|
+
p95_latency_ms=self._calculate_percentile(sorted_latencies, 95),
|
|
382
|
+
p99_latency_ms=self._calculate_percentile(sorted_latencies, 99),
|
|
383
|
+
|
|
384
|
+
cache_hits=self._cache_hits,
|
|
385
|
+
cache_misses=self._cache_misses,
|
|
386
|
+
cache_hit_rate=(
|
|
387
|
+
(self._cache_hits / total_cache_requests) * 100
|
|
388
|
+
if total_cache_requests > 0 else 0
|
|
389
|
+
),
|
|
390
|
+
not_modified_responses=self._not_modified_responses,
|
|
391
|
+
|
|
392
|
+
errors_by_category=dict(self._errors_by_category),
|
|
393
|
+
|
|
394
|
+
circuit_opens=self._circuit_opens,
|
|
395
|
+
circuit_closes=self._circuit_closes,
|
|
396
|
+
circuit_state=self._circuit_state.value,
|
|
397
|
+
|
|
398
|
+
flag_evaluations=self._get_flag_evaluation_metrics(),
|
|
399
|
+
windows=self._get_time_window_metrics(),
|
|
400
|
+
|
|
401
|
+
uptime_ms=int((time.time() - self._start_time) * 1000),
|
|
402
|
+
last_request_at=(
|
|
403
|
+
int(self._last_request_at * 1000)
|
|
404
|
+
if self._last_request_at else None
|
|
405
|
+
),
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
def _get_flag_evaluation_metrics(self) -> FlagEvaluationMetrics:
|
|
409
|
+
"""Get flag evaluation metrics."""
|
|
410
|
+
evaluations_per_flag: Dict[str, FlagStats] = {}
|
|
411
|
+
|
|
412
|
+
for flag_key, stats in self._flag_stats.items():
|
|
413
|
+
count = stats["count"]
|
|
414
|
+
true_count = stats["true_count"]
|
|
415
|
+
evaluations_per_flag[flag_key] = FlagStats(
|
|
416
|
+
evaluations=count,
|
|
417
|
+
true_count=true_count,
|
|
418
|
+
false_count=count - true_count,
|
|
419
|
+
true_rate=(true_count / count) * 100 if count > 0 else 0,
|
|
420
|
+
avg_evaluation_time_ms=(
|
|
421
|
+
stats["total_time_ms"] / count if count > 0 else 0
|
|
422
|
+
),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
return FlagEvaluationMetrics(
|
|
426
|
+
total_evaluations=self._total_evaluations,
|
|
427
|
+
evaluations_per_flag=evaluations_per_flag,
|
|
428
|
+
avg_evaluation_time_ms=(
|
|
429
|
+
self._total_evaluation_time_ms / self._total_evaluations
|
|
430
|
+
if self._total_evaluations > 0 else 0
|
|
431
|
+
),
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
def _get_time_window_metrics(self) -> TimeWindowMetrics:
|
|
435
|
+
"""Get time-windowed metrics."""
|
|
436
|
+
now = time.time()
|
|
437
|
+
return TimeWindowMetrics(
|
|
438
|
+
one_minute=self._calculate_window_stats(now, TIME_WINDOWS["1m"]),
|
|
439
|
+
five_minutes=self._calculate_window_stats(now, TIME_WINDOWS["5m"]),
|
|
440
|
+
fifteen_minutes=self._calculate_window_stats(now, TIME_WINDOWS["15m"]),
|
|
441
|
+
one_hour=self._calculate_window_stats(now, TIME_WINDOWS["1h"]),
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
def _calculate_window_stats(self, now: float, window_seconds: float) -> WindowedStats:
|
|
445
|
+
"""Calculate stats for a time window."""
|
|
446
|
+
cutoff = now - window_seconds
|
|
447
|
+
window_requests = [r for r in self._timestamped_requests if r.timestamp >= cutoff]
|
|
448
|
+
|
|
449
|
+
requests = len(window_requests)
|
|
450
|
+
errors = sum(1 for r in window_requests if not r.success)
|
|
451
|
+
total_latency = sum(r.latency_ms for r in window_requests)
|
|
452
|
+
|
|
453
|
+
return WindowedStats(
|
|
454
|
+
requests=requests,
|
|
455
|
+
errors=errors,
|
|
456
|
+
avg_latency_ms=total_latency / requests if requests > 0 else 0,
|
|
457
|
+
error_rate=(errors / requests) * 100 if requests > 0 else 0,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
def to_prometheus(self, prefix: str = "rollgate_sdk") -> str:
|
|
461
|
+
"""
|
|
462
|
+
Export metrics in Prometheus format.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
prefix: Metric name prefix
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
Metrics in Prometheus text format
|
|
469
|
+
"""
|
|
470
|
+
snap = self.snapshot()
|
|
471
|
+
lines: List[str] = []
|
|
472
|
+
|
|
473
|
+
def metric(name: str, value: float, help_text: str = "", metric_type: str = ""):
|
|
474
|
+
full_name = f"{prefix}_{name}"
|
|
475
|
+
if help_text:
|
|
476
|
+
lines.append(f"# HELP {full_name} {help_text}")
|
|
477
|
+
if metric_type:
|
|
478
|
+
lines.append(f"# TYPE {full_name} {metric_type}")
|
|
479
|
+
lines.append(f"{full_name} {value}")
|
|
480
|
+
|
|
481
|
+
# Request metrics
|
|
482
|
+
metric("requests_total", snap.total_requests, "Total number of requests", "counter")
|
|
483
|
+
metric("requests_success_total", snap.successful_requests, "Total successful requests", "counter")
|
|
484
|
+
metric("requests_failed_total", snap.failed_requests, "Total failed requests", "counter")
|
|
485
|
+
|
|
486
|
+
# Latency metrics
|
|
487
|
+
metric("latency_avg_ms", snap.avg_latency_ms, "Average request latency in milliseconds", "gauge")
|
|
488
|
+
metric("latency_p50_ms", snap.p50_latency_ms, "50th percentile latency", "gauge")
|
|
489
|
+
metric("latency_p95_ms", snap.p95_latency_ms, "95th percentile latency", "gauge")
|
|
490
|
+
metric("latency_p99_ms", snap.p99_latency_ms, "99th percentile latency", "gauge")
|
|
491
|
+
|
|
492
|
+
# Cache metrics
|
|
493
|
+
metric("cache_hits_total", snap.cache_hits, "Total cache hits", "counter")
|
|
494
|
+
metric("cache_misses_total", snap.cache_misses, "Total cache misses", "counter")
|
|
495
|
+
metric("cache_hit_rate", snap.cache_hit_rate, "Cache hit rate percentage", "gauge")
|
|
496
|
+
|
|
497
|
+
# Circuit breaker metrics
|
|
498
|
+
metric("circuit_opens_total", snap.circuit_opens, "Total circuit breaker opens", "counter")
|
|
499
|
+
circuit_value = 0 if snap.circuit_state == "closed" else (1 if snap.circuit_state == "open" else 0.5)
|
|
500
|
+
metric("circuit_state", circuit_value, "Circuit breaker state (0=closed, 0.5=half-open, 1=open)", "gauge")
|
|
501
|
+
|
|
502
|
+
# Flag evaluation metrics
|
|
503
|
+
metric("evaluations_total", snap.flag_evaluations.total_evaluations, "Total flag evaluations", "counter")
|
|
504
|
+
metric(
|
|
505
|
+
"evaluation_avg_time_ms",
|
|
506
|
+
snap.flag_evaluations.avg_evaluation_time_ms,
|
|
507
|
+
"Average evaluation time in milliseconds",
|
|
508
|
+
"gauge",
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# Uptime
|
|
512
|
+
metric("uptime_seconds", snap.uptime_ms / 1000, "SDK uptime in seconds", "gauge")
|
|
513
|
+
|
|
514
|
+
return "\n".join(lines)
|
|
515
|
+
|
|
516
|
+
def reset(self) -> None:
|
|
517
|
+
"""Reset all metrics."""
|
|
518
|
+
self._total_requests = 0
|
|
519
|
+
self._successful_requests = 0
|
|
520
|
+
self._failed_requests = 0
|
|
521
|
+
self._cache_hits = 0
|
|
522
|
+
self._cache_misses = 0
|
|
523
|
+
self._not_modified_responses = 0
|
|
524
|
+
self._circuit_opens = 0
|
|
525
|
+
self._circuit_closes = 0
|
|
526
|
+
self._circuit_state = CircuitStateValue.CLOSED
|
|
527
|
+
self._latencies = []
|
|
528
|
+
self._errors_by_category = defaultdict(int)
|
|
529
|
+
self._timestamped_requests = []
|
|
530
|
+
self._flag_stats = {}
|
|
531
|
+
self._timestamped_evaluations = []
|
|
532
|
+
self._total_evaluations = 0
|
|
533
|
+
self._total_evaluation_time_ms = 0.0
|
|
534
|
+
self._start_time = time.time()
|
|
535
|
+
self._last_request_at = None
|
|
536
|
+
|
|
537
|
+
@staticmethod
|
|
538
|
+
def _calculate_average(sorted_values: List[float]) -> float:
|
|
539
|
+
"""Calculate average of sorted values."""
|
|
540
|
+
if not sorted_values:
|
|
541
|
+
return 0
|
|
542
|
+
return sum(sorted_values) / len(sorted_values)
|
|
543
|
+
|
|
544
|
+
@staticmethod
|
|
545
|
+
def _calculate_percentile(sorted_values: List[float], percentile: float) -> float:
|
|
546
|
+
"""Calculate percentile of sorted values."""
|
|
547
|
+
if not sorted_values:
|
|
548
|
+
return 0
|
|
549
|
+
index = int((percentile / 100) * len(sorted_values)) - 1
|
|
550
|
+
return sorted_values[max(0, index)]
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
# Global metrics instance
|
|
554
|
+
_global_metrics: Optional[SDKMetrics] = None
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def get_metrics() -> SDKMetrics:
|
|
558
|
+
"""Get or create the global metrics instance."""
|
|
559
|
+
global _global_metrics
|
|
560
|
+
if _global_metrics is None:
|
|
561
|
+
_global_metrics = SDKMetrics()
|
|
562
|
+
return _global_metrics
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
def create_metrics() -> SDKMetrics:
|
|
566
|
+
"""Create a new metrics instance (useful for testing)."""
|
|
567
|
+
return SDKMetrics()
|
rollgate/reasons.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Evaluation reasons for Rollgate SDK.
|
|
3
|
+
|
|
4
|
+
Provides detailed information about why a flag evaluated to a particular value.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Optional, TypeVar, Generic
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EvaluationReasonKind(str, Enum):
|
|
15
|
+
"""The category of reason for a flag evaluation."""
|
|
16
|
+
|
|
17
|
+
OFF = "OFF" # Flag is disabled
|
|
18
|
+
TARGET_MATCH = "TARGET_MATCH" # User is in the target users list
|
|
19
|
+
RULE_MATCH = "RULE_MATCH" # User matched a targeting rule
|
|
20
|
+
FALLTHROUGH = "FALLTHROUGH" # No rules matched, using default rollout
|
|
21
|
+
ERROR = "ERROR" # An error occurred during evaluation
|
|
22
|
+
UNKNOWN = "UNKNOWN" # Flag not found or unknown reason
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class EvaluationErrorKind(str, Enum):
|
|
26
|
+
"""Types of errors that can occur during evaluation."""
|
|
27
|
+
|
|
28
|
+
FLAG_NOT_FOUND = "FLAG_NOT_FOUND" # The flag key does not exist
|
|
29
|
+
MALFORMED_FLAG = "MALFORMED_FLAG" # The flag configuration is invalid
|
|
30
|
+
USER_NOT_SPECIFIED = "USER_NOT_SPECIFIED" # No user context was provided
|
|
31
|
+
CLIENT_NOT_READY = "CLIENT_NOT_READY" # The SDK client is not initialized
|
|
32
|
+
EXCEPTION = "EXCEPTION" # An unexpected error occurred
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class EvaluationReason:
|
|
37
|
+
"""Explains why a flag evaluated to a particular value."""
|
|
38
|
+
|
|
39
|
+
kind: EvaluationReasonKind
|
|
40
|
+
rule_id: Optional[str] = None
|
|
41
|
+
rule_index: Optional[int] = None
|
|
42
|
+
in_rollout: Optional[bool] = None
|
|
43
|
+
error_kind: Optional[EvaluationErrorKind] = None
|
|
44
|
+
|
|
45
|
+
def to_dict(self) -> dict:
|
|
46
|
+
"""Convert to dictionary for JSON serialization."""
|
|
47
|
+
result = {"kind": self.kind.value}
|
|
48
|
+
if self.rule_id is not None:
|
|
49
|
+
result["ruleId"] = self.rule_id
|
|
50
|
+
if self.rule_index is not None:
|
|
51
|
+
result["ruleIndex"] = self.rule_index
|
|
52
|
+
if self.in_rollout is not None:
|
|
53
|
+
result["inRollout"] = self.in_rollout
|
|
54
|
+
if self.error_kind is not None:
|
|
55
|
+
result["errorKind"] = self.error_kind.value
|
|
56
|
+
return result
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class EvaluationDetail(Generic[T]):
|
|
61
|
+
"""Contains the full result of a flag evaluation."""
|
|
62
|
+
|
|
63
|
+
value: T
|
|
64
|
+
reason: EvaluationReason
|
|
65
|
+
variation_index: Optional[int] = None
|
|
66
|
+
variation_id: Optional[str] = None
|
|
67
|
+
|
|
68
|
+
def to_dict(self) -> dict:
|
|
69
|
+
"""Convert to dictionary for JSON serialization."""
|
|
70
|
+
result = {"value": self.value, "reason": self.reason.to_dict()}
|
|
71
|
+
if self.variation_index is not None:
|
|
72
|
+
result["variationIndex"] = self.variation_index
|
|
73
|
+
if self.variation_id is not None:
|
|
74
|
+
result["variationId"] = self.variation_id
|
|
75
|
+
return result
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# Helper functions to create common reasons
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def off_reason() -> EvaluationReason:
|
|
82
|
+
"""Create a reason for a disabled flag."""
|
|
83
|
+
return EvaluationReason(kind=EvaluationReasonKind.OFF)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def target_match_reason() -> EvaluationReason:
|
|
87
|
+
"""Create a reason for a target user match."""
|
|
88
|
+
return EvaluationReason(kind=EvaluationReasonKind.TARGET_MATCH)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def rule_match_reason(
|
|
92
|
+
rule_id: str, rule_index: int, in_rollout: bool = True
|
|
93
|
+
) -> EvaluationReason:
|
|
94
|
+
"""Create a reason for a rule match."""
|
|
95
|
+
return EvaluationReason(
|
|
96
|
+
kind=EvaluationReasonKind.RULE_MATCH,
|
|
97
|
+
rule_id=rule_id,
|
|
98
|
+
rule_index=rule_index,
|
|
99
|
+
in_rollout=in_rollout,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def fallthrough_reason(in_rollout: bool = True) -> EvaluationReason:
|
|
104
|
+
"""Create a reason for fallthrough to default rollout."""
|
|
105
|
+
return EvaluationReason(kind=EvaluationReasonKind.FALLTHROUGH, in_rollout=in_rollout)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def error_reason(error_kind: EvaluationErrorKind) -> EvaluationReason:
|
|
109
|
+
"""Create a reason for an error."""
|
|
110
|
+
return EvaluationReason(kind=EvaluationReasonKind.ERROR, error_kind=error_kind)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def unknown_reason() -> EvaluationReason:
|
|
114
|
+
"""Create a reason for an unknown flag."""
|
|
115
|
+
return EvaluationReason(kind=EvaluationReasonKind.UNKNOWN)
|