ledger-sdk 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ledger/__init__.py +4 -0
- ledger/_version.py +1 -0
- ledger/core/__init__.py +3 -0
- ledger/core/buffer.py +45 -0
- ledger/core/client.py +297 -0
- ledger/core/flusher.py +249 -0
- ledger/core/http_client.py +67 -0
- ledger/core/rate_limiter.py +78 -0
- ledger/core/settings.py +30 -0
- ledger/core/validator.py +124 -0
- ledger/integrations/__init__.py +3 -0
- ledger/integrations/fastapi.py +113 -0
- ledger/py.typed +0 -0
- ledger_sdk-1.0.0.dist-info/METADATA +678 -0
- ledger_sdk-1.0.0.dist-info/RECORD +18 -0
- ledger_sdk-1.0.0.dist-info/WHEEL +5 -0
- ledger_sdk-1.0.0.dist-info/licenses/LICENSE +21 -0
- ledger_sdk-1.0.0.dist-info/top_level.txt +1 -0
ledger/__init__.py
ADDED
ledger/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.0"
|
ledger/core/__init__.py
ADDED
ledger/core/buffer.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import sys
|
|
3
|
+
from collections import deque
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LogBuffer:
|
|
8
|
+
def __init__(self, max_size: int = 10000):
|
|
9
|
+
self.max_size = max_size
|
|
10
|
+
self._queue: deque[dict[str, Any]] = deque(maxlen=max_size)
|
|
11
|
+
self._lock = asyncio.Lock()
|
|
12
|
+
self._dropped_count = 0
|
|
13
|
+
|
|
14
|
+
def add(self, log_entry: dict[str, Any]) -> None:
|
|
15
|
+
if len(self._queue) >= self.max_size:
|
|
16
|
+
self._queue.popleft()
|
|
17
|
+
self._dropped_count += 1
|
|
18
|
+
sys.stderr.write(
|
|
19
|
+
f"[Ledger SDK] WARNING: Buffer full ({self.max_size} logs), "
|
|
20
|
+
f"dropped oldest log (total dropped: {self._dropped_count})\n"
|
|
21
|
+
)
|
|
22
|
+
sys.stderr.flush()
|
|
23
|
+
|
|
24
|
+
self._queue.append(log_entry)
|
|
25
|
+
|
|
26
|
+
async def get_batch(self, max_batch_size: int) -> list[dict[str, Any]]:
|
|
27
|
+
async with self._lock:
|
|
28
|
+
batch_size = min(len(self._queue), max_batch_size)
|
|
29
|
+
if batch_size == 0:
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
batch = [self._queue.popleft() for _ in range(batch_size)]
|
|
33
|
+
return batch
|
|
34
|
+
|
|
35
|
+
def size(self) -> int:
|
|
36
|
+
return len(self._queue)
|
|
37
|
+
|
|
38
|
+
def is_empty(self) -> bool:
|
|
39
|
+
return len(self._queue) == 0
|
|
40
|
+
|
|
41
|
+
def clear(self) -> None:
|
|
42
|
+
self._queue.clear()
|
|
43
|
+
|
|
44
|
+
def get_dropped_count(self) -> int:
|
|
45
|
+
return self._dropped_count
|
ledger/core/client.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import ledger.core.buffer as buffer_module
|
|
6
|
+
import ledger.core.flusher as flusher_module
|
|
7
|
+
import ledger.core.http_client as http_client_module
|
|
8
|
+
import ledger.core.rate_limiter as rate_limiter_module
|
|
9
|
+
import ledger.core.settings as settings_module
|
|
10
|
+
import ledger.core.validator as validator_module
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LedgerClient:
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
api_key: str,
|
|
17
|
+
base_url: str = "http://localhost:8000",
|
|
18
|
+
flush_interval: float = 5.0,
|
|
19
|
+
flush_size: int = 100,
|
|
20
|
+
max_buffer_size: int = 10000,
|
|
21
|
+
http_timeout: float = 5.0,
|
|
22
|
+
http_pool_size: int = 10,
|
|
23
|
+
rate_limit_buffer: float = 0.9,
|
|
24
|
+
):
|
|
25
|
+
self._validate_config(
|
|
26
|
+
api_key=api_key,
|
|
27
|
+
base_url=base_url,
|
|
28
|
+
flush_interval=flush_interval,
|
|
29
|
+
flush_size=flush_size,
|
|
30
|
+
max_buffer_size=max_buffer_size,
|
|
31
|
+
http_timeout=http_timeout,
|
|
32
|
+
http_pool_size=http_pool_size,
|
|
33
|
+
rate_limit_buffer=rate_limit_buffer,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self.api_key = api_key
|
|
37
|
+
self.base_url = base_url
|
|
38
|
+
|
|
39
|
+
self._http_client = http_client_module.HTTPClient(
|
|
40
|
+
base_url=base_url,
|
|
41
|
+
api_key=api_key,
|
|
42
|
+
timeout=http_timeout,
|
|
43
|
+
pool_size=http_pool_size,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
self._settings_manager = settings_module.SettingsManager()
|
|
47
|
+
|
|
48
|
+
self._buffer = buffer_module.LogBuffer(max_size=max_buffer_size)
|
|
49
|
+
|
|
50
|
+
rate_limits = self._settings_manager.get_rate_limits()
|
|
51
|
+
self._rate_limiter = rate_limiter_module.RateLimiter(
|
|
52
|
+
requests_per_minute=rate_limits["requests_per_minute"],
|
|
53
|
+
requests_per_hour=rate_limits["requests_per_hour"],
|
|
54
|
+
buffer=rate_limit_buffer,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
constraints = self._settings_manager.get_constraints()
|
|
58
|
+
self._validator = validator_module.Validator(constraints)
|
|
59
|
+
|
|
60
|
+
self._flusher = flusher_module.BackgroundFlusher(
|
|
61
|
+
buffer=self._buffer,
|
|
62
|
+
http_client=self._http_client,
|
|
63
|
+
rate_limiter=self._rate_limiter,
|
|
64
|
+
flush_interval=flush_interval,
|
|
65
|
+
max_batch_size=constraints["max_batch_size"],
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
self._flusher.start()
|
|
69
|
+
|
|
70
|
+
self._sdk_start_time = datetime.now(timezone.utc)
|
|
71
|
+
|
|
72
|
+
def _validate_config(
|
|
73
|
+
self,
|
|
74
|
+
api_key: str,
|
|
75
|
+
base_url: str,
|
|
76
|
+
flush_interval: float,
|
|
77
|
+
flush_size: int,
|
|
78
|
+
max_buffer_size: int,
|
|
79
|
+
http_timeout: float,
|
|
80
|
+
http_pool_size: int,
|
|
81
|
+
rate_limit_buffer: float,
|
|
82
|
+
) -> None:
|
|
83
|
+
errors = []
|
|
84
|
+
|
|
85
|
+
if not api_key or not isinstance(api_key, str):
|
|
86
|
+
errors.append("api_key must be a non-empty string")
|
|
87
|
+
|
|
88
|
+
if not api_key.startswith("ldg_"):
|
|
89
|
+
errors.append("api_key must start with 'ldg_' prefix")
|
|
90
|
+
|
|
91
|
+
if not base_url or not isinstance(base_url, str):
|
|
92
|
+
errors.append("base_url must be a non-empty string")
|
|
93
|
+
|
|
94
|
+
if not base_url.startswith(("http://", "https://")):
|
|
95
|
+
errors.append("base_url must start with 'http://' or 'https://'")
|
|
96
|
+
|
|
97
|
+
if flush_interval <= 0:
|
|
98
|
+
errors.append(f"flush_interval must be positive, got {flush_interval}")
|
|
99
|
+
|
|
100
|
+
if flush_size <= 0:
|
|
101
|
+
errors.append(f"flush_size must be positive, got {flush_size}")
|
|
102
|
+
|
|
103
|
+
if max_buffer_size <= 0:
|
|
104
|
+
errors.append(f"max_buffer_size must be positive, got {max_buffer_size}")
|
|
105
|
+
|
|
106
|
+
if http_timeout <= 0:
|
|
107
|
+
errors.append(f"http_timeout must be positive, got {http_timeout}")
|
|
108
|
+
|
|
109
|
+
if http_pool_size <= 0:
|
|
110
|
+
errors.append(f"http_pool_size must be positive, got {http_pool_size}")
|
|
111
|
+
|
|
112
|
+
if not 0 < rate_limit_buffer <= 1:
|
|
113
|
+
errors.append(f"rate_limit_buffer must be between 0 and 1, got {rate_limit_buffer}")
|
|
114
|
+
|
|
115
|
+
if errors:
|
|
116
|
+
raise ValueError("Invalid Ledger SDK configuration:\n - " + "\n - ".join(errors))
|
|
117
|
+
|
|
118
|
+
def log_info(
|
|
119
|
+
self,
|
|
120
|
+
message: str,
|
|
121
|
+
attributes: dict[str, Any] | None = None,
|
|
122
|
+
) -> None:
|
|
123
|
+
self._log(
|
|
124
|
+
level="info",
|
|
125
|
+
log_type="console",
|
|
126
|
+
importance="standard",
|
|
127
|
+
message=message,
|
|
128
|
+
attributes=attributes,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def log_error(
|
|
132
|
+
self,
|
|
133
|
+
message: str,
|
|
134
|
+
attributes: dict[str, Any] | None = None,
|
|
135
|
+
) -> None:
|
|
136
|
+
self._log(
|
|
137
|
+
level="error",
|
|
138
|
+
log_type="console",
|
|
139
|
+
importance="high",
|
|
140
|
+
message=message,
|
|
141
|
+
attributes=attributes,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def log_exception(
|
|
145
|
+
self,
|
|
146
|
+
exception: Exception,
|
|
147
|
+
message: str | None = None,
|
|
148
|
+
attributes: dict[str, Any] | None = None,
|
|
149
|
+
) -> None:
|
|
150
|
+
stack_trace = "".join(
|
|
151
|
+
traceback.format_exception(
|
|
152
|
+
type(exception),
|
|
153
|
+
exception,
|
|
154
|
+
exception.__traceback__,
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
self._log(
|
|
159
|
+
level="error",
|
|
160
|
+
log_type="exception",
|
|
161
|
+
importance="high",
|
|
162
|
+
message=message or str(exception),
|
|
163
|
+
error_type=exception.__class__.__name__,
|
|
164
|
+
error_message=str(exception),
|
|
165
|
+
stack_trace=stack_trace,
|
|
166
|
+
attributes=attributes,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def _log(
|
|
170
|
+
self,
|
|
171
|
+
level: str,
|
|
172
|
+
log_type: str,
|
|
173
|
+
importance: str,
|
|
174
|
+
message: str | None = None,
|
|
175
|
+
error_type: str | None = None,
|
|
176
|
+
error_message: str | None = None,
|
|
177
|
+
stack_trace: str | None = None,
|
|
178
|
+
attributes: dict[str, Any] | None = None,
|
|
179
|
+
) -> None:
|
|
180
|
+
from ledger._version import __version__
|
|
181
|
+
|
|
182
|
+
log_entry: dict[str, Any] = {
|
|
183
|
+
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
184
|
+
"level": level,
|
|
185
|
+
"log_type": log_type,
|
|
186
|
+
"importance": importance,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if message:
|
|
190
|
+
log_entry["message"] = message
|
|
191
|
+
|
|
192
|
+
if error_type:
|
|
193
|
+
log_entry["error_type"] = error_type
|
|
194
|
+
|
|
195
|
+
if error_message:
|
|
196
|
+
log_entry["error_message"] = error_message
|
|
197
|
+
|
|
198
|
+
if stack_trace:
|
|
199
|
+
log_entry["stack_trace"] = stack_trace
|
|
200
|
+
|
|
201
|
+
if attributes:
|
|
202
|
+
log_entry["attributes"] = attributes
|
|
203
|
+
|
|
204
|
+
log_entry["sdk_version"] = __version__
|
|
205
|
+
log_entry["platform"] = "python"
|
|
206
|
+
|
|
207
|
+
validated_log = self._validator.validate_log(log_entry)
|
|
208
|
+
|
|
209
|
+
self._buffer.add(validated_log)
|
|
210
|
+
|
|
211
|
+
def is_healthy(self) -> bool:
|
|
212
|
+
flusher_metrics = self._flusher.get_metrics()
|
|
213
|
+
|
|
214
|
+
if flusher_metrics["circuit_breaker_open"]:
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
if flusher_metrics["consecutive_failures"] >= 3:
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
buffer_utilization = (self._buffer.size() / self._buffer.max_size) * 100
|
|
221
|
+
if buffer_utilization > 90:
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
return True
|
|
225
|
+
|
|
226
|
+
def get_health_status(self) -> dict[str, Any]:
|
|
227
|
+
flusher_metrics = self._flusher.get_metrics()
|
|
228
|
+
buffer_utilization = (self._buffer.size() / self._buffer.max_size) * 100
|
|
229
|
+
|
|
230
|
+
status = "healthy"
|
|
231
|
+
issues = []
|
|
232
|
+
|
|
233
|
+
if flusher_metrics["circuit_breaker_open"]:
|
|
234
|
+
status = "unhealthy"
|
|
235
|
+
issues.append("Circuit breaker is open (too many failures)")
|
|
236
|
+
|
|
237
|
+
if flusher_metrics["consecutive_failures"] >= 3:
|
|
238
|
+
status = "degraded" if status == "healthy" else status
|
|
239
|
+
issues.append(f"Consecutive failures: {flusher_metrics['consecutive_failures']}")
|
|
240
|
+
|
|
241
|
+
if buffer_utilization > 90:
|
|
242
|
+
status = "degraded" if status == "healthy" else status
|
|
243
|
+
issues.append(f"Buffer nearly full: {buffer_utilization:.1f}%")
|
|
244
|
+
|
|
245
|
+
if self._buffer.get_dropped_count() > 0:
|
|
246
|
+
status = "degraded" if status == "healthy" else status
|
|
247
|
+
issues.append(f"Dropped logs: {self._buffer.get_dropped_count()}")
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"status": status,
|
|
251
|
+
"healthy": status == "healthy",
|
|
252
|
+
"issues": issues if issues else None,
|
|
253
|
+
"buffer_utilization_percent": round(buffer_utilization, 2),
|
|
254
|
+
"circuit_breaker_open": flusher_metrics["circuit_breaker_open"],
|
|
255
|
+
"consecutive_failures": flusher_metrics["consecutive_failures"],
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
async def shutdown(self, timeout: float = 10.0) -> None:
|
|
259
|
+
await self._flusher.shutdown(timeout=timeout)
|
|
260
|
+
await self._http_client.close()
|
|
261
|
+
|
|
262
|
+
def get_metrics(self) -> dict[str, Any]:
|
|
263
|
+
from ledger._version import __version__
|
|
264
|
+
|
|
265
|
+
flusher_metrics = self._flusher.get_metrics()
|
|
266
|
+
uptime = (datetime.now(timezone.utc) - self._sdk_start_time).total_seconds()
|
|
267
|
+
|
|
268
|
+
return {
|
|
269
|
+
"sdk": {
|
|
270
|
+
"uptime_seconds": round(uptime, 2),
|
|
271
|
+
"version": __version__,
|
|
272
|
+
},
|
|
273
|
+
"buffer": {
|
|
274
|
+
"current_size": self._buffer.size(),
|
|
275
|
+
"max_size": self._buffer.max_size,
|
|
276
|
+
"total_dropped": self._buffer.get_dropped_count(),
|
|
277
|
+
"utilization_percent": round(
|
|
278
|
+
(self._buffer.size() / self._buffer.max_size) * 100, 2
|
|
279
|
+
),
|
|
280
|
+
},
|
|
281
|
+
"flusher": {
|
|
282
|
+
"total_flushes": flusher_metrics["total_flushes"],
|
|
283
|
+
"successful_flushes": flusher_metrics["successful_flushes"],
|
|
284
|
+
"failed_flushes": flusher_metrics["failed_flushes"],
|
|
285
|
+
"total_logs_sent": flusher_metrics["total_logs_sent"],
|
|
286
|
+
"total_logs_failed": flusher_metrics["total_logs_failed"],
|
|
287
|
+
"consecutive_failures": flusher_metrics["consecutive_failures"],
|
|
288
|
+
"circuit_breaker_open": flusher_metrics["circuit_breaker_open"],
|
|
289
|
+
"last_flush_time": flusher_metrics["last_flush_time"],
|
|
290
|
+
"last_error": flusher_metrics["last_error"],
|
|
291
|
+
},
|
|
292
|
+
"rate_limiter": {
|
|
293
|
+
"current_rate": self._rate_limiter.get_current_rate(),
|
|
294
|
+
"limit_per_minute": self._rate_limiter.limit_per_minute,
|
|
295
|
+
},
|
|
296
|
+
"errors": flusher_metrics["errors_by_type"],
|
|
297
|
+
}
|
ledger/core/flusher.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
import ledger.core.buffer as buffer_module
|
|
9
|
+
import ledger.core.http_client as http_client_module
|
|
10
|
+
import ledger.core.rate_limiter as rate_limiter_module
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BackgroundFlusher:
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
buffer: "buffer_module.LogBuffer",
|
|
17
|
+
http_client: "http_client_module.HTTPClient",
|
|
18
|
+
rate_limiter: "rate_limiter_module.RateLimiter",
|
|
19
|
+
flush_interval: float = 5.0,
|
|
20
|
+
max_batch_size: int = 1000,
|
|
21
|
+
max_retries: int = 3,
|
|
22
|
+
retry_backoff_base: float = 2.0,
|
|
23
|
+
):
|
|
24
|
+
self.buffer = buffer
|
|
25
|
+
self.http_client = http_client
|
|
26
|
+
self.rate_limiter = rate_limiter
|
|
27
|
+
self.flush_interval = flush_interval
|
|
28
|
+
self.max_batch_size = max_batch_size
|
|
29
|
+
self.max_retries = max_retries
|
|
30
|
+
self.retry_backoff_base = retry_backoff_base
|
|
31
|
+
|
|
32
|
+
self._task: asyncio.Task[Any] | None = None
|
|
33
|
+
self._shutdown_event = asyncio.Event()
|
|
34
|
+
|
|
35
|
+
self._metrics = {
|
|
36
|
+
"total_flushes": 0,
|
|
37
|
+
"successful_flushes": 0,
|
|
38
|
+
"failed_flushes": 0,
|
|
39
|
+
"total_logs_sent": 0,
|
|
40
|
+
"total_logs_failed": 0,
|
|
41
|
+
"consecutive_failures": 0,
|
|
42
|
+
"last_flush_time": None,
|
|
43
|
+
"last_error": None,
|
|
44
|
+
"errors_by_type": {},
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
self._circuit_breaker_open = False
|
|
48
|
+
self._circuit_breaker_opened_at = None
|
|
49
|
+
self._circuit_breaker_threshold = 5
|
|
50
|
+
self._circuit_breaker_timeout = 60.0
|
|
51
|
+
|
|
52
|
+
def start(self) -> None:
|
|
53
|
+
if self._task is None or self._task.done():
|
|
54
|
+
self._task = asyncio.create_task(self._run())
|
|
55
|
+
|
|
56
|
+
async def _run(self) -> None:
|
|
57
|
+
while not self._shutdown_event.is_set():
|
|
58
|
+
try:
|
|
59
|
+
await asyncio.sleep(self.flush_interval)
|
|
60
|
+
|
|
61
|
+
if self.buffer.is_empty():
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
await self._flush_with_retry()
|
|
65
|
+
|
|
66
|
+
except asyncio.CancelledError:
|
|
67
|
+
break
|
|
68
|
+
except Exception as e:
|
|
69
|
+
self._log_error(f"Unexpected error in flusher: {e}")
|
|
70
|
+
|
|
71
|
+
async def _flush_with_retry(self) -> None:
|
|
72
|
+
if self._circuit_breaker_open:
|
|
73
|
+
if time.time() - self._circuit_breaker_opened_at < self._circuit_breaker_timeout:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
self._log_info("Circuit breaker: attempting recovery")
|
|
77
|
+
self._circuit_breaker_open = False
|
|
78
|
+
|
|
79
|
+
batch = await self.buffer.get_batch(self.max_batch_size)
|
|
80
|
+
if not batch:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
self._metrics["total_flushes"] += 1
|
|
84
|
+
|
|
85
|
+
for attempt in range(self.max_retries):
|
|
86
|
+
try:
|
|
87
|
+
await self.rate_limiter.wait_if_needed()
|
|
88
|
+
|
|
89
|
+
success = await self._send_batch(batch)
|
|
90
|
+
|
|
91
|
+
if success:
|
|
92
|
+
self._metrics["successful_flushes"] += 1
|
|
93
|
+
self._metrics["total_logs_sent"] += len(batch)
|
|
94
|
+
self._metrics["consecutive_failures"] = 0
|
|
95
|
+
self._metrics["last_flush_time"] = time.time()
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
if attempt < self.max_retries - 1:
|
|
99
|
+
backoff = self.retry_backoff_base**attempt
|
|
100
|
+
await asyncio.sleep(backoff)
|
|
101
|
+
|
|
102
|
+
except httpx.TimeoutException as e:
|
|
103
|
+
self._handle_network_error("Timeout", e, attempt)
|
|
104
|
+
if attempt < self.max_retries - 1:
|
|
105
|
+
await asyncio.sleep(self.retry_backoff_base**attempt * 5.0)
|
|
106
|
+
|
|
107
|
+
except httpx.ConnectError as e:
|
|
108
|
+
self._handle_network_error("Connection refused", e, attempt)
|
|
109
|
+
if attempt < self.max_retries - 1:
|
|
110
|
+
await asyncio.sleep(self.retry_backoff_base**attempt * 5.0)
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
self._handle_network_error("Unexpected error", e, attempt)
|
|
114
|
+
if attempt < self.max_retries - 1:
|
|
115
|
+
await asyncio.sleep(self.retry_backoff_base**attempt)
|
|
116
|
+
|
|
117
|
+
self._metrics["failed_flushes"] += 1
|
|
118
|
+
self._metrics["total_logs_failed"] += len(batch)
|
|
119
|
+
self._metrics["consecutive_failures"] += 1
|
|
120
|
+
|
|
121
|
+
if self._metrics["consecutive_failures"] >= self._circuit_breaker_threshold:
|
|
122
|
+
self._circuit_breaker_open = True
|
|
123
|
+
self._circuit_breaker_opened_at = time.time()
|
|
124
|
+
self._log_error(
|
|
125
|
+
f"Circuit breaker OPEN: {self._metrics['consecutive_failures']} consecutive failures"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
async def _send_batch(self, batch: list[dict[str, Any]]) -> bool:
|
|
129
|
+
try:
|
|
130
|
+
response = await self.http_client.post(
|
|
131
|
+
"/api/v1/ingest/batch",
|
|
132
|
+
json_data={"logs": batch},
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
if response.status_code == 202:
|
|
136
|
+
data = response.json()
|
|
137
|
+
accepted = data.get("accepted", 0)
|
|
138
|
+
rejected = data.get("rejected", 0)
|
|
139
|
+
|
|
140
|
+
if rejected > 0:
|
|
141
|
+
self._log_warning(f"Batch: {accepted} accepted, {rejected} rejected")
|
|
142
|
+
errors = data.get("errors", [])
|
|
143
|
+
for error in errors[:5]:
|
|
144
|
+
self._log_warning(f" - {error}")
|
|
145
|
+
|
|
146
|
+
return True
|
|
147
|
+
|
|
148
|
+
if response.status_code == 429:
|
|
149
|
+
retry_after = int(response.headers.get("Retry-After", 60))
|
|
150
|
+
self._log_warning(f"Rate limit exceeded, sleeping {retry_after}s")
|
|
151
|
+
self._increment_error_count("rate_limit")
|
|
152
|
+
await asyncio.sleep(retry_after)
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
if response.status_code == 503:
|
|
156
|
+
retry_after = int(response.headers.get("Retry-After", 60))
|
|
157
|
+
self._log_warning(f"Queue full (503), sleeping {retry_after}s")
|
|
158
|
+
self._increment_error_count("queue_full")
|
|
159
|
+
await asyncio.sleep(retry_after)
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
if response.status_code == 401:
|
|
163
|
+
self._log_error("Invalid API key (401), stopping ingestion")
|
|
164
|
+
self._increment_error_count("auth_failure")
|
|
165
|
+
self._shutdown_event.set()
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
if response.status_code == 400:
|
|
169
|
+
self._log_error(f"Bad request (400): {response.text}")
|
|
170
|
+
self._increment_error_count("validation_error")
|
|
171
|
+
return True
|
|
172
|
+
|
|
173
|
+
self._log_error(f"Unexpected response: {response.status_code} - {response.text}")
|
|
174
|
+
self._increment_error_count("server_error")
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
except Exception:
|
|
178
|
+
raise
|
|
179
|
+
|
|
180
|
+
def _handle_network_error(self, error_type: str, error: Exception, attempt: int) -> None:
|
|
181
|
+
self._log_error(f"{error_type} (attempt {attempt + 1}/{self.max_retries}): {error}")
|
|
182
|
+
self._increment_error_count("network_error")
|
|
183
|
+
self._metrics["last_error"] = f"{error_type}: {error}"
|
|
184
|
+
|
|
185
|
+
def _increment_error_count(self, error_type: str) -> None:
|
|
186
|
+
if error_type not in self._metrics["errors_by_type"]:
|
|
187
|
+
self._metrics["errors_by_type"][error_type] = 0
|
|
188
|
+
self._metrics["errors_by_type"][error_type] += 1
|
|
189
|
+
|
|
190
|
+
def _log_info(self, message: str) -> None:
|
|
191
|
+
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
192
|
+
sys.stderr.write(f"[{timestamp}] [Ledger SDK] [INFO] {message}\n")
|
|
193
|
+
sys.stderr.flush()
|
|
194
|
+
|
|
195
|
+
def _log_warning(self, message: str) -> None:
|
|
196
|
+
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
197
|
+
sys.stderr.write(f"[{timestamp}] [Ledger SDK] [WARNING] {message}\n")
|
|
198
|
+
sys.stderr.flush()
|
|
199
|
+
|
|
200
|
+
def _log_error(self, message: str) -> None:
|
|
201
|
+
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
202
|
+
sys.stderr.write(f"[{timestamp}] [Ledger SDK] [ERROR] {message}\n")
|
|
203
|
+
sys.stderr.flush()
|
|
204
|
+
|
|
205
|
+
async def shutdown(self, timeout: float = 10.0) -> None:
|
|
206
|
+
self._log_info("Shutting down, flushing remaining logs...")
|
|
207
|
+
self._shutdown_event.set()
|
|
208
|
+
|
|
209
|
+
if self._task and not self._task.done():
|
|
210
|
+
try:
|
|
211
|
+
await asyncio.wait_for(self._task, timeout=2.0)
|
|
212
|
+
except asyncio.TimeoutError:
|
|
213
|
+
self._task.cancel()
|
|
214
|
+
try:
|
|
215
|
+
await self._task
|
|
216
|
+
except asyncio.CancelledError:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
remaining_attempts = 3
|
|
220
|
+
while not self.buffer.is_empty() and remaining_attempts > 0:
|
|
221
|
+
try:
|
|
222
|
+
await asyncio.wait_for(self._flush_with_retry(), timeout=timeout / 3)
|
|
223
|
+
remaining_attempts -= 1
|
|
224
|
+
except asyncio.TimeoutError:
|
|
225
|
+
self._log_warning("Flush timeout during shutdown")
|
|
226
|
+
break
|
|
227
|
+
except Exception as e:
|
|
228
|
+
self._log_error(f"Shutdown flush error: {e}")
|
|
229
|
+
break
|
|
230
|
+
|
|
231
|
+
if not self.buffer.is_empty():
|
|
232
|
+
dropped = self.buffer.size()
|
|
233
|
+
self._log_warning(f"Shutdown: {dropped} logs still in buffer (not sent)")
|
|
234
|
+
|
|
235
|
+
self._log_info("Shutdown complete")
|
|
236
|
+
|
|
237
|
+
def get_metrics(self) -> dict[str, Any]:
|
|
238
|
+
return {
|
|
239
|
+
"total_flushes": self._metrics["total_flushes"],
|
|
240
|
+
"successful_flushes": self._metrics["successful_flushes"],
|
|
241
|
+
"failed_flushes": self._metrics["failed_flushes"],
|
|
242
|
+
"total_logs_sent": self._metrics["total_logs_sent"],
|
|
243
|
+
"total_logs_failed": self._metrics["total_logs_failed"],
|
|
244
|
+
"consecutive_failures": self._metrics["consecutive_failures"],
|
|
245
|
+
"circuit_breaker_open": self._circuit_breaker_open,
|
|
246
|
+
"last_flush_time": self._metrics["last_flush_time"],
|
|
247
|
+
"last_error": self._metrics["last_error"],
|
|
248
|
+
"errors_by_type": self._metrics["errors_by_type"],
|
|
249
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class HTTPClient:
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
base_url: str,
|
|
10
|
+
api_key: str,
|
|
11
|
+
timeout: float = 5.0,
|
|
12
|
+
pool_size: int = 10,
|
|
13
|
+
):
|
|
14
|
+
self.base_url = base_url.rstrip("/")
|
|
15
|
+
self.api_key = api_key
|
|
16
|
+
|
|
17
|
+
self._client = httpx.AsyncClient(
|
|
18
|
+
base_url=self.base_url,
|
|
19
|
+
timeout=httpx.Timeout(timeout),
|
|
20
|
+
limits=httpx.Limits(
|
|
21
|
+
max_connections=pool_size,
|
|
22
|
+
max_keepalive_connections=pool_size,
|
|
23
|
+
keepalive_expiry=30.0,
|
|
24
|
+
),
|
|
25
|
+
headers={
|
|
26
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
27
|
+
"Content-Type": "application/json",
|
|
28
|
+
"User-Agent": "ledger-sdk-python/1.0.0",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
async def post(
|
|
33
|
+
self,
|
|
34
|
+
path: str,
|
|
35
|
+
json_data: dict[str, Any],
|
|
36
|
+
headers: dict[str, str] | None = None,
|
|
37
|
+
) -> httpx.Response:
|
|
38
|
+
merged_headers = self._client.headers.copy()
|
|
39
|
+
if headers:
|
|
40
|
+
merged_headers.update(headers)
|
|
41
|
+
|
|
42
|
+
response = await self._client.post(
|
|
43
|
+
path,
|
|
44
|
+
json=json_data,
|
|
45
|
+
headers=merged_headers,
|
|
46
|
+
)
|
|
47
|
+
return response
|
|
48
|
+
|
|
49
|
+
async def get(
|
|
50
|
+
self,
|
|
51
|
+
path: str,
|
|
52
|
+
params: dict[str, Any] | None = None,
|
|
53
|
+
headers: dict[str, str] | None = None,
|
|
54
|
+
) -> httpx.Response:
|
|
55
|
+
merged_headers = self._client.headers.copy()
|
|
56
|
+
if headers:
|
|
57
|
+
merged_headers.update(headers)
|
|
58
|
+
|
|
59
|
+
response = await self._client.get(
|
|
60
|
+
path,
|
|
61
|
+
params=params,
|
|
62
|
+
headers=merged_headers,
|
|
63
|
+
)
|
|
64
|
+
return response
|
|
65
|
+
|
|
66
|
+
async def close(self) -> None:
|
|
67
|
+
await self._client.aclose()
|