elven-logs-interceptor-python 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elven_logs_interceptor_python-0.1.2.dist-info/METADATA +262 -0
- elven_logs_interceptor_python-0.1.2.dist-info/RECORD +56 -0
- elven_logs_interceptor_python-0.1.2.dist-info/WHEEL +4 -0
- logs_interceptor/__init__.py +333 -0
- logs_interceptor/application/__init__.py +27 -0
- logs_interceptor/application/config_service.py +232 -0
- logs_interceptor/application/log_service.py +383 -0
- logs_interceptor/config.py +190 -0
- logs_interceptor/domain/__init__.py +25 -0
- logs_interceptor/domain/entities.py +41 -0
- logs_interceptor/domain/interfaces.py +149 -0
- logs_interceptor/domain/value_objects.py +40 -0
- logs_interceptor/infrastructure/__init__.py +48 -0
- logs_interceptor/infrastructure/buffer/__init__.py +3 -0
- logs_interceptor/infrastructure/buffer/memory_buffer.py +187 -0
- logs_interceptor/infrastructure/circuit_breaker/__init__.py +3 -0
- logs_interceptor/infrastructure/circuit_breaker/circuit_breaker.py +110 -0
- logs_interceptor/infrastructure/compression/__init__.py +14 -0
- logs_interceptor/infrastructure/compression/base.py +20 -0
- logs_interceptor/infrastructure/compression/brotli_compressor.py +27 -0
- logs_interceptor/infrastructure/compression/factory.py +18 -0
- logs_interceptor/infrastructure/compression/gzip_compressor.py +20 -0
- logs_interceptor/infrastructure/compression/noop_compressor.py +14 -0
- logs_interceptor/infrastructure/context/__init__.py +3 -0
- logs_interceptor/infrastructure/context/context_provider.py +44 -0
- logs_interceptor/infrastructure/dlq/__init__.py +4 -0
- logs_interceptor/infrastructure/dlq/file_dlq.py +170 -0
- logs_interceptor/infrastructure/dlq/memory_dlq.py +59 -0
- logs_interceptor/infrastructure/filter/__init__.py +3 -0
- logs_interceptor/infrastructure/filter/log_filter.py +55 -0
- logs_interceptor/infrastructure/interceptors/__init__.py +3 -0
- logs_interceptor/infrastructure/interceptors/runtime_interceptor.py +139 -0
- logs_interceptor/infrastructure/memory/__init__.py +3 -0
- logs_interceptor/infrastructure/memory/memory_tracker.py +95 -0
- logs_interceptor/infrastructure/metrics/__init__.py +3 -0
- logs_interceptor/infrastructure/metrics/metrics_collector.py +104 -0
- logs_interceptor/infrastructure/transport/__init__.py +12 -0
- logs_interceptor/infrastructure/transport/loki_json_transport.py +226 -0
- logs_interceptor/infrastructure/transport/loki_protobuf_transport.py +209 -0
- logs_interceptor/infrastructure/transport/resilient_transport.py +161 -0
- logs_interceptor/infrastructure/transport/transport_factory.py +39 -0
- logs_interceptor/infrastructure/workers/__init__.py +3 -0
- logs_interceptor/infrastructure/workers/worker_pool.py +57 -0
- logs_interceptor/integrations/__init__.py +17 -0
- logs_interceptor/integrations/celery.py +53 -0
- logs_interceptor/integrations/django.py +44 -0
- logs_interceptor/integrations/fastapi.py +53 -0
- logs_interceptor/integrations/flask.py +50 -0
- logs_interceptor/integrations/logging_handler.py +43 -0
- logs_interceptor/integrations/loguru.py +36 -0
- logs_interceptor/integrations/structlog.py +21 -0
- logs_interceptor/preload.py +61 -0
- logs_interceptor/presentation/__init__.py +3 -0
- logs_interceptor/presentation/factory.py +128 -0
- logs_interceptor/types.py +89 -0
- logs_interceptor/utils.py +508 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from ..config import (
|
|
6
|
+
BufferConfig,
|
|
7
|
+
CircuitBreakerConfig,
|
|
8
|
+
FilterConfig,
|
|
9
|
+
IntegrationsConfig,
|
|
10
|
+
LogsInterceptorConfig,
|
|
11
|
+
PerformanceConfig,
|
|
12
|
+
ResolvedBufferConfig,
|
|
13
|
+
ResolvedCircuitBreakerConfig,
|
|
14
|
+
ResolvedFilterConfig,
|
|
15
|
+
ResolvedLogsInterceptorConfig,
|
|
16
|
+
ResolvedPerformanceConfig,
|
|
17
|
+
ResolvedTransportConfig,
|
|
18
|
+
TransportConfig,
|
|
19
|
+
)
|
|
20
|
+
from ..types import LogLevel
|
|
21
|
+
|
|
22
|
+
DEFAULT_LEVELS: list[LogLevel] = ["debug", "info", "warn", "error", "fatal"]
|
|
23
|
+
DEFAULT_SENSITIVE_PATTERNS = [
|
|
24
|
+
r"password",
|
|
25
|
+
r"token",
|
|
26
|
+
r"secret",
|
|
27
|
+
r"api[_-]?key",
|
|
28
|
+
r"authorization",
|
|
29
|
+
r"credit[_-]?card",
|
|
30
|
+
r"ssn",
|
|
31
|
+
r"cpf",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ConfigService:
|
|
36
|
+
@staticmethod
|
|
37
|
+
def validate(config: LogsInterceptorConfig) -> list[str]:
|
|
38
|
+
errors: list[str] = []
|
|
39
|
+
|
|
40
|
+
if not config.transport.url:
|
|
41
|
+
errors.append("Transport URL is required")
|
|
42
|
+
if not config.transport.tenant_id:
|
|
43
|
+
errors.append("Tenant ID is required")
|
|
44
|
+
if not config.app_name:
|
|
45
|
+
errors.append("App name is required")
|
|
46
|
+
|
|
47
|
+
if config.transport.url:
|
|
48
|
+
parsed = urlparse(config.transport.url)
|
|
49
|
+
if not parsed.scheme or not parsed.netloc:
|
|
50
|
+
errors.append("Transport URL must be a valid URL")
|
|
51
|
+
|
|
52
|
+
ConfigService._validate_non_negative(errors, "Transport timeout", config.transport.timeout)
|
|
53
|
+
ConfigService._validate_non_negative(errors, "Transport max retries", config.transport.max_retries)
|
|
54
|
+
ConfigService._validate_non_negative(errors, "Transport retry delay", config.transport.retry_delay)
|
|
55
|
+
|
|
56
|
+
if config.buffer:
|
|
57
|
+
ConfigService._validate_positive(errors, "Buffer max size", config.buffer.max_size)
|
|
58
|
+
ConfigService._validate_positive(errors, "Flush interval", config.buffer.flush_interval)
|
|
59
|
+
ConfigService._validate_positive(errors, "Buffer max memory", config.buffer.max_memory_mb)
|
|
60
|
+
|
|
61
|
+
if config.filter and config.filter.sampling_rate is not None:
|
|
62
|
+
if config.filter.sampling_rate < 0 or config.filter.sampling_rate > 1:
|
|
63
|
+
errors.append("Sampling rate must be between 0 and 1")
|
|
64
|
+
|
|
65
|
+
if config.circuit_breaker:
|
|
66
|
+
ConfigService._validate_positive(
|
|
67
|
+
errors,
|
|
68
|
+
"Circuit breaker failure threshold",
|
|
69
|
+
config.circuit_breaker.failure_threshold,
|
|
70
|
+
)
|
|
71
|
+
ConfigService._validate_positive(
|
|
72
|
+
errors,
|
|
73
|
+
"Circuit breaker reset timeout",
|
|
74
|
+
config.circuit_breaker.reset_timeout,
|
|
75
|
+
)
|
|
76
|
+
ConfigService._validate_positive(
|
|
77
|
+
errors,
|
|
78
|
+
"Circuit breaker half-open requests",
|
|
79
|
+
config.circuit_breaker.half_open_requests,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if config.performance:
|
|
83
|
+
ConfigService._validate_positive(
|
|
84
|
+
errors,
|
|
85
|
+
"Max concurrent flushes",
|
|
86
|
+
config.performance.max_concurrent_flushes,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if config.transport.compression_level is not None and config.transport.compression_level < 0:
|
|
90
|
+
errors.append("Compression level must be greater than or equal to 0")
|
|
91
|
+
|
|
92
|
+
return errors
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def resolve(config: LogsInterceptorConfig) -> ResolvedLogsInterceptorConfig:
|
|
96
|
+
transport = ConfigService._resolve_transport(config.transport, config.performance)
|
|
97
|
+
return ResolvedLogsInterceptorConfig(
|
|
98
|
+
transport=transport,
|
|
99
|
+
app_name=config.app_name,
|
|
100
|
+
version=config.version or "1.0.0",
|
|
101
|
+
environment=config.environment or "production",
|
|
102
|
+
labels=config.labels or {},
|
|
103
|
+
dynamic_labels=config.dynamic_labels or {},
|
|
104
|
+
buffer=ConfigService._resolve_buffer(config.buffer),
|
|
105
|
+
filter=ConfigService._resolve_filter(config.filter),
|
|
106
|
+
circuit_breaker=ConfigService._resolve_circuit_breaker(config.circuit_breaker),
|
|
107
|
+
integrations=config.integrations or IntegrationsConfig(),
|
|
108
|
+
performance=ConfigService._resolve_performance(config.performance),
|
|
109
|
+
dead_letter_queue=config.dead_letter_queue,
|
|
110
|
+
enable_metrics=True if config.enable_metrics is None else config.enable_metrics,
|
|
111
|
+
enable_health_check=True
|
|
112
|
+
if config.enable_health_check is None
|
|
113
|
+
else config.enable_health_check,
|
|
114
|
+
intercept_console=False if config.intercept_console is None else config.intercept_console,
|
|
115
|
+
preserve_original_console=True
|
|
116
|
+
if config.preserve_original_console is None
|
|
117
|
+
else config.preserve_original_console,
|
|
118
|
+
debug=False if config.debug is None else config.debug,
|
|
119
|
+
silent_errors=False if config.silent_errors is None else config.silent_errors,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _resolve_transport(
|
|
124
|
+
transport: TransportConfig,
|
|
125
|
+
performance: PerformanceConfig | None,
|
|
126
|
+
) -> ResolvedTransportConfig:
|
|
127
|
+
compression = "gzip"
|
|
128
|
+
if transport.compression in (False, "none"):
|
|
129
|
+
compression = "none"
|
|
130
|
+
elif transport.compression == "brotli":
|
|
131
|
+
compression = "brotli"
|
|
132
|
+
elif transport.compression == "snappy":
|
|
133
|
+
compression = "snappy"
|
|
134
|
+
elif transport.compression in (True, "gzip", None):
|
|
135
|
+
compression = "gzip"
|
|
136
|
+
|
|
137
|
+
return ResolvedTransportConfig(
|
|
138
|
+
url=transport.url,
|
|
139
|
+
tenant_id=transport.tenant_id,
|
|
140
|
+
auth_token=transport.auth_token or "",
|
|
141
|
+
timeout=transport.timeout if transport.timeout is not None else 10_000,
|
|
142
|
+
max_retries=transport.max_retries if transport.max_retries is not None else 3,
|
|
143
|
+
retry_delay=transport.retry_delay if transport.retry_delay is not None else 1_000,
|
|
144
|
+
compression=compression, # type: ignore[arg-type]
|
|
145
|
+
compression_level=(
|
|
146
|
+
transport.compression_level
|
|
147
|
+
if transport.compression_level is not None
|
|
148
|
+
else (performance.compression_level if performance and performance.compression_level is not None else 6)
|
|
149
|
+
),
|
|
150
|
+
compression_threshold=(
|
|
151
|
+
transport.compression_threshold if transport.compression_threshold is not None else 1024
|
|
152
|
+
),
|
|
153
|
+
use_workers=(
|
|
154
|
+
transport.use_workers
|
|
155
|
+
if transport.use_workers is not None
|
|
156
|
+
else (performance.use_workers if performance and performance.use_workers is not None else True)
|
|
157
|
+
),
|
|
158
|
+
max_workers=(
|
|
159
|
+
transport.max_workers
|
|
160
|
+
if transport.max_workers is not None
|
|
161
|
+
else (performance.max_workers if performance else None)
|
|
162
|
+
),
|
|
163
|
+
worker_timeout=(
|
|
164
|
+
transport.worker_timeout
|
|
165
|
+
if transport.worker_timeout is not None
|
|
166
|
+
else (performance.worker_timeout if performance and performance.worker_timeout is not None else 30_000)
|
|
167
|
+
),
|
|
168
|
+
enable_connection_pooling=(
|
|
169
|
+
True if transport.enable_connection_pooling is None else transport.enable_connection_pooling
|
|
170
|
+
),
|
|
171
|
+
max_sockets=transport.max_sockets if transport.max_sockets is not None else 50,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def _resolve_buffer(buffer: BufferConfig | None) -> ResolvedBufferConfig:
|
|
176
|
+
source = buffer or BufferConfig()
|
|
177
|
+
return ResolvedBufferConfig(
|
|
178
|
+
max_size=100 if source.max_size is None else source.max_size,
|
|
179
|
+
flush_interval=5000 if source.flush_interval is None else source.flush_interval,
|
|
180
|
+
max_age=30_000 if source.max_age is None else source.max_age,
|
|
181
|
+
auto_flush=True if source.auto_flush is None else source.auto_flush,
|
|
182
|
+
max_memory_mb=50 if source.max_memory_mb is None else source.max_memory_mb,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _resolve_filter(filter_cfg: FilterConfig | None) -> ResolvedFilterConfig:
|
|
187
|
+
source = filter_cfg or FilterConfig()
|
|
188
|
+
return ResolvedFilterConfig(
|
|
189
|
+
levels=source.levels or DEFAULT_LEVELS,
|
|
190
|
+
patterns=source.patterns or [],
|
|
191
|
+
sampling_rate=1.0 if source.sampling_rate is None else source.sampling_rate,
|
|
192
|
+
max_message_length=8192
|
|
193
|
+
if source.max_message_length is None
|
|
194
|
+
else source.max_message_length,
|
|
195
|
+
sanitize=True if source.sanitize is None else source.sanitize,
|
|
196
|
+
sensitive_patterns=source.sensitive_patterns or DEFAULT_SENSITIVE_PATTERNS,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def _resolve_circuit_breaker(
|
|
201
|
+
circuit_breaker: CircuitBreakerConfig | None,
|
|
202
|
+
) -> ResolvedCircuitBreakerConfig:
|
|
203
|
+
source = circuit_breaker or CircuitBreakerConfig()
|
|
204
|
+
return ResolvedCircuitBreakerConfig(
|
|
205
|
+
enabled=True if source.enabled is None else source.enabled,
|
|
206
|
+
failure_threshold=50 if source.failure_threshold is None else source.failure_threshold,
|
|
207
|
+
reset_timeout=30_000 if source.reset_timeout is None else source.reset_timeout,
|
|
208
|
+
half_open_requests=3 if source.half_open_requests is None else source.half_open_requests,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
def _resolve_performance(performance: PerformanceConfig | None) -> ResolvedPerformanceConfig:
|
|
213
|
+
source = performance or PerformanceConfig()
|
|
214
|
+
return ResolvedPerformanceConfig(
|
|
215
|
+
use_workers=True if source.use_workers is None else source.use_workers,
|
|
216
|
+
max_concurrent_flushes=3
|
|
217
|
+
if source.max_concurrent_flushes is None
|
|
218
|
+
else source.max_concurrent_flushes,
|
|
219
|
+
compression_level=6 if source.compression_level is None else source.compression_level,
|
|
220
|
+
max_workers=source.max_workers,
|
|
221
|
+
worker_timeout=30_000 if source.worker_timeout is None else source.worker_timeout,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
@staticmethod
|
|
225
|
+
def _validate_non_negative(errors: list[str], field: str, value: int | None) -> None:
|
|
226
|
+
if value is not None and value < 0:
|
|
227
|
+
errors.append(f"{field} must be greater than or equal to 0")
|
|
228
|
+
|
|
229
|
+
@staticmethod
|
|
230
|
+
def _validate_positive(errors: list[str], field: str, value: int | None) -> None:
|
|
231
|
+
if value is not None and value <= 0:
|
|
232
|
+
errors.append(f"{field} must be greater than 0")
|
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import socket
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import Any, cast
|
|
12
|
+
|
|
13
|
+
from ..domain.entities import LogEntryEntity
|
|
14
|
+
from ..domain.interfaces import IContextProvider, ILogBuffer, ILogFilter, ILogger, ILogTransport
|
|
15
|
+
from ..infrastructure.metrics.metrics_collector import MetricsCollector
|
|
16
|
+
from ..types import CircuitBreakerState, HealthStatus, LoggerMetrics, LogLevel
|
|
17
|
+
from ..utils import internal_warn
|
|
18
|
+
|
|
19
|
+
resource: Any = None
|
|
20
|
+
try:
|
|
21
|
+
import resource
|
|
22
|
+
except Exception: # pragma: no cover
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
otel_trace: Any = None
|
|
26
|
+
try:
|
|
27
|
+
from opentelemetry import trace as otel_trace
|
|
28
|
+
except Exception: # pragma: no cover - optional dependency
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(slots=True)
|
|
33
|
+
class _FlushTask:
|
|
34
|
+
entries: list[LogEntryEntity]
|
|
35
|
+
event: threading.Event
|
|
36
|
+
error: Exception | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class LogService(ILogger):
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
filter_service: ILogFilter,
|
|
43
|
+
buffer: ILogBuffer,
|
|
44
|
+
transport: ILogTransport,
|
|
45
|
+
context_provider: IContextProvider,
|
|
46
|
+
config: dict[str, Any],
|
|
47
|
+
) -> None:
|
|
48
|
+
self._filter = filter_service
|
|
49
|
+
self._buffer = buffer
|
|
50
|
+
self._transport = transport
|
|
51
|
+
self._context_provider = context_provider
|
|
52
|
+
self._config = config
|
|
53
|
+
|
|
54
|
+
self._start_time = time.time()
|
|
55
|
+
self._hostname = socket.gethostname()
|
|
56
|
+
self._pid = str(os.getpid())
|
|
57
|
+
self._max_concurrent_flushes = max(1, int(config.get("max_concurrent_flushes", 1)))
|
|
58
|
+
|
|
59
|
+
self._metrics: dict[str, Any] = {
|
|
60
|
+
"logs_processed": 0,
|
|
61
|
+
"logs_dropped": 0,
|
|
62
|
+
"logs_sanitized": 0,
|
|
63
|
+
"flush_count": 0,
|
|
64
|
+
"error_count": 0,
|
|
65
|
+
"buffer_size": 0,
|
|
66
|
+
"avg_flush_time": 0.0,
|
|
67
|
+
"last_flush_time": 0.0,
|
|
68
|
+
"memory_usage": 0.0,
|
|
69
|
+
"cpu_usage": 0.0,
|
|
70
|
+
"circuit_breaker_trips": 0,
|
|
71
|
+
"dropped_by_backpressure": 0,
|
|
72
|
+
"dropped_by_dlq": 0,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
self._destroyed = False
|
|
76
|
+
self._log_sequence = 0
|
|
77
|
+
self._last_resource_sample_at = 0.0
|
|
78
|
+
self._resource_sample_interval = 1.0
|
|
79
|
+
|
|
80
|
+
self._flush_queue: list[_FlushTask] = []
|
|
81
|
+
self._in_flight_flushes = 0
|
|
82
|
+
self._queue_lock = threading.RLock()
|
|
83
|
+
self._queue_cond = threading.Condition(self._queue_lock)
|
|
84
|
+
|
|
85
|
+
self._metrics_collector = MetricsCollector()
|
|
86
|
+
|
|
87
|
+
if hasattr(self._buffer, "set_flush_callback"):
|
|
88
|
+
try:
|
|
89
|
+
self._buffer.set_flush_callback(lambda: self.flush())
|
|
90
|
+
except Exception:
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
def debug(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
94
|
+
self.log("debug", message, context)
|
|
95
|
+
|
|
96
|
+
def info(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
97
|
+
self.log("info", message, context)
|
|
98
|
+
|
|
99
|
+
def warn(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
100
|
+
self.log("warn", message, context)
|
|
101
|
+
|
|
102
|
+
def error(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
103
|
+
self.log("error", message, context)
|
|
104
|
+
|
|
105
|
+
def fatal(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
106
|
+
self.log("fatal", message, context)
|
|
107
|
+
try:
|
|
108
|
+
self.flush()
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
def with_context(self, context: dict[str, Any], fn: Callable[[], Any]) -> Any:
|
|
113
|
+
return self._context_provider.run_with_context(context, fn)
|
|
114
|
+
|
|
115
|
+
async def with_context_async(self, context: dict[str, Any], fn: Callable[[], Any]) -> Any:
|
|
116
|
+
return await self._context_provider.run_with_context_async(context, fn)
|
|
117
|
+
|
|
118
|
+
def log(self, level: LogLevel, message: str, context: dict[str, Any] | None = None) -> None:
|
|
119
|
+
if self._destroyed:
|
|
120
|
+
self._metrics["logs_dropped"] += 1
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
if not self._filter.is_level_enabled(level):
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
entry = self._create_log_entry(level, message, context)
|
|
127
|
+
|
|
128
|
+
if not self._filter.should_process(entry):
|
|
129
|
+
self._metrics["logs_dropped"] += 1
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
filtered = self._filter.filter(entry)
|
|
133
|
+
if filtered.message != entry.message or filtered.context != entry.context:
|
|
134
|
+
self._metrics["logs_sanitized"] += 1
|
|
135
|
+
|
|
136
|
+
self._buffer.add(filtered)
|
|
137
|
+
self._metrics["logs_processed"] += 1
|
|
138
|
+
self._update_metrics()
|
|
139
|
+
|
|
140
|
+
def track_event(self, event_name: str, properties: dict[str, Any] | None = None) -> None:
|
|
141
|
+
self.info(f"[EVENT] {event_name}", properties)
|
|
142
|
+
|
|
143
|
+
def _create_log_entry(
|
|
144
|
+
self,
|
|
145
|
+
level: LogLevel,
|
|
146
|
+
message: str,
|
|
147
|
+
context: dict[str, Any] | None,
|
|
148
|
+
) -> LogEntryEntity:
|
|
149
|
+
async_context = self._context_provider.get_context()
|
|
150
|
+
log_id = f"{int(time.time() * 1000):x}-{self._log_sequence:x}"
|
|
151
|
+
self._log_sequence += 1
|
|
152
|
+
|
|
153
|
+
dynamic_labels: dict[str, str] = {}
|
|
154
|
+
providers = self._config.get("dynamic_labels", {})
|
|
155
|
+
for key, provider in providers.items():
|
|
156
|
+
try:
|
|
157
|
+
value = str(provider())
|
|
158
|
+
if value and value != "undefined":
|
|
159
|
+
dynamic_labels[key] = value
|
|
160
|
+
except Exception:
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
trace_id: str | None = None
|
|
164
|
+
span_id: str | None = None
|
|
165
|
+
if otel_trace is not None:
|
|
166
|
+
try:
|
|
167
|
+
span = otel_trace.get_current_span()
|
|
168
|
+
if span is not None:
|
|
169
|
+
ctx = span.get_span_context()
|
|
170
|
+
if ctx is not None:
|
|
171
|
+
trace_id = getattr(ctx, "trace_id", None)
|
|
172
|
+
span_id = getattr(ctx, "span_id", None)
|
|
173
|
+
if isinstance(trace_id, int):
|
|
174
|
+
trace_id = f"{trace_id:032x}"
|
|
175
|
+
if isinstance(span_id, int):
|
|
176
|
+
span_id = f"{span_id:016x}"
|
|
177
|
+
except Exception:
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
if not trace_id:
|
|
181
|
+
trace_id = dynamic_labels.get("trace_id")
|
|
182
|
+
if not span_id:
|
|
183
|
+
span_id = dynamic_labels.get("span_id")
|
|
184
|
+
|
|
185
|
+
return LogEntryEntity(
|
|
186
|
+
id=log_id,
|
|
187
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
188
|
+
level=level,
|
|
189
|
+
message=message,
|
|
190
|
+
context={**async_context, **(context or {})} or None,
|
|
191
|
+
trace_id=trace_id,
|
|
192
|
+
span_id=span_id,
|
|
193
|
+
request_id=dynamic_labels.get("request_id"),
|
|
194
|
+
labels={
|
|
195
|
+
"app": self._config["app_name"],
|
|
196
|
+
"version": self._config["version"],
|
|
197
|
+
"environment": self._config["environment"],
|
|
198
|
+
"level": level,
|
|
199
|
+
"hostname": self._hostname,
|
|
200
|
+
"pid": self._pid,
|
|
201
|
+
**self._config.get("labels", {}),
|
|
202
|
+
**dynamic_labels,
|
|
203
|
+
},
|
|
204
|
+
metadata={
|
|
205
|
+
"memory_usage": self._metrics.get("memory_usage", 0.0),
|
|
206
|
+
"cpu_usage": self._metrics.get("cpu_usage", 0.0),
|
|
207
|
+
},
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def flush(self) -> None:
|
|
211
|
+
if self._destroyed:
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
if self._buffer.size() > 0:
|
|
215
|
+
entries = self._buffer.flush()
|
|
216
|
+
if entries:
|
|
217
|
+
self._enqueue_flush(entries)
|
|
218
|
+
|
|
219
|
+
self._wait_for_queue_idle()
|
|
220
|
+
|
|
221
|
+
async def aflush(self) -> None:
|
|
222
|
+
await asyncio.to_thread(self.flush)
|
|
223
|
+
|
|
224
|
+
def _enqueue_flush(self, entries: list[LogEntryEntity]) -> None:
|
|
225
|
+
task = _FlushTask(entries=entries, event=threading.Event())
|
|
226
|
+
|
|
227
|
+
with self._queue_cond:
|
|
228
|
+
self._flush_queue.append(task)
|
|
229
|
+
self._process_flush_queue_locked()
|
|
230
|
+
|
|
231
|
+
task.event.wait()
|
|
232
|
+
if task.error is not None:
|
|
233
|
+
raise task.error
|
|
234
|
+
|
|
235
|
+
def _process_flush_queue_locked(self) -> None:
|
|
236
|
+
while (
|
|
237
|
+
self._in_flight_flushes < self._max_concurrent_flushes
|
|
238
|
+
and len(self._flush_queue) > 0
|
|
239
|
+
):
|
|
240
|
+
task = self._flush_queue.pop(0)
|
|
241
|
+
self._in_flight_flushes += 1
|
|
242
|
+
thread = threading.Thread(target=self._send_flush_batch, args=(task,), daemon=True)
|
|
243
|
+
thread.start()
|
|
244
|
+
|
|
245
|
+
def _send_flush_batch(self, task: _FlushTask) -> None:
|
|
246
|
+
start = time.perf_counter()
|
|
247
|
+
error: Exception | None = None
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
self._transport.send(task.entries)
|
|
251
|
+
flush_time = (time.perf_counter() - start) * 1000
|
|
252
|
+
|
|
253
|
+
self._metrics_collector.record_latency(flush_time)
|
|
254
|
+
self._metrics["flush_count"] += 1
|
|
255
|
+
self._metrics["last_flush_time"] = time.time()
|
|
256
|
+
|
|
257
|
+
count = self._metrics["flush_count"]
|
|
258
|
+
current_avg = self._metrics.get("avg_flush_time", 0.0)
|
|
259
|
+
self._metrics["avg_flush_time"] = ((current_avg * (count - 1)) + flush_time) / count
|
|
260
|
+
self._update_metrics()
|
|
261
|
+
except Exception as exc:
|
|
262
|
+
self._metrics["error_count"] += 1
|
|
263
|
+
error = exc
|
|
264
|
+
finally:
|
|
265
|
+
task.error = error
|
|
266
|
+
task.event.set()
|
|
267
|
+
with self._queue_cond:
|
|
268
|
+
self._in_flight_flushes = max(0, self._in_flight_flushes - 1)
|
|
269
|
+
self._process_flush_queue_locked()
|
|
270
|
+
self._queue_cond.notify_all()
|
|
271
|
+
|
|
272
|
+
def _wait_for_queue_idle(self) -> None:
|
|
273
|
+
with self._queue_cond:
|
|
274
|
+
while self._in_flight_flushes > 0 or self._flush_queue:
|
|
275
|
+
self._queue_cond.wait(timeout=1)
|
|
276
|
+
|
|
277
|
+
def get_metrics(self) -> LoggerMetrics:
|
|
278
|
+
self._update_metrics(force=True)
|
|
279
|
+
latency_metrics = self._metrics_collector.get_latency_metrics()
|
|
280
|
+
compression_metrics = self._metrics_collector.get_compression_metrics()
|
|
281
|
+
|
|
282
|
+
metrics: dict[str, Any] = {
|
|
283
|
+
**self._metrics,
|
|
284
|
+
"buffer_size": self._buffer.size(),
|
|
285
|
+
"latency": {
|
|
286
|
+
"p50": latency_metrics["p50"],
|
|
287
|
+
"p95": latency_metrics["p95"],
|
|
288
|
+
"p99": latency_metrics["p99"],
|
|
289
|
+
"avg": latency_metrics["avg"],
|
|
290
|
+
},
|
|
291
|
+
"compression": {
|
|
292
|
+
"avg_ratio": float(compression_metrics["avg_ratio"]),
|
|
293
|
+
"avg_time": float(compression_metrics["avg_time"]),
|
|
294
|
+
"total_saved_bytes": int(compression_metrics["total_saved_bytes"]),
|
|
295
|
+
},
|
|
296
|
+
"throughput": self._metrics_collector.get_throughput(60),
|
|
297
|
+
}
|
|
298
|
+
return cast(LoggerMetrics, metrics)
|
|
299
|
+
|
|
300
|
+
def get_health(self) -> HealthStatus:
|
|
301
|
+
buffer_metrics = self._buffer.get_metrics()
|
|
302
|
+
transport_health = self._transport.get_health()
|
|
303
|
+
|
|
304
|
+
circuit_state: CircuitBreakerState = "closed"
|
|
305
|
+
if not transport_health.get("healthy", False):
|
|
306
|
+
circuit_state = "open"
|
|
307
|
+
elif "HALF_OPEN" in str(transport_health.get("error_message", "")):
|
|
308
|
+
circuit_state = "half-open"
|
|
309
|
+
|
|
310
|
+
health: dict[str, Any] = {
|
|
311
|
+
"healthy": self._metrics["error_count"] < 10 and bool(transport_health.get("healthy", False)),
|
|
312
|
+
"last_successful_flush": self._metrics["last_flush_time"],
|
|
313
|
+
"consecutive_errors": self._metrics["error_count"],
|
|
314
|
+
"buffer_utilization": (
|
|
315
|
+
(buffer_metrics["size"] / buffer_metrics["max_size"]) if buffer_metrics["max_size"] else 0
|
|
316
|
+
),
|
|
317
|
+
"uptime": time.time() - self._start_time,
|
|
318
|
+
"memory_usage_mb": self._metrics["memory_usage"],
|
|
319
|
+
"circuit_breaker_state": circuit_state,
|
|
320
|
+
}
|
|
321
|
+
last_error = transport_health.get("error_message")
|
|
322
|
+
if isinstance(last_error, str):
|
|
323
|
+
health["last_error"] = last_error
|
|
324
|
+
return cast(HealthStatus, health)
|
|
325
|
+
|
|
326
|
+
def destroy(self) -> None:
|
|
327
|
+
if self._destroyed:
|
|
328
|
+
return
|
|
329
|
+
self._destroyed = True
|
|
330
|
+
|
|
331
|
+
flush_error: Exception | None = None
|
|
332
|
+
try:
|
|
333
|
+
if self._buffer.size() > 0:
|
|
334
|
+
entries = self._buffer.flush()
|
|
335
|
+
if entries:
|
|
336
|
+
self._enqueue_flush(entries)
|
|
337
|
+
self._wait_for_queue_idle()
|
|
338
|
+
except Exception as exc:
|
|
339
|
+
flush_error = exc
|
|
340
|
+
finally:
|
|
341
|
+
try:
|
|
342
|
+
self._buffer.destroy()
|
|
343
|
+
except Exception as exc:
|
|
344
|
+
internal_warn("Failed to destroy buffer", exc)
|
|
345
|
+
try:
|
|
346
|
+
self._transport.destroy()
|
|
347
|
+
except Exception as exc:
|
|
348
|
+
internal_warn("Failed to destroy transport", exc)
|
|
349
|
+
|
|
350
|
+
if flush_error is not None:
|
|
351
|
+
raise flush_error
|
|
352
|
+
|
|
353
|
+
async def adestroy(self) -> None:
|
|
354
|
+
await asyncio.to_thread(self.destroy)
|
|
355
|
+
|
|
356
|
+
def _update_metrics(self, force: bool = False) -> None:
|
|
357
|
+
if not bool(self._config.get("enable_metrics", True)):
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
now = time.time()
|
|
361
|
+
if not force and (now - self._last_resource_sample_at) < self._resource_sample_interval:
|
|
362
|
+
self._metrics["buffer_size"] = self._buffer.size()
|
|
363
|
+
return
|
|
364
|
+
|
|
365
|
+
self._last_resource_sample_at = now
|
|
366
|
+
|
|
367
|
+
mem_usage = 0.0
|
|
368
|
+
if resource is not None:
|
|
369
|
+
try:
|
|
370
|
+
mem_usage_raw = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
371
|
+
mem_usage = float(mem_usage_raw) / 1024
|
|
372
|
+
except Exception:
|
|
373
|
+
mem_usage = 0.0
|
|
374
|
+
|
|
375
|
+
self._metrics["memory_usage"] = mem_usage
|
|
376
|
+
self._metrics["cpu_usage"] = time.process_time()
|
|
377
|
+
|
|
378
|
+
buffer_metrics = self._buffer.get_metrics()
|
|
379
|
+
self._metrics["buffer_size"] = buffer_metrics["size"]
|
|
380
|
+
self._metrics["dropped_by_backpressure"] = int(buffer_metrics.get("dropped_entries", 0))
|
|
381
|
+
|
|
382
|
+
transport_metrics = self._transport.get_metrics() or {}
|
|
383
|
+
self._metrics["dropped_by_dlq"] = int(transport_metrics.get("dlq_dropped_entries", 0))
|