rebrandly-otel 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rebrandly-otel might be problematic. Click here for more details.
- rebrandly_otel-0.1.1.dist-info/METADATA +327 -0
- rebrandly_otel-0.1.1.dist-info/RECORD +11 -0
- rebrandly_otel-0.1.1.dist-info/WHEEL +5 -0
- rebrandly_otel-0.1.1.dist-info/licenses/LICENSE +19 -0
- rebrandly_otel-0.1.1.dist-info/top_level.txt +1 -0
- src/__init__.py +0 -0
- src/logs.py +112 -0
- src/metrics.py +229 -0
- src/otel_utils.py +54 -0
- src/rebrandly_otel.py +492 -0
- src/traces.py +189 -0
src/rebrandly_otel.py
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
# rebrandly_otel.py
|
|
2
|
+
"""
|
|
3
|
+
Rebrandly OpenTelemetry SDK - Simplified instrumentation for Rebrandly services.
|
|
4
|
+
"""
|
|
5
|
+
import time
|
|
6
|
+
import psutil
|
|
7
|
+
import functools
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from opentelemetry.trace import Status, StatusCode, SpanKind
|
|
11
|
+
from typing import Optional, Dict, Any, Callable, TypeVar
|
|
12
|
+
from opentelemetry import baggage, propagate, context
|
|
13
|
+
|
|
14
|
+
from src.traces import RebrandlyTracer
|
|
15
|
+
from src.metrics import RebrandlyMeter
|
|
16
|
+
from src.logs import RebrandlyLogger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
T = TypeVar('T')
|
|
20
|
+
|
|
21
|
+
class RebrandlyOTEL:
|
|
22
|
+
"""Main entry point for Rebrandly's OpenTelemetry instrumentation."""
|
|
23
|
+
|
|
24
|
+
_instance: Optional['RebrandlyOTEL'] = None
|
|
25
|
+
_initialized: bool = False
|
|
26
|
+
|
|
27
|
+
def __new__(cls):
|
|
28
|
+
if cls._instance is None:
|
|
29
|
+
cls._instance = super().__new__(cls)
|
|
30
|
+
return cls._instance
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
if not self._initialized:
|
|
34
|
+
self._tracer: Optional[RebrandlyTracer] = None
|
|
35
|
+
self._meter: Optional[RebrandlyMeter] = None
|
|
36
|
+
self._logger: Optional[RebrandlyLogger] = None
|
|
37
|
+
self.__class__._initialized = True
|
|
38
|
+
|
|
39
|
+
def initialize(self, **kwargs) -> 'RebrandlyOTEL':
|
|
40
|
+
# Force initialization of components
|
|
41
|
+
_ = self.tracer
|
|
42
|
+
_ = self.meter
|
|
43
|
+
_ = self.logger
|
|
44
|
+
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def tracer(self) -> RebrandlyTracer:
|
|
49
|
+
"""Get the tracer instance."""
|
|
50
|
+
if self._tracer is None:
|
|
51
|
+
self._tracer = RebrandlyTracer()
|
|
52
|
+
return self._tracer
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def meter(self) -> RebrandlyMeter:
|
|
56
|
+
"""Get the meter instance."""
|
|
57
|
+
if self._meter is None:
|
|
58
|
+
self._meter = RebrandlyMeter()
|
|
59
|
+
return self._meter
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def logger(self) -> RebrandlyLogger:
|
|
63
|
+
"""Get the logger instance."""
|
|
64
|
+
if self._logger is None:
|
|
65
|
+
self._logger = RebrandlyLogger()
|
|
66
|
+
return self._logger
|
|
67
|
+
|
|
68
|
+
# Convenience methods for common operations
|
|
69
|
+
|
|
70
|
+
@contextmanager
|
|
71
|
+
def span(self,
|
|
72
|
+
name: str,
|
|
73
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
74
|
+
kind: SpanKind = SpanKind.INTERNAL,
|
|
75
|
+
message=None):
|
|
76
|
+
"""Create a span using context manager."""
|
|
77
|
+
with self.tracer.start_span(name, attributes=attributes, kind=kind) as span:
|
|
78
|
+
try:
|
|
79
|
+
yield span
|
|
80
|
+
span.set_status(Status(StatusCode.OK))
|
|
81
|
+
except Exception as e:
|
|
82
|
+
span.record_exception(e)
|
|
83
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
84
|
+
raise
|
|
85
|
+
|
|
86
|
+
def trace_decorator(self,
|
|
87
|
+
name: Optional[str] = None,
|
|
88
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
89
|
+
kind: SpanKind = SpanKind.INTERNAL) -> Callable[[T], T]:
|
|
90
|
+
"""Decorator for tracing functions."""
|
|
91
|
+
def decorator(func: T) -> T:
|
|
92
|
+
span_name = name or f"{func.__module__}.{func.__name__}"
|
|
93
|
+
|
|
94
|
+
@functools.wraps(func)
|
|
95
|
+
def wrapper(*args, **kwargs):
|
|
96
|
+
with self.span(span_name, attributes=attributes, kind=kind):
|
|
97
|
+
return func(*args, **kwargs)
|
|
98
|
+
|
|
99
|
+
return wrapper
|
|
100
|
+
return decorator
|
|
101
|
+
|
|
102
|
+
def lambda_handler(self,
|
|
103
|
+
name: Optional[str] = None,
|
|
104
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
105
|
+
kind: SpanKind = SpanKind.CONSUMER,
|
|
106
|
+
auto_flush: bool = True,
|
|
107
|
+
skip_aws_link: bool = True):
|
|
108
|
+
"""
|
|
109
|
+
Decorator specifically for Lambda handlers with automatic flushing.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
name: Optional span name (defaults to 'lambda.{function_name}')
|
|
113
|
+
attributes: Additional span attributes
|
|
114
|
+
kind: Span kind (defaults to SERVER)
|
|
115
|
+
auto_flush: If True, automatically flush after handler completes
|
|
116
|
+
|
|
117
|
+
Usage:
|
|
118
|
+
@lambda_handler()
|
|
119
|
+
def my_handler(event, context): ...
|
|
120
|
+
|
|
121
|
+
@lambda_handler(name="custom_span_name")
|
|
122
|
+
def my_handler(event, context): ...
|
|
123
|
+
|
|
124
|
+
@lambda_handler(name="my_span", attributes={"env": "prod"})
|
|
125
|
+
def my_handler(event, context): ...
|
|
126
|
+
"""
|
|
127
|
+
def decorator(func):
|
|
128
|
+
@functools.wraps(func)
|
|
129
|
+
def wrapper(event=None, context=None):
|
|
130
|
+
# Determine span name
|
|
131
|
+
span_name = name or f"lambda.{func.__name__}"
|
|
132
|
+
start_func = datetime.now()
|
|
133
|
+
|
|
134
|
+
# Build span attributes
|
|
135
|
+
span_attributes = attributes or {}
|
|
136
|
+
|
|
137
|
+
span_attributes['faas.trigger'] = self._detect_lambda_trigger(event)
|
|
138
|
+
|
|
139
|
+
# Add Lambda-specific attributes if context is available
|
|
140
|
+
if context is not None:
|
|
141
|
+
span_attributes.update({
|
|
142
|
+
"faas.execution": getattr(context, 'aws_request_id', 'unknown'),
|
|
143
|
+
"faas.id": getattr(context, 'function_name', 'unknown'),
|
|
144
|
+
"cloud.provider": "aws",
|
|
145
|
+
"cloud.platform": "aws_lambda"
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
result = None
|
|
149
|
+
try:
|
|
150
|
+
# Increment invocations counter
|
|
151
|
+
self.meter.GlobalMetrics.invocations.add(1, {'function': span_name})
|
|
152
|
+
|
|
153
|
+
# Create span and execute function
|
|
154
|
+
record = None
|
|
155
|
+
span_function = self.span
|
|
156
|
+
if not skip_aws_link and event is not None and 'Records' in event and len(event['Records']) > 0 and 'MessageAttributes' in event['Records'][0]:
|
|
157
|
+
span_function = self.aws_message_span
|
|
158
|
+
record = event['Records'][0]
|
|
159
|
+
|
|
160
|
+
with span_function(span_name, message=record, attributes=span_attributes, kind=kind) as span_context:
|
|
161
|
+
# Add event type as span event
|
|
162
|
+
if event is not None:
|
|
163
|
+
span_context.add_event("lambda.invocation.start", attributes={"event.type": type(event).__name__})
|
|
164
|
+
|
|
165
|
+
result = func(event, context)
|
|
166
|
+
else:
|
|
167
|
+
result = func()
|
|
168
|
+
|
|
169
|
+
# Add result information if applicable
|
|
170
|
+
if isinstance(result, dict):
|
|
171
|
+
if 'statusCode' in result:
|
|
172
|
+
span_context.set_attribute("http.status_code", result['statusCode'])
|
|
173
|
+
# Set span status based on HTTP status code
|
|
174
|
+
if result['statusCode'] >= 400:
|
|
175
|
+
span_context.set_status(Status(StatusCode.ERROR, f"HTTP {result['statusCode']}"))
|
|
176
|
+
else:
|
|
177
|
+
span_context.set_status(Status(StatusCode.OK))
|
|
178
|
+
|
|
179
|
+
# Increment success counter
|
|
180
|
+
self.meter.GlobalMetrics.successful_invocations.add(1, {'function': span_name})
|
|
181
|
+
|
|
182
|
+
return result
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
# Increment error counter
|
|
186
|
+
self.meter.GlobalMetrics.error_invocations.add(1, {'function': span_name, 'error': type(e).__name__})
|
|
187
|
+
raise
|
|
188
|
+
|
|
189
|
+
finally:
|
|
190
|
+
if auto_flush:
|
|
191
|
+
self.logger.logger.info(f"[OTEL] Lambda handler '{span_name}' completed, flushing telemetry...")
|
|
192
|
+
self.force_flush(start_datetime=start_func)
|
|
193
|
+
|
|
194
|
+
return wrapper
|
|
195
|
+
return decorator
|
|
196
|
+
|
|
197
|
+
def aws_message_handler(self,
|
|
198
|
+
name: Optional[str] = None,
|
|
199
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
200
|
+
kind: SpanKind = SpanKind.CONSUMER,
|
|
201
|
+
auto_flush: bool = True):
|
|
202
|
+
"""
|
|
203
|
+
require a record object parameter to the function
|
|
204
|
+
"""
|
|
205
|
+
def decorator(func):
|
|
206
|
+
@functools.wraps(func)
|
|
207
|
+
def wrapper(record=None, *args, **kwargs):
|
|
208
|
+
# Determine span name
|
|
209
|
+
span_name = name or f"lambda.{func.__name__}"
|
|
210
|
+
start_func = datetime.now()
|
|
211
|
+
|
|
212
|
+
# Build span attributes
|
|
213
|
+
span_attributes = attributes or {}
|
|
214
|
+
|
|
215
|
+
result = None
|
|
216
|
+
try:
|
|
217
|
+
# Increment invocations counter
|
|
218
|
+
print('XXX 2')
|
|
219
|
+
self.meter.GlobalMetrics.invocations.add(1, {'handler': span_name})
|
|
220
|
+
|
|
221
|
+
# Create span and execute function
|
|
222
|
+
span_function = self.span
|
|
223
|
+
if record is not None and 'MessageAttributes' in record:
|
|
224
|
+
span_function = self.aws_message_span
|
|
225
|
+
|
|
226
|
+
with span_function(span_name, message=record, attributes=span_attributes, kind=kind) as span_context:
|
|
227
|
+
# Execute the actual handler function
|
|
228
|
+
result = func(record, *args, **kwargs)
|
|
229
|
+
|
|
230
|
+
# Add result attributes if applicable
|
|
231
|
+
if result and isinstance(result, dict):
|
|
232
|
+
if 'statusCode' in result:
|
|
233
|
+
span_context.set_attribute("handler.status_code", result['statusCode'])
|
|
234
|
+
|
|
235
|
+
# Set span status based on status code
|
|
236
|
+
if result['statusCode'] >= 400:
|
|
237
|
+
span_context.set_status(
|
|
238
|
+
Status(StatusCode.ERROR, f"Handler returned {result['statusCode']}")
|
|
239
|
+
)
|
|
240
|
+
else:
|
|
241
|
+
span_context.set_status(Status(StatusCode.OK))
|
|
242
|
+
|
|
243
|
+
# Add custom result attributes if present
|
|
244
|
+
if 'processed' in result:
|
|
245
|
+
span_context.set_attribute("handler.processed", result['processed'])
|
|
246
|
+
if 'skipped' in result:
|
|
247
|
+
span_context.set_attribute("handler.skipped", result['skipped'])
|
|
248
|
+
|
|
249
|
+
# Add completion event
|
|
250
|
+
span_context.add_event("lambda.invocation.complete", attributes={
|
|
251
|
+
"handler.success": True
|
|
252
|
+
})
|
|
253
|
+
|
|
254
|
+
# Increment success counter
|
|
255
|
+
self.meter.GlobalMetrics.successful_invocations.add(1, {'handler': span_name})
|
|
256
|
+
|
|
257
|
+
return result
|
|
258
|
+
|
|
259
|
+
except Exception as e:
|
|
260
|
+
# Increment error counter
|
|
261
|
+
self.meter.GlobalMetrics.error_invocations.add(1, {'handler': span_name, 'error': type(e).__name__})
|
|
262
|
+
|
|
263
|
+
# Record the exception in the span
|
|
264
|
+
span_context.record_exception(e)
|
|
265
|
+
span_context.set_status(Status(StatusCode.ERROR, str(e)))
|
|
266
|
+
|
|
267
|
+
# Re-raise the exception
|
|
268
|
+
raise
|
|
269
|
+
|
|
270
|
+
finally:
|
|
271
|
+
if auto_flush:
|
|
272
|
+
self.logger.logger.info(f"[OTEL] Lambda handler '{span_name}' completed, flushing telemetry...")
|
|
273
|
+
self.force_flush(start_datetime=start_func)
|
|
274
|
+
|
|
275
|
+
return wrapper
|
|
276
|
+
return decorator
|
|
277
|
+
|
|
278
|
+
def force_flush(self, start_datetime: datetime=None, timeout_millis: int = 1000) -> bool:
|
|
279
|
+
"""
|
|
280
|
+
Force flush all telemetry data.
|
|
281
|
+
This is CRITICAL for Lambda functions to ensure data is sent before function freezes.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
timeout_millis: Maximum time to wait for flush in milliseconds
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
True if all flushes succeeded, False otherwise
|
|
288
|
+
"""
|
|
289
|
+
success = True
|
|
290
|
+
|
|
291
|
+
if start_datetime is not None:
|
|
292
|
+
end_func = datetime.now()
|
|
293
|
+
duration = (end_func - start_datetime).total_seconds() * 1000
|
|
294
|
+
cpu_percent = psutil.cpu_percent(interval=1)
|
|
295
|
+
memory = psutil.virtual_memory()
|
|
296
|
+
|
|
297
|
+
# Record metrics using standardized names
|
|
298
|
+
self.meter.GlobalMetrics.duration.record(duration, {'source': 'force_flush'})
|
|
299
|
+
self.meter.GlobalMetrics.memory_usage_bytes.set(memory.used)
|
|
300
|
+
self.meter.GlobalMetrics.cpu_usage_percentage.set(cpu_percent)
|
|
301
|
+
self.logger.logger.info(f"[OTEL] Function duration: {duration}ms, Memory usage: {memory.percent}%, CPU usage: {cpu_percent}%")
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
# Flush traces
|
|
305
|
+
if self._tracer:
|
|
306
|
+
if not self._tracer.force_flush(timeout_millis):
|
|
307
|
+
success = False
|
|
308
|
+
|
|
309
|
+
# Flush metrics
|
|
310
|
+
if self._meter:
|
|
311
|
+
if not self._meter.force_flush(timeout_millis):
|
|
312
|
+
success = False
|
|
313
|
+
|
|
314
|
+
# Flush logs
|
|
315
|
+
if self._logger:
|
|
316
|
+
if not self._logger.force_flush(timeout_millis):
|
|
317
|
+
success = False
|
|
318
|
+
|
|
319
|
+
# Add a small delay to ensure network operations complete
|
|
320
|
+
time.sleep(0.1)
|
|
321
|
+
|
|
322
|
+
except Exception as e:
|
|
323
|
+
success = False
|
|
324
|
+
|
|
325
|
+
return success
|
|
326
|
+
|
|
327
|
+
def shutdown(self):
|
|
328
|
+
"""
|
|
329
|
+
Shutdown all OTEL components gracefully.
|
|
330
|
+
Call this at the end of your Lambda handler if you want to ensure clean shutdown.
|
|
331
|
+
"""
|
|
332
|
+
try:
|
|
333
|
+
if self._tracer:
|
|
334
|
+
self._tracer.shutdown()
|
|
335
|
+
if self._meter:
|
|
336
|
+
self._meter.shutdown()
|
|
337
|
+
if self._logger:
|
|
338
|
+
self._logger.shutdown()
|
|
339
|
+
self.logger.logger.info("[OTEL] Shutdown completed")
|
|
340
|
+
except Exception as e:
|
|
341
|
+
self.logger.logger.info(f"[OTEL] Error during shutdown: {e}")
|
|
342
|
+
|
|
343
|
+
def _detect_lambda_trigger(self, event: Any) -> str:
|
|
344
|
+
"""Detect Lambda trigger type from event."""
|
|
345
|
+
if not event or not isinstance(event, dict):
|
|
346
|
+
return 'direct'
|
|
347
|
+
|
|
348
|
+
if 'Records' in event:
|
|
349
|
+
first_record = event['Records'][0] if event['Records'] else None
|
|
350
|
+
if first_record:
|
|
351
|
+
event_source = first_record.get('eventSource', '')
|
|
352
|
+
if event_source == 'aws:sqs':
|
|
353
|
+
return 'sqs'
|
|
354
|
+
elif event_source == 'aws:sns':
|
|
355
|
+
return 'sns'
|
|
356
|
+
elif event_source == 'aws:s3':
|
|
357
|
+
return 's3'
|
|
358
|
+
elif event_source == 'aws:kinesis':
|
|
359
|
+
return 'kinesis'
|
|
360
|
+
elif event_source == 'aws:dynamodb':
|
|
361
|
+
return 'dynamodb'
|
|
362
|
+
|
|
363
|
+
if 'httpMethod' in event:
|
|
364
|
+
return 'api_gateway'
|
|
365
|
+
if 'requestContext' in event and 'http' in event.get('requestContext', {}):
|
|
366
|
+
return 'api_gateway_v2'
|
|
367
|
+
if event.get('source') == 'aws.events':
|
|
368
|
+
return 'eventbridge'
|
|
369
|
+
if event.get('source') == 'aws.scheduler':
|
|
370
|
+
return 'eventbridge_scheduler'
|
|
371
|
+
if 'jobName' in event:
|
|
372
|
+
return 'batch'
|
|
373
|
+
|
|
374
|
+
return 'unknown'
|
|
375
|
+
|
|
376
|
+
def set_baggage(self, key: str, value: str):
|
|
377
|
+
"""Set baggage item."""
|
|
378
|
+
return baggage.set_baggage(key, value)
|
|
379
|
+
|
|
380
|
+
def get_baggage(self, key: str) -> Optional[str]:
|
|
381
|
+
"""Get baggage item."""
|
|
382
|
+
return baggage.get_baggage(key)
|
|
383
|
+
|
|
384
|
+
def inject_context(self, carrier: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
385
|
+
"""Inject trace context into headers for outbound requests."""
|
|
386
|
+
if carrier is None:
|
|
387
|
+
carrier = {}
|
|
388
|
+
propagate.inject(carrier)
|
|
389
|
+
return carrier
|
|
390
|
+
|
|
391
|
+
def extract_context(self, carrier: Dict[str, Any]) -> context.Context:
|
|
392
|
+
"""Extract trace context from incoming request headers."""
|
|
393
|
+
return propagate.extract(carrier)
|
|
394
|
+
|
|
395
|
+
def attach_context(self, carrier: Dict[str, Any]) -> object:
|
|
396
|
+
"""Extract and attach context, returning a token for cleanup."""
|
|
397
|
+
ctx = self.extract_context(carrier)
|
|
398
|
+
return context.attach(ctx)
|
|
399
|
+
|
|
400
|
+
def detach_context(self, token):
|
|
401
|
+
"""Detach a previously attached context."""
|
|
402
|
+
context.detach(token)
|
|
403
|
+
|
|
404
|
+
@contextmanager
|
|
405
|
+
def aws_message_span(self,
|
|
406
|
+
name: str,
|
|
407
|
+
message: Dict[str, Any]=None,
|
|
408
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
409
|
+
kind: SpanKind = SpanKind.CONSUMER):
|
|
410
|
+
"""Create span from AWS message with extracted context."""
|
|
411
|
+
# Extract context from the message if it contains trace context
|
|
412
|
+
token = None
|
|
413
|
+
if message and isinstance(message, dict):
|
|
414
|
+
carrier = {}
|
|
415
|
+
|
|
416
|
+
# Check for trace context in different possible locations
|
|
417
|
+
if 'MessageAttributes' in message:
|
|
418
|
+
# SQS format
|
|
419
|
+
for key, value in message.get('MessageAttributes', {}).items():
|
|
420
|
+
if isinstance(value, dict) and 'StringValue' in value:
|
|
421
|
+
carrier[key] = value['StringValue']
|
|
422
|
+
elif 'Sns' in message and 'MessageAttributes' in message['Sns']:
|
|
423
|
+
# SNS format - MessageAttributes are nested under 'Sns'
|
|
424
|
+
for key, value in message['Sns'].get('MessageAttributes', {}).items():
|
|
425
|
+
if isinstance(value, dict):
|
|
426
|
+
# SNS uses 'Value' instead of 'StringValue'
|
|
427
|
+
if 'Value' in value:
|
|
428
|
+
carrier[key] = value['Value']
|
|
429
|
+
elif 'StringValue' in value:
|
|
430
|
+
carrier[key] = value['StringValue']
|
|
431
|
+
elif 'messageAttributes' in message:
|
|
432
|
+
# Alternative format
|
|
433
|
+
for key, value in message.get('messageAttributes', {}).items():
|
|
434
|
+
if isinstance(value, dict) and 'stringValue' in value:
|
|
435
|
+
carrier[key] = value['stringValue']
|
|
436
|
+
|
|
437
|
+
# If we found trace context, attach it
|
|
438
|
+
if carrier:
|
|
439
|
+
token = self.attach_context(carrier)
|
|
440
|
+
|
|
441
|
+
# Create a span with the potentially extracted context
|
|
442
|
+
combined_attributes = attributes or {}
|
|
443
|
+
|
|
444
|
+
# Add message-specific attributes
|
|
445
|
+
if message and isinstance(message, dict):
|
|
446
|
+
# Add SNS-specific attributes
|
|
447
|
+
if 'Sns' in message:
|
|
448
|
+
sns_msg = message['Sns']
|
|
449
|
+
if 'MessageId' in sns_msg:
|
|
450
|
+
combined_attributes['messaging.message_id'] = sns_msg['MessageId']
|
|
451
|
+
if 'TopicArn' in sns_msg:
|
|
452
|
+
combined_attributes['messaging.destination'] = sns_msg['TopicArn']
|
|
453
|
+
combined_attributes['messaging.system'] = 'aws_sns'
|
|
454
|
+
# Add SQS-specific attributes
|
|
455
|
+
elif 'messageId' in message:
|
|
456
|
+
combined_attributes['messaging.message_id'] = message['messageId']
|
|
457
|
+
if 'eventSource' in message:
|
|
458
|
+
combined_attributes['messaging.system'] = message['eventSource']
|
|
459
|
+
|
|
460
|
+
# Add common attributes
|
|
461
|
+
if 'awsRegion' in message:
|
|
462
|
+
combined_attributes['cloud.region'] = message['awsRegion']
|
|
463
|
+
|
|
464
|
+
try:
|
|
465
|
+
# Use the regular span method which properly handles context
|
|
466
|
+
with self.span(name, attributes=combined_attributes, kind=kind) as span:
|
|
467
|
+
yield span
|
|
468
|
+
finally:
|
|
469
|
+
# Detach context if we attached one
|
|
470
|
+
if token:
|
|
471
|
+
self.detach_context(token)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
# Create Singleton instance
|
|
475
|
+
otel = RebrandlyOTEL()
|
|
476
|
+
|
|
477
|
+
# Export commonly used functions
|
|
478
|
+
span = otel.span
|
|
479
|
+
aws_message_span = otel.aws_message_span
|
|
480
|
+
traces = otel.trace_decorator
|
|
481
|
+
tracer = otel.tracer
|
|
482
|
+
meter = otel.meter
|
|
483
|
+
logger = otel.logger.logger
|
|
484
|
+
lambda_handler = otel.lambda_handler
|
|
485
|
+
aws_message_handler = otel.aws_message_handler
|
|
486
|
+
initialize = otel.initialize
|
|
487
|
+
inject_context = otel.inject_context
|
|
488
|
+
extract_context = otel.extract_context
|
|
489
|
+
attach_context = otel.attach_context
|
|
490
|
+
detach_context = otel.detach_context
|
|
491
|
+
force_flush = otel.force_flush
|
|
492
|
+
shutdown = otel.shutdown
|
src/traces.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# traces.py
|
|
2
|
+
"""Tracing implementation for Rebrandly OTEL SDK."""
|
|
3
|
+
from typing import Optional, Dict, Any, ContextManager
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from opentelemetry import trace, propagate, context
|
|
6
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
7
|
+
from opentelemetry.sdk.trace import TracerProvider, Span
|
|
8
|
+
from opentelemetry.sdk.trace.export import (
|
|
9
|
+
ConsoleSpanExporter,
|
|
10
|
+
BatchSpanProcessor,
|
|
11
|
+
SimpleSpanProcessor
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from src.otel_utils import *
|
|
15
|
+
|
|
16
|
+
class RebrandlyTracer:
|
|
17
|
+
"""Wrapper for OpenTelemetry tracing with Rebrandly-specific features."""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
self._tracer: Optional[trace.Tracer] = None
|
|
21
|
+
self._provider: Optional[TracerProvider] = None
|
|
22
|
+
self._setup_tracing()
|
|
23
|
+
|
|
24
|
+
def _setup_tracing(self):
|
|
25
|
+
|
|
26
|
+
# Create provider with resource
|
|
27
|
+
self._provider = TracerProvider(resource=create_resource())
|
|
28
|
+
|
|
29
|
+
# Add console exporter for local debugging
|
|
30
|
+
if is_otel_debug():
|
|
31
|
+
console_exporter = ConsoleSpanExporter()
|
|
32
|
+
self._provider.add_span_processor(SimpleSpanProcessor(console_exporter))
|
|
33
|
+
|
|
34
|
+
# Add OTLP exporter if configured
|
|
35
|
+
if get_otlp_endpoint() is not None:
|
|
36
|
+
otlp_exporter = OTLPSpanExporter(endpoint=get_otlp_endpoint())
|
|
37
|
+
|
|
38
|
+
# Use batch processor for production
|
|
39
|
+
batch_processor = BatchSpanProcessor(otlp_exporter, export_timeout_millis=get_millis_batch_time())
|
|
40
|
+
self._provider.add_span_processor(batch_processor)
|
|
41
|
+
|
|
42
|
+
# Set as global provider
|
|
43
|
+
trace.set_tracer_provider(self._provider)
|
|
44
|
+
|
|
45
|
+
# Get tracer
|
|
46
|
+
self._tracer = trace.get_tracer(get_service_name(), get_service_version())
|
|
47
|
+
|
|
48
|
+
def force_flush(self, timeout_millis: int = 5000) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Force flush all pending spans.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
timeout_millis: Maximum time to wait for flush in milliseconds
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
True if flush succeeded, False otherwise
|
|
57
|
+
"""
|
|
58
|
+
if not self._provider:
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# ForceFlush on the TracerProvider will flush all processors
|
|
63
|
+
success = self._provider.force_flush(timeout_millis)
|
|
64
|
+
|
|
65
|
+
if not success:
|
|
66
|
+
print(f"[Tracer] Force flush timed out after {timeout_millis}ms")
|
|
67
|
+
|
|
68
|
+
return success
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"[Tracer] Error during force flush: {e}")
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
def shutdown(self):
|
|
74
|
+
"""Shutdown the tracer provider and all processors."""
|
|
75
|
+
if self._provider:
|
|
76
|
+
try:
|
|
77
|
+
self._provider.shutdown()
|
|
78
|
+
print("[Tracer] Shutdown completed")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f"[Tracer] Error during shutdown: {e}")
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def tracer(self) -> trace.Tracer:
|
|
84
|
+
"""Get the underlying OpenTelemetry tracer."""
|
|
85
|
+
if not self._tracer:
|
|
86
|
+
# Return no-op tracer if tracing is disabled
|
|
87
|
+
return trace.get_tracer(__name__)
|
|
88
|
+
return self._tracer
|
|
89
|
+
|
|
90
|
+
@contextmanager
|
|
91
|
+
def start_span(self,
|
|
92
|
+
name: str,
|
|
93
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
94
|
+
kind: trace.SpanKind = trace.SpanKind.INTERNAL) -> ContextManager[Span]:
|
|
95
|
+
"""Start a new span as the current span."""
|
|
96
|
+
with self.tracer.start_as_current_span(
|
|
97
|
+
name,
|
|
98
|
+
attributes=attributes,
|
|
99
|
+
kind=kind
|
|
100
|
+
) as span:
|
|
101
|
+
yield span
|
|
102
|
+
|
|
103
|
+
def start_span_with_context(self,
|
|
104
|
+
name: str,
|
|
105
|
+
attributes: Dict[str, str],
|
|
106
|
+
context_attributes: Optional[Dict[str, Any]] = None):
|
|
107
|
+
"""Start a span with extracted context (e.g., from message headers)."""
|
|
108
|
+
# Extract context from carrier
|
|
109
|
+
|
|
110
|
+
carrier, extracted_context = self.__get_aws_message_context_attributes(context_attributes)
|
|
111
|
+
ctx = propagate.extract(extracted_context)
|
|
112
|
+
|
|
113
|
+
if context_attributes is not None:
|
|
114
|
+
# Start span with extracted context
|
|
115
|
+
with self.tracer.start_as_current_span(
|
|
116
|
+
name,
|
|
117
|
+
context=ctx,
|
|
118
|
+
attributes=attributes
|
|
119
|
+
) as span:
|
|
120
|
+
yield span
|
|
121
|
+
else:
|
|
122
|
+
# Start span with current context
|
|
123
|
+
with self.tracer.start_as_current_span(
|
|
124
|
+
name,
|
|
125
|
+
context=context.get_current(),
|
|
126
|
+
attributes=attributes
|
|
127
|
+
) as span:
|
|
128
|
+
yield span
|
|
129
|
+
|
|
130
|
+
def get_current_span(self) -> Span:
|
|
131
|
+
"""Get the currently active span."""
|
|
132
|
+
return trace.get_current_span()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def record_span_exception(self, exception: Exception=None, span: Optional[Span] = None, msg: Optional[str] = None):
|
|
136
|
+
"""Record an exception on a span."""
|
|
137
|
+
target_span = span or self.get_current_span()
|
|
138
|
+
if target_span and hasattr(target_span, 'record_exception'):
|
|
139
|
+
if exception is not None:
|
|
140
|
+
target_span.record_exception(exception)
|
|
141
|
+
target_span.set_status(trace.Status(trace.StatusCode.ERROR, str(exception)), description=msg)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def record_span_success(self, span: Optional[Span] = None, msg: Optional[str] = None):
|
|
145
|
+
"""Record an exception on a span."""
|
|
146
|
+
target_span = span or self.get_current_span()
|
|
147
|
+
if target_span and hasattr(target_span, 'record_exception'):
|
|
148
|
+
target_span.set_status(trace.Status(trace.StatusCode.OK), description=msg)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def add_event(self, name: str, attributes: Optional[Dict[str, Any]] = None, span: Optional[Span] = None):
|
|
152
|
+
"""Add an event to a span."""
|
|
153
|
+
target_span = span or self.get_current_span()
|
|
154
|
+
if target_span and hasattr(target_span, 'add_event'):
|
|
155
|
+
target_span.add_event(name, attributes=attributes or {})
|
|
156
|
+
|
|
157
|
+
# AWS-specific helpers
|
|
158
|
+
def __get_aws_message_context_attributes(self, msg: dict):
|
|
159
|
+
"""
|
|
160
|
+
Get trace context as AWS message attributes format.
|
|
161
|
+
Used for SQS/SNS message propagation.
|
|
162
|
+
"""
|
|
163
|
+
carrier = {}
|
|
164
|
+
# Convert to AWS message attributes format
|
|
165
|
+
message_attributes = {}
|
|
166
|
+
if msg is not None and 'MessageAttributes' in msg:
|
|
167
|
+
for key, value in msg['MessageAttributes'].items():
|
|
168
|
+
carrier[key] = {
|
|
169
|
+
'StringValue': value,
|
|
170
|
+
'DataType': 'String'
|
|
171
|
+
}
|
|
172
|
+
context_extracted = propagate.extract(carrier)
|
|
173
|
+
return carrier, context_extracted
|
|
174
|
+
|
|
175
|
+
def get_attributes_for_aws_from_context(self):
|
|
176
|
+
# Create carrier for message attributes
|
|
177
|
+
carrier = {}
|
|
178
|
+
|
|
179
|
+
# Inject trace context into carrier
|
|
180
|
+
propagate.inject(carrier)
|
|
181
|
+
|
|
182
|
+
# Convert carrier to SQS message attributes format
|
|
183
|
+
message_attributes = {}
|
|
184
|
+
for key, value in carrier.items():
|
|
185
|
+
message_attributes[key] = {
|
|
186
|
+
'StringValue': value,
|
|
187
|
+
'DataType': 'String'
|
|
188
|
+
}
|
|
189
|
+
return message_attributes
|