rebrandly-otel 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,614 @@
1
+
2
+ import json
3
+ import time
4
+ import psutil
5
+ import functools
6
+ from contextlib import contextmanager
7
+ from datetime import datetime
8
+ from opentelemetry.trace import Status, StatusCode, SpanKind
9
+ from typing import Optional, Dict, Any, Callable, TypeVar
10
+ from opentelemetry import baggage, propagate, context as otel_context
11
+
12
+ from .traces import RebrandlyTracer
13
+ from .metrics import RebrandlyMeter
14
+ from .logs import RebrandlyLogger
15
+ from .otel_utils import extract_event_from
16
+ from .http_constants import (
17
+ HTTP_RESPONSE_STATUS_CODE,
18
+ )
19
+ from .api_gateway_utils import (
20
+ is_api_gateway_event,
21
+ extract_api_gateway_http_attributes,
22
+ extract_api_gateway_context
23
+ )
24
+
25
+
26
+ T = TypeVar('T')
27
+
28
+ class RebrandlyOTEL:
29
+ """Main entry point for Rebrandly's OpenTelemetry instrumentation."""
30
+
31
+ _instance: Optional['RebrandlyOTEL'] = None
32
+ _initialized: bool = False
33
+
34
+ def __new__(cls):
35
+ if cls._instance is None:
36
+ cls._instance = super().__new__(cls)
37
+ return cls._instance
38
+
39
+ def __init__(self):
40
+ if not self._initialized:
41
+ self._tracer: Optional[RebrandlyTracer] = None
42
+ self._meter: Optional[RebrandlyMeter] = None
43
+ self._logger: Optional[RebrandlyLogger] = None
44
+ self.__class__._initialized = True
45
+
46
+ def initialize(self, **kwargs) -> 'RebrandlyOTEL':
47
+ # Force initialization of components
48
+ _ = self.tracer
49
+ _ = self.meter
50
+ _ = self.logger
51
+
52
+ return self
53
+
54
+ @property
55
+ def tracer(self) -> RebrandlyTracer:
56
+ """Get the tracer instance."""
57
+ if self._tracer is None:
58
+ self._tracer = RebrandlyTracer()
59
+ return self._tracer
60
+
61
+ @property
62
+ def meter(self) -> RebrandlyMeter:
63
+ """Get the meter instance."""
64
+ if self._meter is None:
65
+ self._meter = RebrandlyMeter()
66
+ return self._meter
67
+
68
+ @property
69
+ def logger(self) -> RebrandlyLogger:
70
+ """Get the logger instance."""
71
+ if self._logger is None:
72
+ self._logger = RebrandlyLogger()
73
+ return self._logger
74
+
75
+ # Convenience methods for common operations
76
+
77
+ @contextmanager
78
+ def span(self,
79
+ name: str,
80
+ attributes: Optional[Dict[str, Any]] = None,
81
+ kind: SpanKind = SpanKind.INTERNAL,
82
+ message=None):
83
+ """Create a span using context manager."""
84
+ with self.tracer.start_span(name=name, attributes=attributes, kind=kind) as span:
85
+ try:
86
+ yield span
87
+ span.set_status(Status(StatusCode.OK))
88
+ except Exception as e:
89
+ span.record_exception(e)
90
+ span.set_status(Status(StatusCode.ERROR, str(e)))
91
+ raise
92
+
93
+ def trace_decorator(self,
94
+ name: Optional[str] = None,
95
+ attributes: Optional[Dict[str, Any]] = None,
96
+ kind: SpanKind = SpanKind.INTERNAL) -> Callable[[T], T]:
97
+ """Decorator for tracing functions."""
98
+ def decorator(func: T) -> T:
99
+ span_name = name or f"{func.__module__}.{func.__name__}"
100
+
101
+ @functools.wraps(func)
102
+ def wrapper(*args, **kwargs):
103
+ with self.span(span_name, attributes=attributes, kind=kind):
104
+ return func(*args, **kwargs)
105
+
106
+ return wrapper
107
+ return decorator
108
+
109
+ def lambda_handler(self,
110
+ name: Optional[str] = None,
111
+ attributes: Optional[Dict[str, Any]] = None,
112
+ kind: SpanKind = SpanKind.SERVER,
113
+ auto_flush: bool = True,
114
+ skip_aws_link: bool = False):
115
+ """
116
+ Decorator specifically for Lambda handlers with automatic flushing.
117
+ """
118
+ def decorator(func):
119
+ @functools.wraps(func)
120
+ def wrapper(event=None, lambda_context=None):
121
+ # Determine span name
122
+ span_name = name or f"lambda.{func.__name__}"
123
+ start_time = datetime.now()
124
+
125
+ # Build span attributes
126
+ span_attributes = attributes.copy() if attributes else {}
127
+ span_attributes['faas.trigger'] = self._detect_lambda_trigger(event)
128
+
129
+ # Add Lambda-specific attributes if context is available
130
+ if lambda_context is not None:
131
+ span_attributes.update({
132
+ "faas.execution": getattr(lambda_context, 'aws_request_id', 'unknown'),
133
+ "faas.id": getattr(lambda_context, 'function_arn', 'unknown'),
134
+ "faas.name": getattr(lambda_context, 'function_name', 'unknown'),
135
+ "faas.version": getattr(lambda_context, 'function_version', 'unknown')
136
+ })
137
+
138
+ # Handle context extraction from AWS events
139
+ token = None
140
+ is_api_gateway = False
141
+
142
+ # Detect and extract HTTP attributes for API Gateway events
143
+ if event and isinstance(event, dict) and is_api_gateway_event(event):
144
+ is_api_gateway = True
145
+ http_attrs, updated_span_name = extract_api_gateway_http_attributes(event)
146
+ span_attributes.update(http_attrs)
147
+
148
+ # Update span name to HTTP format if not explicitly set
149
+ if not name and updated_span_name:
150
+ span_name = updated_span_name
151
+
152
+ # Extract traceparent from API Gateway headers
153
+ carrier = extract_api_gateway_context(event)
154
+ if carrier:
155
+ extracted_context = propagate.extract(carrier)
156
+ token = otel_context.attach(extracted_context)
157
+
158
+ if not is_api_gateway and not skip_aws_link and event and isinstance(event, dict) and 'Records' in event:
159
+ first_record = event['Records'][0] if event['Records'] else None
160
+ if first_record:
161
+ carrier = {}
162
+
163
+ # Extract from SQS
164
+ if 'MessageAttributes' in first_record:
165
+ for key, value in first_record['MessageAttributes'].items():
166
+ if isinstance(value, dict) and 'StringValue' in value:
167
+ carrier[key] = value['StringValue']
168
+ if ('messageAttributes' in first_record and 'traceparent' in first_record['messageAttributes']
169
+ and 'stringValue' in first_record['messageAttributes']['traceparent']):
170
+ carrier['traceparent'] = first_record['messageAttributes']['traceparent']['stringValue']
171
+
172
+ # Extract from SNS
173
+ elif 'Sns' in first_record and 'MessageAttributes' in first_record['Sns']:
174
+ for key, value in first_record['Sns']['MessageAttributes'].items():
175
+ if isinstance(value, dict):
176
+ if 'Value' in value:
177
+ carrier[key] = value['Value']
178
+ elif 'StringValue' in value:
179
+ carrier[key] = value['StringValue']
180
+
181
+ # Attach extracted context
182
+ if carrier:
183
+ extracted_context = propagate.extract(carrier)
184
+ token = otel_context.attach(extracted_context)
185
+
186
+ result = None
187
+ span = None
188
+ try:
189
+
190
+ # Create and execute within span
191
+ with self.tracer.start_span(
192
+ name=span_name,
193
+ attributes=span_attributes,
194
+ kind=kind
195
+ ) as span:
196
+ # Add invocation start event with standardized attributes
197
+ start_event_attrs = {
198
+ 'event.type': type(event).__name__ if event else 'None'
199
+ }
200
+
201
+ # Add records count if present
202
+ if event and isinstance(event, dict) and 'Records' in event:
203
+ start_event_attrs['event.records'] = f"{len(event['Records'])}"
204
+
205
+ span.add_event("lambda.invocation.start", start_event_attrs)
206
+
207
+ # Execute handler
208
+ result = func(event, lambda_context)
209
+
210
+ # Process result with standardized attributes
211
+ success = True
212
+ complete_event_attrs = {}
213
+
214
+ if isinstance(result, dict) and 'statusCode' in result:
215
+ span.set_attribute(HTTP_RESPONSE_STATUS_CODE, result['statusCode'])
216
+ complete_event_attrs['status_code'] = result['statusCode']
217
+
218
+ # Per OTel spec for HTTP: only 5xx are server errors, 4xx are client errors
219
+ # Leave status UNSET for HTTP success - don't call set_status
220
+ if result['statusCode'] >= 500:
221
+ success = False
222
+ span.set_status(Status(StatusCode.ERROR, f"HTTP {result['statusCode']}"))
223
+ elif result['statusCode'] >= 400:
224
+ success = False
225
+ # HTTP success: leave UNSET per OTel spec
226
+ else:
227
+ # Non-HTTP response: set OK for success
228
+ span.set_status(Status(StatusCode.OK))
229
+
230
+ # Add completion event with success indicator
231
+ complete_event_attrs['success'] = success
232
+ span.add_event("lambda.invocation.complete", complete_event_attrs)
233
+
234
+ return result
235
+
236
+ except Exception as e:
237
+
238
+ # Add failed completion event with error attribute (only if span exists)
239
+ if span is not None and hasattr(span, 'is_recording') and span.is_recording():
240
+ span.add_event("lambda.invocation.complete", {
241
+ 'success': False,
242
+ 'error': type(e).__name__
243
+ })
244
+
245
+ # Record the exception in the span
246
+ span.record_exception(e)
247
+ span.set_status(Status(StatusCode.ERROR, str(e)))
248
+
249
+ # Log error
250
+ print(f"Lambda execution failed: {e}")
251
+ raise
252
+
253
+ finally:
254
+ # Always detach context if we attached it
255
+ if token is not None:
256
+ otel_context.detach(token)
257
+
258
+ # Force flush if enabled
259
+ if auto_flush:
260
+ print(f"[Rebrandly OTEL] Lambda '{span_name}', flushing...")
261
+ flush_success = self.force_flush(timeout_millis=1000)
262
+ if not flush_success:
263
+ print("[Rebrandly OTEL] Force flush may not have completed fully")
264
+
265
+ return wrapper
266
+ return decorator
267
+
268
+ def aws_message_handler(self,
269
+ name: Optional[str] = None,
270
+ attributes: Optional[Dict[str, Any]] = None,
271
+ kind: SpanKind = SpanKind.CONSUMER,
272
+ auto_flush: bool = True):
273
+ """
274
+ Decorator for AWS message handlers (SQS/SNS record processing).
275
+ Requires a record object parameter to the function.
276
+ """
277
+ def decorator(func):
278
+ @functools.wraps(func)
279
+ def wrapper(record=None, *args, **kwargs):
280
+ # Determine span name
281
+ span_name = name or f"message.{func.__name__}"
282
+ start_func = datetime.now()
283
+
284
+ # Build span attributes
285
+ span_attributes = attributes.copy() if attributes else {}
286
+ span_attributes['messaging.operation'] = 'process'
287
+
288
+ # Check if the record body contains an API Gateway event
289
+ record_body = None
290
+ is_api_gateway_record = False
291
+ actual_kind = kind
292
+
293
+ if record and isinstance(record, dict):
294
+ # Try to get body from SQS record
295
+ if 'body' in record:
296
+ try:
297
+ record_body = json.loads(record['body'])
298
+ except (json.JSONDecodeError, TypeError):
299
+ record_body = None
300
+ # Try to get message from SNS record
301
+ elif 'Sns' in record and 'Message' in record['Sns']:
302
+ try:
303
+ record_body = json.loads(record['Sns']['Message'])
304
+ except (json.JSONDecodeError, TypeError):
305
+ record_body = None
306
+
307
+ # If the body looks like an API Gateway event, extract HTTP attributes
308
+ if record_body and isinstance(record_body, dict) and is_api_gateway_event(record_body):
309
+ is_api_gateway_record = True
310
+ http_attrs, updated_span_name = extract_api_gateway_http_attributes(record_body)
311
+ span_attributes.update(http_attrs)
312
+ # Update span name to HTTP format if not explicitly set
313
+ if not name and updated_span_name:
314
+ span_name = updated_span_name
315
+ # Change span kind to SERVER for HTTP requests
316
+ actual_kind = SpanKind.SERVER
317
+
318
+ result = None
319
+ try:
320
+ # Create span and execute function
321
+ span_function = self.span
322
+ if record is not None and (('MessageAttributes' in record or 'messageAttributes' in record) or ('Sns' in record and 'MessageAttributes' in record['Sns'])):
323
+ span_function = self.aws_message_span
324
+ evt = extract_event_from(record)
325
+ if evt:
326
+ span_attributes['event.type'] = evt
327
+
328
+ with span_function(span_name, message=record, attributes=span_attributes, kind=actual_kind) as span_context:
329
+ # Add processing start event with standardized name
330
+ span_context.add_event("message.processing.start", {})
331
+
332
+ # Execute the actual handler function
333
+ result = func(record, *args, **kwargs)
334
+
335
+ # Process result
336
+ success = True
337
+ complete_event_attrs = {}
338
+
339
+ has_http_status = False
340
+ if result and isinstance(result, dict):
341
+ if 'statusCode' in result:
342
+ has_http_status = True
343
+ span_context.set_attribute("http.response.status_code", result['statusCode'])
344
+
345
+ # Per OTel spec for HTTP: only 5xx are server errors
346
+ if result['statusCode'] >= 500:
347
+ success = False
348
+ span_context.set_status(
349
+ Status(StatusCode.ERROR, f"Handler returned {result['statusCode']}")
350
+ )
351
+ elif result['statusCode'] >= 400:
352
+ success = False
353
+ # HTTP success: leave UNSET per OTel spec
354
+
355
+ # Add custom result attributes if present
356
+ if 'processed' in result:
357
+ complete_event_attrs['processed'] = result['processed']
358
+ span_context.set_attribute("message.processed", result['processed'])
359
+ if 'skipped' in result:
360
+ complete_event_attrs['skipped'] = result['skipped']
361
+ span_context.set_attribute("message.skipped", result['skipped'])
362
+
363
+ # Non-HTTP response: set OK for success
364
+ if not has_http_status:
365
+ span_context.set_status(Status(StatusCode.OK))
366
+
367
+ # Add completion event with standardized name
368
+ complete_event_attrs['success'] = success
369
+ span_context.add_event("message.processing.complete", complete_event_attrs)
370
+
371
+ return result
372
+
373
+ except Exception as e:
374
+ # Record the exception in the span
375
+ if 'span_context' in locals():
376
+ span_context.record_exception(e)
377
+ span_context.set_status(Status(StatusCode.ERROR, str(e)))
378
+
379
+ # Add failed processing event
380
+ span_context.add_event("message.processing.complete", {
381
+ 'success': False,
382
+ 'error': type(e).__name__
383
+ })
384
+
385
+ # Re-raise the exception
386
+ raise
387
+
388
+ finally:
389
+ if auto_flush:
390
+ self.force_flush(start_datetime=start_func)
391
+
392
+ return wrapper
393
+ return decorator
394
+
395
+ def force_flush(self, start_datetime: datetime=None, timeout_millis: int = 1000) -> bool:
396
+ """
397
+ Force flush all telemetry data.
398
+ This is CRITICAL for Lambda functions to ensure data is sent before function freezes.
399
+
400
+ Args:
401
+ start_datetime: Optional start time for system metrics capture
402
+ timeout_millis: Maximum time to wait for flush in milliseconds
403
+
404
+ Returns:
405
+ True if all flushes succeeded, False otherwise
406
+ """
407
+ success = True
408
+
409
+ if start_datetime is not None:
410
+ end_func = datetime.now()
411
+ cpu_percent = psutil.cpu_percent(interval=0.1) # Shorter interval for Lambda
412
+ memory = psutil.virtual_memory()
413
+
414
+ # Record metrics using standardized names (with safety checks)
415
+ try:
416
+ if self.meter.GlobalMetrics.memory_usage_bytes:
417
+ self.meter.GlobalMetrics.memory_usage_bytes.set(memory.used)
418
+ if self.meter.GlobalMetrics.cpu_usage_percentage:
419
+ self.meter.GlobalMetrics.cpu_usage_percentage.set(cpu_percent)
420
+ except Exception as e:
421
+ print(f"[Rebrandly OTEL] Warning: Could not record system metrics: {e}")
422
+
423
+ print(f"Function Memory usage: {memory.percent}%, CPU usage: {cpu_percent}%")
424
+
425
+ try:
426
+ # Flush traces
427
+ if self._tracer:
428
+ if not self._tracer.force_flush(timeout_millis):
429
+ success = False
430
+
431
+ # Flush metrics
432
+ if self._meter:
433
+ if not self._meter.force_flush(timeout_millis):
434
+ success = False
435
+
436
+ # Flush logs
437
+ if self._logger:
438
+ if not self._logger.force_flush(timeout_millis):
439
+ success = False
440
+
441
+ # Add a small delay to ensure network operations complete
442
+ time.sleep(0.1)
443
+
444
+ except Exception as e:
445
+ print(f"[Rebrandly OTEL] Error during force flush: {e}")
446
+ success = False
447
+
448
+ return success
449
+
450
+ def shutdown(self):
451
+ """
452
+ Shutdown all OTEL components gracefully.
453
+ Call this at the end of your Lambda handler if you want to ensure clean shutdown.
454
+ """
455
+ try:
456
+ if self._tracer:
457
+ self._tracer.shutdown()
458
+ if self._meter:
459
+ self._meter.shutdown()
460
+ if self._logger:
461
+ self._logger.shutdown()
462
+ except Exception as e:
463
+ print(f"[Rebrandly OTEL] Error during shutdown: {e}")
464
+
465
+ def _detect_lambda_trigger(self, event: Any) -> str:
466
+ """Detect Lambda trigger type from event."""
467
+ if not event or not isinstance(event, dict):
468
+ return 'direct'
469
+
470
+ if 'Records' in event:
471
+ first_record = event['Records'][0] if event['Records'] else None
472
+ if first_record:
473
+ event_source = first_record.get('eventSource', '')
474
+ if event_source == 'aws:sqs':
475
+ return 'sqs'
476
+ elif event_source == 'aws:sns':
477
+ return 'sns'
478
+ elif event_source == 'aws:s3':
479
+ return 's3'
480
+ elif event_source == 'aws:kinesis':
481
+ return 'kinesis'
482
+ elif event_source == 'aws:dynamodb':
483
+ return 'dynamodb'
484
+
485
+ if 'httpMethod' in event:
486
+ return 'api_gateway'
487
+ if 'requestContext' in event and 'http' in event.get('requestContext', {}):
488
+ return 'api_gateway_v2'
489
+ if event.get('source') == 'aws.events':
490
+ return 'eventbridge'
491
+ if event.get('source') == 'aws.scheduler':
492
+ return 'eventbridge_scheduler'
493
+ if 'jobName' in event:
494
+ return 'batch'
495
+
496
+ return 'unknown'
497
+
498
+ def set_baggage(self, key: str, value: str):
499
+ """Set baggage item."""
500
+ return baggage.set_baggage(key, value)
501
+
502
+ def get_baggage(self, key: str) -> Optional[str]:
503
+ """Get baggage item."""
504
+ return baggage.get_baggage(key)
505
+
506
+ def inject_context(self, carrier: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
507
+ """Inject trace context into headers for outbound requests."""
508
+ if carrier is None:
509
+ carrier = {}
510
+ propagate.inject(carrier)
511
+ return carrier
512
+
513
+ def extract_context(self, carrier: Dict[str, Any]) -> otel_context.Context:
514
+ """Extract trace context from incoming request headers."""
515
+ return propagate.extract(carrier)
516
+
517
+ def attach_context(self, carrier: Dict[str, Any]) -> object:
518
+ """Extract and attach context, returning a token for cleanup."""
519
+ ctx = self.extract_context(carrier)
520
+ return otel_context.attach(ctx)
521
+
522
+ def detach_context(self, token):
523
+ """Detach a previously attached context."""
524
+ otel_context.detach(token)
525
+
526
+ @contextmanager
527
+ def aws_message_span(self,
528
+ name: str,
529
+ message: Dict[str, Any]=None,
530
+ attributes: Optional[Dict[str, Any]] = None,
531
+ kind: SpanKind = SpanKind.CONSUMER):
532
+ """Create span from AWS message - properly handling trace context."""
533
+
534
+ from opentelemetry import trace, context as otel_context
535
+
536
+ combined_attributes = attributes or {}
537
+ combined_attributes['messaging.operation'] = 'process'
538
+
539
+ # Extract message attributes for linking/attributes
540
+ if message and isinstance(message, dict):
541
+ # Add message-specific attributes
542
+ if 'Sns' in message:
543
+ sns_msg = message['Sns']
544
+ if 'MessageId' in sns_msg:
545
+ combined_attributes['messaging.message_id'] = sns_msg['MessageId']
546
+ if 'Subject' in sns_msg:
547
+ combined_attributes['messaging.sns.subject'] = sns_msg['Subject']
548
+ if 'TopicArn' in sns_msg:
549
+ combined_attributes['messaging.destination'] = sns_msg['TopicArn']
550
+ combined_attributes['messaging.system'] = 'aws_sns'
551
+
552
+ elif 'messageId' in message:
553
+ # SQS message
554
+ combined_attributes['messaging.message_id'] = message['messageId']
555
+ if 'eventSource' in message:
556
+ # Convert AWS eventSource format (aws:sqs) to OTel format (aws_sqs)
557
+ combined_attributes['messaging.system'] = message['eventSource'].replace(':', '_')
558
+
559
+
560
+ if 'awsRegion' in message:
561
+ combined_attributes['cloud.region'] = message['awsRegion']
562
+
563
+ evt = extract_event_from(message)
564
+ if evt:
565
+ combined_attributes['event.type'] = evt
566
+
567
+
568
+ # Use the tracer's start_span method directly to ensure it works
569
+ # This creates a child span of whatever is currently active
570
+ with self.tracer.start_span(
571
+ name=name,
572
+ attributes=combined_attributes,
573
+ kind=kind
574
+ ) as span:
575
+ try:
576
+ yield span
577
+ span.set_status(Status(StatusCode.OK))
578
+ except Exception as e:
579
+ span.record_exception(e)
580
+ span.set_status(Status(StatusCode.ERROR, str(e)))
581
+ raise
582
+
583
+
584
+ # Create Singleton instance
585
+ otel = RebrandlyOTEL()
586
+
587
+ # Export commonly used functions
588
+ span = otel.span
589
+ aws_message_span = otel.aws_message_span
590
+ traces = otel.trace_decorator
591
+ tracer = otel.tracer
592
+ meter = otel.meter
593
+ logger = otel.logger.logger
594
+ lambda_handler = otel.lambda_handler
595
+ aws_message_handler = otel.aws_message_handler
596
+ initialize = otel.initialize
597
+ inject_context = otel.inject_context
598
+ extract_context = otel.extract_context
599
+ attach_context = otel.attach_context
600
+ detach_context = otel.detach_context
601
+ force_flush = otel.force_flush
602
+ shutdown = otel.shutdown
603
+
604
+ # Attach logging levels to logger for convenience
605
+ # This allows: from rebrandly_otel import logger; logger.setLevel(logger.INFO)
606
+ import logging as _logging
607
+ logger.DEBUG = _logging.DEBUG
608
+ logger.INFO = _logging.INFO
609
+ logger.WARNING = _logging.WARNING
610
+ logger.ERROR = _logging.ERROR
611
+ logger.CRITICAL = _logging.CRITICAL
612
+ logger.NOTSET = _logging.NOTSET
613
+ logger.setLevel = otel.logger.setLevel
614
+ logger.getLogger = otel.logger.getLogger