PyPI - rebrandly-otel - Versions diffs - 0.3.1__py3-none-any.whl - Mend

rebrandly-otel 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

rebrandly_otel/__init__.py +32 -0
rebrandly_otel/api_gateway_utils.py +230 -0
rebrandly_otel/fastapi_support.py +278 -0
rebrandly_otel/flask_support.py +222 -0
rebrandly_otel/http_constants.py +38 -0
rebrandly_otel/http_utils.py +505 -0
rebrandly_otel/logs.py +154 -0
rebrandly_otel/metrics.py +212 -0
rebrandly_otel/otel_utils.py +169 -0
rebrandly_otel/pymysql_instrumentation.py +219 -0
rebrandly_otel/rebrandly_otel.py +614 -0
rebrandly_otel/span_attributes_processor.py +106 -0
rebrandly_otel/traces.py +198 -0
rebrandly_otel-0.3.1.dist-info/METADATA +1926 -0
rebrandly_otel-0.3.1.dist-info/RECORD +18 -0
rebrandly_otel-0.3.1.dist-info/WHEEL +5 -0
rebrandly_otel-0.3.1.dist-info/licenses/LICENSE +19 -0
rebrandly_otel-0.3.1.dist-info/top_level.txt +1 -0

rebrandly_otel/metrics.py ADDED Viewed

@@ -0,0 +1,212 @@
+# metrics.py
+"""Metrics implementation for Rebrandly OTEL SDK."""
+from typing import Optional, Dict, List
+from dataclasses import dataclass
+from enum import Enum
+from opentelemetry import metrics
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+from opentelemetry.metrics import Meter, Histogram, Instrument, Counter
+from opentelemetry.metrics._internal import Gauge
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import (PeriodicExportingMetricReader, ConsoleMetricExporter)
+from opentelemetry.sdk.metrics._internal.aggregation import (ExplicitBucketHistogramAggregation)
+from opentelemetry.sdk.metrics.view import View
+from .otel_utils import *
+class MetricType(Enum):
+    """Supported metric types."""
+    COUNTER = "counter"
+    GAUGE = "gauge"
+    HISTOGRAM = "histogram"
+    UP_DOWN_COUNTER = "up_down_counter"
+@dataclass
+class MetricDefinition:
+    """Definition of a metric."""
+    name: str
+    description: str
+    unit: str = "1"
+    type: MetricType = MetricType.COUNTER
+class RebrandlyMeter:
+    """Wrapper for OpenTelemetry metrics with Rebrandly-specific features."""
+    # Standardized metric definitions aligned with Node.js
+    DEFAULT_METRICS = {
+        ## PROCESS
+        'cpu_usage_percentage': MetricDefinition(
+            name='process.cpu.utilization',
+            description='Difference in process.cpu.time since the last measurement, divided by the elapsed time and number of CPUs available to the process.',
+            unit='1',
+            type=MetricType.GAUGE
+        ),
+        'memory_usage_bytes': MetricDefinition(
+            name='process.memory.used',
+            description='The amount of physical memory in use.',
+            unit='By',
+            type=MetricType.GAUGE
+        )
+    }
+    class GlobalMetrics:
+        def __init__(self, rebrandly_meter):
+            self.__rebrandly_meter = rebrandly_meter
+            self.cpu_usage_percentage: Gauge = self.__rebrandly_meter.get_metric('cpu_usage_percentage')
+            self.memory_usage_bytes: Gauge = self.__rebrandly_meter.get_metric('memory_usage_bytes')
+    def __init__(self):
+        self._meter: Optional[Meter] = None
+        self._provider: Optional[MeterProvider] = None
+        self._metrics: Dict[str, Instrument] = {}
+        self.__setup_metrics()
+        self.__register_default_metrics()
+        self.GlobalMetrics = RebrandlyMeter.GlobalMetrics(self)
+    def __setup_metrics(self):
+        """Initialize metrics with configured exporters."""
+        readers = []
+        # Add console exporter for local debugging
+        if is_otel_debug():
+            console_reader = PeriodicExportingMetricReader(
+                ConsoleMetricExporter(),
+                export_interval_millis=1000  # 10 seconds for debugging
+            )
+            readers.append(console_reader)
+        # Add OTLP exporter if configured
+        otel_endpoint = get_otlp_endpoint()
+        if otel_endpoint is not None:
+            otlp_exporter = OTLPMetricExporter(
+                endpoint=otel_endpoint,
+                timeout=5
+            )
+            otlp_reader = PeriodicExportingMetricReader(otlp_exporter, export_interval_millis=get_millis_batch_time())
+            readers.append(otlp_reader)
+        # Create views
+        views = self.__create_views()
+        # Create provider
+        self._provider = MeterProvider(
+            resource=create_resource(),
+            metric_readers=readers,
+            views=views
+        )
+        # Set as global provider
+        metrics.set_meter_provider(self._provider)
+        # Get meter
+        self._meter = metrics.get_meter(get_service_name(), get_service_version())
+    def __create_views(self) -> List[View]:
+        """Create metric views for customization."""
+        views = []
+        # Histogram view with custom buckets
+        histogram_view = View(
+            instrument_type=Histogram,
+            instrument_name="*",
+            aggregation=ExplicitBucketHistogramAggregation((0.001, 0.004, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5)) # todo <-- define buckets
+        )
+        views.append(histogram_view)
+        return views
+    def __register_default_metrics(self):
+        """Register default metrics."""
+        for name, definition in self.DEFAULT_METRICS.items():
+            self.register_metric(definition, key=name)
+    @property
+    def meter(self) -> Meter:
+        """Get the underlying OpenTelemetry meter."""
+        if not self._meter:
+            # Return no-op meter if metrics are disabled
+            return metrics.get_meter(__name__)
+        return self._meter
+    def force_flush(self, timeout_millis: int = 5000) -> bool:
+        """
+        Force flush all pending metrics.
+        Args:
+            timeout_millis: Maximum time to wait for flush in milliseconds
+        Returns:
+            True if flush succeeded, False otherwise
+        """
+        if not hasattr(self, '_provider') or not self._provider:
+            return True
+        try:
+            # Get the internal provider (MeterProvider doesn't have direct flush)
+            # We need to flush through the metric readers
+            success = self._provider.force_flush(timeout_millis)
+            return success
+        except Exception as e:
+            print(f"[Meter] Error during force flush: {e}")
+            # For metrics, we might not have a flush method, so we return True
+            return True
+    def shutdown(self):
+        """Shutdown the meter provider."""
+        if hasattr(self, '_provider') and self._provider:
+            try:
+                self._provider.shutdown()
+                print("[Meter] Shutdown completed")
+            except Exception as e:
+                print(f"[Meter] Error during shutdown: {e}")
+    def register_metric(self, definition: MetricDefinition, key: Optional[str] = None) -> Instrument:
+        """Register a new metric."""
+        # Use the full name as primary key
+        if definition.name in self._metrics:
+            return self._metrics[definition.name]
+        metric = self.__create_metric(definition)
+        self._metrics[definition.name] = metric
+        # Also store by key name if provided (for easy lookup)
+        if key:
+            self._metrics[key] = metric
+        return metric
+    def __create_metric(self, definition: MetricDefinition) -> Instrument:
+        """Create a metric instrument based on definition."""
+        if definition.type == MetricType.COUNTER:
+            return self.meter.create_counter(
+                name=definition.name,
+                unit=definition.unit,
+                description=definition.description
+            )
+        elif definition.type == MetricType.HISTOGRAM:
+            return self.meter.create_histogram(
+                name=definition.name,
+                unit=definition.unit,
+                description=definition.description
+            )
+        elif definition.type == MetricType.UP_DOWN_COUNTER:
+            return self.meter.create_up_down_counter(
+                name=definition.name,
+                unit=definition.unit,
+                description=definition.description
+            )
+        elif definition.type == MetricType.GAUGE:
+            # For gauges, we'll create them when needed with callbacks
+            return self.meter.create_gauge(
+                name=definition.name,
+                unit=definition.unit,
+                description=definition.description
+            )
+        else:
+            raise ValueError(f"Unknown metric type: {definition.type}")
+    def get_metric(self, name: str) -> Optional[Instrument]:
+        """Get a registered metric by name."""
+        return self._metrics.get(name)

rebrandly_otel/otel_utils.py ADDED Viewed

@@ -0,0 +1,169 @@
+# otel_utils.py
+import os
+import sys
+import grpc
+import json
+from opentelemetry.sdk.resources import Resource, SERVICE_NAMESPACE, DEPLOYMENT_ENVIRONMENT
+from opentelemetry.semconv.attributes import service_attributes, telemetry_attributes
+from opentelemetry.semconv.resource import ResourceAttributes
+from opentelemetry.semconv._incubating.attributes import process_attributes, deployment_attributes
+# Cache for endpoint validation results
+_ENDPOINT_CACHE = {}
+def create_resource(name: str = None, version: str = None) -> Resource:
+    if name is None:
+        name = get_service_name()
+    if version is None:
+        version = get_service_version()
+    env = os.environ.get('ENV', os.environ.get('ENVIRONMENT', os.environ.get('NODE_ENV', 'local')))
+    resources_attributes = {
+        service_attributes.SERVICE_NAME: name,
+        "application.name": name,
+        service_attributes.SERVICE_VERSION: version,
+        process_attributes.PROCESS_RUNTIME_VERSION: f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
+        SERVICE_NAMESPACE: get_application_name(),
+        DEPLOYMENT_ENVIRONMENT: env,
+        telemetry_attributes.TELEMETRY_SDK_LANGUAGE: "python",
+        telemetry_attributes.TELEMETRY_SDK_NAME: "rebrandly-otel-sdk",
+        telemetry_attributes.TELEMETRY_SDK_VERSION: version
+    }
+    if os.environ.get('OTEL_RESOURCE_ATTRIBUTES', None) is not None and os.environ.get('OTEL_RESOURCE_ATTRIBUTES', None).strip() != "":
+        try:
+            ora = os.environ.get('OTEL_RESOURCE_ATTRIBUTES')
+            spl = ora.split(',')
+            for attr in spl:
+                attr = attr.strip()
+                if attr != "" and '=' in attr:
+                    # Split on first '=' only, in case value contains '='
+                    k, v = attr.split('=', 1)
+                    resources_attributes[k.strip()] = v.strip()
+        except Exception as e:
+            print(f"[OTEL Utils] Warning: Invalid OTEL_RESOURCE_ATTRIBUTES value: {e}")
+    if os.environ.get('OTEL_REPO_NAME', None) is not None:
+        resources_attributes['repository.name'] = os.environ.get('OTEL_REPO_NAME')
+    if os.environ.get('OTEL_COMMIT_ID', None) is not None:
+        resources_attributes[service_attributes.SERVICE_VERSION] = os.environ.get('OTEL_COMMIT_ID')
+    resource = Resource.create(
+        resources_attributes
+    )
+    return resource
+def get_package_version():
+    try:
+        from importlib.metadata import version, PackageNotFoundError  # Python 3.8+
+        return version('rebrandly_otel')
+    except ImportError:
+        try:
+            from importlib_metadata import version, PackageNotFoundError
+            return version('rebrandly_otel')
+        except Exception as e:
+            print(f"[OTEL Utils] Warning: Could not get package version: {e}")
+            return '0.1.0'
+def get_service_name(service_name: str = None) -> str:
+    if service_name is None:
+        serv = os.environ.get('OTEL_SERVICE_NAME', 'default-service-python')
+        if serv.strip() == "":
+            return 'default-service-python'
+        return serv
+    return service_name
+def get_service_version(service_version: str = None) -> str:
+    if service_version is None:
+        return os.environ.get('OTEL_SERVICE_VERSION', get_package_version())
+    return service_version
+def get_application_name() -> str:
+    return os.environ.get('OTEL_SERVICE_APPLICATION', get_service_name())
+def get_otlp_endpoint(otlp_endpoint: str = None) -> str | None:
+    endpoint = otlp_endpoint or os.environ.get('OTEL_EXPORTER_OTLP_ENDPOINT', None)
+    # Return cached result if available
+    cache_key = endpoint if endpoint else '__none__'
+    if cache_key in _ENDPOINT_CACHE:
+        return _ENDPOINT_CACHE[cache_key]
+    # Store the result to cache
+    result = None
+    if endpoint is not None:
+        if endpoint.strip() == "":
+            result = None
+        else:
+            try:
+                from urllib.parse import urlparse
+                # Parse the endpoint
+                parsed = urlparse(endpoint if '://' in endpoint else f'http://{endpoint}')
+                host = parsed.hostname
+                port = parsed.port
+                # Test gRPC connection
+                channel = grpc.insecure_channel(f'{host}:{port}')
+                try:
+                    # Wait for the channel to be ready
+                    grpc.channel_ready_future(channel).result(timeout=3)
+                    result = endpoint
+                finally:
+                    channel.close()
+            except grpc.FutureTimeoutError:
+                print(f"[OTEL] Error: Connection timeout to OTLP endpoint {endpoint}. Check if the collector is running and accessible.")
+                result = None
+            except Exception as e:
+                print(f"[OTEL] Error: Failed to connect to OTLP endpoint {endpoint}: {type(e).__name__}: {e}")
+                print(f"[OTEL] Telemetry data will not be exported. Verify endpoint configuration and network connectivity.")
+                result = None
+    else:
+        result = None
+    # Cache the result
+    _ENDPOINT_CACHE[cache_key] = result
+    return result
+def is_otel_debug() -> bool:
+    return os.environ.get('OTEL_DEBUG', 'false').lower() == 'true'
+def get_millis_batch_time():
+    try:
+        return int(os.environ.get('BATCH_EXPORT_TIME_MILLIS', 100))
+    except Exception as e:
+        print(f"[OTEL Utils] Warning: Invalid BATCH_EXPORT_TIME_MILLIS value, using default 5000ms: {e}")
+        return 5000
+def extract_event_from(message) -> str | None:
+    body = None
+    if 'body' in message:
+        body = message['body']
+    if 'Body' in message:
+        body = message['Body']
+    if 'Message' in message:
+        body = message['Message']
+    if 'Sns' in message and 'Message' in message['Sns']:
+        body = message['Sns']['Message']
+    if body is not None:
+        try:
+            jbody = json.loads(body)
+            if 'event' in jbody:
+                return jbody['event']
+        except:
+            pass
+    return None

rebrandly_otel/pymysql_instrumentation.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""
+PyMySQL instrumentation for Rebrandly OTEL SDK
+Provides query tracing and slow query detection
+"""
+import os
+import time
+import functools
+from opentelemetry.trace import Status, StatusCode, SpanKind
+# Environment configuration
+SLOW_QUERY_THRESHOLD_MS = int(os.getenv('PYMYSQL_SLOW_QUERY_THRESHOLD_MS', '1500'))
+MAX_QUERY_LENGTH = 2000  # Truncate long queries
+def instrument_pymysql(otel_instance, connection, options=None):
+    """
+    Instrument a PyMySQL connection for OpenTelemetry tracing
+    Args:
+        otel_instance: The RebrandlyOTEL instance
+        connection: The PyMySQL connection to instrument
+        options: Configuration options dict with:
+            - slow_query_threshold_ms: Threshold for slow query detection (default: 1500ms)
+            - capture_bindings: Include query bindings in spans (default: False for security)
+    Returns:
+        The instrumented connection
+    """
+    if options is None:
+        options = {}
+    slow_query_threshold_ms = options.get('slow_query_threshold_ms', SLOW_QUERY_THRESHOLD_MS)
+    capture_bindings = options.get('capture_bindings', False)
+    if not connection:
+        print('[Rebrandly OTEL PyMySQL] No connection provided for instrumentation')
+        return connection
+    if not otel_instance or not hasattr(otel_instance, 'tracer'):
+        print('[Rebrandly OTEL PyMySQL] No valid OTEL instance provided for instrumentation')
+        return connection
+    # Get the underlying OpenTelemetry tracer from RebrandlyOTEL instance
+    tracer = otel_instance.tracer.tracer
+    # Extract database name from connection
+    db_name = getattr(connection, 'db', None) or getattr(connection, 'database', None)
+    if db_name and isinstance(db_name, bytes):
+        db_name = db_name.decode('utf-8')
+    # Wrap the cursor method to return instrumented cursors
+    original_cursor = connection.cursor
+    def instrumented_cursor(*args, **kwargs):
+        cursor = original_cursor(*args, **kwargs)
+        return _instrument_cursor(cursor, tracer, slow_query_threshold_ms, capture_bindings, db_name)
+    connection.cursor = instrumented_cursor
+    return connection
+def _instrument_cursor(cursor, tracer, slow_query_threshold_ms, capture_bindings, db_name=None):
+    """
+    Instrument a cursor's execute methods
+    """
+    original_execute = cursor.execute
+    original_executemany = cursor.executemany
+    @functools.wraps(original_execute)
+    def instrumented_execute(query, args=None):
+        return _trace_query(
+            original_execute,
+            tracer,
+            slow_query_threshold_ms,
+            capture_bindings,
+            db_name,
+            query,
+            args,
+            many=False
+        )
+    @functools.wraps(original_executemany)
+    def instrumented_executemany(query, args):
+        return _trace_query(
+            original_executemany,
+            tracer,
+            slow_query_threshold_ms,
+            capture_bindings,
+            db_name,
+            query,
+            args,
+            many=True
+        )
+    cursor.execute = instrumented_execute
+    cursor.executemany = instrumented_executemany
+    return cursor
+def _trace_query(func, tracer, slow_query_threshold_ms, capture_bindings, db_name, query, args, many=False):
+    """
+    Trace a query execution with OpenTelemetry
+    """
+    operation = _extract_operation(query)
+    truncated_query = _truncate_query(query)
+    # Start span
+    span_name = f"pymysql.{'executemany' if many else 'execute'}"
+    with tracer.start_as_current_span(
+        name=span_name,
+        kind=SpanKind.CLIENT
+    ) as span:
+        # Set database attributes
+        span.set_attribute('db.system', 'mysql')
+        span.set_attribute('db.operation.name', operation)
+        span.set_attribute('db.statement', truncated_query)
+        # Set database name if available
+        if db_name:
+            span.set_attribute('db.name', db_name)
+        else:
+            span.set_attribute('db.name', 'unknown')
+        # Add bindings if enabled (be cautious with sensitive data)
+        if capture_bindings and args:
+            if many:
+                span.set_attribute('db.bindings_count', len(args))
+            else:
+                span.set_attribute('db.bindings', str(args))
+        start_time = time.time()
+        try:
+            # Execute the query
+            result = func(query, args)
+            # Calculate duration
+            duration_ms = (time.time() - start_time) * 1000
+            span.set_attribute('db.duration_ms', duration_ms)
+            # Check for slow query
+            if duration_ms >= slow_query_threshold_ms:
+                span.set_attribute('db.slow_query', True)
+                span.add_event('slow_query_detected', {
+                    'db.duration_ms': duration_ms,
+                    'db.threshold_ms': slow_query_threshold_ms
+                })
+            # Set success status
+            span.set_status(Status(StatusCode.OK))
+            return result
+        except Exception as error:
+            # Calculate duration even on error
+            duration_ms = (time.time() - start_time) * 1000
+            span.set_attribute('db.duration_ms', duration_ms)
+            # Record exception
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR, str(error)))
+            raise
+def _extract_operation(sql):
+    """
+    Extract operation type from SQL statement
+    Args:
+        sql: SQL query string
+    Returns:
+        Operation type (SELECT, INSERT, UPDATE, etc.)
+    """
+    if not sql:
+        return 'unknown'
+    normalized = sql.strip().upper()
+    if normalized.startswith('SELECT'):
+        return 'SELECT'
+    if normalized.startswith('INSERT'):
+        return 'INSERT'
+    if normalized.startswith('UPDATE'):
+        return 'UPDATE'
+    if normalized.startswith('DELETE'):
+        return 'DELETE'
+    if normalized.startswith('CREATE'):
+        return 'CREATE'
+    if normalized.startswith('DROP'):
+        return 'DROP'
+    if normalized.startswith('ALTER'):
+        return 'ALTER'
+    if normalized.startswith('TRUNCATE'):
+        return 'TRUNCATE'
+    return 'unknown'
+def _truncate_query(sql):
+    """
+    Truncate long queries for span attributes
+    Args:
+        sql: SQL query string
+    Returns:
+        Truncated query
+    """
+    if not sql:
+        return ''
+    if len(sql) <= MAX_QUERY_LENGTH:
+        return sql
+    return sql[:MAX_QUERY_LENGTH] + '... [truncated]'