rebrandly-otel 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,212 @@
1
+ # metrics.py
2
+ """Metrics implementation for Rebrandly OTEL SDK."""
3
+ from typing import Optional, Dict, List
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+ from opentelemetry import metrics
7
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
8
+ from opentelemetry.metrics import Meter, Histogram, Instrument, Counter
9
+ from opentelemetry.metrics._internal import Gauge
10
+ from opentelemetry.sdk.metrics import MeterProvider
11
+ from opentelemetry.sdk.metrics.export import (PeriodicExportingMetricReader, ConsoleMetricExporter)
12
+ from opentelemetry.sdk.metrics._internal.aggregation import (ExplicitBucketHistogramAggregation)
13
+ from opentelemetry.sdk.metrics.view import View
14
+
15
+ from .otel_utils import *
16
+
17
+ class MetricType(Enum):
18
+ """Supported metric types."""
19
+ COUNTER = "counter"
20
+ GAUGE = "gauge"
21
+ HISTOGRAM = "histogram"
22
+ UP_DOWN_COUNTER = "up_down_counter"
23
+
24
+ @dataclass
25
+ class MetricDefinition:
26
+ """Definition of a metric."""
27
+ name: str
28
+ description: str
29
+ unit: str = "1"
30
+ type: MetricType = MetricType.COUNTER
31
+
32
+ class RebrandlyMeter:
33
+ """Wrapper for OpenTelemetry metrics with Rebrandly-specific features."""
34
+
35
+ # Standardized metric definitions aligned with Node.js
36
+ DEFAULT_METRICS = {
37
+ ## PROCESS
38
+ 'cpu_usage_percentage': MetricDefinition(
39
+ name='process.cpu.utilization',
40
+ description='Difference in process.cpu.time since the last measurement, divided by the elapsed time and number of CPUs available to the process.',
41
+ unit='1',
42
+ type=MetricType.GAUGE
43
+ ),
44
+ 'memory_usage_bytes': MetricDefinition(
45
+ name='process.memory.used',
46
+ description='The amount of physical memory in use.',
47
+ unit='By',
48
+ type=MetricType.GAUGE
49
+ )
50
+ }
51
+
52
+ class GlobalMetrics:
53
+ def __init__(self, rebrandly_meter):
54
+ self.__rebrandly_meter = rebrandly_meter
55
+ self.cpu_usage_percentage: Gauge = self.__rebrandly_meter.get_metric('cpu_usage_percentage')
56
+ self.memory_usage_bytes: Gauge = self.__rebrandly_meter.get_metric('memory_usage_bytes')
57
+
58
+
59
+ def __init__(self):
60
+ self._meter: Optional[Meter] = None
61
+ self._provider: Optional[MeterProvider] = None
62
+ self._metrics: Dict[str, Instrument] = {}
63
+ self.__setup_metrics()
64
+ self.__register_default_metrics()
65
+ self.GlobalMetrics = RebrandlyMeter.GlobalMetrics(self)
66
+
67
+ def __setup_metrics(self):
68
+ """Initialize metrics with configured exporters."""
69
+
70
+ readers = []
71
+
72
+ # Add console exporter for local debugging
73
+ if is_otel_debug():
74
+ console_reader = PeriodicExportingMetricReader(
75
+ ConsoleMetricExporter(),
76
+ export_interval_millis=1000 # 10 seconds for debugging
77
+ )
78
+ readers.append(console_reader)
79
+
80
+ # Add OTLP exporter if configured
81
+ otel_endpoint = get_otlp_endpoint()
82
+ if otel_endpoint is not None:
83
+ otlp_exporter = OTLPMetricExporter(
84
+ endpoint=otel_endpoint,
85
+ timeout=5
86
+ )
87
+ otlp_reader = PeriodicExportingMetricReader(otlp_exporter, export_interval_millis=get_millis_batch_time())
88
+ readers.append(otlp_reader)
89
+
90
+ # Create views
91
+ views = self.__create_views()
92
+
93
+ # Create provider
94
+ self._provider = MeterProvider(
95
+ resource=create_resource(),
96
+ metric_readers=readers,
97
+ views=views
98
+ )
99
+
100
+ # Set as global provider
101
+ metrics.set_meter_provider(self._provider)
102
+
103
+ # Get meter
104
+ self._meter = metrics.get_meter(get_service_name(), get_service_version())
105
+
106
+ def __create_views(self) -> List[View]:
107
+ """Create metric views for customization."""
108
+ views = []
109
+
110
+ # Histogram view with custom buckets
111
+ histogram_view = View(
112
+ instrument_type=Histogram,
113
+ instrument_name="*",
114
+ aggregation=ExplicitBucketHistogramAggregation((0.001, 0.004, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5)) # todo <-- define buckets
115
+ )
116
+ views.append(histogram_view)
117
+
118
+ return views
119
+
120
+ def __register_default_metrics(self):
121
+ """Register default metrics."""
122
+ for name, definition in self.DEFAULT_METRICS.items():
123
+ self.register_metric(definition, key=name)
124
+
125
+ @property
126
+ def meter(self) -> Meter:
127
+ """Get the underlying OpenTelemetry meter."""
128
+ if not self._meter:
129
+ # Return no-op meter if metrics are disabled
130
+ return metrics.get_meter(__name__)
131
+ return self._meter
132
+
133
+ def force_flush(self, timeout_millis: int = 5000) -> bool:
134
+ """
135
+ Force flush all pending metrics.
136
+
137
+ Args:
138
+ timeout_millis: Maximum time to wait for flush in milliseconds
139
+
140
+ Returns:
141
+ True if flush succeeded, False otherwise
142
+ """
143
+ if not hasattr(self, '_provider') or not self._provider:
144
+ return True
145
+
146
+ try:
147
+ # Get the internal provider (MeterProvider doesn't have direct flush)
148
+ # We need to flush through the metric readers
149
+ success = self._provider.force_flush(timeout_millis)
150
+ return success
151
+ except Exception as e:
152
+ print(f"[Meter] Error during force flush: {e}")
153
+ # For metrics, we might not have a flush method, so we return True
154
+ return True
155
+
156
+ def shutdown(self):
157
+ """Shutdown the meter provider."""
158
+ if hasattr(self, '_provider') and self._provider:
159
+ try:
160
+ self._provider.shutdown()
161
+ print("[Meter] Shutdown completed")
162
+ except Exception as e:
163
+ print(f"[Meter] Error during shutdown: {e}")
164
+
165
+ def register_metric(self, definition: MetricDefinition, key: Optional[str] = None) -> Instrument:
166
+ """Register a new metric."""
167
+ # Use the full name as primary key
168
+ if definition.name in self._metrics:
169
+ return self._metrics[definition.name]
170
+
171
+ metric = self.__create_metric(definition)
172
+ self._metrics[definition.name] = metric
173
+
174
+ # Also store by key name if provided (for easy lookup)
175
+ if key:
176
+ self._metrics[key] = metric
177
+
178
+ return metric
179
+
180
+ def __create_metric(self, definition: MetricDefinition) -> Instrument:
181
+ """Create a metric instrument based on definition."""
182
+ if definition.type == MetricType.COUNTER:
183
+ return self.meter.create_counter(
184
+ name=definition.name,
185
+ unit=definition.unit,
186
+ description=definition.description
187
+ )
188
+ elif definition.type == MetricType.HISTOGRAM:
189
+ return self.meter.create_histogram(
190
+ name=definition.name,
191
+ unit=definition.unit,
192
+ description=definition.description
193
+ )
194
+ elif definition.type == MetricType.UP_DOWN_COUNTER:
195
+ return self.meter.create_up_down_counter(
196
+ name=definition.name,
197
+ unit=definition.unit,
198
+ description=definition.description
199
+ )
200
+ elif definition.type == MetricType.GAUGE:
201
+ # For gauges, we'll create them when needed with callbacks
202
+ return self.meter.create_gauge(
203
+ name=definition.name,
204
+ unit=definition.unit,
205
+ description=definition.description
206
+ )
207
+ else:
208
+ raise ValueError(f"Unknown metric type: {definition.type}")
209
+
210
+ def get_metric(self, name: str) -> Optional[Instrument]:
211
+ """Get a registered metric by name."""
212
+ return self._metrics.get(name)
@@ -0,0 +1,169 @@
1
+
2
+ # otel_utils.py
3
+
4
+ import os
5
+ import sys
6
+ import grpc
7
+ import json
8
+
9
+ from opentelemetry.sdk.resources import Resource, SERVICE_NAMESPACE, DEPLOYMENT_ENVIRONMENT
10
+ from opentelemetry.semconv.attributes import service_attributes, telemetry_attributes
11
+ from opentelemetry.semconv.resource import ResourceAttributes
12
+ from opentelemetry.semconv._incubating.attributes import process_attributes, deployment_attributes
13
+
14
+ # Cache for endpoint validation results
15
+ _ENDPOINT_CACHE = {}
16
+
17
+ def create_resource(name: str = None, version: str = None) -> Resource:
18
+
19
+ if name is None:
20
+ name = get_service_name()
21
+ if version is None:
22
+ version = get_service_version()
23
+
24
+ env = os.environ.get('ENV', os.environ.get('ENVIRONMENT', os.environ.get('NODE_ENV', 'local')))
25
+
26
+ resources_attributes = {
27
+ service_attributes.SERVICE_NAME: name,
28
+ "application.name": name,
29
+ service_attributes.SERVICE_VERSION: version,
30
+ process_attributes.PROCESS_RUNTIME_VERSION: f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
31
+ SERVICE_NAMESPACE: get_application_name(),
32
+ DEPLOYMENT_ENVIRONMENT: env,
33
+ telemetry_attributes.TELEMETRY_SDK_LANGUAGE: "python",
34
+ telemetry_attributes.TELEMETRY_SDK_NAME: "rebrandly-otel-sdk",
35
+ telemetry_attributes.TELEMETRY_SDK_VERSION: version
36
+ }
37
+
38
+ if os.environ.get('OTEL_RESOURCE_ATTRIBUTES', None) is not None and os.environ.get('OTEL_RESOURCE_ATTRIBUTES', None).strip() != "":
39
+ try:
40
+ ora = os.environ.get('OTEL_RESOURCE_ATTRIBUTES')
41
+ spl = ora.split(',')
42
+ for attr in spl:
43
+ attr = attr.strip()
44
+ if attr != "" and '=' in attr:
45
+ # Split on first '=' only, in case value contains '='
46
+ k, v = attr.split('=', 1)
47
+ resources_attributes[k.strip()] = v.strip()
48
+ except Exception as e:
49
+ print(f"[OTEL Utils] Warning: Invalid OTEL_RESOURCE_ATTRIBUTES value: {e}")
50
+
51
+ if os.environ.get('OTEL_REPO_NAME', None) is not None:
52
+ resources_attributes['repository.name'] = os.environ.get('OTEL_REPO_NAME')
53
+
54
+ if os.environ.get('OTEL_COMMIT_ID', None) is not None:
55
+ resources_attributes[service_attributes.SERVICE_VERSION] = os.environ.get('OTEL_COMMIT_ID')
56
+
57
+ resource = Resource.create(
58
+ resources_attributes
59
+ )
60
+ return resource
61
+
62
+ def get_package_version():
63
+ try:
64
+ from importlib.metadata import version, PackageNotFoundError # Python 3.8+
65
+ return version('rebrandly_otel')
66
+ except ImportError:
67
+ try:
68
+ from importlib_metadata import version, PackageNotFoundError
69
+ return version('rebrandly_otel')
70
+ except Exception as e:
71
+ print(f"[OTEL Utils] Warning: Could not get package version: {e}")
72
+ return '0.1.0'
73
+
74
+
75
+ def get_service_name(service_name: str = None) -> str:
76
+ if service_name is None:
77
+ serv = os.environ.get('OTEL_SERVICE_NAME', 'default-service-python')
78
+ if serv.strip() == "":
79
+ return 'default-service-python'
80
+ return serv
81
+ return service_name
82
+
83
+
84
+ def get_service_version(service_version: str = None) -> str:
85
+ if service_version is None:
86
+ return os.environ.get('OTEL_SERVICE_VERSION', get_package_version())
87
+ return service_version
88
+
89
+
90
+ def get_application_name() -> str:
91
+ return os.environ.get('OTEL_SERVICE_APPLICATION', get_service_name())
92
+
93
+
94
+ def get_otlp_endpoint(otlp_endpoint: str = None) -> str | None:
95
+ endpoint = otlp_endpoint or os.environ.get('OTEL_EXPORTER_OTLP_ENDPOINT', None)
96
+
97
+ # Return cached result if available
98
+ cache_key = endpoint if endpoint else '__none__'
99
+ if cache_key in _ENDPOINT_CACHE:
100
+ return _ENDPOINT_CACHE[cache_key]
101
+
102
+ # Store the result to cache
103
+ result = None
104
+
105
+ if endpoint is not None:
106
+
107
+ if endpoint.strip() == "":
108
+ result = None
109
+ else:
110
+ try:
111
+ from urllib.parse import urlparse
112
+
113
+ # Parse the endpoint
114
+ parsed = urlparse(endpoint if '://' in endpoint else f'http://{endpoint}')
115
+ host = parsed.hostname
116
+ port = parsed.port
117
+
118
+ # Test gRPC connection
119
+ channel = grpc.insecure_channel(f'{host}:{port}')
120
+ try:
121
+ # Wait for the channel to be ready
122
+ grpc.channel_ready_future(channel).result(timeout=3)
123
+ result = endpoint
124
+ finally:
125
+ channel.close()
126
+
127
+ except grpc.FutureTimeoutError:
128
+ print(f"[OTEL] Error: Connection timeout to OTLP endpoint {endpoint}. Check if the collector is running and accessible.")
129
+ result = None
130
+ except Exception as e:
131
+ print(f"[OTEL] Error: Failed to connect to OTLP endpoint {endpoint}: {type(e).__name__}: {e}")
132
+ print(f"[OTEL] Telemetry data will not be exported. Verify endpoint configuration and network connectivity.")
133
+ result = None
134
+ else:
135
+ result = None
136
+
137
+ # Cache the result
138
+ _ENDPOINT_CACHE[cache_key] = result
139
+ return result
140
+
141
+ def is_otel_debug() -> bool:
142
+ return os.environ.get('OTEL_DEBUG', 'false').lower() == 'true'
143
+
144
+
145
+ def get_millis_batch_time():
146
+ try:
147
+ return int(os.environ.get('BATCH_EXPORT_TIME_MILLIS', 100))
148
+ except Exception as e:
149
+ print(f"[OTEL Utils] Warning: Invalid BATCH_EXPORT_TIME_MILLIS value, using default 5000ms: {e}")
150
+ return 5000
151
+
152
+ def extract_event_from(message) -> str | None:
153
+ body = None
154
+ if 'body' in message:
155
+ body = message['body']
156
+ if 'Body' in message:
157
+ body = message['Body']
158
+ if 'Message' in message:
159
+ body = message['Message']
160
+ if 'Sns' in message and 'Message' in message['Sns']:
161
+ body = message['Sns']['Message']
162
+ if body is not None:
163
+ try:
164
+ jbody = json.loads(body)
165
+ if 'event' in jbody:
166
+ return jbody['event']
167
+ except:
168
+ pass
169
+ return None
@@ -0,0 +1,219 @@
1
+ """
2
+ PyMySQL instrumentation for Rebrandly OTEL SDK
3
+ Provides query tracing and slow query detection
4
+ """
5
+
6
+ import os
7
+ import time
8
+ import functools
9
+ from opentelemetry.trace import Status, StatusCode, SpanKind
10
+
11
+ # Environment configuration
12
+ SLOW_QUERY_THRESHOLD_MS = int(os.getenv('PYMYSQL_SLOW_QUERY_THRESHOLD_MS', '1500'))
13
+ MAX_QUERY_LENGTH = 2000 # Truncate long queries
14
+
15
+
16
+ def instrument_pymysql(otel_instance, connection, options=None):
17
+ """
18
+ Instrument a PyMySQL connection for OpenTelemetry tracing
19
+
20
+ Args:
21
+ otel_instance: The RebrandlyOTEL instance
22
+ connection: The PyMySQL connection to instrument
23
+ options: Configuration options dict with:
24
+ - slow_query_threshold_ms: Threshold for slow query detection (default: 1500ms)
25
+ - capture_bindings: Include query bindings in spans (default: False for security)
26
+
27
+ Returns:
28
+ The instrumented connection
29
+ """
30
+ if options is None:
31
+ options = {}
32
+
33
+ slow_query_threshold_ms = options.get('slow_query_threshold_ms', SLOW_QUERY_THRESHOLD_MS)
34
+ capture_bindings = options.get('capture_bindings', False)
35
+
36
+ if not connection:
37
+ print('[Rebrandly OTEL PyMySQL] No connection provided for instrumentation')
38
+ return connection
39
+
40
+ if not otel_instance or not hasattr(otel_instance, 'tracer'):
41
+ print('[Rebrandly OTEL PyMySQL] No valid OTEL instance provided for instrumentation')
42
+ return connection
43
+
44
+ # Get the underlying OpenTelemetry tracer from RebrandlyOTEL instance
45
+ tracer = otel_instance.tracer.tracer
46
+
47
+ # Extract database name from connection
48
+ db_name = getattr(connection, 'db', None) or getattr(connection, 'database', None)
49
+ if db_name and isinstance(db_name, bytes):
50
+ db_name = db_name.decode('utf-8')
51
+
52
+ # Wrap the cursor method to return instrumented cursors
53
+ original_cursor = connection.cursor
54
+
55
+ def instrumented_cursor(*args, **kwargs):
56
+ cursor = original_cursor(*args, **kwargs)
57
+ return _instrument_cursor(cursor, tracer, slow_query_threshold_ms, capture_bindings, db_name)
58
+
59
+ connection.cursor = instrumented_cursor
60
+
61
+ return connection
62
+
63
+
64
+ def _instrument_cursor(cursor, tracer, slow_query_threshold_ms, capture_bindings, db_name=None):
65
+ """
66
+ Instrument a cursor's execute methods
67
+ """
68
+ original_execute = cursor.execute
69
+ original_executemany = cursor.executemany
70
+
71
+ @functools.wraps(original_execute)
72
+ def instrumented_execute(query, args=None):
73
+ return _trace_query(
74
+ original_execute,
75
+ tracer,
76
+ slow_query_threshold_ms,
77
+ capture_bindings,
78
+ db_name,
79
+ query,
80
+ args,
81
+ many=False
82
+ )
83
+
84
+ @functools.wraps(original_executemany)
85
+ def instrumented_executemany(query, args):
86
+ return _trace_query(
87
+ original_executemany,
88
+ tracer,
89
+ slow_query_threshold_ms,
90
+ capture_bindings,
91
+ db_name,
92
+ query,
93
+ args,
94
+ many=True
95
+ )
96
+
97
+ cursor.execute = instrumented_execute
98
+ cursor.executemany = instrumented_executemany
99
+
100
+ return cursor
101
+
102
+
103
+ def _trace_query(func, tracer, slow_query_threshold_ms, capture_bindings, db_name, query, args, many=False):
104
+ """
105
+ Trace a query execution with OpenTelemetry
106
+ """
107
+ operation = _extract_operation(query)
108
+ truncated_query = _truncate_query(query)
109
+
110
+ # Start span
111
+ span_name = f"pymysql.{'executemany' if many else 'execute'}"
112
+
113
+ with tracer.start_as_current_span(
114
+ name=span_name,
115
+ kind=SpanKind.CLIENT
116
+ ) as span:
117
+ # Set database attributes
118
+ span.set_attribute('db.system', 'mysql')
119
+ span.set_attribute('db.operation.name', operation)
120
+ span.set_attribute('db.statement', truncated_query)
121
+
122
+ # Set database name if available
123
+ if db_name:
124
+ span.set_attribute('db.name', db_name)
125
+ else:
126
+ span.set_attribute('db.name', 'unknown')
127
+
128
+ # Add bindings if enabled (be cautious with sensitive data)
129
+ if capture_bindings and args:
130
+ if many:
131
+ span.set_attribute('db.bindings_count', len(args))
132
+ else:
133
+ span.set_attribute('db.bindings', str(args))
134
+
135
+ start_time = time.time()
136
+
137
+ try:
138
+ # Execute the query
139
+ result = func(query, args)
140
+
141
+ # Calculate duration
142
+ duration_ms = (time.time() - start_time) * 1000
143
+ span.set_attribute('db.duration_ms', duration_ms)
144
+
145
+ # Check for slow query
146
+ if duration_ms >= slow_query_threshold_ms:
147
+ span.set_attribute('db.slow_query', True)
148
+ span.add_event('slow_query_detected', {
149
+ 'db.duration_ms': duration_ms,
150
+ 'db.threshold_ms': slow_query_threshold_ms
151
+ })
152
+
153
+ # Set success status
154
+ span.set_status(Status(StatusCode.OK))
155
+
156
+ return result
157
+
158
+ except Exception as error:
159
+ # Calculate duration even on error
160
+ duration_ms = (time.time() - start_time) * 1000
161
+ span.set_attribute('db.duration_ms', duration_ms)
162
+
163
+ # Record exception
164
+ span.record_exception(error)
165
+ span.set_status(Status(StatusCode.ERROR, str(error)))
166
+
167
+ raise
168
+
169
+
170
+ def _extract_operation(sql):
171
+ """
172
+ Extract operation type from SQL statement
173
+
174
+ Args:
175
+ sql: SQL query string
176
+
177
+ Returns:
178
+ Operation type (SELECT, INSERT, UPDATE, etc.)
179
+ """
180
+ if not sql:
181
+ return 'unknown'
182
+
183
+ normalized = sql.strip().upper()
184
+
185
+ if normalized.startswith('SELECT'):
186
+ return 'SELECT'
187
+ if normalized.startswith('INSERT'):
188
+ return 'INSERT'
189
+ if normalized.startswith('UPDATE'):
190
+ return 'UPDATE'
191
+ if normalized.startswith('DELETE'):
192
+ return 'DELETE'
193
+ if normalized.startswith('CREATE'):
194
+ return 'CREATE'
195
+ if normalized.startswith('DROP'):
196
+ return 'DROP'
197
+ if normalized.startswith('ALTER'):
198
+ return 'ALTER'
199
+ if normalized.startswith('TRUNCATE'):
200
+ return 'TRUNCATE'
201
+
202
+ return 'unknown'
203
+
204
+
205
+ def _truncate_query(sql):
206
+ """
207
+ Truncate long queries for span attributes
208
+
209
+ Args:
210
+ sql: SQL query string
211
+
212
+ Returns:
213
+ Truncated query
214
+ """
215
+ if not sql:
216
+ return ''
217
+ if len(sql) <= MAX_QUERY_LENGTH:
218
+ return sql
219
+ return sql[:MAX_QUERY_LENGTH] + '... [truncated]'