genai-otel-instrument 0.1.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__init__.py +129 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +413 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +187 -0
- genai_otel/cost_calculator.py +276 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +240 -0
- genai_otel/instrumentors/__init__.py +47 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +528 -0
- genai_otel/instrumentors/cohere_instrumentor.py +76 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +87 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +97 -0
- genai_otel/instrumentors/langchain_instrumentor.py +75 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +119 -0
- genai_otel/instrumentors/ollama_instrumentor.py +83 -0
- genai_otel/instrumentors/openai_instrumentor.py +241 -0
- genai_otel/instrumentors/replicate_instrumentor.py +42 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +42 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +42 -0
- genai_otel/llm_pricing.json +589 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/METADATA +463 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/RECORD +44 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/licenses/LICENSE +201 -0
- genai_otel_instrument-0.1.1.dev0.dist-info/top_level.txt +1 -0
genai_otel/__init__.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Top-level package for GenAI OpenTelemetry Auto-Instrumentation.
|
|
2
|
+
|
|
3
|
+
This package provides a comprehensive solution for automatically instrumenting
|
|
4
|
+
Generative AI (GenAI) and Large Language Model (LLM) applications with OpenTelemetry.
|
|
5
|
+
It supports various LLM providers, frameworks, and common data stores (MCP tools).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
# Suppress known third-party library warnings that we cannot control
|
|
15
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning, module="pydantic")
|
|
16
|
+
warnings.filterwarnings("ignore", message=".*validate_default.*", module="pydantic")
|
|
17
|
+
warnings.filterwarnings("ignore", message=".*NumPy module was reloaded.*", module="replicate")
|
|
18
|
+
|
|
19
|
+
from .__version__ import __version__
|
|
20
|
+
|
|
21
|
+
# Package metadata (from pyproject.toml)
|
|
22
|
+
__author__ = "Kshitij Thakkar"
|
|
23
|
+
__email__ = "kshitijthakkar@rocketmail.com"
|
|
24
|
+
__license__ = "Apache-2.0"
|
|
25
|
+
|
|
26
|
+
# Re-exporting key components for easier access
|
|
27
|
+
from .auto_instrument import setup_auto_instrumentation # Restoring direct import
|
|
28
|
+
from .config import OTelConfig
|
|
29
|
+
from .cost_calculator import CostCalculator
|
|
30
|
+
from .gpu_metrics import GPUMetricsCollector
|
|
31
|
+
|
|
32
|
+
# Import instrumentors conditionally to avoid errors if dependencies aren't installed
|
|
33
|
+
from .instrumentors import (
|
|
34
|
+
AnthropicInstrumentor,
|
|
35
|
+
AnyscaleInstrumentor,
|
|
36
|
+
AWSBedrockInstrumentor,
|
|
37
|
+
AzureOpenAIInstrumentor,
|
|
38
|
+
CohereInstrumentor,
|
|
39
|
+
GoogleAIInstrumentor,
|
|
40
|
+
GroqInstrumentor,
|
|
41
|
+
HuggingFaceInstrumentor,
|
|
42
|
+
LangChainInstrumentor,
|
|
43
|
+
LlamaIndexInstrumentor,
|
|
44
|
+
MistralAIInstrumentor,
|
|
45
|
+
OllamaInstrumentor,
|
|
46
|
+
OpenAIInstrumentor,
|
|
47
|
+
ReplicateInstrumentor,
|
|
48
|
+
TogetherAIInstrumentor,
|
|
49
|
+
VertexAIInstrumentor,
|
|
50
|
+
)
|
|
51
|
+
from .mcp_instrumentors.manager import MCPInstrumentorManager
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def instrument(**kwargs):
|
|
57
|
+
"""Public function to initialize and start auto-instrumentation.
|
|
58
|
+
|
|
59
|
+
Loads configuration from environment variables or provided keyword arguments,
|
|
60
|
+
then sets up OpenTelemetry tracing and metrics.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
**kwargs: Configuration parameters that can override environment variables.
|
|
64
|
+
See OTelConfig for available parameters (e.g., service_name, endpoint).
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> from genai_otel import instrument
|
|
68
|
+
>>> instrument(service_name="my-app", endpoint="http://localhost:4318")
|
|
69
|
+
|
|
70
|
+
Environment Variables:
|
|
71
|
+
OTEL_SERVICE_NAME: Name of the service (default: "genai-app")
|
|
72
|
+
OTEL_EXPORTER_OTLP_ENDPOINT: OTLP endpoint (default: "http://localhost:4318")
|
|
73
|
+
GENAI_ENABLE_GPU_METRICS: Enable GPU metrics (default: "true")
|
|
74
|
+
GENAI_ENABLE_COST_TRACKING: Enable cost tracking (default: "true")
|
|
75
|
+
GENAI_ENABLE_MCP_INSTRUMENTATION: Enable MCP instrumentation (default: "true")
|
|
76
|
+
GENAI_FAIL_ON_ERROR: Fail if instrumentation errors occur (default: "false")
|
|
77
|
+
OTEL_EXPORTER_OTLP_HEADERS: OTLP headers in format "key1=val1,key2=val2"
|
|
78
|
+
GENAI_LOG_LEVEL: Logging level (default: "INFO")
|
|
79
|
+
GENAI_LOG_FILE: Log file path (optional)
|
|
80
|
+
"""
|
|
81
|
+
try:
|
|
82
|
+
# Create config object, allowing kwargs to override env vars
|
|
83
|
+
config = OTelConfig(**kwargs)
|
|
84
|
+
setup_auto_instrumentation(config)
|
|
85
|
+
logger.info("GenAI OpenTelemetry instrumentation initialized successfully")
|
|
86
|
+
except Exception as e:
|
|
87
|
+
# Log the error and potentially re-raise based on fail_on_error
|
|
88
|
+
logger.error("Failed to initialize instrumentation: %s", e, exc_info=True)
|
|
89
|
+
fail_on_error = kwargs.get(
|
|
90
|
+
"fail_on_error", os.getenv("GENAI_FAIL_ON_ERROR", "false").lower() == "true"
|
|
91
|
+
)
|
|
92
|
+
if fail_on_error:
|
|
93
|
+
raise
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
__all__ = [
|
|
97
|
+
# Version info
|
|
98
|
+
"__version__",
|
|
99
|
+
"__author__",
|
|
100
|
+
"__email__",
|
|
101
|
+
"__license__",
|
|
102
|
+
# Core functions
|
|
103
|
+
"instrument",
|
|
104
|
+
"setup_auto_instrumentation", # Re-added to __all__
|
|
105
|
+
# Configuration
|
|
106
|
+
"OTelConfig",
|
|
107
|
+
# Utilities
|
|
108
|
+
"CostCalculator",
|
|
109
|
+
"GPUMetricsCollector",
|
|
110
|
+
# Instrumentors
|
|
111
|
+
"OpenAIInstrumentor",
|
|
112
|
+
"AnthropicInstrumentor",
|
|
113
|
+
"GoogleAIInstrumentor",
|
|
114
|
+
"AWSBedrockInstrumentor",
|
|
115
|
+
"AzureOpenAIInstrumentor",
|
|
116
|
+
"CohereInstrumentor",
|
|
117
|
+
"MistralAIInstrumentor",
|
|
118
|
+
"TogetherAIInstrumentor",
|
|
119
|
+
"GroqInstrumentor",
|
|
120
|
+
"LangChainInstrumentor",
|
|
121
|
+
"LlamaIndexInstrumentor",
|
|
122
|
+
"HuggingFaceInstrumentor",
|
|
123
|
+
"OllamaInstrumentor",
|
|
124
|
+
"VertexAIInstrumentor",
|
|
125
|
+
"ReplicateInstrumentor",
|
|
126
|
+
"AnyscaleInstrumentor",
|
|
127
|
+
# MCP Manager
|
|
128
|
+
"MCPInstrumentorManager",
|
|
129
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.1.1.dev0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 1, 'dev0')
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""Module for setting up OpenTelemetry auto-instrumentation for GenAI applications."""
|
|
2
|
+
|
|
3
|
+
# isort: skip_file
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from opentelemetry import metrics, trace
|
|
9
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
10
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
11
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
12
|
+
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
|
|
13
|
+
from opentelemetry.sdk.metrics.view import View
|
|
14
|
+
from opentelemetry.sdk.metrics._internal.aggregation import ExplicitBucketHistogramAggregation
|
|
15
|
+
from opentelemetry.sdk.resources import Resource
|
|
16
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
17
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
|
|
18
|
+
|
|
19
|
+
from .config import OTelConfig
|
|
20
|
+
from .gpu_metrics import GPUMetricsCollector
|
|
21
|
+
from .mcp_instrumentors import MCPInstrumentorManager
|
|
22
|
+
from .metrics import (
|
|
23
|
+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
|
|
24
|
+
_GEN_AI_SERVER_TBT,
|
|
25
|
+
_GEN_AI_SERVER_TFTT,
|
|
26
|
+
_MCP_CLIENT_OPERATION_DURATION_BUCKETS,
|
|
27
|
+
_MCP_PAYLOAD_SIZE_BUCKETS,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Import semantic conventions
|
|
31
|
+
try:
|
|
32
|
+
from openlit.semcov import SemanticConvention as SC
|
|
33
|
+
except ImportError:
|
|
34
|
+
# Fallback if openlit not available
|
|
35
|
+
class SC:
|
|
36
|
+
GEN_AI_CLIENT_OPERATION_DURATION = "gen_ai.client.operation.duration"
|
|
37
|
+
GEN_AI_SERVER_TTFT = "gen_ai.server.ttft"
|
|
38
|
+
GEN_AI_SERVER_TBT = "gen_ai.server.tbt"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Import instrumentors - fix the import path based on your actual structure
|
|
42
|
+
try:
|
|
43
|
+
from .instrumentors import (
|
|
44
|
+
AnthropicInstrumentor,
|
|
45
|
+
AnyscaleInstrumentor,
|
|
46
|
+
AWSBedrockInstrumentor,
|
|
47
|
+
AzureOpenAIInstrumentor,
|
|
48
|
+
CohereInstrumentor,
|
|
49
|
+
GoogleAIInstrumentor,
|
|
50
|
+
GroqInstrumentor,
|
|
51
|
+
HuggingFaceInstrumentor,
|
|
52
|
+
LangChainInstrumentor,
|
|
53
|
+
LlamaIndexInstrumentor,
|
|
54
|
+
MistralAIInstrumentor,
|
|
55
|
+
OllamaInstrumentor,
|
|
56
|
+
OpenAIInstrumentor,
|
|
57
|
+
ReplicateInstrumentor,
|
|
58
|
+
TogetherAIInstrumentor,
|
|
59
|
+
VertexAIInstrumentor,
|
|
60
|
+
)
|
|
61
|
+
except ImportError:
|
|
62
|
+
# Fallback for testing or if instrumentors are in different structure
|
|
63
|
+
from genai_otel.instrumentors import (
|
|
64
|
+
AnthropicInstrumentor,
|
|
65
|
+
AnyscaleInstrumentor,
|
|
66
|
+
AWSBedrockInstrumentor,
|
|
67
|
+
AzureOpenAIInstrumentor,
|
|
68
|
+
CohereInstrumentor,
|
|
69
|
+
GoogleAIInstrumentor,
|
|
70
|
+
GroqInstrumentor,
|
|
71
|
+
HuggingFaceInstrumentor,
|
|
72
|
+
LangChainInstrumentor,
|
|
73
|
+
LlamaIndexInstrumentor,
|
|
74
|
+
MistralAIInstrumentor,
|
|
75
|
+
OllamaInstrumentor,
|
|
76
|
+
OpenAIInstrumentor,
|
|
77
|
+
ReplicateInstrumentor,
|
|
78
|
+
TogetherAIInstrumentor,
|
|
79
|
+
VertexAIInstrumentor,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
logger = logging.getLogger(__name__)
|
|
83
|
+
|
|
84
|
+
# Optional OpenInference instrumentors (requires Python >= 3.10)
|
|
85
|
+
try:
|
|
86
|
+
from openinference.instrumentation.litellm import LiteLLMInstrumentor # noqa: E402
|
|
87
|
+
from openinference.instrumentation.mcp import MCPInstrumentor # noqa: E402
|
|
88
|
+
from openinference.instrumentation.smolagents import ( # noqa: E402
|
|
89
|
+
SmolagentsInstrumentor,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
OPENINFERENCE_AVAILABLE = True
|
|
93
|
+
except ImportError:
|
|
94
|
+
LiteLLMInstrumentor = None
|
|
95
|
+
MCPInstrumentor = None
|
|
96
|
+
SmolagentsInstrumentor = None
|
|
97
|
+
OPENINFERENCE_AVAILABLE = False
|
|
98
|
+
|
|
99
|
+
# Defines the available instrumentors. This is now at the module level for easier mocking in tests.
|
|
100
|
+
INSTRUMENTORS = {
|
|
101
|
+
"openai": OpenAIInstrumentor,
|
|
102
|
+
"anthropic": AnthropicInstrumentor,
|
|
103
|
+
"google.generativeai": GoogleAIInstrumentor,
|
|
104
|
+
"boto3": AWSBedrockInstrumentor,
|
|
105
|
+
"azure.ai.openai": AzureOpenAIInstrumentor,
|
|
106
|
+
"cohere": CohereInstrumentor,
|
|
107
|
+
"mistralai": MistralAIInstrumentor,
|
|
108
|
+
"together": TogetherAIInstrumentor,
|
|
109
|
+
"groq": GroqInstrumentor,
|
|
110
|
+
"ollama": OllamaInstrumentor,
|
|
111
|
+
"vertexai": VertexAIInstrumentor,
|
|
112
|
+
"replicate": ReplicateInstrumentor,
|
|
113
|
+
"anyscale": AnyscaleInstrumentor,
|
|
114
|
+
"langchain": LangChainInstrumentor,
|
|
115
|
+
"llama_index": LlamaIndexInstrumentor,
|
|
116
|
+
"transformers": HuggingFaceInstrumentor,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Add OpenInference instrumentors if available (requires Python >= 3.10)
|
|
120
|
+
if OPENINFERENCE_AVAILABLE:
|
|
121
|
+
INSTRUMENTORS.update(
|
|
122
|
+
{
|
|
123
|
+
"smolagents": SmolagentsInstrumentor,
|
|
124
|
+
"mcp": MCPInstrumentor,
|
|
125
|
+
"litellm": LiteLLMInstrumentor,
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Global list to store OTLP exporter sessions that should not be instrumented
|
|
131
|
+
_OTLP_EXPORTER_SESSIONS = []
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def setup_auto_instrumentation(config: OTelConfig):
|
|
135
|
+
"""
|
|
136
|
+
Set up OpenTelemetry with auto-instrumentation for LLM frameworks and MCP tools.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
config: OTelConfig instance with configuration parameters.
|
|
140
|
+
"""
|
|
141
|
+
global _OTLP_EXPORTER_SESSIONS
|
|
142
|
+
logger.info("Starting auto-instrumentation setup...")
|
|
143
|
+
|
|
144
|
+
# Configure OpenTelemetry SDK (TracerProvider, MeterProvider, etc.)
|
|
145
|
+
import os
|
|
146
|
+
|
|
147
|
+
service_instance_id = os.getenv("OTEL_SERVICE_INSTANCE_ID")
|
|
148
|
+
environment = os.getenv("OTEL_ENVIRONMENT")
|
|
149
|
+
resource_attributes = {"service.name": config.service_name}
|
|
150
|
+
if service_instance_id:
|
|
151
|
+
resource_attributes["service.instance.id"] = service_instance_id
|
|
152
|
+
if environment:
|
|
153
|
+
resource_attributes["environment"] = environment
|
|
154
|
+
resource = Resource.create(resource_attributes)
|
|
155
|
+
|
|
156
|
+
# Configure Tracing
|
|
157
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
158
|
+
trace.set_tracer_provider(tracer_provider)
|
|
159
|
+
from opentelemetry.propagate import set_global_textmap
|
|
160
|
+
from opentelemetry.trace.propagation.tracecontext import (
|
|
161
|
+
TraceContextTextMapPropagator,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
set_global_textmap(TraceContextTextMapPropagator())
|
|
165
|
+
|
|
166
|
+
logger.debug(f"OTelConfig endpoint: {config.endpoint}")
|
|
167
|
+
if config.endpoint:
|
|
168
|
+
# Convert timeout to float safely
|
|
169
|
+
timeout_str = os.getenv("OTEL_EXPORTER_OTLP_TIMEOUT", "10.0")
|
|
170
|
+
try:
|
|
171
|
+
timeout = float(timeout_str)
|
|
172
|
+
except (ValueError, TypeError):
|
|
173
|
+
logger.warning(f"Invalid timeout value '{timeout_str}', using default 10.0")
|
|
174
|
+
timeout = 10.0
|
|
175
|
+
|
|
176
|
+
# CRITICAL FIX: Set endpoint in environment variable so exporters can append correct paths
|
|
177
|
+
# The exporters only call _append_trace_path() when reading from env vars
|
|
178
|
+
from urllib.parse import urlparse
|
|
179
|
+
|
|
180
|
+
# Set the base endpoint in environment variable
|
|
181
|
+
os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = config.endpoint
|
|
182
|
+
|
|
183
|
+
parsed = urlparse(config.endpoint)
|
|
184
|
+
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
|
185
|
+
|
|
186
|
+
# Build list of URLs to exclude from instrumentation
|
|
187
|
+
excluded_urls = [
|
|
188
|
+
base_url,
|
|
189
|
+
config.endpoint,
|
|
190
|
+
f"{base_url}/v1/traces",
|
|
191
|
+
f"{base_url}/v1/metrics",
|
|
192
|
+
config.endpoint.rstrip("/") + "/v1/traces",
|
|
193
|
+
config.endpoint.rstrip("/") + "/v1/metrics",
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
# Add to environment variable (comma-separated)
|
|
197
|
+
existing = os.environ.get("OTEL_PYTHON_REQUESTS_EXCLUDED_URLS", "")
|
|
198
|
+
if existing:
|
|
199
|
+
excluded_urls.append(existing)
|
|
200
|
+
os.environ["OTEL_PYTHON_REQUESTS_EXCLUDED_URLS"] = ",".join(excluded_urls)
|
|
201
|
+
logger.info(f"Excluded OTLP endpoints from instrumentation: {base_url}")
|
|
202
|
+
|
|
203
|
+
# Set timeout in environment variable
|
|
204
|
+
os.environ["OTEL_EXPORTER_OTLP_TIMEOUT"] = str(timeout)
|
|
205
|
+
|
|
206
|
+
# Create exporters WITHOUT passing endpoint (let them read from env vars)
|
|
207
|
+
# This ensures they call _append_trace_path() correctly
|
|
208
|
+
span_exporter = OTLPSpanExporter(
|
|
209
|
+
headers=config.headers,
|
|
210
|
+
)
|
|
211
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter))
|
|
212
|
+
logger.info(
|
|
213
|
+
f"OpenTelemetry tracing configured with OTLP endpoint: {span_exporter._endpoint}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Configure Metrics with Views for histogram buckets
|
|
217
|
+
metric_exporter = OTLPMetricExporter(
|
|
218
|
+
headers=config.headers,
|
|
219
|
+
)
|
|
220
|
+
metric_reader = PeriodicExportingMetricReader(exporter=metric_exporter)
|
|
221
|
+
|
|
222
|
+
# Create Views to configure histogram buckets for GenAI operation duration
|
|
223
|
+
duration_view = View(
|
|
224
|
+
instrument_name=SC.GEN_AI_CLIENT_OPERATION_DURATION,
|
|
225
|
+
aggregation=ExplicitBucketHistogramAggregation(
|
|
226
|
+
boundaries=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
|
|
227
|
+
),
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Create Views for MCP metrics histograms
|
|
231
|
+
mcp_duration_view = View(
|
|
232
|
+
instrument_name="mcp.client.operation.duration",
|
|
233
|
+
aggregation=ExplicitBucketHistogramAggregation(
|
|
234
|
+
boundaries=_MCP_CLIENT_OPERATION_DURATION_BUCKETS
|
|
235
|
+
),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
mcp_request_size_view = View(
|
|
239
|
+
instrument_name="mcp.request.size",
|
|
240
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_MCP_PAYLOAD_SIZE_BUCKETS),
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
mcp_response_size_view = View(
|
|
244
|
+
instrument_name="mcp.response.size",
|
|
245
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_MCP_PAYLOAD_SIZE_BUCKETS),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Create Views for streaming metrics (Phase 3.4)
|
|
249
|
+
ttft_view = View(
|
|
250
|
+
instrument_name=SC.GEN_AI_SERVER_TTFT,
|
|
251
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_GEN_AI_SERVER_TFTT),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
tbt_view = View(
|
|
255
|
+
instrument_name=SC.GEN_AI_SERVER_TBT,
|
|
256
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_GEN_AI_SERVER_TBT),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
meter_provider = MeterProvider(
|
|
260
|
+
resource=resource,
|
|
261
|
+
metric_readers=[metric_reader],
|
|
262
|
+
views=[
|
|
263
|
+
duration_view,
|
|
264
|
+
mcp_duration_view,
|
|
265
|
+
mcp_request_size_view,
|
|
266
|
+
mcp_response_size_view,
|
|
267
|
+
ttft_view,
|
|
268
|
+
tbt_view,
|
|
269
|
+
],
|
|
270
|
+
)
|
|
271
|
+
metrics.set_meter_provider(meter_provider)
|
|
272
|
+
logger.info(
|
|
273
|
+
f"OpenTelemetry metrics configured with OTLP endpoint: {metric_exporter._endpoint}"
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
# Configure Console Exporters if no OTLP endpoint is set
|
|
277
|
+
span_exporter = ConsoleSpanExporter()
|
|
278
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter))
|
|
279
|
+
logger.info("No OTLP endpoint configured, traces will be exported to console.")
|
|
280
|
+
|
|
281
|
+
metric_exporter = ConsoleMetricExporter()
|
|
282
|
+
metric_reader = PeriodicExportingMetricReader(exporter=metric_exporter)
|
|
283
|
+
|
|
284
|
+
# Create Views to configure histogram buckets (same as OTLP path)
|
|
285
|
+
duration_view = View(
|
|
286
|
+
instrument_name=SC.GEN_AI_CLIENT_OPERATION_DURATION,
|
|
287
|
+
aggregation=ExplicitBucketHistogramAggregation(
|
|
288
|
+
boundaries=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Create Views for MCP metrics histograms
|
|
293
|
+
mcp_duration_view = View(
|
|
294
|
+
instrument_name="mcp.client.operation.duration",
|
|
295
|
+
aggregation=ExplicitBucketHistogramAggregation(
|
|
296
|
+
boundaries=_MCP_CLIENT_OPERATION_DURATION_BUCKETS
|
|
297
|
+
),
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
mcp_request_size_view = View(
|
|
301
|
+
instrument_name="mcp.request.size",
|
|
302
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_MCP_PAYLOAD_SIZE_BUCKETS),
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
mcp_response_size_view = View(
|
|
306
|
+
instrument_name="mcp.response.size",
|
|
307
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_MCP_PAYLOAD_SIZE_BUCKETS),
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Create Views for streaming metrics (Phase 3.4)
|
|
311
|
+
ttft_view = View(
|
|
312
|
+
instrument_name=SC.GEN_AI_SERVER_TTFT,
|
|
313
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_GEN_AI_SERVER_TFTT),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
tbt_view = View(
|
|
317
|
+
instrument_name=SC.GEN_AI_SERVER_TBT,
|
|
318
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=_GEN_AI_SERVER_TBT),
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
meter_provider = MeterProvider(
|
|
322
|
+
resource=resource,
|
|
323
|
+
metric_readers=[metric_reader],
|
|
324
|
+
views=[
|
|
325
|
+
duration_view,
|
|
326
|
+
mcp_duration_view,
|
|
327
|
+
mcp_request_size_view,
|
|
328
|
+
mcp_response_size_view,
|
|
329
|
+
ttft_view,
|
|
330
|
+
tbt_view,
|
|
331
|
+
],
|
|
332
|
+
)
|
|
333
|
+
metrics.set_meter_provider(meter_provider)
|
|
334
|
+
logger.info("No OTLP endpoint configured, metrics will be exported to console.")
|
|
335
|
+
|
|
336
|
+
# OpenInference instrumentors that use different API (no config parameter)
|
|
337
|
+
# Only include if OpenInference is available (Python >= 3.10)
|
|
338
|
+
OPENINFERENCE_INSTRUMENTORS = (
|
|
339
|
+
{"smolagents", "mcp", "litellm"} if OPENINFERENCE_AVAILABLE else set()
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Auto-instrument LLM libraries based on the configuration
|
|
343
|
+
for name in config.enabled_instrumentors:
|
|
344
|
+
if name in INSTRUMENTORS:
|
|
345
|
+
try:
|
|
346
|
+
instrumentor_class = INSTRUMENTORS[name]
|
|
347
|
+
instrumentor = instrumentor_class()
|
|
348
|
+
|
|
349
|
+
# OpenInference instrumentors don't take config parameter
|
|
350
|
+
if name in OPENINFERENCE_INSTRUMENTORS:
|
|
351
|
+
instrumentor.instrument()
|
|
352
|
+
else:
|
|
353
|
+
instrumentor.instrument(config=config)
|
|
354
|
+
|
|
355
|
+
logger.info(f"{name} instrumentation enabled")
|
|
356
|
+
except Exception as e:
|
|
357
|
+
logger.error(f"Failed to instrument {name}: {e}", exc_info=True)
|
|
358
|
+
if config.fail_on_error:
|
|
359
|
+
raise
|
|
360
|
+
else:
|
|
361
|
+
logger.warning(f"Unknown instrumentor '{name}' requested.")
|
|
362
|
+
|
|
363
|
+
# Auto-instrument MCP tools (databases, APIs, etc.)
|
|
364
|
+
# NOTE: OTLP endpoints are excluded via OTEL_PYTHON_REQUESTS_EXCLUDED_URLS set above
|
|
365
|
+
if config.enable_mcp_instrumentation:
|
|
366
|
+
try:
|
|
367
|
+
mcp_manager = MCPInstrumentorManager(config)
|
|
368
|
+
mcp_manager.instrument_all(config.fail_on_error)
|
|
369
|
+
logger.info("MCP tools instrumentation enabled and set up.")
|
|
370
|
+
except Exception as e:
|
|
371
|
+
logger.error(f"Failed to set up MCP tools instrumentation: {e}", exc_info=True)
|
|
372
|
+
if config.fail_on_error:
|
|
373
|
+
raise
|
|
374
|
+
|
|
375
|
+
# Start GPU metrics collection if enabled
|
|
376
|
+
if config.enable_gpu_metrics:
|
|
377
|
+
try:
|
|
378
|
+
meter_provider = metrics.get_meter_provider()
|
|
379
|
+
gpu_collector = GPUMetricsCollector(
|
|
380
|
+
meter_provider.get_meter("genai.gpu"),
|
|
381
|
+
config,
|
|
382
|
+
interval=config.gpu_collection_interval,
|
|
383
|
+
)
|
|
384
|
+
gpu_collector.start()
|
|
385
|
+
logger.info(
|
|
386
|
+
f"GPU metrics collection started (interval: {config.gpu_collection_interval}s)."
|
|
387
|
+
)
|
|
388
|
+
except Exception as e:
|
|
389
|
+
logger.error(f"Failed to start GPU metrics collection: {e}", exc_info=True)
|
|
390
|
+
if config.fail_on_error:
|
|
391
|
+
raise
|
|
392
|
+
|
|
393
|
+
logger.info("Auto-instrumentation setup complete")
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def instrument(**kwargs):
|
|
397
|
+
"""
|
|
398
|
+
Convenience wrapper for setup_auto_instrumentation that accepts kwargs.
|
|
399
|
+
|
|
400
|
+
Set up OpenTelemetry with auto-instrumentation for LLM frameworks and MCP tools.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
**kwargs: Keyword arguments to configure OTelConfig. These will override
|
|
404
|
+
environment variables.
|
|
405
|
+
|
|
406
|
+
Example:
|
|
407
|
+
>>> instrument(service_name="my-app", endpoint="http://localhost:4318")
|
|
408
|
+
"""
|
|
409
|
+
# Load configuration from environment variables or use provided kwargs
|
|
410
|
+
config = OTelConfig(**kwargs)
|
|
411
|
+
|
|
412
|
+
# Call the main setup function
|
|
413
|
+
setup_auto_instrumentation(config)
|
genai_otel/cli.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""CLI tool for running instrumented applications"""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import runpy
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from genai_otel import instrument
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
"""Main entry point for the genai-instrument CLI tool.
|
|
16
|
+
|
|
17
|
+
Parses command-line arguments, initializes OpenTelemetry instrumentation,
|
|
18
|
+
and then executes the specified command/script with its arguments.
|
|
19
|
+
|
|
20
|
+
Supports two usage patterns:
|
|
21
|
+
1. genai-instrument python script.py [args...]
|
|
22
|
+
2. genai-instrument script.py [args...]
|
|
23
|
+
|
|
24
|
+
In both cases, the Python script is executed in the same process to ensure
|
|
25
|
+
instrumentation hooks are active.
|
|
26
|
+
"""
|
|
27
|
+
parser = argparse.ArgumentParser(
|
|
28
|
+
description=("Run a Python script with GenAI OpenTelemetry instrumentation.")
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"command",
|
|
32
|
+
nargs=argparse.REMAINDER,
|
|
33
|
+
help="The command to run (python script.py or script.py)",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
args = parser.parse_args()
|
|
37
|
+
|
|
38
|
+
if not args.command:
|
|
39
|
+
parser.print_help()
|
|
40
|
+
sys.exit(1)
|
|
41
|
+
|
|
42
|
+
# Load configuration from environment variables
|
|
43
|
+
# The `instrument` function will handle loading config.
|
|
44
|
+
try:
|
|
45
|
+
# Initialize instrumentation. This reads env vars like OTEL_SERVICE_NAME, etc.
|
|
46
|
+
# If GENAI_FAIL_ON_ERROR is true and setup fails, it will raise an exception.
|
|
47
|
+
instrument()
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.error(f"Failed to initialize instrumentation: {e}", exc_info=True)
|
|
50
|
+
sys.exit(1) # Exit if instrumentation setup fails and fail_on_error is true
|
|
51
|
+
|
|
52
|
+
# Parse the command to extract the Python script and its arguments
|
|
53
|
+
script_path = None
|
|
54
|
+
script_args = []
|
|
55
|
+
|
|
56
|
+
# Check if command starts with 'python' or 'python3' or 'python.exe'
|
|
57
|
+
if args.command[0].lower() in [
|
|
58
|
+
"python",
|
|
59
|
+
"python3",
|
|
60
|
+
"python.exe",
|
|
61
|
+
"python3.exe",
|
|
62
|
+
] or os.path.basename(args.command[0]).lower().startswith("python"):
|
|
63
|
+
# Format: genai-instrument python script.py [args...]
|
|
64
|
+
if len(args.command) < 2:
|
|
65
|
+
logger.error("No Python script specified after 'python' command")
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
script_path = args.command[1]
|
|
68
|
+
script_args = args.command[2:]
|
|
69
|
+
elif args.command[0].endswith(".py"):
|
|
70
|
+
# Format: genai-instrument script.py [args...]
|
|
71
|
+
script_path = args.command[0]
|
|
72
|
+
script_args = args.command[1:]
|
|
73
|
+
else:
|
|
74
|
+
logger.error(
|
|
75
|
+
f"Invalid command format. Expected 'python script.py' or 'script.py', got: {' '.join(args.command)}"
|
|
76
|
+
)
|
|
77
|
+
sys.exit(1)
|
|
78
|
+
|
|
79
|
+
# Set sys.argv to simulate running the script directly
|
|
80
|
+
# This ensures the target script receives the correct arguments
|
|
81
|
+
sys.argv = [script_path] + script_args
|
|
82
|
+
|
|
83
|
+
# Run the target script in the same process using runpy
|
|
84
|
+
# This ensures instrumentation hooks are active in the script
|
|
85
|
+
try:
|
|
86
|
+
runpy.run_path(script_path, run_name="__main__")
|
|
87
|
+
except FileNotFoundError:
|
|
88
|
+
logger.error(f"Script not found: {script_path}")
|
|
89
|
+
sys.exit(1)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.error(f"Error running script {script_path}: {e}", exc_info=True)
|
|
92
|
+
sys.exit(1)
|