llm-otel-kit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ # Environment / secrets
2
+ .env
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.pyc
7
+ *.pyo
8
+ .venv/
9
+
10
+ # IDE
11
+ .vscode/
12
+
13
+ # OS
14
+ .DS_Store
15
+ Thumbs.db
16
+
17
+ # Docker
18
+ open-webui-data/
@@ -0,0 +1,13 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2025-07-12)
4
+
5
+ ### Added
6
+
7
+ - Initial release
8
+ - Provider abstraction: `LLMProvider` ABC with `complete()`, `stream()`, `list_models()`
9
+ - Providers: Ollama, OpenAI-compatible (OpenAI, vLLM, llama.cpp, LM Studio, Groq, Together, Fireworks, Azure OpenAI, LiteLLM), Anthropic
10
+ - OTel bootstrap: `init_observability()` with Dynatrace-compatible temporality
11
+ - GenAI metrics: 10 instruments following OTel GenAI semantic conventions
12
+ - Span helpers: `set_genai_span()`, `set_genai_response()`, `classify_request()`
13
+ - Config: `AppConfig.from_env()` with legacy `OLLAMA_BASE_URL` fallback
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 theharithsa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-otel-kit
3
+ Version: 0.1.0
4
+ Summary: Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud.
5
+ Project-URL: Homepage, https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry
6
+ Project-URL: Issues, https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry/issues
7
+ Author: Vishruth Harithsa
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: dynatrace,genai,llm,observability,ollama,openai,opentelemetry
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Framework :: FastAPI
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: System :: Monitoring
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: httpx>=0.27
20
+ Requires-Dist: opentelemetry-api>=1.25
21
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.25
22
+ Requires-Dist: opentelemetry-sdk>=1.25
23
+ Requires-Dist: traceloop-sdk>=0.59
24
+ Provides-Extra: anthropic
25
+ Requires-Dist: anthropic>=0.25; extra == 'anthropic'
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio; extra == 'dev'
29
+ Requires-Dist: ruff; extra == 'dev'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # llm-otel-kit
33
+
34
+ Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud.
35
+
36
+ ## What it does
37
+
38
+ `llm-otel-kit` gives you **full OTel GenAI semantic convention coverage** for any LLM provider in ~10 lines of code:
39
+
40
+ - **Traces** with `gen_ai.*` span attributes (model, tokens, latency, streaming mode)
41
+ - **Metrics** — 10 instruments: operation duration, token usage, TTFT, TPOT, throughput, error rate, active requests
42
+ - **Logs** exported via OTLP with structured context (model, duration, token counts)
43
+ - **Dynatrace-ready** — correct temporality (DELTA for counters/histograms, CUMULATIVE for UpDownCounters)
44
+
45
+ ## Supported Providers
46
+
47
+ | Provider | Type | Config name |
48
+ |----------|------|-------------|
49
+ | Ollama | Local | `ollama` |
50
+ | OpenAI | Cloud | `openai` |
51
+ | Anthropic | Cloud | `anthropic` |
52
+ | vLLM | Local | `vllm` |
53
+ | llama.cpp | Local | `llamacpp` |
54
+ | LM Studio | Local | `lmstudio` |
55
+ | Groq | Cloud | `groq` |
56
+ | Together | Cloud | `together` |
57
+ | Fireworks | Cloud | `fireworks` |
58
+ | Azure OpenAI | Cloud | `azure_openai` |
59
+ | LiteLLM | Proxy | `litellm` |
60
+
61
+ ## Quick Start
62
+
63
+ ```python
64
+ from llm_otel_kit import AppConfig, GenAIMetrics, init_observability, create_provider
65
+
66
+ config = AppConfig.from_env()
67
+ otel = init_observability(config.app_name, config.otlp_endpoint, config.otlp_token)
68
+ provider = create_provider(config.provider)
69
+ m = GenAIMetrics(otel.meter)
70
+
71
+ # Use provider.complete() / provider.stream() for instrumented LLM calls
72
+ ```
73
+
74
+ ### Environment Variables
75
+
76
+ | Variable | Default | Description |
77
+ |----------|---------|-------------|
78
+ | `LLM_PROVIDER` | `ollama` | Provider name (see table above) |
79
+ | `LLM_BASE_URL` | `http://localhost:11434` | Provider API base URL |
80
+ | `LLM_API_KEY` | (empty) | API key for cloud providers |
81
+ | `DEFAULT_MODEL` | (empty) | Fallback model name |
82
+ | `APP_NAME` | `llm-backend` | OTel service name |
83
+ | `TRACELOOP_BASE_URL` | (empty) | OTLP endpoint URL |
84
+ | `DT_OTLP_TOKEN` | (empty) | Dynatrace API token |
85
+
86
+ ## Install
87
+
88
+ ```bash
89
+ pip install llm-otel-kit
90
+ ```
91
+
92
+ For Anthropic support:
93
+
94
+ ```bash
95
+ pip install llm-otel-kit[anthropic]
96
+ ```
97
+
98
+ ## License
99
+
100
+ MIT
@@ -0,0 +1,69 @@
1
+ # llm-otel-kit
2
+
3
+ Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud.
4
+
5
+ ## What it does
6
+
7
+ `llm-otel-kit` gives you **full OTel GenAI semantic convention coverage** for any LLM provider in ~10 lines of code:
8
+
9
+ - **Traces** with `gen_ai.*` span attributes (model, tokens, latency, streaming mode)
10
+ - **Metrics** — 10 instruments: operation duration, token usage, TTFT, TPOT, throughput, error rate, active requests
11
+ - **Logs** exported via OTLP with structured context (model, duration, token counts)
12
+ - **Dynatrace-ready** — correct temporality (DELTA for counters/histograms, CUMULATIVE for UpDownCounters)
13
+
14
+ ## Supported Providers
15
+
16
+ | Provider | Type | Config name |
17
+ |----------|------|-------------|
18
+ | Ollama | Local | `ollama` |
19
+ | OpenAI | Cloud | `openai` |
20
+ | Anthropic | Cloud | `anthropic` |
21
+ | vLLM | Local | `vllm` |
22
+ | llama.cpp | Local | `llamacpp` |
23
+ | LM Studio | Local | `lmstudio` |
24
+ | Groq | Cloud | `groq` |
25
+ | Together | Cloud | `together` |
26
+ | Fireworks | Cloud | `fireworks` |
27
+ | Azure OpenAI | Cloud | `azure_openai` |
28
+ | LiteLLM | Proxy | `litellm` |
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ from llm_otel_kit import AppConfig, GenAIMetrics, init_observability, create_provider
34
+
35
+ config = AppConfig.from_env()
36
+ otel = init_observability(config.app_name, config.otlp_endpoint, config.otlp_token)
37
+ provider = create_provider(config.provider)
38
+ m = GenAIMetrics(otel.meter)
39
+
40
+ # Use provider.complete() / provider.stream() for instrumented LLM calls
41
+ ```
42
+
43
+ ### Environment Variables
44
+
45
+ | Variable | Default | Description |
46
+ |----------|---------|-------------|
47
+ | `LLM_PROVIDER` | `ollama` | Provider name (see table above) |
48
+ | `LLM_BASE_URL` | `http://localhost:11434` | Provider API base URL |
49
+ | `LLM_API_KEY` | (empty) | API key for cloud providers |
50
+ | `DEFAULT_MODEL` | (empty) | Fallback model name |
51
+ | `APP_NAME` | `llm-backend` | OTel service name |
52
+ | `TRACELOOP_BASE_URL` | (empty) | OTLP endpoint URL |
53
+ | `DT_OTLP_TOKEN` | (empty) | Dynatrace API token |
54
+
55
+ ## Install
56
+
57
+ ```bash
58
+ pip install llm-otel-kit
59
+ ```
60
+
61
+ For Anthropic support:
62
+
63
+ ```bash
64
+ pip install llm-otel-kit[anthropic]
65
+ ```
66
+
67
+ ## License
68
+
69
+ MIT
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "llm-otel-kit"
7
+ version = "0.1.0"
8
+ description = "Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.11"
12
+ authors = [{ name = "Vishruth Harithsa" }]
13
+ keywords = ["opentelemetry", "llm", "observability", "genai", "dynatrace", "ollama", "openai"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Framework :: FastAPI",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Topic :: System :: Monitoring",
22
+ ]
23
+ dependencies = [
24
+ "httpx>=0.27",
25
+ "opentelemetry-api>=1.25",
26
+ "opentelemetry-sdk>=1.25",
27
+ "opentelemetry-exporter-otlp-proto-http>=1.25",
28
+ "traceloop-sdk>=0.59",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ anthropic = ["anthropic>=0.25"]
33
+ dev = ["pytest", "pytest-asyncio", "ruff"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry"
37
+ Issues = "https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry/issues"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/llm_otel_kit"]
@@ -0,0 +1,27 @@
1
+ """llm-otel-kit — Drop-in OTel GenAI observability for any LLM backend."""
2
+
3
+ from llm_otel_kit.bootstrap import OTelComponents, init_observability
4
+ from llm_otel_kit.config import AppConfig, ProviderConfig
5
+ from llm_otel_kit.metrics import GenAIMetrics
6
+ from llm_otel_kit.providers import create_provider
7
+ from llm_otel_kit.spans import (
8
+ classify_request,
9
+ detect_provider,
10
+ record_metrics,
11
+ set_genai_response,
12
+ set_genai_span,
13
+ )
14
+
15
+ __all__ = [
16
+ "AppConfig",
17
+ "GenAIMetrics",
18
+ "OTelComponents",
19
+ "ProviderConfig",
20
+ "classify_request",
21
+ "create_provider",
22
+ "detect_provider",
23
+ "init_observability",
24
+ "record_metrics",
25
+ "set_genai_response",
26
+ "set_genai_span",
27
+ ]
@@ -0,0 +1,137 @@
1
+ """
2
+ Dynatrace-compatible OpenTelemetry bootstrap for GenAI applications.
3
+
4
+ Handles the critical init order: MeterProvider → Logs → Traceloop.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ from typing import NamedTuple
10
+
11
+ from opentelemetry import metrics, trace
12
+ from opentelemetry._logs import set_logger_provider
13
+ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
14
+ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
15
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
16
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
17
+ from opentelemetry.sdk.metrics import (
18
+ Counter,
19
+ Histogram,
20
+ MeterProvider,
21
+ UpDownCounter,
22
+ )
23
+ from opentelemetry.sdk.metrics.export import (
24
+ AggregationTemporality,
25
+ PeriodicExportingMetricReader,
26
+ )
27
+ from opentelemetry.sdk.metrics.view import (
28
+ ExplicitBucketHistogramAggregation,
29
+ View,
30
+ )
31
+ from traceloop.sdk import Traceloop
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # OTel GenAI semantic-convention histogram bucket boundaries
35
+ # ---------------------------------------------------------------------------
36
+ DURATION_BUCKETS = [
37
+ 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56,
38
+ 5.12, 10.24, 20.48, 40.96, 81.92,
39
+ ]
40
+ TOKEN_BUCKETS = [
41
+ 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144,
42
+ 1048576, 4194304, 16777216, 67108864,
43
+ ]
44
+ TTFT_BUCKETS = [
45
+ 0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25,
46
+ 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0,
47
+ ]
48
+ TPOT_BUCKETS = [
49
+ 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5,
50
+ 0.75, 1.0, 2.5,
51
+ ]
52
+
53
+
54
+ class OTelComponents(NamedTuple):
55
+ """Tuple returned by init_observability()."""
56
+ meter: metrics.Meter
57
+ tracer: trace.Tracer
58
+ logger: logging.Logger
59
+
60
+
61
+ def _init_metrics(app_name: str, otlp_endpoint: str, otlp_token: str) -> metrics.Meter:
62
+ headers: dict[str, str] = {}
63
+ if otlp_token:
64
+ headers["Authorization"] = f"Api-Token {otlp_token}"
65
+
66
+ exporter = OTLPMetricExporter(
67
+ endpoint=f"{otlp_endpoint}/v1/metrics",
68
+ headers=headers,
69
+ preferred_temporality={
70
+ Counter: AggregationTemporality.DELTA,
71
+ Histogram: AggregationTemporality.DELTA,
72
+ UpDownCounter: AggregationTemporality.CUMULATIVE,
73
+ },
74
+ )
75
+ provider = MeterProvider(
76
+ metric_readers=[
77
+ PeriodicExportingMetricReader(exporter, export_interval_millis=30_000),
78
+ ],
79
+ views=[
80
+ View(instrument_name="gen_ai.client.operation.duration",
81
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=DURATION_BUCKETS)),
82
+ View(instrument_name="gen_ai.client.token.usage",
83
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=TOKEN_BUCKETS)),
84
+ View(instrument_name="gen_ai.server.time_to_first_token",
85
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=TTFT_BUCKETS)),
86
+ View(instrument_name="gen_ai.server.time_per_output_token",
87
+ aggregation=ExplicitBucketHistogramAggregation(boundaries=TPOT_BUCKETS)),
88
+ ],
89
+ )
90
+ metrics.set_meter_provider(provider)
91
+ return metrics.get_meter(app_name, "1.0.0")
92
+
93
+
94
+ def _init_logs(otlp_endpoint: str, otlp_token: str) -> None:
95
+ headers: dict[str, str] = {}
96
+ if otlp_token:
97
+ headers["Authorization"] = f"Api-Token {otlp_token}"
98
+
99
+ exporter = OTLPLogExporter(
100
+ endpoint=f"{otlp_endpoint}/v1/logs",
101
+ headers=headers,
102
+ )
103
+ provider = LoggerProvider()
104
+ provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
105
+ set_logger_provider(provider)
106
+ handler = LoggingHandler(level=logging.INFO, logger_provider=provider)
107
+ logging.getLogger().addHandler(handler)
108
+ logging.getLogger().setLevel(logging.INFO)
109
+
110
+
111
+ def init_observability(
112
+ app_name: str,
113
+ otlp_endpoint: str = "",
114
+ otlp_token: str = "",
115
+ ) -> OTelComponents:
116
+ """
117
+ One-call OTel bootstrap: metrics → logs → tracing (order matters).
118
+
119
+ If *otlp_endpoint* / *otlp_token* are empty, falls back to env vars
120
+ ``TRACELOOP_BASE_URL`` and ``DT_OTLP_TOKEN``.
121
+ """
122
+ otlp_endpoint = otlp_endpoint or os.getenv("TRACELOOP_BASE_URL", "")
123
+ otlp_token = otlp_token or os.getenv("DT_OTLP_TOKEN", "")
124
+
125
+ if otlp_endpoint:
126
+ meter = _init_metrics(app_name, otlp_endpoint, otlp_token)
127
+ _init_logs(otlp_endpoint, otlp_token)
128
+ else:
129
+ meter = metrics.get_meter(app_name, "1.0.0")
130
+
131
+ # Traceloop MUST init after MeterProvider to avoid conflicts
132
+ Traceloop.init(app_name=app_name, disable_batch=False)
133
+
134
+ tracer = trace.get_tracer(app_name, "1.0.0")
135
+ logger = logging.getLogger(app_name)
136
+
137
+ return OTelComponents(meter=meter, tracer=tracer, logger=logger)
@@ -0,0 +1,80 @@
1
+ """Configuration models for llm-otel-kit."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ @dataclass
10
+ class ProviderConfig:
11
+ """LLM provider connection settings.
12
+
13
+ Attributes:
14
+ name: Provider identifier — ``"ollama"``, ``"openai"``, or ``"anthropic"``.
15
+ base_url: API base URL (e.g. ``http://localhost:11434``,
16
+ ``https://api.openai.com``).
17
+ api_key: API key for cloud providers. Leave empty for local providers.
18
+ default_model: Fallback model when the request doesn't specify one.
19
+ """
20
+
21
+ name: str = "ollama"
22
+ base_url: str = "http://localhost:11434"
23
+ api_key: str = ""
24
+ default_model: str = ""
25
+
26
+
27
+ @dataclass
28
+ class AppConfig:
29
+ """Full application configuration — provider + observability.
30
+
31
+ Attributes:
32
+ app_name: OTel service name.
33
+ provider: LLM provider settings.
34
+ otlp_endpoint: OTLP base URL (e.g. Dynatrace OTLP endpoint).
35
+ otlp_token: Auth token for the OTLP exporter.
36
+ """
37
+
38
+ app_name: str = "llm-backend"
39
+ provider: ProviderConfig = field(default_factory=ProviderConfig)
40
+ otlp_endpoint: str = ""
41
+ otlp_token: str = ""
42
+
43
+ @classmethod
44
+ def from_env(cls) -> AppConfig:
45
+ """Build config from environment variables.
46
+
47
+ Env vars:
48
+ ``APP_NAME`` — OTel service name (default: ``llm-backend``)
49
+ ``LLM_PROVIDER`` — ``ollama`` | ``openai`` | ``anthropic``
50
+ ``LLM_BASE_URL`` — Provider API base URL
51
+ ``LLM_API_KEY`` — API key for cloud providers
52
+ ``DEFAULT_MODEL`` — Fallback model name
53
+ ``TRACELOOP_BASE_URL`` — OTLP endpoint
54
+ ``DT_OTLP_TOKEN`` — Dynatrace API token
55
+
56
+ Legacy env vars (``OLLAMA_BASE_URL``) are supported as fallbacks.
57
+ """
58
+ provider_name = os.getenv("LLM_PROVIDER", "ollama").lower()
59
+
60
+ # Resolve base URL with legacy fallback
61
+ base_url = os.getenv("LLM_BASE_URL", "")
62
+ if not base_url:
63
+ if provider_name == "ollama":
64
+ base_url = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
65
+ elif provider_name == "openai":
66
+ base_url = "https://api.openai.com"
67
+ elif provider_name == "anthropic":
68
+ base_url = "https://api.anthropic.com"
69
+
70
+ return cls(
71
+ app_name=os.getenv("APP_NAME", "llm-backend"),
72
+ provider=ProviderConfig(
73
+ name=provider_name,
74
+ base_url=base_url,
75
+ api_key=os.getenv("LLM_API_KEY", ""),
76
+ default_model=os.getenv("DEFAULT_MODEL", ""),
77
+ ),
78
+ otlp_endpoint=os.getenv("TRACELOOP_BASE_URL", ""),
79
+ otlp_token=os.getenv("DT_OTLP_TOKEN", ""),
80
+ )
@@ -0,0 +1,58 @@
1
+ """GenAI semantic-convention + operational metric instruments."""
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+ from opentelemetry.metrics import Counter, Histogram, Meter, UpDownCounter
6
+
7
+
8
+ @dataclass
9
+ class GenAIMetrics:
10
+ """Pre-created OTel metric instruments for LLM observability.
11
+
12
+ Usage::
13
+
14
+ from llm_otel_kit import GenAIMetrics, init_observability
15
+
16
+ otel = init_observability("my-app")
17
+ m = GenAIMetrics(otel.meter)
18
+ m.request_count.add(1, {"model": "gpt-4o"})
19
+ """
20
+
21
+ _meter: Meter = field(repr=False)
22
+
23
+ # GenAI semconv
24
+ operation_duration: Histogram = field(init=False)
25
+ token_usage: Histogram = field(init=False)
26
+ ttft: Histogram = field(init=False)
27
+ tpot: Histogram = field(init=False)
28
+
29
+ # Operational
30
+ request_count: Counter = field(init=False)
31
+ error_count: Counter = field(init=False)
32
+ active_requests: UpDownCounter = field(init=False)
33
+ stream_chunks: Counter = field(init=False)
34
+ token_throughput: Histogram = field(init=False)
35
+ message_count: Histogram = field(init=False)
36
+
37
+ def __post_init__(self) -> None:
38
+ m = self._meter
39
+ self.operation_duration = m.create_histogram(
40
+ "gen_ai.client.operation.duration", "GenAI operation duration", "s")
41
+ self.token_usage = m.create_histogram(
42
+ "gen_ai.client.token.usage", "Input and output token counts", "{token}")
43
+ self.ttft = m.create_histogram(
44
+ "gen_ai.server.time_to_first_token", "Time to first token", "s")
45
+ self.tpot = m.create_histogram(
46
+ "gen_ai.server.time_per_output_token", "Time per output token", "s")
47
+ self.request_count = m.create_counter(
48
+ "llm.request.count", "Total LLM requests", "1")
49
+ self.error_count = m.create_counter(
50
+ "llm.request.errors", "Failed LLM requests", "1")
51
+ self.active_requests = m.create_up_down_counter(
52
+ "llm.request.active", "In-flight LLM requests", "1")
53
+ self.stream_chunks = m.create_counter(
54
+ "llm.stream.chunks", "Streaming chunks sent", "1")
55
+ self.token_throughput = m.create_histogram(
56
+ "llm.token.throughput", "Output token throughput", "{token}/s")
57
+ self.message_count = m.create_histogram(
58
+ "llm.request.message_count", "Messages in prompt", "1")
@@ -0,0 +1,36 @@
1
+ """Provider registry — factory for LLM backend providers."""
2
+
3
+ from llm_otel_kit.config import ProviderConfig
4
+ from llm_otel_kit.providers.base import LLMProvider
5
+
6
+
7
+ def create_provider(config: ProviderConfig) -> LLMProvider:
8
+ """Instantiate the correct provider from config.
9
+
10
+ Supported providers:
11
+ - ``ollama`` — Ollama native API (``/api/chat``)
12
+ - ``openai`` — OpenAI-compatible (works with OpenAI, vLLM, llama.cpp,
13
+ LM Studio, Groq, Together, Fireworks, Azure OpenAI, LiteLLM, etc.)
14
+ - ``anthropic`` — Anthropic Messages API (``/v1/messages``)
15
+ """
16
+ name = config.name.lower()
17
+
18
+ if name == "ollama":
19
+ from llm_otel_kit.providers.ollama import OllamaProvider
20
+ return OllamaProvider(config)
21
+ if name in ("openai", "vllm", "llamacpp", "lmstudio", "groq", "together",
22
+ "fireworks", "azure_openai", "litellm"):
23
+ from llm_otel_kit.providers.openai_compat import OpenAICompatProvider
24
+ return OpenAICompatProvider(config)
25
+ if name == "anthropic":
26
+ from llm_otel_kit.providers.anthropic import AnthropicProvider
27
+ return AnthropicProvider(config)
28
+
29
+ raise ValueError(
30
+ f"Unknown provider '{config.name}'. "
31
+ "Supported: ollama, openai, anthropic, vllm, llamacpp, lmstudio, "
32
+ "groq, together, fireworks, azure_openai, litellm"
33
+ )
34
+
35
+
36
+ __all__ = ["LLMProvider", "create_provider"]
@@ -0,0 +1,142 @@
1
+ """Anthropic Messages API provider (``/v1/messages``)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from typing import AsyncIterator
8
+
9
+ import httpx
10
+
11
+ from llm_otel_kit.providers.base import (
12
+ CompletionResult,
13
+ LLMProvider,
14
+ StreamChunk,
15
+ TimingInfo,
16
+ )
17
+
18
+
19
+ class AnthropicProvider(LLMProvider):
20
+ """Provider for the Anthropic Claude API."""
21
+
22
+ @property
23
+ def system_name(self) -> str:
24
+ return "anthropic"
25
+
26
+ def _headers(self) -> dict[str, str]:
27
+ return {
28
+ "Content-Type": "application/json",
29
+ "x-api-key": self.config.api_key,
30
+ "anthropic-version": "2023-06-01",
31
+ }
32
+
33
+ def build_payload(
34
+ self,
35
+ model: str,
36
+ messages: list[dict],
37
+ stream: bool,
38
+ **kwargs,
39
+ ) -> dict:
40
+ # Anthropic separates system message from the messages array
41
+ system_parts: list[str] = []
42
+ user_messages: list[dict] = []
43
+ for msg in messages:
44
+ if msg["role"] == "system":
45
+ system_parts.append(msg["content"])
46
+ else:
47
+ user_messages.append({"role": msg["role"], "content": msg["content"]})
48
+
49
+ payload: dict = {
50
+ "model": model,
51
+ "messages": user_messages,
52
+ "max_tokens": kwargs.get("max_tokens", 4096),
53
+ "stream": stream,
54
+ }
55
+ if system_parts:
56
+ payload["system"] = "\n".join(system_parts)
57
+ if kwargs.get("temperature") is not None:
58
+ payload["temperature"] = kwargs["temperature"]
59
+ if kwargs.get("top_p") is not None:
60
+ payload["top_p"] = kwargs["top_p"]
61
+ return payload
62
+
63
+ async def complete(
64
+ self,
65
+ client: httpx.AsyncClient,
66
+ payload: dict,
67
+ ) -> CompletionResult:
68
+ response = await client.post(
69
+ f"{self.base_url}/v1/messages",
70
+ json=payload,
71
+ headers=self._headers(),
72
+ )
73
+ response.raise_for_status()
74
+ data = response.json()
75
+
76
+ content_blocks = data.get("content", [])
77
+ text = "".join(b.get("text", "") for b in content_blocks if b.get("type") == "text")
78
+ usage = data.get("usage", {})
79
+
80
+ return CompletionResult(
81
+ content=text,
82
+ model=data.get("model", payload.get("model", "")),
83
+ prompt_tokens=usage.get("input_tokens", 0),
84
+ completion_tokens=usage.get("output_tokens", 0),
85
+ finish_reason=data.get("stop_reason", "end_turn"),
86
+ response_id=data.get("id", ""),
87
+ timing=TimingInfo(),
88
+ )
89
+
90
+ async def stream(
91
+ self,
92
+ client: httpx.AsyncClient,
93
+ payload: dict,
94
+ ) -> AsyncIterator[StreamChunk]:
95
+ prompt_tokens = 0
96
+ completion_tokens = 0
97
+
98
+ async with client.stream(
99
+ "POST",
100
+ f"{self.base_url}/v1/messages",
101
+ json=payload,
102
+ headers=self._headers(),
103
+ ) as resp:
104
+ resp.raise_for_status()
105
+ async for line in resp.aiter_lines():
106
+ if not line.startswith("data: "):
107
+ continue
108
+ try:
109
+ event = json.loads(line[6:])
110
+ except json.JSONDecodeError:
111
+ continue
112
+
113
+ event_type = event.get("type", "")
114
+
115
+ if event_type == "message_start":
116
+ usage = event.get("message", {}).get("usage", {})
117
+ prompt_tokens = usage.get("input_tokens", 0)
118
+
119
+ elif event_type == "content_block_delta":
120
+ delta = event.get("delta", {})
121
+ yield StreamChunk(content=delta.get("text", ""))
122
+
123
+ elif event_type == "message_delta":
124
+ usage = event.get("usage", {})
125
+ completion_tokens = usage.get("output_tokens", 0)
126
+ stop_reason = event.get("delta", {}).get("stop_reason", "end_turn")
127
+ yield StreamChunk(
128
+ done=True,
129
+ prompt_tokens=prompt_tokens,
130
+ completion_tokens=completion_tokens,
131
+ finish_reason=stop_reason,
132
+ timing=TimingInfo(),
133
+ )
134
+
135
+ async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
136
+ # Anthropic doesn't have a models endpoint; return a static list
137
+ models = ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022", "claude-3-opus-20240229"]
138
+ return [
139
+ {"id": m, "object": "model",
140
+ "created": int(time.time()), "owned_by": "anthropic"}
141
+ for m in models
142
+ ]
@@ -0,0 +1,88 @@
1
+ """Abstract base class for LLM providers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass, field
7
+ from typing import AsyncIterator
8
+ from urllib.parse import urlparse
9
+
10
+ import httpx
11
+
12
+ from llm_otel_kit.config import ProviderConfig
13
+
14
+
15
+ @dataclass
16
+ class TimingInfo:
17
+ """TTFT / TPOT extracted from the provider response."""
18
+ ttft: float | None = None
19
+ tpot: float | None = None
20
+
21
+
22
+ @dataclass
23
+ class CompletionResult:
24
+ """Normalised result of a non-streaming chat completion."""
25
+ content: str = ""
26
+ model: str = ""
27
+ prompt_tokens: int = 0
28
+ completion_tokens: int = 0
29
+ finish_reason: str = "stop"
30
+ response_id: str = ""
31
+ timing: TimingInfo = field(default_factory=TimingInfo)
32
+
33
+
34
+ @dataclass
35
+ class StreamChunk:
36
+ """One chunk from a streaming completion."""
37
+ content: str = ""
38
+ done: bool = False
39
+ prompt_tokens: int = 0
40
+ completion_tokens: int = 0
41
+ finish_reason: str = ""
42
+ timing: TimingInfo = field(default_factory=TimingInfo)
43
+
44
+
45
+ class LLMProvider(ABC):
46
+ """Interface that every LLM backend must implement."""
47
+
48
+ def __init__(self, config: ProviderConfig) -> None:
49
+ self.config = config
50
+ self.base_url = config.base_url.rstrip("/")
51
+ parsed = urlparse(self.base_url)
52
+ self.host = parsed.hostname or "localhost"
53
+ self.port = parsed.port or (443 if parsed.scheme == "https" else 80)
54
+
55
+ @property
56
+ @abstractmethod
57
+ def system_name(self) -> str:
58
+ """OTel ``gen_ai.system`` value (e.g. ``"ollama"``, ``"openai"``)."""
59
+
60
+ @abstractmethod
61
+ def build_payload(
62
+ self,
63
+ model: str,
64
+ messages: list[dict],
65
+ stream: bool,
66
+ **kwargs,
67
+ ) -> dict:
68
+ """Translate OpenAI-format request into provider-native payload."""
69
+
70
+ @abstractmethod
71
+ async def complete(
72
+ self,
73
+ client: httpx.AsyncClient,
74
+ payload: dict,
75
+ ) -> CompletionResult:
76
+ """Non-streaming chat completion."""
77
+
78
+ @abstractmethod
79
+ async def stream(
80
+ self,
81
+ client: httpx.AsyncClient,
82
+ payload: dict,
83
+ ) -> AsyncIterator[StreamChunk]:
84
+ """Streaming chat completion — yields ``StreamChunk``s."""
85
+
86
+ @abstractmethod
87
+ async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
88
+ """Return models in OpenAI list format ``[{"id": ..., "object": "model", ...}]``."""
@@ -0,0 +1,113 @@
1
+ """Ollama native API provider (``/api/chat``)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from typing import AsyncIterator
8
+
9
+ import httpx
10
+
11
+ from llm_otel_kit.providers.base import (
12
+ CompletionResult,
13
+ LLMProvider,
14
+ StreamChunk,
15
+ TimingInfo,
16
+ )
17
+
18
+
19
+ class OllamaProvider(LLMProvider):
20
+ """Provider for Ollama running locally or on a remote host."""
21
+
22
+ @property
23
+ def system_name(self) -> str:
24
+ return "ollama"
25
+
26
+ def build_payload(
27
+ self,
28
+ model: str,
29
+ messages: list[dict],
30
+ stream: bool,
31
+ **kwargs,
32
+ ) -> dict:
33
+ payload: dict = {
34
+ "model": model,
35
+ "messages": messages,
36
+ "stream": stream,
37
+ "options": {},
38
+ }
39
+ if kwargs.get("temperature") is not None:
40
+ payload["options"]["temperature"] = kwargs["temperature"]
41
+ if kwargs.get("top_p") is not None:
42
+ payload["options"]["top_p"] = kwargs["top_p"]
43
+ if kwargs.get("max_tokens") is not None:
44
+ payload["options"]["num_predict"] = kwargs["max_tokens"]
45
+ return payload
46
+
47
+ async def complete(
48
+ self,
49
+ client: httpx.AsyncClient,
50
+ payload: dict,
51
+ ) -> CompletionResult:
52
+ response = await client.post(f"{self.base_url}/api/chat", json=payload)
53
+ response.raise_for_status()
54
+ data = response.json()
55
+
56
+ prompt_eval_ns = data.get("prompt_eval_duration", 0)
57
+ eval_ns = data.get("eval_duration", 0)
58
+ completion_tokens = data.get("eval_count", 0)
59
+
60
+ ttft = (prompt_eval_ns / 1e9) if prompt_eval_ns > 0 else None
61
+ tpot = None
62
+ if eval_ns > 0 and completion_tokens > 1:
63
+ tpot = (eval_ns / 1e9) / (completion_tokens - 1)
64
+
65
+ return CompletionResult(
66
+ content=data.get("message", {}).get("content", ""),
67
+ model=data.get("model", payload.get("model", "")),
68
+ prompt_tokens=data.get("prompt_eval_count", 0),
69
+ completion_tokens=completion_tokens,
70
+ finish_reason="stop",
71
+ timing=TimingInfo(ttft=ttft, tpot=tpot),
72
+ )
73
+
74
+ async def stream(
75
+ self,
76
+ client: httpx.AsyncClient,
77
+ payload: dict,
78
+ ) -> AsyncIterator[StreamChunk]:
79
+ async with client.stream("POST", f"{self.base_url}/api/chat", json=payload) as resp:
80
+ resp.raise_for_status()
81
+ async for line in resp.aiter_lines():
82
+ if not line:
83
+ continue
84
+ try:
85
+ chunk = json.loads(line)
86
+ except json.JSONDecodeError:
87
+ continue
88
+
89
+ content = chunk.get("message", {}).get("content", "")
90
+ done = chunk.get("done", False)
91
+
92
+ sc = StreamChunk(content=content, done=done)
93
+ if done:
94
+ prompt_eval_ns = chunk.get("prompt_eval_duration", 0)
95
+ eval_ns = chunk.get("eval_duration", 0)
96
+ sc.prompt_tokens = chunk.get("prompt_eval_count", 0)
97
+ sc.completion_tokens = chunk.get("eval_count", 0)
98
+ sc.finish_reason = "stop"
99
+ sc.timing = TimingInfo(
100
+ ttft=(prompt_eval_ns / 1e9) if prompt_eval_ns > 0 else None,
101
+ tpot=((eval_ns / 1e9) / (sc.completion_tokens - 1)
102
+ if eval_ns > 0 and sc.completion_tokens > 1 else None),
103
+ )
104
+ yield sc
105
+
106
+ async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
107
+ response = await client.get(f"{self.base_url}/api/tags")
108
+ response.raise_for_status()
109
+ return [
110
+ {"id": m["name"], "object": "model",
111
+ "created": int(time.time()), "owned_by": "ollama"}
112
+ for m in response.json().get("models", [])
113
+ ]
@@ -0,0 +1,137 @@
1
+ """OpenAI-compatible API provider.
2
+
3
+ Works with: OpenAI, Azure OpenAI, vLLM, llama.cpp (server mode),
4
+ LM Studio, Groq, Together.ai, Fireworks.ai, LiteLLM, LocalAI, etc.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import time
11
+ from typing import AsyncIterator
12
+
13
+ import httpx
14
+
15
+ from llm_otel_kit.providers.base import (
16
+ CompletionResult,
17
+ LLMProvider,
18
+ StreamChunk,
19
+ TimingInfo,
20
+ )
21
+
22
+
23
+ class OpenAICompatProvider(LLMProvider):
24
+ """Provider for any OpenAI-compatible API endpoint."""
25
+
26
+ @property
27
+ def system_name(self) -> str:
28
+ return self.config.name if self.config.name != "openai" else "openai"
29
+
30
+ def _headers(self) -> dict[str, str]:
31
+ h: dict[str, str] = {"Content-Type": "application/json"}
32
+ if self.config.api_key:
33
+ h["Authorization"] = f"Bearer {self.config.api_key}"
34
+ return h
35
+
36
+ def build_payload(
37
+ self,
38
+ model: str,
39
+ messages: list[dict],
40
+ stream: bool,
41
+ **kwargs,
42
+ ) -> dict:
43
+ payload: dict = {
44
+ "model": model,
45
+ "messages": messages,
46
+ "stream": stream,
47
+ }
48
+ if kwargs.get("temperature") is not None:
49
+ payload["temperature"] = kwargs["temperature"]
50
+ if kwargs.get("top_p") is not None:
51
+ payload["top_p"] = kwargs["top_p"]
52
+ if kwargs.get("max_tokens") is not None:
53
+ payload["max_tokens"] = kwargs["max_tokens"]
54
+ if stream:
55
+ payload["stream_options"] = {"include_usage": True}
56
+ return payload
57
+
58
+ async def complete(
59
+ self,
60
+ client: httpx.AsyncClient,
61
+ payload: dict,
62
+ ) -> CompletionResult:
63
+ response = await client.post(
64
+ f"{self.base_url}/v1/chat/completions",
65
+ json=payload,
66
+ headers=self._headers(),
67
+ )
68
+ response.raise_for_status()
69
+ data = response.json()
70
+
71
+ choice = data.get("choices", [{}])[0]
72
+ usage = data.get("usage", {})
73
+
74
+ return CompletionResult(
75
+ content=choice.get("message", {}).get("content", ""),
76
+ model=data.get("model", payload.get("model", "")),
77
+ prompt_tokens=usage.get("prompt_tokens", 0),
78
+ completion_tokens=usage.get("completion_tokens", 0),
79
+ finish_reason=choice.get("finish_reason", "stop"),
80
+ response_id=data.get("id", ""),
81
+ timing=TimingInfo(), # OpenAI API doesn't expose server-side timing
82
+ )
83
+
84
+ async def stream(
85
+ self,
86
+ client: httpx.AsyncClient,
87
+ payload: dict,
88
+ ) -> AsyncIterator[StreamChunk]:
89
+ async with client.stream(
90
+ "POST",
91
+ f"{self.base_url}/v1/chat/completions",
92
+ json=payload,
93
+ headers=self._headers(),
94
+ ) as resp:
95
+ resp.raise_for_status()
96
+ async for line in resp.aiter_lines():
97
+ if not line.startswith("data: "):
98
+ continue
99
+ data_str = line[6:]
100
+ if data_str.strip() == "[DONE]":
101
+ return
102
+ try:
103
+ chunk = json.loads(data_str)
104
+ except json.JSONDecodeError:
105
+ continue
106
+
107
+ choice = chunk.get("choices", [{}])[0]
108
+ delta = choice.get("delta", {})
109
+ content = delta.get("content", "")
110
+ finish_reason = choice.get("finish_reason")
111
+
112
+ # Usage arrives in the final chunk when stream_options.include_usage is set
113
+ usage = chunk.get("usage") or {}
114
+
115
+ sc = StreamChunk(
116
+ content=content,
117
+ done=finish_reason is not None,
118
+ finish_reason=finish_reason or "",
119
+ prompt_tokens=usage.get("prompt_tokens", 0),
120
+ completion_tokens=usage.get("completion_tokens", 0),
121
+ timing=TimingInfo(),
122
+ )
123
+ yield sc
124
+
125
+ async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
126
+ response = await client.get(
127
+ f"{self.base_url}/v1/models",
128
+ headers=self._headers(),
129
+ )
130
+ response.raise_for_status()
131
+ data = response.json()
132
+ return [
133
+ {"id": m["id"], "object": "model",
134
+ "created": m.get("created", int(time.time())),
135
+ "owned_by": m.get("owned_by", self.system_name)}
136
+ for m in data.get("data", [])
137
+ ]
File without changes
@@ -0,0 +1,185 @@
1
+ """GenAI span attribute helpers and request classification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+
8
+ from opentelemetry.trace import Span
9
+
10
+ from llm_otel_kit.metrics import GenAIMetrics
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # Provider detection from model name
14
+ # ---------------------------------------------------------------------------
15
+ _PROVIDER_PATTERNS: list[tuple[list[str], str]] = [
16
+ (["gpt-", "o1-", "o3-", "o4-", "dall-e", "text-embedding"], "openai"),
17
+ (["claude-"], "anthropic"),
18
+ (["gemini-"], "google"),
19
+ (["copilot-", "github/"], "github.copilot"),
20
+ (["mistral-", "mixtral-", "codestral-"], "mistral"),
21
+ (["command-", "embed-"], "cohere"),
22
+ (["deepseek-"], "deepseek"),
23
+ ]
24
+
25
+ _MAX_CONTENT_LEN = 500
26
+
27
+
28
+ def detect_provider(model: str) -> str:
29
+ """Infer ``gen_ai.system`` from model name prefix."""
30
+ model_lower = model.lower()
31
+ for prefixes, provider in _PROVIDER_PATTERNS:
32
+ if any(model_lower.startswith(p) for p in prefixes):
33
+ return provider
34
+ return "ollama"
35
+
36
+
37
+ def classify_request(messages: list[dict]) -> str:
38
+ """Classify an OpenAI-format message list into a purpose label."""
39
+ last_content = (messages[-1].get("content", "") if messages else "").lower()
40
+ if "generate a concise" in last_content and "title" in last_content:
41
+ return "Title Generation"
42
+ if ("generate tags" in last_content or "categorize" in last_content
43
+ or "tag the conversation" in last_content):
44
+ return "Tag Generation"
45
+ if "follow-up" in last_content or ("suggest" in last_content and "question" in last_content):
46
+ return "Suggestion Generation"
47
+ if messages and all(msg.get("role") == "system" for msg in messages):
48
+ return "System Prompt"
49
+ return "User Chat"
50
+
51
+
52
+ def _truncate(text: str) -> str:
53
+ return text[:_MAX_CONTENT_LEN] + "..." if len(text) > _MAX_CONTENT_LEN else text
54
+
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Span attribute setters (OTel GenAI semconv)
58
+ # ---------------------------------------------------------------------------
59
+
60
+ def semconv_attrs(model: str, server_host: str, server_port: int) -> dict:
61
+ """Build the standard GenAI metric attribute dict."""
62
+ return {
63
+ "gen_ai.operation.name": "chat",
64
+ "gen_ai.system": detect_provider(model),
65
+ "gen_ai.request.model": model,
66
+ "gen_ai.response.model": model,
67
+ "server.address": server_host,
68
+ "server.port": server_port,
69
+ }
70
+
71
+
72
+ def set_genai_span(
73
+ span: Span,
74
+ model: str,
75
+ request_type: str,
76
+ stream: bool,
77
+ messages: list[dict],
78
+ server_host: str,
79
+ server_port: int,
80
+ *,
81
+ temperature: float | None = None,
82
+ top_p: float | None = None,
83
+ max_tokens: int | None = None,
84
+ auth_header: str = "",
85
+ ) -> None:
86
+ """Set all gen_ai.* request attributes and input span event."""
87
+ provider = detect_provider(model)
88
+
89
+ span.update_name(f"{request_type} · {model}")
90
+
91
+ span.set_attribute("gen_ai.system", provider)
92
+ span.set_attribute("gen_ai.provider.name", provider)
93
+ span.set_attribute("gen_ai.operation.name", "chat")
94
+ span.set_attribute("gen_ai.request.model", model)
95
+ span.set_attribute("llm.request.type", "chat")
96
+ span.set_attribute("llm.is_streaming", stream)
97
+ span.set_attribute("llm.request.purpose", request_type)
98
+ span.set_attribute("server.address", server_host)
99
+ span.set_attribute("server.port", server_port)
100
+
101
+ if temperature is not None:
102
+ span.set_attribute("gen_ai.request.temperature", temperature)
103
+ if top_p is not None:
104
+ span.set_attribute("gen_ai.request.top_p", top_p)
105
+ if max_tokens is not None:
106
+ span.set_attribute("gen_ai.request.max_tokens", max_tokens)
107
+
108
+ # Indexed prompt attribute (last user message)
109
+ for msg in reversed(messages):
110
+ if msg.get("role") == "user":
111
+ span.set_attribute("gen_ai.prompt.0.role", "user")
112
+ span.set_attribute("gen_ai.prompt.0.content", msg.get("content", ""))
113
+ break
114
+
115
+ # Span event
116
+ for msg in reversed(messages):
117
+ if msg.get("role") == "user":
118
+ span.add_event("gen_ai.user.message", {
119
+ "gen_ai.prompt.role": "user",
120
+ "gen_ai.prompt.content": _truncate(msg.get("content", "")),
121
+ })
122
+ break
123
+
124
+ # Conversation fingerprint
125
+ user_msgs = [msg["content"] for msg in messages if msg["role"] == "user"]
126
+ fp_input = user_msgs[0][:200] if (request_type != "User Chat" and user_msgs) else "|".join(user_msgs)
127
+ span.set_attribute("conversation.fingerprint",
128
+ hashlib.sha256(fp_input.encode()).hexdigest()[:12])
129
+
130
+ if auth_header:
131
+ span.set_attribute("enduser.id",
132
+ hashlib.sha256(auth_header.encode()).hexdigest()[:8])
133
+
134
+
135
+ def set_genai_response(
136
+ span: Span,
137
+ content: str,
138
+ model: str,
139
+ prompt_tokens: int,
140
+ completion_tokens: int,
141
+ finish_reason: str = "stop",
142
+ response_id: str = "",
143
+ ) -> None:
144
+ """Set response attributes and assistant span event."""
145
+ span.set_attribute("gen_ai.completion.0.role", "assistant")
146
+ span.set_attribute("gen_ai.completion.0.content", content)
147
+ span.set_attribute("gen_ai.completion.0.finish_reason", finish_reason)
148
+
149
+ span.set_attribute("gen_ai.response.model", model)
150
+ span.set_attribute("gen_ai.response.finish_reasons", json.dumps([finish_reason]))
151
+ span.set_attribute("gen_ai.usage.input_tokens", prompt_tokens)
152
+ span.set_attribute("gen_ai.usage.output_tokens", completion_tokens)
153
+ span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
154
+ span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
155
+ if response_id:
156
+ span.set_attribute("gen_ai.response.id", response_id)
157
+
158
+ span.add_event("gen_ai.assistant.message", {
159
+ "gen_ai.completion.role": "assistant",
160
+ "gen_ai.completion.content": _truncate(content),
161
+ "gen_ai.completion.finish_reason": finish_reason,
162
+ })
163
+
164
+
165
+ def record_metrics(
166
+ m: GenAIMetrics,
167
+ attrs: dict,
168
+ model: str,
169
+ duration: float,
170
+ prompt_tokens: int,
171
+ completion_tokens: int,
172
+ ttft: float | None = None,
173
+ tpot: float | None = None,
174
+ ) -> None:
175
+ """Record all GenAI + operational metrics for one completed request."""
176
+ m.operation_duration.record(duration, attrs)
177
+ m.token_usage.record(prompt_tokens, {**attrs, "gen_ai.token.type": "input"})
178
+ m.token_usage.record(completion_tokens, {**attrs, "gen_ai.token.type": "output"})
179
+ if ttft is not None:
180
+ m.ttft.record(ttft, attrs)
181
+ if tpot is not None:
182
+ m.tpot.record(tpot, attrs)
183
+ if duration > 0 and completion_tokens > 0:
184
+ m.token_throughput.record(completion_tokens / duration, attrs)
185
+ m.active_requests.add(-1, {"model": model})