llm-otel-kit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_otel_kit-0.1.0/.gitignore +18 -0
- llm_otel_kit-0.1.0/CHANGELOG.md +13 -0
- llm_otel_kit-0.1.0/LICENSE +21 -0
- llm_otel_kit-0.1.0/PKG-INFO +100 -0
- llm_otel_kit-0.1.0/README.md +69 -0
- llm_otel_kit-0.1.0/pyproject.toml +40 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/__init__.py +27 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/bootstrap.py +137 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/config.py +80 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/metrics.py +58 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/providers/__init__.py +36 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/providers/anthropic.py +142 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/providers/base.py +88 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/providers/ollama.py +113 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/providers/openai_compat.py +137 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/py.typed +0 -0
- llm_otel_kit-0.1.0/src/llm_otel_kit/spans.py +185 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 (2025-07-12)
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Initial release
|
|
8
|
+
- Provider abstraction: `LLMProvider` ABC with `complete()`, `stream()`, `list_models()`
|
|
9
|
+
- Providers: Ollama, OpenAI-compatible (OpenAI, vLLM, llama.cpp, LM Studio, Groq, Together, Fireworks, Azure OpenAI, LiteLLM), Anthropic
|
|
10
|
+
- OTel bootstrap: `init_observability()` with Dynatrace-compatible temporality
|
|
11
|
+
- GenAI metrics: 10 instruments following OTel GenAI semantic conventions
|
|
12
|
+
- Span helpers: `set_genai_span()`, `set_genai_response()`, `classify_request()`
|
|
13
|
+
- Config: `AppConfig.from_env()` with legacy `OLLAMA_BASE_URL` fallback
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 theharithsa
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-otel-kit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud.
|
|
5
|
+
Project-URL: Homepage, https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry
|
|
6
|
+
Project-URL: Issues, https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry/issues
|
|
7
|
+
Author: Vishruth Harithsa
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: dynatrace,genai,llm,observability,ollama,openai,opentelemetry
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Framework :: FastAPI
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: System :: Monitoring
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: httpx>=0.27
|
|
20
|
+
Requires-Dist: opentelemetry-api>=1.25
|
|
21
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.25
|
|
22
|
+
Requires-Dist: opentelemetry-sdk>=1.25
|
|
23
|
+
Requires-Dist: traceloop-sdk>=0.59
|
|
24
|
+
Provides-Extra: anthropic
|
|
25
|
+
Requires-Dist: anthropic>=0.25; extra == 'anthropic'
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# llm-otel-kit
|
|
33
|
+
|
|
34
|
+
Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud.
|
|
35
|
+
|
|
36
|
+
## What it does
|
|
37
|
+
|
|
38
|
+
`llm-otel-kit` gives you **full OTel GenAI semantic convention coverage** for any LLM provider in ~10 lines of code:
|
|
39
|
+
|
|
40
|
+
- **Traces** with `gen_ai.*` span attributes (model, tokens, latency, streaming mode)
|
|
41
|
+
- **Metrics** — 10 instruments: operation duration, token usage, TTFT, TPOT, throughput, error rate, active requests
|
|
42
|
+
- **Logs** exported via OTLP with structured context (model, duration, token counts)
|
|
43
|
+
- **Dynatrace-ready** — correct temporality (DELTA for counters/histograms, CUMULATIVE for UpDownCounters)
|
|
44
|
+
|
|
45
|
+
## Supported Providers
|
|
46
|
+
|
|
47
|
+
| Provider | Type | Config name |
|
|
48
|
+
|----------|------|-------------|
|
|
49
|
+
| Ollama | Local | `ollama` |
|
|
50
|
+
| OpenAI | Cloud | `openai` |
|
|
51
|
+
| Anthropic | Cloud | `anthropic` |
|
|
52
|
+
| vLLM | Local | `vllm` |
|
|
53
|
+
| llama.cpp | Local | `llamacpp` |
|
|
54
|
+
| LM Studio | Local | `lmstudio` |
|
|
55
|
+
| Groq | Cloud | `groq` |
|
|
56
|
+
| Together | Cloud | `together` |
|
|
57
|
+
| Fireworks | Cloud | `fireworks` |
|
|
58
|
+
| Azure OpenAI | Cloud | `azure_openai` |
|
|
59
|
+
| LiteLLM | Proxy | `litellm` |
|
|
60
|
+
|
|
61
|
+
## Quick Start
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from llm_otel_kit import AppConfig, GenAIMetrics, init_observability, create_provider
|
|
65
|
+
|
|
66
|
+
config = AppConfig.from_env()
|
|
67
|
+
otel = init_observability(config.app_name, config.otlp_endpoint, config.otlp_token)
|
|
68
|
+
provider = create_provider(config.provider)
|
|
69
|
+
m = GenAIMetrics(otel.meter)
|
|
70
|
+
|
|
71
|
+
# Use provider.complete() / provider.stream() for instrumented LLM calls
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Environment Variables
|
|
75
|
+
|
|
76
|
+
| Variable | Default | Description |
|
|
77
|
+
|----------|---------|-------------|
|
|
78
|
+
| `LLM_PROVIDER` | `ollama` | Provider name (see table above) |
|
|
79
|
+
| `LLM_BASE_URL` | `http://localhost:11434` | Provider API base URL |
|
|
80
|
+
| `LLM_API_KEY` | (empty) | API key for cloud providers |
|
|
81
|
+
| `DEFAULT_MODEL` | (empty) | Fallback model name |
|
|
82
|
+
| `APP_NAME` | `llm-backend` | OTel service name |
|
|
83
|
+
| `TRACELOOP_BASE_URL` | (empty) | OTLP endpoint URL |
|
|
84
|
+
| `DT_OTLP_TOKEN` | (empty) | Dynatrace API token |
|
|
85
|
+
|
|
86
|
+
## Install
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install llm-otel-kit
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
For Anthropic support:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pip install llm-otel-kit[anthropic]
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## License
|
|
99
|
+
|
|
100
|
+
MIT
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# llm-otel-kit
|
|
2
|
+
|
|
3
|
+
Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud.
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
`llm-otel-kit` gives you **full OTel GenAI semantic convention coverage** for any LLM provider in ~10 lines of code:
|
|
8
|
+
|
|
9
|
+
- **Traces** with `gen_ai.*` span attributes (model, tokens, latency, streaming mode)
|
|
10
|
+
- **Metrics** — 10 instruments: operation duration, token usage, TTFT, TPOT, throughput, error rate, active requests
|
|
11
|
+
- **Logs** exported via OTLP with structured context (model, duration, token counts)
|
|
12
|
+
- **Dynatrace-ready** — correct temporality (DELTA for counters/histograms, CUMULATIVE for UpDownCounters)
|
|
13
|
+
|
|
14
|
+
## Supported Providers
|
|
15
|
+
|
|
16
|
+
| Provider | Type | Config name |
|
|
17
|
+
|----------|------|-------------|
|
|
18
|
+
| Ollama | Local | `ollama` |
|
|
19
|
+
| OpenAI | Cloud | `openai` |
|
|
20
|
+
| Anthropic | Cloud | `anthropic` |
|
|
21
|
+
| vLLM | Local | `vllm` |
|
|
22
|
+
| llama.cpp | Local | `llamacpp` |
|
|
23
|
+
| LM Studio | Local | `lmstudio` |
|
|
24
|
+
| Groq | Cloud | `groq` |
|
|
25
|
+
| Together | Cloud | `together` |
|
|
26
|
+
| Fireworks | Cloud | `fireworks` |
|
|
27
|
+
| Azure OpenAI | Cloud | `azure_openai` |
|
|
28
|
+
| LiteLLM | Proxy | `litellm` |
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from llm_otel_kit import AppConfig, GenAIMetrics, init_observability, create_provider
|
|
34
|
+
|
|
35
|
+
config = AppConfig.from_env()
|
|
36
|
+
otel = init_observability(config.app_name, config.otlp_endpoint, config.otlp_token)
|
|
37
|
+
provider = create_provider(config.provider)
|
|
38
|
+
m = GenAIMetrics(otel.meter)
|
|
39
|
+
|
|
40
|
+
# Use provider.complete() / provider.stream() for instrumented LLM calls
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Environment Variables
|
|
44
|
+
|
|
45
|
+
| Variable | Default | Description |
|
|
46
|
+
|----------|---------|-------------|
|
|
47
|
+
| `LLM_PROVIDER` | `ollama` | Provider name (see table above) |
|
|
48
|
+
| `LLM_BASE_URL` | `http://localhost:11434` | Provider API base URL |
|
|
49
|
+
| `LLM_API_KEY` | (empty) | API key for cloud providers |
|
|
50
|
+
| `DEFAULT_MODEL` | (empty) | Fallback model name |
|
|
51
|
+
| `APP_NAME` | `llm-backend` | OTel service name |
|
|
52
|
+
| `TRACELOOP_BASE_URL` | (empty) | OTLP endpoint URL |
|
|
53
|
+
| `DT_OTLP_TOKEN` | (empty) | Dynatrace API token |
|
|
54
|
+
|
|
55
|
+
## Install
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install llm-otel-kit
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
For Anthropic support:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install llm-otel-kit[anthropic]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## License
|
|
68
|
+
|
|
69
|
+
MIT
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "llm-otel-kit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Drop-in OpenTelemetry GenAI observability for any LLM backend — local or cloud."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [{ name = "Vishruth Harithsa" }]
|
|
13
|
+
keywords = ["opentelemetry", "llm", "observability", "genai", "dynatrace", "ollama", "openai"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Framework :: FastAPI",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: System :: Monitoring",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"httpx>=0.27",
|
|
25
|
+
"opentelemetry-api>=1.25",
|
|
26
|
+
"opentelemetry-sdk>=1.25",
|
|
27
|
+
"opentelemetry-exporter-otlp-proto-http>=1.25",
|
|
28
|
+
"traceloop-sdk>=0.59",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
anthropic = ["anthropic>=0.25"]
|
|
33
|
+
dev = ["pytest", "pytest-asyncio", "ruff"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry"
|
|
37
|
+
Issues = "https://github.com/theharithsa/Local-LLM-Application-with-OpenLLMetry/issues"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/llm_otel_kit"]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""llm-otel-kit — Drop-in OTel GenAI observability for any LLM backend."""
|
|
2
|
+
|
|
3
|
+
from llm_otel_kit.bootstrap import OTelComponents, init_observability
|
|
4
|
+
from llm_otel_kit.config import AppConfig, ProviderConfig
|
|
5
|
+
from llm_otel_kit.metrics import GenAIMetrics
|
|
6
|
+
from llm_otel_kit.providers import create_provider
|
|
7
|
+
from llm_otel_kit.spans import (
|
|
8
|
+
classify_request,
|
|
9
|
+
detect_provider,
|
|
10
|
+
record_metrics,
|
|
11
|
+
set_genai_response,
|
|
12
|
+
set_genai_span,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"AppConfig",
|
|
17
|
+
"GenAIMetrics",
|
|
18
|
+
"OTelComponents",
|
|
19
|
+
"ProviderConfig",
|
|
20
|
+
"classify_request",
|
|
21
|
+
"create_provider",
|
|
22
|
+
"detect_provider",
|
|
23
|
+
"init_observability",
|
|
24
|
+
"record_metrics",
|
|
25
|
+
"set_genai_response",
|
|
26
|
+
"set_genai_span",
|
|
27
|
+
]
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynatrace-compatible OpenTelemetry bootstrap for GenAI applications.
|
|
3
|
+
|
|
4
|
+
Handles the critical init order: MeterProvider → Logs → Traceloop.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from typing import NamedTuple
|
|
10
|
+
|
|
11
|
+
from opentelemetry import metrics, trace
|
|
12
|
+
from opentelemetry._logs import set_logger_provider
|
|
13
|
+
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
|
|
14
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
15
|
+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
|
16
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
|
17
|
+
from opentelemetry.sdk.metrics import (
|
|
18
|
+
Counter,
|
|
19
|
+
Histogram,
|
|
20
|
+
MeterProvider,
|
|
21
|
+
UpDownCounter,
|
|
22
|
+
)
|
|
23
|
+
from opentelemetry.sdk.metrics.export import (
|
|
24
|
+
AggregationTemporality,
|
|
25
|
+
PeriodicExportingMetricReader,
|
|
26
|
+
)
|
|
27
|
+
from opentelemetry.sdk.metrics.view import (
|
|
28
|
+
ExplicitBucketHistogramAggregation,
|
|
29
|
+
View,
|
|
30
|
+
)
|
|
31
|
+
from traceloop.sdk import Traceloop
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# OTel GenAI semantic-convention histogram bucket boundaries
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
DURATION_BUCKETS = [
|
|
37
|
+
0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56,
|
|
38
|
+
5.12, 10.24, 20.48, 40.96, 81.92,
|
|
39
|
+
]
|
|
40
|
+
TOKEN_BUCKETS = [
|
|
41
|
+
1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144,
|
|
42
|
+
1048576, 4194304, 16777216, 67108864,
|
|
43
|
+
]
|
|
44
|
+
TTFT_BUCKETS = [
|
|
45
|
+
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25,
|
|
46
|
+
0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0,
|
|
47
|
+
]
|
|
48
|
+
TPOT_BUCKETS = [
|
|
49
|
+
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5,
|
|
50
|
+
0.75, 1.0, 2.5,
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class OTelComponents(NamedTuple):
|
|
55
|
+
"""Tuple returned by init_observability()."""
|
|
56
|
+
meter: metrics.Meter
|
|
57
|
+
tracer: trace.Tracer
|
|
58
|
+
logger: logging.Logger
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _init_metrics(app_name: str, otlp_endpoint: str, otlp_token: str) -> metrics.Meter:
|
|
62
|
+
headers: dict[str, str] = {}
|
|
63
|
+
if otlp_token:
|
|
64
|
+
headers["Authorization"] = f"Api-Token {otlp_token}"
|
|
65
|
+
|
|
66
|
+
exporter = OTLPMetricExporter(
|
|
67
|
+
endpoint=f"{otlp_endpoint}/v1/metrics",
|
|
68
|
+
headers=headers,
|
|
69
|
+
preferred_temporality={
|
|
70
|
+
Counter: AggregationTemporality.DELTA,
|
|
71
|
+
Histogram: AggregationTemporality.DELTA,
|
|
72
|
+
UpDownCounter: AggregationTemporality.CUMULATIVE,
|
|
73
|
+
},
|
|
74
|
+
)
|
|
75
|
+
provider = MeterProvider(
|
|
76
|
+
metric_readers=[
|
|
77
|
+
PeriodicExportingMetricReader(exporter, export_interval_millis=30_000),
|
|
78
|
+
],
|
|
79
|
+
views=[
|
|
80
|
+
View(instrument_name="gen_ai.client.operation.duration",
|
|
81
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=DURATION_BUCKETS)),
|
|
82
|
+
View(instrument_name="gen_ai.client.token.usage",
|
|
83
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=TOKEN_BUCKETS)),
|
|
84
|
+
View(instrument_name="gen_ai.server.time_to_first_token",
|
|
85
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=TTFT_BUCKETS)),
|
|
86
|
+
View(instrument_name="gen_ai.server.time_per_output_token",
|
|
87
|
+
aggregation=ExplicitBucketHistogramAggregation(boundaries=TPOT_BUCKETS)),
|
|
88
|
+
],
|
|
89
|
+
)
|
|
90
|
+
metrics.set_meter_provider(provider)
|
|
91
|
+
return metrics.get_meter(app_name, "1.0.0")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _init_logs(otlp_endpoint: str, otlp_token: str) -> None:
|
|
95
|
+
headers: dict[str, str] = {}
|
|
96
|
+
if otlp_token:
|
|
97
|
+
headers["Authorization"] = f"Api-Token {otlp_token}"
|
|
98
|
+
|
|
99
|
+
exporter = OTLPLogExporter(
|
|
100
|
+
endpoint=f"{otlp_endpoint}/v1/logs",
|
|
101
|
+
headers=headers,
|
|
102
|
+
)
|
|
103
|
+
provider = LoggerProvider()
|
|
104
|
+
provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
|
|
105
|
+
set_logger_provider(provider)
|
|
106
|
+
handler = LoggingHandler(level=logging.INFO, logger_provider=provider)
|
|
107
|
+
logging.getLogger().addHandler(handler)
|
|
108
|
+
logging.getLogger().setLevel(logging.INFO)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def init_observability(
|
|
112
|
+
app_name: str,
|
|
113
|
+
otlp_endpoint: str = "",
|
|
114
|
+
otlp_token: str = "",
|
|
115
|
+
) -> OTelComponents:
|
|
116
|
+
"""
|
|
117
|
+
One-call OTel bootstrap: metrics → logs → tracing (order matters).
|
|
118
|
+
|
|
119
|
+
If *otlp_endpoint* / *otlp_token* are empty, falls back to env vars
|
|
120
|
+
``TRACELOOP_BASE_URL`` and ``DT_OTLP_TOKEN``.
|
|
121
|
+
"""
|
|
122
|
+
otlp_endpoint = otlp_endpoint or os.getenv("TRACELOOP_BASE_URL", "")
|
|
123
|
+
otlp_token = otlp_token or os.getenv("DT_OTLP_TOKEN", "")
|
|
124
|
+
|
|
125
|
+
if otlp_endpoint:
|
|
126
|
+
meter = _init_metrics(app_name, otlp_endpoint, otlp_token)
|
|
127
|
+
_init_logs(otlp_endpoint, otlp_token)
|
|
128
|
+
else:
|
|
129
|
+
meter = metrics.get_meter(app_name, "1.0.0")
|
|
130
|
+
|
|
131
|
+
# Traceloop MUST init after MeterProvider to avoid conflicts
|
|
132
|
+
Traceloop.init(app_name=app_name, disable_batch=False)
|
|
133
|
+
|
|
134
|
+
tracer = trace.get_tracer(app_name, "1.0.0")
|
|
135
|
+
logger = logging.getLogger(app_name)
|
|
136
|
+
|
|
137
|
+
return OTelComponents(meter=meter, tracer=tracer, logger=logger)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Configuration models for llm-otel-kit."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ProviderConfig:
|
|
11
|
+
"""LLM provider connection settings.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
name: Provider identifier — ``"ollama"``, ``"openai"``, or ``"anthropic"``.
|
|
15
|
+
base_url: API base URL (e.g. ``http://localhost:11434``,
|
|
16
|
+
``https://api.openai.com``).
|
|
17
|
+
api_key: API key for cloud providers. Leave empty for local providers.
|
|
18
|
+
default_model: Fallback model when the request doesn't specify one.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name: str = "ollama"
|
|
22
|
+
base_url: str = "http://localhost:11434"
|
|
23
|
+
api_key: str = ""
|
|
24
|
+
default_model: str = ""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class AppConfig:
|
|
29
|
+
"""Full application configuration — provider + observability.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
app_name: OTel service name.
|
|
33
|
+
provider: LLM provider settings.
|
|
34
|
+
otlp_endpoint: OTLP base URL (e.g. Dynatrace OTLP endpoint).
|
|
35
|
+
otlp_token: Auth token for the OTLP exporter.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
app_name: str = "llm-backend"
|
|
39
|
+
provider: ProviderConfig = field(default_factory=ProviderConfig)
|
|
40
|
+
otlp_endpoint: str = ""
|
|
41
|
+
otlp_token: str = ""
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_env(cls) -> AppConfig:
|
|
45
|
+
"""Build config from environment variables.
|
|
46
|
+
|
|
47
|
+
Env vars:
|
|
48
|
+
``APP_NAME`` — OTel service name (default: ``llm-backend``)
|
|
49
|
+
``LLM_PROVIDER`` — ``ollama`` | ``openai`` | ``anthropic``
|
|
50
|
+
``LLM_BASE_URL`` — Provider API base URL
|
|
51
|
+
``LLM_API_KEY`` — API key for cloud providers
|
|
52
|
+
``DEFAULT_MODEL`` — Fallback model name
|
|
53
|
+
``TRACELOOP_BASE_URL`` — OTLP endpoint
|
|
54
|
+
``DT_OTLP_TOKEN`` — Dynatrace API token
|
|
55
|
+
|
|
56
|
+
Legacy env vars (``OLLAMA_BASE_URL``) are supported as fallbacks.
|
|
57
|
+
"""
|
|
58
|
+
provider_name = os.getenv("LLM_PROVIDER", "ollama").lower()
|
|
59
|
+
|
|
60
|
+
# Resolve base URL with legacy fallback
|
|
61
|
+
base_url = os.getenv("LLM_BASE_URL", "")
|
|
62
|
+
if not base_url:
|
|
63
|
+
if provider_name == "ollama":
|
|
64
|
+
base_url = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
|
65
|
+
elif provider_name == "openai":
|
|
66
|
+
base_url = "https://api.openai.com"
|
|
67
|
+
elif provider_name == "anthropic":
|
|
68
|
+
base_url = "https://api.anthropic.com"
|
|
69
|
+
|
|
70
|
+
return cls(
|
|
71
|
+
app_name=os.getenv("APP_NAME", "llm-backend"),
|
|
72
|
+
provider=ProviderConfig(
|
|
73
|
+
name=provider_name,
|
|
74
|
+
base_url=base_url,
|
|
75
|
+
api_key=os.getenv("LLM_API_KEY", ""),
|
|
76
|
+
default_model=os.getenv("DEFAULT_MODEL", ""),
|
|
77
|
+
),
|
|
78
|
+
otlp_endpoint=os.getenv("TRACELOOP_BASE_URL", ""),
|
|
79
|
+
otlp_token=os.getenv("DT_OTLP_TOKEN", ""),
|
|
80
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""GenAI semantic-convention + operational metric instruments."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
from opentelemetry.metrics import Counter, Histogram, Meter, UpDownCounter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class GenAIMetrics:
|
|
10
|
+
"""Pre-created OTel metric instruments for LLM observability.
|
|
11
|
+
|
|
12
|
+
Usage::
|
|
13
|
+
|
|
14
|
+
from llm_otel_kit import GenAIMetrics, init_observability
|
|
15
|
+
|
|
16
|
+
otel = init_observability("my-app")
|
|
17
|
+
m = GenAIMetrics(otel.meter)
|
|
18
|
+
m.request_count.add(1, {"model": "gpt-4o"})
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
_meter: Meter = field(repr=False)
|
|
22
|
+
|
|
23
|
+
# GenAI semconv
|
|
24
|
+
operation_duration: Histogram = field(init=False)
|
|
25
|
+
token_usage: Histogram = field(init=False)
|
|
26
|
+
ttft: Histogram = field(init=False)
|
|
27
|
+
tpot: Histogram = field(init=False)
|
|
28
|
+
|
|
29
|
+
# Operational
|
|
30
|
+
request_count: Counter = field(init=False)
|
|
31
|
+
error_count: Counter = field(init=False)
|
|
32
|
+
active_requests: UpDownCounter = field(init=False)
|
|
33
|
+
stream_chunks: Counter = field(init=False)
|
|
34
|
+
token_throughput: Histogram = field(init=False)
|
|
35
|
+
message_count: Histogram = field(init=False)
|
|
36
|
+
|
|
37
|
+
def __post_init__(self) -> None:
|
|
38
|
+
m = self._meter
|
|
39
|
+
self.operation_duration = m.create_histogram(
|
|
40
|
+
"gen_ai.client.operation.duration", "GenAI operation duration", "s")
|
|
41
|
+
self.token_usage = m.create_histogram(
|
|
42
|
+
"gen_ai.client.token.usage", "Input and output token counts", "{token}")
|
|
43
|
+
self.ttft = m.create_histogram(
|
|
44
|
+
"gen_ai.server.time_to_first_token", "Time to first token", "s")
|
|
45
|
+
self.tpot = m.create_histogram(
|
|
46
|
+
"gen_ai.server.time_per_output_token", "Time per output token", "s")
|
|
47
|
+
self.request_count = m.create_counter(
|
|
48
|
+
"llm.request.count", "Total LLM requests", "1")
|
|
49
|
+
self.error_count = m.create_counter(
|
|
50
|
+
"llm.request.errors", "Failed LLM requests", "1")
|
|
51
|
+
self.active_requests = m.create_up_down_counter(
|
|
52
|
+
"llm.request.active", "In-flight LLM requests", "1")
|
|
53
|
+
self.stream_chunks = m.create_counter(
|
|
54
|
+
"llm.stream.chunks", "Streaming chunks sent", "1")
|
|
55
|
+
self.token_throughput = m.create_histogram(
|
|
56
|
+
"llm.token.throughput", "Output token throughput", "{token}/s")
|
|
57
|
+
self.message_count = m.create_histogram(
|
|
58
|
+
"llm.request.message_count", "Messages in prompt", "1")
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Provider registry — factory for LLM backend providers."""
|
|
2
|
+
|
|
3
|
+
from llm_otel_kit.config import ProviderConfig
|
|
4
|
+
from llm_otel_kit.providers.base import LLMProvider
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def create_provider(config: ProviderConfig) -> LLMProvider:
|
|
8
|
+
"""Instantiate the correct provider from config.
|
|
9
|
+
|
|
10
|
+
Supported providers:
|
|
11
|
+
- ``ollama`` — Ollama native API (``/api/chat``)
|
|
12
|
+
- ``openai`` — OpenAI-compatible (works with OpenAI, vLLM, llama.cpp,
|
|
13
|
+
LM Studio, Groq, Together, Fireworks, Azure OpenAI, LiteLLM, etc.)
|
|
14
|
+
- ``anthropic`` — Anthropic Messages API (``/v1/messages``)
|
|
15
|
+
"""
|
|
16
|
+
name = config.name.lower()
|
|
17
|
+
|
|
18
|
+
if name == "ollama":
|
|
19
|
+
from llm_otel_kit.providers.ollama import OllamaProvider
|
|
20
|
+
return OllamaProvider(config)
|
|
21
|
+
if name in ("openai", "vllm", "llamacpp", "lmstudio", "groq", "together",
|
|
22
|
+
"fireworks", "azure_openai", "litellm"):
|
|
23
|
+
from llm_otel_kit.providers.openai_compat import OpenAICompatProvider
|
|
24
|
+
return OpenAICompatProvider(config)
|
|
25
|
+
if name == "anthropic":
|
|
26
|
+
from llm_otel_kit.providers.anthropic import AnthropicProvider
|
|
27
|
+
return AnthropicProvider(config)
|
|
28
|
+
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Unknown provider '{config.name}'. "
|
|
31
|
+
"Supported: ollama, openai, anthropic, vllm, llamacpp, lmstudio, "
|
|
32
|
+
"groq, together, fireworks, azure_openai, litellm"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = ["LLMProvider", "create_provider"]
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Anthropic Messages API provider (``/v1/messages``)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from typing import AsyncIterator
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from llm_otel_kit.providers.base import (
|
|
12
|
+
CompletionResult,
|
|
13
|
+
LLMProvider,
|
|
14
|
+
StreamChunk,
|
|
15
|
+
TimingInfo,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AnthropicProvider(LLMProvider):
|
|
20
|
+
"""Provider for the Anthropic Claude API."""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def system_name(self) -> str:
|
|
24
|
+
return "anthropic"
|
|
25
|
+
|
|
26
|
+
def _headers(self) -> dict[str, str]:
|
|
27
|
+
return {
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
"x-api-key": self.config.api_key,
|
|
30
|
+
"anthropic-version": "2023-06-01",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
def build_payload(
|
|
34
|
+
self,
|
|
35
|
+
model: str,
|
|
36
|
+
messages: list[dict],
|
|
37
|
+
stream: bool,
|
|
38
|
+
**kwargs,
|
|
39
|
+
) -> dict:
|
|
40
|
+
# Anthropic separates system message from the messages array
|
|
41
|
+
system_parts: list[str] = []
|
|
42
|
+
user_messages: list[dict] = []
|
|
43
|
+
for msg in messages:
|
|
44
|
+
if msg["role"] == "system":
|
|
45
|
+
system_parts.append(msg["content"])
|
|
46
|
+
else:
|
|
47
|
+
user_messages.append({"role": msg["role"], "content": msg["content"]})
|
|
48
|
+
|
|
49
|
+
payload: dict = {
|
|
50
|
+
"model": model,
|
|
51
|
+
"messages": user_messages,
|
|
52
|
+
"max_tokens": kwargs.get("max_tokens", 4096),
|
|
53
|
+
"stream": stream,
|
|
54
|
+
}
|
|
55
|
+
if system_parts:
|
|
56
|
+
payload["system"] = "\n".join(system_parts)
|
|
57
|
+
if kwargs.get("temperature") is not None:
|
|
58
|
+
payload["temperature"] = kwargs["temperature"]
|
|
59
|
+
if kwargs.get("top_p") is not None:
|
|
60
|
+
payload["top_p"] = kwargs["top_p"]
|
|
61
|
+
return payload
|
|
62
|
+
|
|
63
|
+
async def complete(
|
|
64
|
+
self,
|
|
65
|
+
client: httpx.AsyncClient,
|
|
66
|
+
payload: dict,
|
|
67
|
+
) -> CompletionResult:
|
|
68
|
+
response = await client.post(
|
|
69
|
+
f"{self.base_url}/v1/messages",
|
|
70
|
+
json=payload,
|
|
71
|
+
headers=self._headers(),
|
|
72
|
+
)
|
|
73
|
+
response.raise_for_status()
|
|
74
|
+
data = response.json()
|
|
75
|
+
|
|
76
|
+
content_blocks = data.get("content", [])
|
|
77
|
+
text = "".join(b.get("text", "") for b in content_blocks if b.get("type") == "text")
|
|
78
|
+
usage = data.get("usage", {})
|
|
79
|
+
|
|
80
|
+
return CompletionResult(
|
|
81
|
+
content=text,
|
|
82
|
+
model=data.get("model", payload.get("model", "")),
|
|
83
|
+
prompt_tokens=usage.get("input_tokens", 0),
|
|
84
|
+
completion_tokens=usage.get("output_tokens", 0),
|
|
85
|
+
finish_reason=data.get("stop_reason", "end_turn"),
|
|
86
|
+
response_id=data.get("id", ""),
|
|
87
|
+
timing=TimingInfo(),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
async def stream(
|
|
91
|
+
self,
|
|
92
|
+
client: httpx.AsyncClient,
|
|
93
|
+
payload: dict,
|
|
94
|
+
) -> AsyncIterator[StreamChunk]:
|
|
95
|
+
prompt_tokens = 0
|
|
96
|
+
completion_tokens = 0
|
|
97
|
+
|
|
98
|
+
async with client.stream(
|
|
99
|
+
"POST",
|
|
100
|
+
f"{self.base_url}/v1/messages",
|
|
101
|
+
json=payload,
|
|
102
|
+
headers=self._headers(),
|
|
103
|
+
) as resp:
|
|
104
|
+
resp.raise_for_status()
|
|
105
|
+
async for line in resp.aiter_lines():
|
|
106
|
+
if not line.startswith("data: "):
|
|
107
|
+
continue
|
|
108
|
+
try:
|
|
109
|
+
event = json.loads(line[6:])
|
|
110
|
+
except json.JSONDecodeError:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
event_type = event.get("type", "")
|
|
114
|
+
|
|
115
|
+
if event_type == "message_start":
|
|
116
|
+
usage = event.get("message", {}).get("usage", {})
|
|
117
|
+
prompt_tokens = usage.get("input_tokens", 0)
|
|
118
|
+
|
|
119
|
+
elif event_type == "content_block_delta":
|
|
120
|
+
delta = event.get("delta", {})
|
|
121
|
+
yield StreamChunk(content=delta.get("text", ""))
|
|
122
|
+
|
|
123
|
+
elif event_type == "message_delta":
|
|
124
|
+
usage = event.get("usage", {})
|
|
125
|
+
completion_tokens = usage.get("output_tokens", 0)
|
|
126
|
+
stop_reason = event.get("delta", {}).get("stop_reason", "end_turn")
|
|
127
|
+
yield StreamChunk(
|
|
128
|
+
done=True,
|
|
129
|
+
prompt_tokens=prompt_tokens,
|
|
130
|
+
completion_tokens=completion_tokens,
|
|
131
|
+
finish_reason=stop_reason,
|
|
132
|
+
timing=TimingInfo(),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
|
|
136
|
+
# Anthropic doesn't have a models endpoint; return a static list
|
|
137
|
+
models = ["claude-sonnet-4-20250514", "claude-3-5-haiku-20241022", "claude-3-opus-20240229"]
|
|
138
|
+
return [
|
|
139
|
+
{"id": m, "object": "model",
|
|
140
|
+
"created": int(time.time()), "owned_by": "anthropic"}
|
|
141
|
+
for m in models
|
|
142
|
+
]
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Abstract base class for LLM providers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import AsyncIterator
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from llm_otel_kit.config import ProviderConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class TimingInfo:
|
|
17
|
+
"""TTFT / TPOT extracted from the provider response."""
|
|
18
|
+
ttft: float | None = None
|
|
19
|
+
tpot: float | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class CompletionResult:
|
|
24
|
+
"""Normalised result of a non-streaming chat completion."""
|
|
25
|
+
content: str = ""
|
|
26
|
+
model: str = ""
|
|
27
|
+
prompt_tokens: int = 0
|
|
28
|
+
completion_tokens: int = 0
|
|
29
|
+
finish_reason: str = "stop"
|
|
30
|
+
response_id: str = ""
|
|
31
|
+
timing: TimingInfo = field(default_factory=TimingInfo)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class StreamChunk:
|
|
36
|
+
"""One chunk from a streaming completion."""
|
|
37
|
+
content: str = ""
|
|
38
|
+
done: bool = False
|
|
39
|
+
prompt_tokens: int = 0
|
|
40
|
+
completion_tokens: int = 0
|
|
41
|
+
finish_reason: str = ""
|
|
42
|
+
timing: TimingInfo = field(default_factory=TimingInfo)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LLMProvider(ABC):
|
|
46
|
+
"""Interface that every LLM backend must implement."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, config: ProviderConfig) -> None:
|
|
49
|
+
self.config = config
|
|
50
|
+
self.base_url = config.base_url.rstrip("/")
|
|
51
|
+
parsed = urlparse(self.base_url)
|
|
52
|
+
self.host = parsed.hostname or "localhost"
|
|
53
|
+
self.port = parsed.port or (443 if parsed.scheme == "https" else 80)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def system_name(self) -> str:
|
|
58
|
+
"""OTel ``gen_ai.system`` value (e.g. ``"ollama"``, ``"openai"``)."""
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def build_payload(
|
|
62
|
+
self,
|
|
63
|
+
model: str,
|
|
64
|
+
messages: list[dict],
|
|
65
|
+
stream: bool,
|
|
66
|
+
**kwargs,
|
|
67
|
+
) -> dict:
|
|
68
|
+
"""Translate OpenAI-format request into provider-native payload."""
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
async def complete(
|
|
72
|
+
self,
|
|
73
|
+
client: httpx.AsyncClient,
|
|
74
|
+
payload: dict,
|
|
75
|
+
) -> CompletionResult:
|
|
76
|
+
"""Non-streaming chat completion."""
|
|
77
|
+
|
|
78
|
+
@abstractmethod
|
|
79
|
+
async def stream(
|
|
80
|
+
self,
|
|
81
|
+
client: httpx.AsyncClient,
|
|
82
|
+
payload: dict,
|
|
83
|
+
) -> AsyncIterator[StreamChunk]:
|
|
84
|
+
"""Streaming chat completion — yields ``StreamChunk``s."""
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
|
|
88
|
+
"""Return models in OpenAI list format ``[{"id": ..., "object": "model", ...}]``."""
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Ollama native API provider (``/api/chat``)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from typing import AsyncIterator
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from llm_otel_kit.providers.base import (
|
|
12
|
+
CompletionResult,
|
|
13
|
+
LLMProvider,
|
|
14
|
+
StreamChunk,
|
|
15
|
+
TimingInfo,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class OllamaProvider(LLMProvider):
|
|
20
|
+
"""Provider for Ollama running locally or on a remote host."""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def system_name(self) -> str:
|
|
24
|
+
return "ollama"
|
|
25
|
+
|
|
26
|
+
def build_payload(
|
|
27
|
+
self,
|
|
28
|
+
model: str,
|
|
29
|
+
messages: list[dict],
|
|
30
|
+
stream: bool,
|
|
31
|
+
**kwargs,
|
|
32
|
+
) -> dict:
|
|
33
|
+
payload: dict = {
|
|
34
|
+
"model": model,
|
|
35
|
+
"messages": messages,
|
|
36
|
+
"stream": stream,
|
|
37
|
+
"options": {},
|
|
38
|
+
}
|
|
39
|
+
if kwargs.get("temperature") is not None:
|
|
40
|
+
payload["options"]["temperature"] = kwargs["temperature"]
|
|
41
|
+
if kwargs.get("top_p") is not None:
|
|
42
|
+
payload["options"]["top_p"] = kwargs["top_p"]
|
|
43
|
+
if kwargs.get("max_tokens") is not None:
|
|
44
|
+
payload["options"]["num_predict"] = kwargs["max_tokens"]
|
|
45
|
+
return payload
|
|
46
|
+
|
|
47
|
+
async def complete(
|
|
48
|
+
self,
|
|
49
|
+
client: httpx.AsyncClient,
|
|
50
|
+
payload: dict,
|
|
51
|
+
) -> CompletionResult:
|
|
52
|
+
response = await client.post(f"{self.base_url}/api/chat", json=payload)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
data = response.json()
|
|
55
|
+
|
|
56
|
+
prompt_eval_ns = data.get("prompt_eval_duration", 0)
|
|
57
|
+
eval_ns = data.get("eval_duration", 0)
|
|
58
|
+
completion_tokens = data.get("eval_count", 0)
|
|
59
|
+
|
|
60
|
+
ttft = (prompt_eval_ns / 1e9) if prompt_eval_ns > 0 else None
|
|
61
|
+
tpot = None
|
|
62
|
+
if eval_ns > 0 and completion_tokens > 1:
|
|
63
|
+
tpot = (eval_ns / 1e9) / (completion_tokens - 1)
|
|
64
|
+
|
|
65
|
+
return CompletionResult(
|
|
66
|
+
content=data.get("message", {}).get("content", ""),
|
|
67
|
+
model=data.get("model", payload.get("model", "")),
|
|
68
|
+
prompt_tokens=data.get("prompt_eval_count", 0),
|
|
69
|
+
completion_tokens=completion_tokens,
|
|
70
|
+
finish_reason="stop",
|
|
71
|
+
timing=TimingInfo(ttft=ttft, tpot=tpot),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
async def stream(
|
|
75
|
+
self,
|
|
76
|
+
client: httpx.AsyncClient,
|
|
77
|
+
payload: dict,
|
|
78
|
+
) -> AsyncIterator[StreamChunk]:
|
|
79
|
+
async with client.stream("POST", f"{self.base_url}/api/chat", json=payload) as resp:
|
|
80
|
+
resp.raise_for_status()
|
|
81
|
+
async for line in resp.aiter_lines():
|
|
82
|
+
if not line:
|
|
83
|
+
continue
|
|
84
|
+
try:
|
|
85
|
+
chunk = json.loads(line)
|
|
86
|
+
except json.JSONDecodeError:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
content = chunk.get("message", {}).get("content", "")
|
|
90
|
+
done = chunk.get("done", False)
|
|
91
|
+
|
|
92
|
+
sc = StreamChunk(content=content, done=done)
|
|
93
|
+
if done:
|
|
94
|
+
prompt_eval_ns = chunk.get("prompt_eval_duration", 0)
|
|
95
|
+
eval_ns = chunk.get("eval_duration", 0)
|
|
96
|
+
sc.prompt_tokens = chunk.get("prompt_eval_count", 0)
|
|
97
|
+
sc.completion_tokens = chunk.get("eval_count", 0)
|
|
98
|
+
sc.finish_reason = "stop"
|
|
99
|
+
sc.timing = TimingInfo(
|
|
100
|
+
ttft=(prompt_eval_ns / 1e9) if prompt_eval_ns > 0 else None,
|
|
101
|
+
tpot=((eval_ns / 1e9) / (sc.completion_tokens - 1)
|
|
102
|
+
if eval_ns > 0 and sc.completion_tokens > 1 else None),
|
|
103
|
+
)
|
|
104
|
+
yield sc
|
|
105
|
+
|
|
106
|
+
async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
|
|
107
|
+
response = await client.get(f"{self.base_url}/api/tags")
|
|
108
|
+
response.raise_for_status()
|
|
109
|
+
return [
|
|
110
|
+
{"id": m["name"], "object": "model",
|
|
111
|
+
"created": int(time.time()), "owned_by": "ollama"}
|
|
112
|
+
for m in response.json().get("models", [])
|
|
113
|
+
]
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""OpenAI-compatible API provider.
|
|
2
|
+
|
|
3
|
+
Works with: OpenAI, Azure OpenAI, vLLM, llama.cpp (server mode),
|
|
4
|
+
LM Studio, Groq, Together.ai, Fireworks.ai, LiteLLM, LocalAI, etc.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import time
|
|
11
|
+
from typing import AsyncIterator
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from llm_otel_kit.providers.base import (
|
|
16
|
+
CompletionResult,
|
|
17
|
+
LLMProvider,
|
|
18
|
+
StreamChunk,
|
|
19
|
+
TimingInfo,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OpenAICompatProvider(LLMProvider):
|
|
24
|
+
"""Provider for any OpenAI-compatible API endpoint."""
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def system_name(self) -> str:
|
|
28
|
+
return self.config.name if self.config.name != "openai" else "openai"
|
|
29
|
+
|
|
30
|
+
def _headers(self) -> dict[str, str]:
|
|
31
|
+
h: dict[str, str] = {"Content-Type": "application/json"}
|
|
32
|
+
if self.config.api_key:
|
|
33
|
+
h["Authorization"] = f"Bearer {self.config.api_key}"
|
|
34
|
+
return h
|
|
35
|
+
|
|
36
|
+
def build_payload(
|
|
37
|
+
self,
|
|
38
|
+
model: str,
|
|
39
|
+
messages: list[dict],
|
|
40
|
+
stream: bool,
|
|
41
|
+
**kwargs,
|
|
42
|
+
) -> dict:
|
|
43
|
+
payload: dict = {
|
|
44
|
+
"model": model,
|
|
45
|
+
"messages": messages,
|
|
46
|
+
"stream": stream,
|
|
47
|
+
}
|
|
48
|
+
if kwargs.get("temperature") is not None:
|
|
49
|
+
payload["temperature"] = kwargs["temperature"]
|
|
50
|
+
if kwargs.get("top_p") is not None:
|
|
51
|
+
payload["top_p"] = kwargs["top_p"]
|
|
52
|
+
if kwargs.get("max_tokens") is not None:
|
|
53
|
+
payload["max_tokens"] = kwargs["max_tokens"]
|
|
54
|
+
if stream:
|
|
55
|
+
payload["stream_options"] = {"include_usage": True}
|
|
56
|
+
return payload
|
|
57
|
+
|
|
58
|
+
async def complete(
|
|
59
|
+
self,
|
|
60
|
+
client: httpx.AsyncClient,
|
|
61
|
+
payload: dict,
|
|
62
|
+
) -> CompletionResult:
|
|
63
|
+
response = await client.post(
|
|
64
|
+
f"{self.base_url}/v1/chat/completions",
|
|
65
|
+
json=payload,
|
|
66
|
+
headers=self._headers(),
|
|
67
|
+
)
|
|
68
|
+
response.raise_for_status()
|
|
69
|
+
data = response.json()
|
|
70
|
+
|
|
71
|
+
choice = data.get("choices", [{}])[0]
|
|
72
|
+
usage = data.get("usage", {})
|
|
73
|
+
|
|
74
|
+
return CompletionResult(
|
|
75
|
+
content=choice.get("message", {}).get("content", ""),
|
|
76
|
+
model=data.get("model", payload.get("model", "")),
|
|
77
|
+
prompt_tokens=usage.get("prompt_tokens", 0),
|
|
78
|
+
completion_tokens=usage.get("completion_tokens", 0),
|
|
79
|
+
finish_reason=choice.get("finish_reason", "stop"),
|
|
80
|
+
response_id=data.get("id", ""),
|
|
81
|
+
timing=TimingInfo(), # OpenAI API doesn't expose server-side timing
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
async def stream(
|
|
85
|
+
self,
|
|
86
|
+
client: httpx.AsyncClient,
|
|
87
|
+
payload: dict,
|
|
88
|
+
) -> AsyncIterator[StreamChunk]:
|
|
89
|
+
async with client.stream(
|
|
90
|
+
"POST",
|
|
91
|
+
f"{self.base_url}/v1/chat/completions",
|
|
92
|
+
json=payload,
|
|
93
|
+
headers=self._headers(),
|
|
94
|
+
) as resp:
|
|
95
|
+
resp.raise_for_status()
|
|
96
|
+
async for line in resp.aiter_lines():
|
|
97
|
+
if not line.startswith("data: "):
|
|
98
|
+
continue
|
|
99
|
+
data_str = line[6:]
|
|
100
|
+
if data_str.strip() == "[DONE]":
|
|
101
|
+
return
|
|
102
|
+
try:
|
|
103
|
+
chunk = json.loads(data_str)
|
|
104
|
+
except json.JSONDecodeError:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
choice = chunk.get("choices", [{}])[0]
|
|
108
|
+
delta = choice.get("delta", {})
|
|
109
|
+
content = delta.get("content", "")
|
|
110
|
+
finish_reason = choice.get("finish_reason")
|
|
111
|
+
|
|
112
|
+
# Usage arrives in the final chunk when stream_options.include_usage is set
|
|
113
|
+
usage = chunk.get("usage") or {}
|
|
114
|
+
|
|
115
|
+
sc = StreamChunk(
|
|
116
|
+
content=content,
|
|
117
|
+
done=finish_reason is not None,
|
|
118
|
+
finish_reason=finish_reason or "",
|
|
119
|
+
prompt_tokens=usage.get("prompt_tokens", 0),
|
|
120
|
+
completion_tokens=usage.get("completion_tokens", 0),
|
|
121
|
+
timing=TimingInfo(),
|
|
122
|
+
)
|
|
123
|
+
yield sc
|
|
124
|
+
|
|
125
|
+
async def list_models(self, client: httpx.AsyncClient) -> list[dict]:
|
|
126
|
+
response = await client.get(
|
|
127
|
+
f"{self.base_url}/v1/models",
|
|
128
|
+
headers=self._headers(),
|
|
129
|
+
)
|
|
130
|
+
response.raise_for_status()
|
|
131
|
+
data = response.json()
|
|
132
|
+
return [
|
|
133
|
+
{"id": m["id"], "object": "model",
|
|
134
|
+
"created": m.get("created", int(time.time())),
|
|
135
|
+
"owned_by": m.get("owned_by", self.system_name)}
|
|
136
|
+
for m in data.get("data", [])
|
|
137
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""GenAI span attribute helpers and request classification."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
from opentelemetry.trace import Span
|
|
9
|
+
|
|
10
|
+
from llm_otel_kit.metrics import GenAIMetrics
|
|
11
|
+
|
|
12
|
+
# ---------------------------------------------------------------------------
|
|
13
|
+
# Provider detection from model name
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
_PROVIDER_PATTERNS: list[tuple[list[str], str]] = [
|
|
16
|
+
(["gpt-", "o1-", "o3-", "o4-", "dall-e", "text-embedding"], "openai"),
|
|
17
|
+
(["claude-"], "anthropic"),
|
|
18
|
+
(["gemini-"], "google"),
|
|
19
|
+
(["copilot-", "github/"], "github.copilot"),
|
|
20
|
+
(["mistral-", "mixtral-", "codestral-"], "mistral"),
|
|
21
|
+
(["command-", "embed-"], "cohere"),
|
|
22
|
+
(["deepseek-"], "deepseek"),
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
_MAX_CONTENT_LEN = 500
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def detect_provider(model: str) -> str:
|
|
29
|
+
"""Infer ``gen_ai.system`` from model name prefix."""
|
|
30
|
+
model_lower = model.lower()
|
|
31
|
+
for prefixes, provider in _PROVIDER_PATTERNS:
|
|
32
|
+
if any(model_lower.startswith(p) for p in prefixes):
|
|
33
|
+
return provider
|
|
34
|
+
return "ollama"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def classify_request(messages: list[dict]) -> str:
|
|
38
|
+
"""Classify an OpenAI-format message list into a purpose label."""
|
|
39
|
+
last_content = (messages[-1].get("content", "") if messages else "").lower()
|
|
40
|
+
if "generate a concise" in last_content and "title" in last_content:
|
|
41
|
+
return "Title Generation"
|
|
42
|
+
if ("generate tags" in last_content or "categorize" in last_content
|
|
43
|
+
or "tag the conversation" in last_content):
|
|
44
|
+
return "Tag Generation"
|
|
45
|
+
if "follow-up" in last_content or ("suggest" in last_content and "question" in last_content):
|
|
46
|
+
return "Suggestion Generation"
|
|
47
|
+
if messages and all(msg.get("role") == "system" for msg in messages):
|
|
48
|
+
return "System Prompt"
|
|
49
|
+
return "User Chat"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _truncate(text: str) -> str:
|
|
53
|
+
return text[:_MAX_CONTENT_LEN] + "..." if len(text) > _MAX_CONTENT_LEN else text
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Span attribute setters (OTel GenAI semconv)
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
def semconv_attrs(model: str, server_host: str, server_port: int) -> dict:
|
|
61
|
+
"""Build the standard GenAI metric attribute dict."""
|
|
62
|
+
return {
|
|
63
|
+
"gen_ai.operation.name": "chat",
|
|
64
|
+
"gen_ai.system": detect_provider(model),
|
|
65
|
+
"gen_ai.request.model": model,
|
|
66
|
+
"gen_ai.response.model": model,
|
|
67
|
+
"server.address": server_host,
|
|
68
|
+
"server.port": server_port,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def set_genai_span(
|
|
73
|
+
span: Span,
|
|
74
|
+
model: str,
|
|
75
|
+
request_type: str,
|
|
76
|
+
stream: bool,
|
|
77
|
+
messages: list[dict],
|
|
78
|
+
server_host: str,
|
|
79
|
+
server_port: int,
|
|
80
|
+
*,
|
|
81
|
+
temperature: float | None = None,
|
|
82
|
+
top_p: float | None = None,
|
|
83
|
+
max_tokens: int | None = None,
|
|
84
|
+
auth_header: str = "",
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Set all gen_ai.* request attributes and input span event."""
|
|
87
|
+
provider = detect_provider(model)
|
|
88
|
+
|
|
89
|
+
span.update_name(f"{request_type} · {model}")
|
|
90
|
+
|
|
91
|
+
span.set_attribute("gen_ai.system", provider)
|
|
92
|
+
span.set_attribute("gen_ai.provider.name", provider)
|
|
93
|
+
span.set_attribute("gen_ai.operation.name", "chat")
|
|
94
|
+
span.set_attribute("gen_ai.request.model", model)
|
|
95
|
+
span.set_attribute("llm.request.type", "chat")
|
|
96
|
+
span.set_attribute("llm.is_streaming", stream)
|
|
97
|
+
span.set_attribute("llm.request.purpose", request_type)
|
|
98
|
+
span.set_attribute("server.address", server_host)
|
|
99
|
+
span.set_attribute("server.port", server_port)
|
|
100
|
+
|
|
101
|
+
if temperature is not None:
|
|
102
|
+
span.set_attribute("gen_ai.request.temperature", temperature)
|
|
103
|
+
if top_p is not None:
|
|
104
|
+
span.set_attribute("gen_ai.request.top_p", top_p)
|
|
105
|
+
if max_tokens is not None:
|
|
106
|
+
span.set_attribute("gen_ai.request.max_tokens", max_tokens)
|
|
107
|
+
|
|
108
|
+
# Indexed prompt attribute (last user message)
|
|
109
|
+
for msg in reversed(messages):
|
|
110
|
+
if msg.get("role") == "user":
|
|
111
|
+
span.set_attribute("gen_ai.prompt.0.role", "user")
|
|
112
|
+
span.set_attribute("gen_ai.prompt.0.content", msg.get("content", ""))
|
|
113
|
+
break
|
|
114
|
+
|
|
115
|
+
# Span event
|
|
116
|
+
for msg in reversed(messages):
|
|
117
|
+
if msg.get("role") == "user":
|
|
118
|
+
span.add_event("gen_ai.user.message", {
|
|
119
|
+
"gen_ai.prompt.role": "user",
|
|
120
|
+
"gen_ai.prompt.content": _truncate(msg.get("content", "")),
|
|
121
|
+
})
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
# Conversation fingerprint
|
|
125
|
+
user_msgs = [msg["content"] for msg in messages if msg["role"] == "user"]
|
|
126
|
+
fp_input = user_msgs[0][:200] if (request_type != "User Chat" and user_msgs) else "|".join(user_msgs)
|
|
127
|
+
span.set_attribute("conversation.fingerprint",
|
|
128
|
+
hashlib.sha256(fp_input.encode()).hexdigest()[:12])
|
|
129
|
+
|
|
130
|
+
if auth_header:
|
|
131
|
+
span.set_attribute("enduser.id",
|
|
132
|
+
hashlib.sha256(auth_header.encode()).hexdigest()[:8])
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def set_genai_response(
|
|
136
|
+
span: Span,
|
|
137
|
+
content: str,
|
|
138
|
+
model: str,
|
|
139
|
+
prompt_tokens: int,
|
|
140
|
+
completion_tokens: int,
|
|
141
|
+
finish_reason: str = "stop",
|
|
142
|
+
response_id: str = "",
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Set response attributes and assistant span event."""
|
|
145
|
+
span.set_attribute("gen_ai.completion.0.role", "assistant")
|
|
146
|
+
span.set_attribute("gen_ai.completion.0.content", content)
|
|
147
|
+
span.set_attribute("gen_ai.completion.0.finish_reason", finish_reason)
|
|
148
|
+
|
|
149
|
+
span.set_attribute("gen_ai.response.model", model)
|
|
150
|
+
span.set_attribute("gen_ai.response.finish_reasons", json.dumps([finish_reason]))
|
|
151
|
+
span.set_attribute("gen_ai.usage.input_tokens", prompt_tokens)
|
|
152
|
+
span.set_attribute("gen_ai.usage.output_tokens", completion_tokens)
|
|
153
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
|
|
154
|
+
span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
|
|
155
|
+
if response_id:
|
|
156
|
+
span.set_attribute("gen_ai.response.id", response_id)
|
|
157
|
+
|
|
158
|
+
span.add_event("gen_ai.assistant.message", {
|
|
159
|
+
"gen_ai.completion.role": "assistant",
|
|
160
|
+
"gen_ai.completion.content": _truncate(content),
|
|
161
|
+
"gen_ai.completion.finish_reason": finish_reason,
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def record_metrics(
|
|
166
|
+
m: GenAIMetrics,
|
|
167
|
+
attrs: dict,
|
|
168
|
+
model: str,
|
|
169
|
+
duration: float,
|
|
170
|
+
prompt_tokens: int,
|
|
171
|
+
completion_tokens: int,
|
|
172
|
+
ttft: float | None = None,
|
|
173
|
+
tpot: float | None = None,
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Record all GenAI + operational metrics for one completed request."""
|
|
176
|
+
m.operation_duration.record(duration, attrs)
|
|
177
|
+
m.token_usage.record(prompt_tokens, {**attrs, "gen_ai.token.type": "input"})
|
|
178
|
+
m.token_usage.record(completion_tokens, {**attrs, "gen_ai.token.type": "output"})
|
|
179
|
+
if ttft is not None:
|
|
180
|
+
m.ttft.record(ttft, attrs)
|
|
181
|
+
if tpot is not None:
|
|
182
|
+
m.tpot.record(tpot, attrs)
|
|
183
|
+
if duration > 0 and completion_tokens > 0:
|
|
184
|
+
m.token_throughput.record(completion_tokens / duration, attrs)
|
|
185
|
+
m.active_requests.add(-1, {"model": model})
|