kyber-runtime 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/PKG-INFO +2 -2
- kyber_runtime-0.1.2/kyber_runtime/__init__.py +4 -0
- kyber_runtime-0.1.2/kyber_runtime/config.py +4 -0
- kyber_runtime-0.1.2/kyber_runtime/llm.py +110 -0
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/kyber_runtime.egg-info/PKG-INFO +2 -2
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/kyber_runtime.egg-info/SOURCES.txt +3 -0
- kyber_runtime-0.1.2/kyber_runtime.egg-info/top_level.txt +1 -0
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/setup.py +2 -2
- kyber_runtime-0.1.0/kyber_runtime.egg-info/top_level.txt +0 -1
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/kyber_runtime.egg-info/dependency_links.txt +0 -0
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/kyber_runtime.egg-info/requires.txt +0 -0
- {kyber_runtime-0.1.0 → kyber_runtime-0.1.2}/setup.cfg +0 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import uuid
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
from openai import OpenAI
|
|
7
|
+
from pydantic import BaseModel,Field
|
|
8
|
+
|
|
9
|
+
from .config import LLM_pricing
|
|
10
|
+
|
|
11
|
+
logging.basicConfig(level=logging.INFO)
|
|
12
|
+
logger = logging.getLogger("kyber.telemetry")
|
|
13
|
+
|
|
14
|
+
class LLMCallContect(BaseModel):
|
|
15
|
+
execution_id: str
|
|
16
|
+
agent_id: str
|
|
17
|
+
tenent_id: str
|
|
18
|
+
circuit_id: Optional[str] = None
|
|
19
|
+
|
|
20
|
+
def _calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
|
|
21
|
+
if model not in LLM_pricing:
|
|
22
|
+
return 0.0
|
|
23
|
+
princing = LLM_pricing[model]
|
|
24
|
+
input_cost = (prompt_tokens / 1000) * princing["input"]
|
|
25
|
+
output_cost = (completion_tokens / 1000) * princing["output"]
|
|
26
|
+
return round(input_cost + output_cost, 6)
|
|
27
|
+
|
|
28
|
+
def llm_call(
|
|
29
|
+
*,
|
|
30
|
+
model: str,
|
|
31
|
+
messages: list[Dict[str, str]],
|
|
32
|
+
purpose: str,
|
|
33
|
+
context: LLMCallContxet,
|
|
34
|
+
temperature: float = 0.7,
|
|
35
|
+
max_retries: int = 3,
|
|
36
|
+
client: Optional[OpenAI] = None
|
|
37
|
+
) -> Dict[str, Any]:
|
|
38
|
+
|
|
39
|
+
if not purpose:
|
|
40
|
+
raise ValueError("Purpose must be provided for LLM call telemetry.")
|
|
41
|
+
|
|
42
|
+
call_id = str(uuid.uuid4())
|
|
43
|
+
start_time = time.time()
|
|
44
|
+
retry_count = 0
|
|
45
|
+
client = client or OpenAI()
|
|
46
|
+
|
|
47
|
+
while retry_count <= max_retries:
|
|
48
|
+
try:
|
|
49
|
+
response = client.chat.completions.create(
|
|
50
|
+
model=model,
|
|
51
|
+
messages=messages,
|
|
52
|
+
temperature=temperature,
|
|
53
|
+
timeout=30.0
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
usage = response.usage
|
|
58
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
59
|
+
cost_usd = _calculate_cost(model,usage.prompt_tokens, usage.completion_tokens)
|
|
60
|
+
|
|
61
|
+
telemetry = {
|
|
62
|
+
"call_id": call_id,
|
|
63
|
+
"execution_id": context.execution_id,
|
|
64
|
+
"agent_id": context.agent_id,
|
|
65
|
+
"tenent_id": context.tenent_id,
|
|
66
|
+
"circuit_id": context.circuit_id,
|
|
67
|
+
"model": model,
|
|
68
|
+
"purpose": purpose,
|
|
69
|
+
"latency_ms": latency_ms,
|
|
70
|
+
"prompt_tokens": usage.prompt_tokens,
|
|
71
|
+
"completion_tokens": usage.completion_tokens,
|
|
72
|
+
"total_tokens": usage.total_tokens,
|
|
73
|
+
"llm_cost_usd": cost_usd,
|
|
74
|
+
"success": True,
|
|
75
|
+
"error_message": None,
|
|
76
|
+
"temperature": temperature,
|
|
77
|
+
"retry_count": retry_count,
|
|
78
|
+
"timestamp": time.time()
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
logger.info("LLM Call Telemetry:", extra={"telemetry": telemetry})
|
|
82
|
+
|
|
83
|
+
return json.loads(response.model_dump_json())
|
|
84
|
+
except Exception as e:
|
|
85
|
+
retry_count += 1
|
|
86
|
+
if retry_count > max_retries:
|
|
87
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
88
|
+
telemetry = {
|
|
89
|
+
"call_id": call_id,
|
|
90
|
+
"execution_id": context.execution_id,
|
|
91
|
+
"agent_id": context.agent_id,
|
|
92
|
+
"tenent_id": context.tenent_id,
|
|
93
|
+
"circuit_id": context.circuit_id,
|
|
94
|
+
"model": model,
|
|
95
|
+
"purpose": purpose,
|
|
96
|
+
"latency_ms": latency_ms,
|
|
97
|
+
"prompt_tokens": 0,
|
|
98
|
+
"completion_tokens": 0,
|
|
99
|
+
"total_tokens": 0,
|
|
100
|
+
"llm_cost_usd": 0.0,
|
|
101
|
+
"success": False,
|
|
102
|
+
"error_message": str(e),
|
|
103
|
+
"temperature": temperature,
|
|
104
|
+
"retry_count": retry_count,
|
|
105
|
+
"timestamp": time.time()
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
logger.error("LLM Call Telemetry:", extra={"telemetry": telemetry})
|
|
109
|
+
raise e
|
|
110
|
+
time.sleep(0.5 *(2 ** (retry_count - 1))) # Exponential backoff
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
kyber_runtime
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from setuptools import setup, find_packages
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
|
-
name="
|
|
5
|
-
version="0.1.
|
|
4
|
+
name="kyber_runtime",
|
|
5
|
+
version="0.1.2",
|
|
6
6
|
author="Your Team",
|
|
7
7
|
description="Kyber Agent Runtime SDK for instrumented LLM calls",
|
|
8
8
|
long_description_content_type="text/markdown",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|