llumo 0.2.24__py3-none-any.whl → 0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +5 -2
- llumo/callback.py +480 -0
- llumo/callbacks-0.py +258 -0
- llumo/client.py +36 -8
- llumo/llumoLogger.py +57 -0
- llumo/llumoSessionContext.py +364 -0
- llumo/openai.py +196 -50
- {llumo-0.2.24.dist-info → llumo-0.2.25.dist-info}/METADATA +1 -1
- llumo-0.2.25.dist-info/RECORD +20 -0
- llumo-0.2.24.dist-info/RECORD +0 -16
- {llumo-0.2.24.dist-info → llumo-0.2.25.dist-info}/WHEEL +0 -0
- {llumo-0.2.24.dist-info → llumo-0.2.25.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.24.dist-info → llumo-0.2.25.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
import contextvars
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import Optional, List, Dict, Any
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
_ctxLogger = contextvars.ContextVar("ctxLogger")
|
|
8
|
+
_ctxSessionID = contextvars.ContextVar("ctxSessionID")
|
|
9
|
+
_ctxLlumoRun = contextvars.ContextVar("ctxLlumoRun")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def getLogger():
|
|
13
|
+
return _ctxLogger.get()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def getSessionID():
|
|
17
|
+
return _ctxSessionID.get()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def getLlumoRun():
|
|
21
|
+
return _ctxLlumoRun.get()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LlumoSessionContext:
|
|
25
|
+
def __init__(self, logger, sessionID: Optional[str] = None):
|
|
26
|
+
self.sessionID = sessionID or str(uuid.uuid4().hex[:14])
|
|
27
|
+
self.logger = logger
|
|
28
|
+
self.apiKey = logger.apiKey
|
|
29
|
+
self.threadLogger = None
|
|
30
|
+
self.threadSessionID = None
|
|
31
|
+
self.threadLlumoRun = None
|
|
32
|
+
|
|
33
|
+
def start(self):
|
|
34
|
+
self.threadLogger = _ctxLogger.set(self.logger)
|
|
35
|
+
self.threadSessionID = _ctxSessionID.set(self.sessionID)
|
|
36
|
+
|
|
37
|
+
def end(self):
|
|
38
|
+
if self.threadLogger:
|
|
39
|
+
_ctxLogger.reset(self.threadLogger)
|
|
40
|
+
if self.threadSessionID:
|
|
41
|
+
_ctxSessionID.reset(self.threadSessionID)
|
|
42
|
+
if self.threadLlumoRun:
|
|
43
|
+
_ctxLlumoRun.reset(self.threadLlumoRun)
|
|
44
|
+
|
|
45
|
+
def __enter__(self):
|
|
46
|
+
self.start()
|
|
47
|
+
return self
|
|
48
|
+
|
|
49
|
+
def __exit__(self, excType, excVal, excTb):
|
|
50
|
+
self.end()
|
|
51
|
+
|
|
52
|
+
def startLlumoRun(self, runName: str):
|
|
53
|
+
LlumoRunID = str(uuid.uuid4().hex[:16])
|
|
54
|
+
currentTime = datetime(2025, 8, 2, 10, 20, 15, tzinfo=timezone.utc)
|
|
55
|
+
createdAt = currentTime.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
|
56
|
+
llumoRun = {
|
|
57
|
+
"logID": LlumoRunID,
|
|
58
|
+
"runName": runName,
|
|
59
|
+
"sessionID": self.sessionID,
|
|
60
|
+
"playgroundID": self.logger.getPlaygroundID(),
|
|
61
|
+
"workspaceID": self.logger.getWorkspaceID(),
|
|
62
|
+
"source": "SDK",
|
|
63
|
+
"rowID": "",
|
|
64
|
+
"columnID": "",
|
|
65
|
+
"email": self.logger.getUserEmailID(),
|
|
66
|
+
"createdAt": createdAt,
|
|
67
|
+
"createdBy": self.logger.getUserEmailID(),
|
|
68
|
+
"status": "SUCCESS",
|
|
69
|
+
"flow": [],
|
|
70
|
+
"latency": 4200,
|
|
71
|
+
"feedback": "",
|
|
72
|
+
"dump": "",
|
|
73
|
+
"steps": [],
|
|
74
|
+
}
|
|
75
|
+
self.threadLlumoRun = _ctxLlumoRun.set(llumoRun)
|
|
76
|
+
|
|
77
|
+
def endLlumoRun(self):
|
|
78
|
+
run = getLlumoRun()
|
|
79
|
+
if run is None:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
# STEP 1: Sort steps by timestamp
|
|
83
|
+
steps = run.get("steps", [])
|
|
84
|
+
sorted_steps = sorted(steps, key=lambda s: s.get("timestamp", 0))
|
|
85
|
+
|
|
86
|
+
# STEP 2: Remove timestamp from each step before sending
|
|
87
|
+
clean_steps = [
|
|
88
|
+
{k: v for k, v in step.items() if k != "timestamp"} for step in sorted_steps
|
|
89
|
+
]
|
|
90
|
+
run["steps"] = clean_steps
|
|
91
|
+
|
|
92
|
+
print(run["runName"]) # optional debug log
|
|
93
|
+
|
|
94
|
+
# STEP 3: Send the payload
|
|
95
|
+
url = "https://app.llumo.ai/api/create-debug-log"
|
|
96
|
+
headers = {
|
|
97
|
+
"Authorization": f"Bearer {self.apiKey}",
|
|
98
|
+
"Content-Type": "application/json",
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
response = requests.post(url, headers=headers, json=run, timeout=10)
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
# print(response.json())
|
|
105
|
+
except requests.exceptions.Timeout:
|
|
106
|
+
print("Request timed out.")
|
|
107
|
+
except requests.exceptions.RequestException as e:
|
|
108
|
+
print(f"Request failed: {e}")
|
|
109
|
+
|
|
110
|
+
# Cleanup
|
|
111
|
+
if self.threadLlumoRun:
|
|
112
|
+
_ctxLlumoRun.reset(self.threadLlumoRun)
|
|
113
|
+
self.threadLlumoRun = None
|
|
114
|
+
|
|
115
|
+
def logStep(
|
|
116
|
+
self,
|
|
117
|
+
stepType: str,
|
|
118
|
+
stepName: str,
|
|
119
|
+
metadata: Optional[dict] = None,
|
|
120
|
+
):
|
|
121
|
+
print(f"logged: {stepType}")
|
|
122
|
+
run = getLlumoRun()
|
|
123
|
+
if run is None:
|
|
124
|
+
raise RuntimeError("No active run to log steps.")
|
|
125
|
+
|
|
126
|
+
# add step
|
|
127
|
+
stepData = {
|
|
128
|
+
"stepType": stepType,
|
|
129
|
+
"stepName": stepName,
|
|
130
|
+
"status": metadata.get("status", "SUCCESS"),
|
|
131
|
+
"message": metadata.get("message", ""),
|
|
132
|
+
"metadata": metadata or {},
|
|
133
|
+
"timestamp": datetime.now(timezone.utc).timestamp(), # OPTIONAL
|
|
134
|
+
}
|
|
135
|
+
run["steps"].append(stepData)
|
|
136
|
+
# set to context vars again in llumo run
|
|
137
|
+
self.threadLlumoRun = _ctxLlumoRun.set(run)
|
|
138
|
+
|
|
139
|
+
def logLlmStep(
|
|
140
|
+
self,
|
|
141
|
+
stepName: str,
|
|
142
|
+
model: str,
|
|
143
|
+
provider: str,
|
|
144
|
+
inputTokens: int,
|
|
145
|
+
outputTokens: int,
|
|
146
|
+
temperature: float,
|
|
147
|
+
promptTruncated: bool,
|
|
148
|
+
latencyMs: int,
|
|
149
|
+
query: str,
|
|
150
|
+
output: str,
|
|
151
|
+
status: str,
|
|
152
|
+
message: str,
|
|
153
|
+
):
|
|
154
|
+
metadata = {
|
|
155
|
+
"model": model,
|
|
156
|
+
"provider": provider,
|
|
157
|
+
"inputTokens": inputTokens,
|
|
158
|
+
"outputTokens": outputTokens,
|
|
159
|
+
"temperature": temperature,
|
|
160
|
+
"promptTruncated": promptTruncated,
|
|
161
|
+
"latencyMs": latencyMs,
|
|
162
|
+
"query": query,
|
|
163
|
+
"output": output,
|
|
164
|
+
"status": status,
|
|
165
|
+
"message": message,
|
|
166
|
+
}
|
|
167
|
+
self.logStep("LLM", stepName, metadata)
|
|
168
|
+
|
|
169
|
+
def logRetrieverStep(
|
|
170
|
+
self,
|
|
171
|
+
stepName: str,
|
|
172
|
+
retrieverSource: str,
|
|
173
|
+
queryVectorType: str,
|
|
174
|
+
topK: int,
|
|
175
|
+
matchedIDs: List[str],
|
|
176
|
+
query: str,
|
|
177
|
+
latencyMs: int,
|
|
178
|
+
status: str,
|
|
179
|
+
message: str,
|
|
180
|
+
):
|
|
181
|
+
metadata = {
|
|
182
|
+
"retrieverSource": retrieverSource,
|
|
183
|
+
"queryVectorType": queryVectorType,
|
|
184
|
+
"topK": topK,
|
|
185
|
+
"matchedIDs": matchedIDs,
|
|
186
|
+
"query": query,
|
|
187
|
+
"latencyMs": latencyMs,
|
|
188
|
+
"status": status,
|
|
189
|
+
"message": message,
|
|
190
|
+
}
|
|
191
|
+
self.logStep("RETRIEVER", stepName, metadata)
|
|
192
|
+
|
|
193
|
+
def logAgentStep(
|
|
194
|
+
self,
|
|
195
|
+
stepName: str,
|
|
196
|
+
agentType: str,
|
|
197
|
+
agentName: str,
|
|
198
|
+
numStepsTaken: int,
|
|
199
|
+
tools: List[str],
|
|
200
|
+
query: str,
|
|
201
|
+
status: str,
|
|
202
|
+
message: str,
|
|
203
|
+
):
|
|
204
|
+
metadata = {
|
|
205
|
+
"agentType": agentType,
|
|
206
|
+
"agentName": agentName,
|
|
207
|
+
"numStepsTaken": numStepsTaken,
|
|
208
|
+
"tools": tools,
|
|
209
|
+
"query": query,
|
|
210
|
+
"status": status,
|
|
211
|
+
"message": message,
|
|
212
|
+
}
|
|
213
|
+
self.logStep("AGENT", stepName, metadata)
|
|
214
|
+
|
|
215
|
+
def logToolSelectorStep(
|
|
216
|
+
self,
|
|
217
|
+
stepName: str,
|
|
218
|
+
selectorType: str,
|
|
219
|
+
toolsRanked: List[Dict[str, Any]],
|
|
220
|
+
selectedTool: str,
|
|
221
|
+
reasoning: str,
|
|
222
|
+
status: str,
|
|
223
|
+
message: str,
|
|
224
|
+
):
|
|
225
|
+
metadata = {
|
|
226
|
+
"selectorType": selectorType,
|
|
227
|
+
"toolsRanked": toolsRanked,
|
|
228
|
+
"selectedTool": selectedTool,
|
|
229
|
+
"reasoning": reasoning,
|
|
230
|
+
"status": status,
|
|
231
|
+
"message": message,
|
|
232
|
+
}
|
|
233
|
+
self.logStep("TOOL_SELECTOR", stepName, metadata)
|
|
234
|
+
|
|
235
|
+
def logToolStep(
|
|
236
|
+
self,
|
|
237
|
+
stepName: str,
|
|
238
|
+
toolName: str,
|
|
239
|
+
input: Dict[str, Any],
|
|
240
|
+
output: str,
|
|
241
|
+
latencyMs: int,
|
|
242
|
+
status: str,
|
|
243
|
+
message: str,
|
|
244
|
+
):
|
|
245
|
+
metadata = {
|
|
246
|
+
"toolName": toolName,
|
|
247
|
+
"input": input,
|
|
248
|
+
"output": output,
|
|
249
|
+
"latencyMs": latencyMs,
|
|
250
|
+
"status": status,
|
|
251
|
+
"message": message,
|
|
252
|
+
}
|
|
253
|
+
self.logStep("TOOL", stepName, metadata)
|
|
254
|
+
|
|
255
|
+
def logEvalStep(
|
|
256
|
+
self,
|
|
257
|
+
stepName: str,
|
|
258
|
+
output: str,
|
|
259
|
+
context: str,
|
|
260
|
+
query: str,
|
|
261
|
+
# total 7 keys add 4 more
|
|
262
|
+
messageHistory: str,
|
|
263
|
+
tools: str,
|
|
264
|
+
intermediateSteps: str,
|
|
265
|
+
groundTruth: str,
|
|
266
|
+
analyticsScore: Dict[str, float],
|
|
267
|
+
reasoning: Dict[str, str],
|
|
268
|
+
classification: Dict[str, str],
|
|
269
|
+
evalLabel: Dict[str, str],
|
|
270
|
+
latencyMs: int,
|
|
271
|
+
status: str,
|
|
272
|
+
message: str,
|
|
273
|
+
):
|
|
274
|
+
metadata = {
|
|
275
|
+
"output": output,
|
|
276
|
+
"context": context,
|
|
277
|
+
"query": query,
|
|
278
|
+
"messageHistory": messageHistory,
|
|
279
|
+
"tools": tools,
|
|
280
|
+
"intermediateSteps": intermediateSteps,
|
|
281
|
+
"groundTruth": groundTruth,
|
|
282
|
+
"analyticsScore": analyticsScore,
|
|
283
|
+
"reasoning": reasoning,
|
|
284
|
+
"classification": classification,
|
|
285
|
+
"evalLabel": evalLabel,
|
|
286
|
+
"latencyMs": latencyMs,
|
|
287
|
+
"status": status,
|
|
288
|
+
"message": message,
|
|
289
|
+
}
|
|
290
|
+
self.logStep("EVAL", stepName, metadata)
|
|
291
|
+
|
|
292
|
+
def logFunctionCallStep(
|
|
293
|
+
self,
|
|
294
|
+
stepName: str,
|
|
295
|
+
functionName: str,
|
|
296
|
+
argsPassed: Dict[str, Any],
|
|
297
|
+
output: Dict[str, Any],
|
|
298
|
+
callMode: str,
|
|
299
|
+
latencyMs: int,
|
|
300
|
+
status: str,
|
|
301
|
+
message: str,
|
|
302
|
+
):
|
|
303
|
+
metadata = {
|
|
304
|
+
"functionName": functionName,
|
|
305
|
+
"argsPassed": argsPassed,
|
|
306
|
+
"output": output,
|
|
307
|
+
"callMode": callMode,
|
|
308
|
+
"latencyMs": latencyMs,
|
|
309
|
+
"status": status,
|
|
310
|
+
"message": message,
|
|
311
|
+
}
|
|
312
|
+
self.logStep("FUNCTION_CALL", stepName, metadata)
|
|
313
|
+
|
|
314
|
+
def logCompressionStep(
|
|
315
|
+
self,
|
|
316
|
+
stepName: str,
|
|
317
|
+
prompt: str,
|
|
318
|
+
promptTemplate: str,
|
|
319
|
+
inputs: Dict[str, Any],
|
|
320
|
+
compressedPrompt: str,
|
|
321
|
+
inputToken: int,
|
|
322
|
+
compressedToken: int,
|
|
323
|
+
outputToken: int,
|
|
324
|
+
output: str,
|
|
325
|
+
compressedOutput: str,
|
|
326
|
+
latencyMs: int,
|
|
327
|
+
status: str,
|
|
328
|
+
message: str,
|
|
329
|
+
):
|
|
330
|
+
metadata = {
|
|
331
|
+
"prompt": prompt,
|
|
332
|
+
"promptTemplate": promptTemplate,
|
|
333
|
+
"inputs": inputs,
|
|
334
|
+
"compressedPrompt": compressedPrompt,
|
|
335
|
+
"inputToken": inputToken,
|
|
336
|
+
"compressedToken": compressedToken,
|
|
337
|
+
"outputToken": outputToken,
|
|
338
|
+
"output": output,
|
|
339
|
+
"compressedOutput": compressedOutput,
|
|
340
|
+
"latencyMs": latencyMs,
|
|
341
|
+
"status": status,
|
|
342
|
+
"message": message,
|
|
343
|
+
}
|
|
344
|
+
self.logStep("COMPRESSION", stepName, metadata)
|
|
345
|
+
|
|
346
|
+
def logCustomScriptStep(
|
|
347
|
+
self,
|
|
348
|
+
stepName: str,
|
|
349
|
+
inputs: Dict[str, Any],
|
|
350
|
+
script: str,
|
|
351
|
+
output: str,
|
|
352
|
+
latencyMs: int,
|
|
353
|
+
status: str,
|
|
354
|
+
message: str,
|
|
355
|
+
):
|
|
356
|
+
metadata = {
|
|
357
|
+
"inputs": inputs,
|
|
358
|
+
"script": script,
|
|
359
|
+
"output": output,
|
|
360
|
+
"latencyMs": latencyMs,
|
|
361
|
+
"status": status,
|
|
362
|
+
"message": message,
|
|
363
|
+
}
|
|
364
|
+
self.logStep("CUSTOM_SCRIPT", stepName, metadata)
|
llumo/openai.py
CHANGED
|
@@ -1,11 +1,27 @@
|
|
|
1
|
+
import time
|
|
1
2
|
from openai import OpenAI as OpenAIClient
|
|
2
3
|
from .client import LlumoClient
|
|
4
|
+
from .llumoSessionContext import LlumoSessionContext
|
|
5
|
+
from .llumoLogger import LLUMOLogger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# evaluation function that uses LlumoClient
|
|
9
|
+
def performEvaluation(data, api_key=None, evals=["Response Correctness"], **kwargs):
|
|
10
|
+
try:
|
|
11
|
+
client = LlumoClient(api_key=api_key)
|
|
12
|
+
results = client.evaluateMultiple(
|
|
13
|
+
data,
|
|
14
|
+
evals=evals,
|
|
15
|
+
createExperiment=kwargs.get("createExperiment", False),
|
|
16
|
+
playgroundID=kwargs.get("playgroundID"),
|
|
17
|
+
prompt_template="Give answer to the query: {{query}}, using context: {{context}}",
|
|
18
|
+
getDataFrame=False,
|
|
19
|
+
)
|
|
20
|
+
return results
|
|
21
|
+
except Exception as e:
|
|
22
|
+
print(f"Error in perform_evaluation: {e}")
|
|
23
|
+
raise
|
|
3
24
|
|
|
4
|
-
# Dummy evaluation function that uses LlumoClient
|
|
5
|
-
def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
|
|
6
|
-
client = LlumoClient(api_key=api_key)
|
|
7
|
-
results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
|
|
8
|
-
return results
|
|
9
25
|
|
|
10
26
|
# Wrapper around ChatCompletion to allow custom fields like `.evaluation`
|
|
11
27
|
class ChatCompletionWithEval:
|
|
@@ -22,57 +38,187 @@ class ChatCompletionWithEval:
|
|
|
22
38
|
def __repr__(self):
|
|
23
39
|
return repr(self._response)
|
|
24
40
|
|
|
25
|
-
|
|
26
|
-
|
|
41
|
+
|
|
42
|
+
class OpenAI(OpenAIClient):
|
|
43
|
+
def __init__(self, api_key: str, session):
|
|
27
44
|
super().__init__(api_key=api_key)
|
|
45
|
+
self.session = session
|
|
46
|
+
self.llumo_key = session.apiKey
|
|
28
47
|
|
|
29
48
|
original_create = self.chat.completions.create
|
|
30
49
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
""
|
|
50
|
+
def create_wrapper(*args, **kwargs):
|
|
51
|
+
context = kwargs.pop("context", None)
|
|
52
|
+
model = kwargs["model"]
|
|
53
|
+
create_experiment = kwargs.pop("createExperiment", False)
|
|
54
|
+
|
|
55
|
+
messages = kwargs.get("messages", [])
|
|
56
|
+
user_message = next(
|
|
57
|
+
(
|
|
58
|
+
m.get("content")
|
|
59
|
+
for m in reversed(messages)
|
|
60
|
+
if m.get("role") == "user"
|
|
61
|
+
),
|
|
62
|
+
"",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if not context or context.strip() == "":
|
|
66
|
+
context = user_message
|
|
67
|
+
|
|
68
|
+
# Get IDs from the session logger
|
|
69
|
+
playground_id = self.session.logger.getPlaygroundID()
|
|
70
|
+
workspace_id = self.session.logger.getWorkspaceID()
|
|
71
|
+
|
|
72
|
+
# Input Bias Evaluation
|
|
73
|
+
eval_input_bias = [
|
|
74
|
+
{
|
|
75
|
+
"query": user_message,
|
|
76
|
+
"context": context,
|
|
77
|
+
"output": "", # No output yet
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
try:
|
|
81
|
+
start_time = time.time()
|
|
82
|
+
bias_evaluation_result = performEvaluation(
|
|
83
|
+
eval_input_bias,
|
|
84
|
+
api_key=self.llumo_key,
|
|
85
|
+
evals=["Input Bias"],
|
|
86
|
+
playgroundID=playground_id,
|
|
87
|
+
workspaceID=workspace_id,
|
|
88
|
+
createExperiment=create_experiment,
|
|
89
|
+
)
|
|
90
|
+
latency = int((time.time() - start_time) * 1000)
|
|
91
|
+
# Access the first result object
|
|
92
|
+
bias_evaluation = bias_evaluation_result[0]
|
|
93
|
+
# message = "-".join(
|
|
94
|
+
# getattr(bias_evaluation, "edgeCases", {}).get("value", [])
|
|
95
|
+
# )
|
|
96
|
+
# self.session.logEvalStep(
|
|
97
|
+
# stepName=f"EVAL-Input Bias",
|
|
98
|
+
# output="",
|
|
99
|
+
# context=context,
|
|
100
|
+
# query=user_message,
|
|
101
|
+
# messageHistory="",
|
|
102
|
+
# tools="",
|
|
103
|
+
# intermediateSteps="",
|
|
104
|
+
# groundTruth="",
|
|
105
|
+
# analyticsScore=getattr(bias_evaluation, "analyticsScore", {}),
|
|
106
|
+
# reasoning=getattr(bias_evaluation, "reasoning", {}),
|
|
107
|
+
# classification=getattr(bias_evaluation, "classification", {}),
|
|
108
|
+
# evalLabel=getattr(bias_evaluation, "evalLabel", {}),
|
|
109
|
+
# latencyMs=latency,
|
|
110
|
+
# status="SUCCESS",
|
|
111
|
+
# message=message,
|
|
112
|
+
# )
|
|
113
|
+
except Exception as e:
|
|
114
|
+
print(f"Input Bias evaluation failed: {e}")
|
|
115
|
+
self.session.logEvalStep(
|
|
116
|
+
stepName=f"EVAL-FAILURE",
|
|
117
|
+
output="",
|
|
118
|
+
context=context,
|
|
119
|
+
query=user_message,
|
|
120
|
+
messageHistory="",
|
|
121
|
+
tools="",
|
|
122
|
+
intermediateSteps="",
|
|
123
|
+
groundTruth="",
|
|
124
|
+
analyticsScore={},
|
|
125
|
+
reasoning={},
|
|
126
|
+
classification={},
|
|
127
|
+
evalLabel={},
|
|
128
|
+
latencyMs=0,
|
|
129
|
+
status="FAILURE",
|
|
130
|
+
message="EVAL_ERROR",
|
|
42
131
|
)
|
|
43
132
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
133
|
+
start_time = time.time()
|
|
134
|
+
response = original_create(*args, **kwargs)
|
|
135
|
+
latency = int((time.time() - start_time) * 1000)
|
|
136
|
+
output_text = response.choices[0].message.content
|
|
137
|
+
|
|
138
|
+
self.session.logLlmStep(
|
|
139
|
+
stepName=f"LLM-{user_message[:30]}",
|
|
140
|
+
model=model,
|
|
141
|
+
provider="openai",
|
|
142
|
+
inputTokens=response.usage.prompt_tokens,
|
|
143
|
+
outputTokens=response.usage.completion_tokens,
|
|
144
|
+
temperature=kwargs.get("temperature", 0.0),
|
|
145
|
+
promptTruncated=False,
|
|
146
|
+
latencyMs=latency,
|
|
147
|
+
query=user_message,
|
|
148
|
+
output=output_text,
|
|
149
|
+
status="SUCCESS",
|
|
150
|
+
message="",
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Response Correctness Evaluation
|
|
154
|
+
eval_input_correctness = [
|
|
155
|
+
{
|
|
56
156
|
"query": user_message,
|
|
57
157
|
"context": context,
|
|
58
158
|
"output": output_text,
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
#
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
159
|
+
}
|
|
160
|
+
]
|
|
161
|
+
try:
|
|
162
|
+
start_time = time.time()
|
|
163
|
+
correctness_evaluation_result = performEvaluation(
|
|
164
|
+
eval_input_correctness,
|
|
165
|
+
api_key=self.llumo_key,
|
|
166
|
+
evals=["Response Correctness"],
|
|
167
|
+
playgroundID=playground_id,
|
|
168
|
+
workspaceID=workspace_id,
|
|
169
|
+
createExperiment=create_experiment,
|
|
170
|
+
)
|
|
171
|
+
latency = int((time.time() - start_time) * 1000)
|
|
172
|
+
# Access the first result object
|
|
173
|
+
correctness_evaluation = correctness_evaluation_result[0]
|
|
174
|
+
# message = "-".join(
|
|
175
|
+
# getattr(correctness_evaluation, "edgeCases", {}).get("value", [])
|
|
176
|
+
# )
|
|
177
|
+
# self.session.logEvalStep(
|
|
178
|
+
# stepName=f"EVAL-Response Correctness",
|
|
179
|
+
# output=output_text,
|
|
180
|
+
# context=context,
|
|
181
|
+
# query=user_message,
|
|
182
|
+
# messageHistory="",
|
|
183
|
+
# tools="",
|
|
184
|
+
# intermediateSteps="",
|
|
185
|
+
# groundTruth="",
|
|
186
|
+
# analyticsScore=getattr(
|
|
187
|
+
# correctness_evaluation, "analyticsScore", {}
|
|
188
|
+
# ),
|
|
189
|
+
# reasoning=getattr(correctness_evaluation, "reasoning", {}),
|
|
190
|
+
# classification=getattr(
|
|
191
|
+
# correctness_evaluation, "classification", {}
|
|
192
|
+
# ),
|
|
193
|
+
# evalLabel=getattr(correctness_evaluation, "evalLabel", {}),
|
|
194
|
+
# latencyMs=latency,
|
|
195
|
+
# status="SUCCESS",
|
|
196
|
+
# message=message,
|
|
197
|
+
# )
|
|
198
|
+
except Exception as e:
|
|
199
|
+
print(f"Response Correctness evaluation failed: {e}")
|
|
200
|
+
correctness_evaluation = None
|
|
201
|
+
self.session.logEvalStep(
|
|
202
|
+
stepName=f"EVAL-FAILURE",
|
|
203
|
+
output=output_text,
|
|
204
|
+
context=context,
|
|
205
|
+
query=user_message,
|
|
206
|
+
messageHistory="",
|
|
207
|
+
tools="",
|
|
208
|
+
intermediateSteps="",
|
|
209
|
+
groundTruth="",
|
|
210
|
+
analyticsScore={},
|
|
211
|
+
reasoning={},
|
|
212
|
+
classification={},
|
|
213
|
+
evalLabel={},
|
|
214
|
+
latencyMs=0,
|
|
215
|
+
status="FAILURE",
|
|
216
|
+
message="EVAL_ERROR",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
if correctness_evaluation is None:
|
|
220
|
+
return response
|
|
221
|
+
|
|
222
|
+
return ChatCompletionWithEval(response, correctness_evaluation)
|
|
223
|
+
|
|
224
|
+
self.chat.completions.create = create_wrapper
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
llumo/__init__.py,sha256=ZdFeOT5aDM1iA4VzQ8ryc0rxF3ihjhPO8aCRuw8t0zk,342
|
|
2
|
+
llumo/callback.py,sha256=Pzg9Smqsu5G900YZjoFwqMY0TTP4jUizxllaP0TjKgk,20439
|
|
3
|
+
llumo/callbacks-0.py,sha256=TEIOCWRvk2UYsTmBMBsnlgpqWvr-2y3a6d0w_e96NRM,8958
|
|
4
|
+
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
|
5
|
+
llumo/client.py,sha256=RKI8XIIafzMWX42gXBXAcMjtOzZngx1ebgGfXmNDa-w,69064
|
|
6
|
+
llumo/exceptions.py,sha256=1OyhN9YL9LcyUPUsqYHq6Rret0udATZAwMVJaio2_Ec,2123
|
|
7
|
+
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
|
8
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
|
9
|
+
llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
|
|
10
|
+
llumo/helpingFuntions.py,sha256=B6FwUQ5f1v4FKrWCbYoGWMFdscOV_liuuhTgNQ3cdrk,27275
|
|
11
|
+
llumo/llumoLogger.py,sha256=UW3eIQb5txneilx8FQnGf6t4LgP85NoIf5YECFDZShk,1912
|
|
12
|
+
llumo/llumoSessionContext.py,sha256=J4oFCWcpksZ2sWgGJqzq2LjUwIG1OAQva_EAvNy9ACs,10373
|
|
13
|
+
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
|
14
|
+
llumo/openai.py,sha256=QyNMXiYTppaU_YjU6vU5UB0At3OiNntoDTQ0dszLN0g,8538
|
|
15
|
+
llumo/sockets.py,sha256=pBDo-U65hMIMwKMwZQl3iBkEjISEt-9BkXxZTWfSHF4,6116
|
|
16
|
+
llumo-0.2.25.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
|
17
|
+
llumo-0.2.25.dist-info/METADATA,sha256=HkIJH54gwQ-rAgxzgpiCWVvvn6tD0jNPYJqnTAoZOfA,1558
|
|
18
|
+
llumo-0.2.25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
+
llumo-0.2.25.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
|
20
|
+
llumo-0.2.25.dist-info/RECORD,,
|
llumo-0.2.24.dist-info/RECORD
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
|
|
2
|
-
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
|
3
|
-
llumo/client.py,sha256=pCocD7v5dDuHKlWE6OqZrrEk4e_LWYuI_LgWaGT3E-g,67498
|
|
4
|
-
llumo/exceptions.py,sha256=1OyhN9YL9LcyUPUsqYHq6Rret0udATZAwMVJaio2_Ec,2123
|
|
5
|
-
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
|
6
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
|
7
|
-
llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
|
|
8
|
-
llumo/helpingFuntions.py,sha256=B6FwUQ5f1v4FKrWCbYoGWMFdscOV_liuuhTgNQ3cdrk,27275
|
|
9
|
-
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
|
10
|
-
llumo/openai.py,sha256=DGhEwQIJIIycGpw3hYQnyxdj6RFVpZ-gay-fZGqtkhU,3013
|
|
11
|
-
llumo/sockets.py,sha256=pBDo-U65hMIMwKMwZQl3iBkEjISEt-9BkXxZTWfSHF4,6116
|
|
12
|
-
llumo-0.2.24.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
|
13
|
-
llumo-0.2.24.dist-info/METADATA,sha256=IfYBa5UYiXuF595u13Qyi5MoQorHzxpfpUSPmciS7rM,1558
|
|
14
|
-
llumo-0.2.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
-
llumo-0.2.24.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
|
16
|
-
llumo-0.2.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|