llumo 0.2.29__tar.gz → 0.2.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llumo-0.2.29/llumo.egg-info → llumo-0.2.31}/PKG-INFO +1 -1
- {llumo-0.2.29 → llumo-0.2.31}/llumo/callback.py +123 -31
- {llumo-0.2.29 → llumo-0.2.31}/llumo/client.py +213 -228
- {llumo-0.2.29 → llumo-0.2.31}/llumo/helpingFuntions.py +2 -2
- {llumo-0.2.29 → llumo-0.2.31}/llumo/llumoSessionContext.py +99 -30
- {llumo-0.2.29 → llumo-0.2.31}/llumo/openai.py +11 -6
- {llumo-0.2.29 → llumo-0.2.31/llumo.egg-info}/PKG-INFO +1 -1
- {llumo-0.2.29 → llumo-0.2.31}/LICENSE +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/MANIFEST.in +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/README.md +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/__init__.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/callbacks-0.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/chains.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/exceptions.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/execution.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/functionCalling.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/google.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/llumoLogger.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/models.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo/sockets.py +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo.egg-info/SOURCES.txt +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo.egg-info/dependency_links.txt +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo.egg-info/requires.txt +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/llumo.egg-info/top_level.txt +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/setup.cfg +0 -0
- {llumo-0.2.29 → llumo-0.2.31}/setup.py +0 -0
|
@@ -11,11 +11,12 @@ import re
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class LlumoCallbackHandler(BaseCallbackHandler):
|
|
14
|
-
def __init__(self, session: LlumoSessionContext = None):
|
|
14
|
+
def __init__(self, session: LlumoSessionContext = None,agentType = "react_agent"):
|
|
15
15
|
if session is None:
|
|
16
16
|
raise ValueError("LlumoSessionContext is required")
|
|
17
17
|
|
|
18
18
|
self.sessionLogger = session
|
|
19
|
+
self.agentType = agentType
|
|
19
20
|
|
|
20
21
|
# Initialize timing and state variables
|
|
21
22
|
self.llmStartTime = None
|
|
@@ -26,6 +27,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
26
27
|
|
|
27
28
|
# Initialize tracking variables
|
|
28
29
|
self.prompt = ""
|
|
30
|
+
self.searchQuery = ""
|
|
29
31
|
self.currentToolName = None
|
|
30
32
|
self.currentToolInput = None
|
|
31
33
|
self.currentAgentName = None
|
|
@@ -45,8 +47,14 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
45
47
|
self.currentObservation = ""
|
|
46
48
|
self.isAgentExecution = False
|
|
47
49
|
|
|
50
|
+
|
|
51
|
+
|
|
48
52
|
def on_chain_start(self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any) -> None:
|
|
49
53
|
"""Called when a chain starts - this includes agent execution"""
|
|
54
|
+
# print("ON CHAIN START: ",inputs)
|
|
55
|
+
# print("ON CHAIN START: serialized",serialized)
|
|
56
|
+
# print("ON CHAIN START: kwargs",kwargs)
|
|
57
|
+
|
|
50
58
|
try:
|
|
51
59
|
self.prompt = inputs.get("input", "")
|
|
52
60
|
self.chainStartTime = time.time()
|
|
@@ -62,6 +70,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
62
70
|
self.currentToolInput = None
|
|
63
71
|
self.hasErrors = False
|
|
64
72
|
self.lastError = None
|
|
73
|
+
self.toolDescription = {}
|
|
65
74
|
|
|
66
75
|
# Dynamically detect agent name from serialized data
|
|
67
76
|
if serialized is not None:
|
|
@@ -91,6 +100,26 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
91
100
|
except Exception as e:
|
|
92
101
|
print(f"[ERROR] in on_chain_start: {e}")
|
|
93
102
|
|
|
103
|
+
try:
|
|
104
|
+
self.sessionLogger.logQueryStep(
|
|
105
|
+
stepName = "Query Invocation",
|
|
106
|
+
model = "unknown",
|
|
107
|
+
provider = "unknown",
|
|
108
|
+
inputTokens = round(len(self.prompt.split()) * 1.5),
|
|
109
|
+
query = self.prompt,
|
|
110
|
+
status = "SUCCESS"
|
|
111
|
+
)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
self.sessionLogger.logQueryStep(
|
|
114
|
+
stepName="Query Invocation",
|
|
115
|
+
model="unknown",
|
|
116
|
+
provider="unknown",
|
|
117
|
+
inputTokens=0,
|
|
118
|
+
query="",
|
|
119
|
+
status="FAILURE"
|
|
120
|
+
)
|
|
121
|
+
print(f"[ERROR] Failed to log user input: {e}")
|
|
122
|
+
|
|
94
123
|
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
|
95
124
|
"""Called when a chain ends"""
|
|
96
125
|
try:
|
|
@@ -98,15 +127,16 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
98
127
|
# Use logAgentStep for final completion
|
|
99
128
|
self.sessionLogger.logAgentStep(
|
|
100
129
|
stepName="Agent Execution Completed",
|
|
101
|
-
agentType=
|
|
130
|
+
agentType=self.agentType,
|
|
102
131
|
agentName=self.currentAgentName or "unknown",
|
|
103
132
|
numStepsTaken=self.agentsSteps,
|
|
104
133
|
tools=self.toolsUsed,
|
|
105
134
|
query=self.prompt,
|
|
106
135
|
status="SUCCESS",
|
|
107
|
-
message=f"Final output: {outputs['output']}. ReAct steps: {json.dumps(self.reactSteps)}",
|
|
136
|
+
# message=f"Final output: {outputs['output']}. ReAct steps: {json.dumps(self.reactSteps)}",
|
|
108
137
|
)
|
|
109
138
|
|
|
139
|
+
|
|
110
140
|
# Reset execution state after chain ends
|
|
111
141
|
self.isAgentExecution = False
|
|
112
142
|
|
|
@@ -118,6 +148,14 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
118
148
|
self.llmStartTime = time.time()
|
|
119
149
|
self.stepTime = time.time()
|
|
120
150
|
|
|
151
|
+
if self.prompt == "":
|
|
152
|
+
match = re.search(r"Human:\s*(.*)",prompts[0], re.DOTALL)
|
|
153
|
+
if match:
|
|
154
|
+
user_question = match.group(1).strip()
|
|
155
|
+
self.prompt = user_question # 👉 What is LangChain?
|
|
156
|
+
else:
|
|
157
|
+
self.prompt = ""
|
|
158
|
+
|
|
121
159
|
# Dynamically get model info
|
|
122
160
|
model = "unknown"
|
|
123
161
|
if serialized and "kwargs" in serialized:
|
|
@@ -204,7 +242,6 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
204
242
|
# Parse ReAct reasoning from LLM output if we're in agent execution
|
|
205
243
|
if self.isAgentExecution and output:
|
|
206
244
|
self._parse_react_reasoning(output)
|
|
207
|
-
|
|
208
245
|
try:
|
|
209
246
|
self.sessionLogger.logLlmStep(
|
|
210
247
|
stepName="LLM Call Completed",
|
|
@@ -212,17 +249,62 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
212
249
|
provider=self.llmProvider,
|
|
213
250
|
inputTokens=int(input_tokens),
|
|
214
251
|
outputTokens=int(output_tokens),
|
|
215
|
-
temperature=float(kwargs.get("temperature", 0.7)),
|
|
216
|
-
promptTruncated=False,
|
|
252
|
+
# temperature=float(kwargs.get("temperature", 0.7)),
|
|
253
|
+
# promptTruncated=False,
|
|
217
254
|
latencyMs=duration_ms,
|
|
218
|
-
|
|
255
|
+
prompt=str(self.prompt),
|
|
219
256
|
output=output,
|
|
220
257
|
status=status,
|
|
221
|
-
message=error_message if status == "ERROR" else "",
|
|
258
|
+
# message=error_message if status == "ERROR" else "",
|
|
222
259
|
)
|
|
260
|
+
|
|
223
261
|
except Exception as e:
|
|
224
262
|
print(f"[ERROR] Failed to log LLM end: {e}")
|
|
225
263
|
|
|
264
|
+
def on_retriever_start(self, serialized, query, run_id, parent_run_id=None, **kwargs):
|
|
265
|
+
self.prompt = query
|
|
266
|
+
self.searchQuery = query
|
|
267
|
+
|
|
268
|
+
def on_retriever_end(self, documents, run_id, parent_run_id=None, **kwargs):
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
chunkSize = len(documents[0].page_content) if documents and documents[0].page_content else 0
|
|
272
|
+
except Exception:
|
|
273
|
+
chunkSize = 0
|
|
274
|
+
|
|
275
|
+
source = ( kwargs.get("metadata", {}).get("source") or kwargs.get("tags") or "unknown")
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
self.sessionLogger.logRetrieverStep(
|
|
279
|
+
stepName="Context Retrieval Complete",
|
|
280
|
+
retrieverSource = str(source),
|
|
281
|
+
topK = len(documents),
|
|
282
|
+
chunkSize = chunkSize,
|
|
283
|
+
context = [doc.page_content for doc in documents],
|
|
284
|
+
searchQuery = self.prompt if self.prompt != "" else self.searchQuery,
|
|
285
|
+
latencyMs = 120, # mock latency, replace with real timing if needed
|
|
286
|
+
status = "SUCCESS"
|
|
287
|
+
)
|
|
288
|
+
except Exception as e:
|
|
289
|
+
print(f"[ERROR] Failed to log chain output: {e}")
|
|
290
|
+
|
|
291
|
+
def on_retriever_error(self, error, run_id, parent_run_id=None, **kwargs):
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
self.sessionLogger.logRetrieverStep(
|
|
295
|
+
stepName="Context Retrieval Error",
|
|
296
|
+
retrieverSource = kwargs.get("metadata", {}).get("source", "unknown"),
|
|
297
|
+
topK = 0,
|
|
298
|
+
chunkSize = 0,
|
|
299
|
+
context = [],
|
|
300
|
+
searchQuery = self.prompt if self.prompt != "" else self.searchQuery,
|
|
301
|
+
latencyMs = 0, # mock latency, replace with real timing if needed
|
|
302
|
+
status = "FAILURE"
|
|
303
|
+
)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
print(f"[ERROR] Failed to log chain output: {e}")
|
|
306
|
+
|
|
307
|
+
|
|
226
308
|
def _parse_react_reasoning(self, llm_output: str):
|
|
227
309
|
"""Parse ReAct reasoning pattern from LLM output"""
|
|
228
310
|
try:
|
|
@@ -265,6 +347,9 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
265
347
|
|
|
266
348
|
def on_tool_start(self, serialized: Dict[str, Any], input_str: str, **kwargs: Any) -> None:
|
|
267
349
|
"""Called when a tool starts executing"""
|
|
350
|
+
# print("ON TOOL START: ",serialized)
|
|
351
|
+
# print("ON TOOL START: ",kwargs)
|
|
352
|
+
|
|
268
353
|
self.toolStartTime = time.time()
|
|
269
354
|
self.stepTime = time.time()
|
|
270
355
|
|
|
@@ -272,6 +357,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
272
357
|
self.currentToolName = (serialized.get("name") or
|
|
273
358
|
serialized.get("_type") or
|
|
274
359
|
"unknown")
|
|
360
|
+
self.currentToolDescription = serialized.get("description","No description found")
|
|
275
361
|
|
|
276
362
|
# Handle the case where input_str is "None" or None
|
|
277
363
|
if input_str == "None" or input_str is None:
|
|
@@ -316,13 +402,13 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
316
402
|
self.sessionLogger.logToolStep(
|
|
317
403
|
stepName="Tool Execution Completed",
|
|
318
404
|
toolName=self.currentToolName or "unknown",
|
|
405
|
+
description = self.currentToolDescription,
|
|
319
406
|
input=self.currentToolInput or {"input": ""},
|
|
320
407
|
output=output_str,
|
|
321
408
|
latencyMs=duration_ms,
|
|
322
409
|
status="SUCCESS",
|
|
323
|
-
message="",
|
|
410
|
+
# message="",
|
|
324
411
|
)
|
|
325
|
-
|
|
326
412
|
print(f"[DEBUG] Tool completed: {self.currentToolName} -> {output_str}")
|
|
327
413
|
|
|
328
414
|
except Exception as e:
|
|
@@ -331,7 +417,7 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
331
417
|
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> None:
|
|
332
418
|
"""Called when an agent takes an action"""
|
|
333
419
|
self.agentsSteps += 1
|
|
334
|
-
print("ON AGENT ACTION: ", action)
|
|
420
|
+
# print("ON AGENT ACTION: ", action)
|
|
335
421
|
|
|
336
422
|
try:
|
|
337
423
|
# Dynamically extract information from action
|
|
@@ -352,52 +438,54 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
352
438
|
})
|
|
353
439
|
|
|
354
440
|
# Log the agent action step using logAgentStep
|
|
355
|
-
current_status = "
|
|
441
|
+
current_status = "FAILURE" if self.hasErrors else "SUCCESS"
|
|
356
442
|
reasoning_text = self.currentThought if self.currentThought else "No reasoning captured"
|
|
357
443
|
|
|
358
444
|
self.sessionLogger.logAgentStep(
|
|
359
445
|
stepName=f"Agent Action Step {self.agentsSteps}",
|
|
360
|
-
agentType=
|
|
446
|
+
agentType=self.agentType,
|
|
361
447
|
agentName=self.currentAgentName or "unknown",
|
|
362
448
|
numStepsTaken=self.agentsSteps,
|
|
363
449
|
tools=[tool_name],
|
|
364
450
|
query=self.prompt,
|
|
365
451
|
status=current_status,
|
|
366
|
-
message=f"Executing {tool_name} with input: {tool_input}. Reasoning: {reasoning_text}",
|
|
452
|
+
# message=f"Executing {tool_name} with input: {tool_input}. Reasoning: {reasoning_text}",
|
|
367
453
|
)
|
|
368
454
|
|
|
455
|
+
|
|
456
|
+
|
|
369
457
|
except Exception as e:
|
|
370
458
|
print(f"[ERROR] Failed to log agent action: {e}")
|
|
371
459
|
|
|
372
460
|
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
|
|
373
461
|
"""Called when an agent completes execution"""
|
|
374
|
-
print("ON AGENT FINISH:", finish)
|
|
462
|
+
# print("ON AGENT FINISH:", finish)
|
|
375
463
|
# We don't need to log anything here since the final result is already logged in on_chain_end
|
|
376
464
|
pass
|
|
377
465
|
|
|
378
466
|
def on_agent_error(self, error: Exception, **kwargs: Any) -> None:
|
|
379
467
|
"""Called when an agent encounters an error"""
|
|
380
|
-
print("ITS A AGENT ERROR:", error)
|
|
468
|
+
# print("ITS A AGENT ERROR:", error)
|
|
381
469
|
self.hasErrors = True
|
|
382
470
|
self.lastError = str(error)
|
|
383
471
|
|
|
384
472
|
try:
|
|
385
473
|
self.sessionLogger.logAgentStep(
|
|
386
474
|
stepName="Agent Execution Error",
|
|
387
|
-
agentType=
|
|
475
|
+
agentType=self.agentType,
|
|
388
476
|
agentName=self.currentAgentName or "unknown",
|
|
389
477
|
numStepsTaken=self.agentsSteps,
|
|
390
478
|
tools=self.toolsUsed,
|
|
391
479
|
query=self.prompt,
|
|
392
|
-
status="
|
|
393
|
-
message=str(error),
|
|
480
|
+
status="FAILURE",
|
|
481
|
+
# message=str(error),
|
|
394
482
|
)
|
|
395
483
|
except Exception as e:
|
|
396
484
|
print(f"[ERROR] Failed to log agent error: {e}")
|
|
397
485
|
|
|
398
486
|
def on_tool_error(self, error: Exception, **kwargs: Any) -> None:
|
|
399
487
|
"""Called when a tool encounters an error"""
|
|
400
|
-
|
|
488
|
+
|
|
401
489
|
self.hasErrors = True
|
|
402
490
|
self.lastError = str(error)
|
|
403
491
|
|
|
@@ -410,18 +498,19 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
410
498
|
self.sessionLogger.logToolStep(
|
|
411
499
|
stepName="Tool Execution Failed",
|
|
412
500
|
toolName=self.currentToolName or "unknown",
|
|
501
|
+
description=self.currentToolDescription,
|
|
413
502
|
input=self.currentToolInput or {"input": ""},
|
|
414
503
|
output="",
|
|
415
504
|
latencyMs=0,
|
|
416
|
-
status="
|
|
417
|
-
message=str(error),
|
|
505
|
+
status="FAILURE",
|
|
506
|
+
# message=str(error),
|
|
418
507
|
)
|
|
419
508
|
except Exception as e:
|
|
420
509
|
print(f"[ERROR] Failed to log tool error: {e}")
|
|
421
510
|
|
|
422
511
|
def on_chain_error(self, error: Exception, **kwargs: Any) -> None:
|
|
423
512
|
"""Called when a chain encounters an error"""
|
|
424
|
-
print("ITS A CHAIN ERROR:", error)
|
|
513
|
+
# print("ITS A CHAIN ERROR:", error)
|
|
425
514
|
self.hasErrors = True
|
|
426
515
|
self.lastError = str(error)
|
|
427
516
|
|
|
@@ -430,14 +519,17 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
430
519
|
# Use logAgentStep for agent-related chain errors
|
|
431
520
|
self.sessionLogger.logAgentStep(
|
|
432
521
|
stepName="Agent Chain Error",
|
|
433
|
-
agentType=
|
|
522
|
+
agentType=self.agentType,
|
|
434
523
|
agentName=self.currentAgentName or "unknown",
|
|
435
524
|
numStepsTaken=self.agentsSteps,
|
|
436
525
|
tools=self.toolsUsed,
|
|
437
526
|
query=self.prompt,
|
|
438
|
-
status="
|
|
439
|
-
message=str(error),
|
|
527
|
+
status="FAILURE",
|
|
528
|
+
# message=str(error),
|
|
440
529
|
)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
|
|
441
533
|
else:
|
|
442
534
|
# Use logLlmStep for general chain errors
|
|
443
535
|
self.sessionLogger.logLlmStep(
|
|
@@ -446,13 +538,13 @@ class LlumoCallbackHandler(BaseCallbackHandler):
|
|
|
446
538
|
provider=self.llmProvider,
|
|
447
539
|
inputTokens=0,
|
|
448
540
|
outputTokens=0,
|
|
449
|
-
temperature=0.0,
|
|
450
|
-
promptTruncated=False,
|
|
541
|
+
# temperature=0.0,
|
|
542
|
+
# promptTruncated=False,
|
|
451
543
|
latencyMs=0,
|
|
452
|
-
|
|
544
|
+
prompt=self.prompt,
|
|
453
545
|
output="",
|
|
454
|
-
status="
|
|
455
|
-
message=str(error),
|
|
546
|
+
status="FAILURE",
|
|
547
|
+
# message=str(error),
|
|
456
548
|
)
|
|
457
549
|
except Exception as e:
|
|
458
550
|
print(f"[ERROR] Failed to log chain error: {e}")
|
|
@@ -29,10 +29,7 @@ fetchUrl = (
|
|
|
29
29
|
"https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
|
|
30
30
|
)
|
|
31
31
|
socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
|
|
32
|
-
|
|
33
|
-
# "workspaceID":"c9191fdf33bdd7838328c1a0",
|
|
34
|
-
# "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
|
|
35
|
-
# }
|
|
32
|
+
|
|
36
33
|
validateUrl = "https://app.llumo.ai/api/workspace-details"
|
|
37
34
|
socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
|
|
38
35
|
|
|
@@ -79,6 +76,7 @@ class LlumoClient:
|
|
|
79
76
|
# Try to parse JSON
|
|
80
77
|
try:
|
|
81
78
|
data = response.json()
|
|
79
|
+
# print(data)
|
|
82
80
|
except ValueError as e:
|
|
83
81
|
print(f"JSON parsing error: {str(e)}")
|
|
84
82
|
# print(f"Response content that could not be parsed: {response.text[:1000]}...")
|
|
@@ -93,7 +91,9 @@ class LlumoClient:
|
|
|
93
91
|
self.workspaceID = data["data"]["data"].get("workspaceID")
|
|
94
92
|
self.evalDefinition = data["data"]["data"]["analyticsMapping"]
|
|
95
93
|
self.socketToken = data["data"]["data"].get("token")
|
|
96
|
-
self.
|
|
94
|
+
# print(self.socketToken)
|
|
95
|
+
self.hasSubscribed = data["data"]["data"].get("hasSubscr"
|
|
96
|
+
"ibed", False)
|
|
97
97
|
self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
|
|
98
98
|
self.subscriptionEndDate = data["data"]["data"].get(
|
|
99
99
|
"subscriptionEndDate", None
|
|
@@ -191,7 +191,7 @@ class LlumoClient:
|
|
|
191
191
|
def postBatch(self, batch, workspaceID):
|
|
192
192
|
payload = {
|
|
193
193
|
"batch": json.dumps(batch),
|
|
194
|
-
"runType": "
|
|
194
|
+
"runType": "FULL_EVAL_RUN",
|
|
195
195
|
"workspaceID": workspaceID,
|
|
196
196
|
}
|
|
197
197
|
# socketToken here if the "JWD" token
|
|
@@ -204,6 +204,7 @@ class LlumoClient:
|
|
|
204
204
|
response = requests.post(postUrl, json=payload, headers=headers)
|
|
205
205
|
# print(f"Post API Status Code: {response.status_code}")
|
|
206
206
|
# print(response.text)
|
|
207
|
+
# print(response.status_code)
|
|
207
208
|
|
|
208
209
|
except Exception as e:
|
|
209
210
|
print(f"Error in posting batch: {e}")
|
|
@@ -644,7 +645,7 @@ class LlumoClient:
|
|
|
644
645
|
self.socket.listenForResults(
|
|
645
646
|
min_wait=20,
|
|
646
647
|
max_wait=timeout,
|
|
647
|
-
inactivity_timeout=
|
|
648
|
+
inactivity_timeout=50,
|
|
648
649
|
expected_results=None,
|
|
649
650
|
)
|
|
650
651
|
|
|
@@ -701,13 +702,11 @@ class LlumoClient:
|
|
|
701
702
|
data,
|
|
702
703
|
evals: list,
|
|
703
704
|
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
|
704
|
-
outputColName="output",
|
|
705
|
-
createExperiment: bool = False,
|
|
706
705
|
getDataFrame: bool = False,
|
|
707
706
|
_tocheck=True,
|
|
708
707
|
):
|
|
709
|
-
if hasattr(self, "startLlumoRun"):
|
|
710
|
-
|
|
708
|
+
# if hasattr(self, "startLlumoRun"):
|
|
709
|
+
# self.startLlumoRun(runName="evaluateMultiple")
|
|
711
710
|
if isinstance(data, dict):
|
|
712
711
|
data = [data]
|
|
713
712
|
elif not isinstance(data, list):
|
|
@@ -743,9 +742,9 @@ class LlumoClient:
|
|
|
743
742
|
listener_thread = threading.Thread(
|
|
744
743
|
target=self.socket.listenForResults,
|
|
745
744
|
kwargs={
|
|
746
|
-
"min_wait":
|
|
745
|
+
"min_wait": 20,
|
|
747
746
|
"max_wait": timeout,
|
|
748
|
-
"inactivity_timeout":
|
|
747
|
+
"inactivity_timeout": 35,
|
|
749
748
|
"expected_results": expectedResults,
|
|
750
749
|
},
|
|
751
750
|
daemon=True,
|
|
@@ -753,148 +752,152 @@ class LlumoClient:
|
|
|
753
752
|
listener_thread.start()
|
|
754
753
|
self.validateApiKey(evalName=evals[0])
|
|
755
754
|
activePlayground = self.playgroundID
|
|
756
|
-
for evalName
|
|
757
|
-
|
|
755
|
+
# print(f"\n======= Running evaluation for: {evalName} =======")
|
|
756
|
+
|
|
757
|
+
# Validate API and dependencies
|
|
758
|
+
# self.validateApiKey(evalName=evals[0])
|
|
759
|
+
customAnalytics = getCustomAnalytics(self.workspaceID)
|
|
760
|
+
# metricDependencies = checkDependency(
|
|
761
|
+
# evalName,
|
|
762
|
+
# list(dataframe.columns),
|
|
763
|
+
# tocheck=_tocheck,
|
|
764
|
+
# customevals=customAnalytics,
|
|
765
|
+
# )
|
|
766
|
+
# if not metricDependencies["status"]:
|
|
767
|
+
# raise LlumoAIError.dependencyError(metricDependencies["message"])
|
|
758
768
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
769
|
+
# evalDefinition = self.evalDefinition[evalName]["definition"]
|
|
770
|
+
model = "GPT_4"
|
|
771
|
+
provider = "OPENAI"
|
|
772
|
+
evalType = "LLM"
|
|
773
|
+
workspaceID = self.workspaceID
|
|
774
|
+
email = self.email
|
|
775
|
+
# categories = self.categories
|
|
776
|
+
# evaluationStrictness = self.evaluationStrictness
|
|
777
|
+
# grammarCheckOutput = self.grammarCheckOutput
|
|
778
|
+
# insightLength = self.insightsLength
|
|
779
|
+
# numJudges = self.numJudges
|
|
780
|
+
# penaltyBonusInstructions = self.penaltyBonusInstructions
|
|
781
|
+
# probableEdgeCases = self.probableEdgeCases
|
|
782
|
+
# fieldMapping = self.fieldMapping
|
|
770
783
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
numJudges = self.numJudges
|
|
782
|
-
penaltyBonusInstructions = self.penaltyBonusInstructions
|
|
783
|
-
probableEdgeCases = self.probableEdgeCases
|
|
784
|
-
fieldMapping = self.fieldMapping
|
|
784
|
+
userHits = checkUserHits(
|
|
785
|
+
self.workspaceID,
|
|
786
|
+
self.hasSubscribed,
|
|
787
|
+
self.trialEndDate,
|
|
788
|
+
self.subscriptionEndDate,
|
|
789
|
+
self.hitsAvailable,
|
|
790
|
+
len(dataframe),
|
|
791
|
+
)
|
|
792
|
+
if not userHits["success"]:
|
|
793
|
+
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
|
785
794
|
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
795
|
+
currentBatch = []
|
|
796
|
+
for index, row in dataframe.iterrows():
|
|
797
|
+
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
|
798
|
+
groundTruth = row.get("groundTruth", "")
|
|
799
|
+
messageHistory = (
|
|
800
|
+
[row["messageHistory"]]
|
|
801
|
+
if "messageHistory" in dataframe.columns
|
|
802
|
+
else []
|
|
793
803
|
)
|
|
794
|
-
|
|
795
|
-
|
|
804
|
+
promptTemplate = prompt_template
|
|
805
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
|
796
806
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
|
800
|
-
groundTruth = row.get("groundTruth", "")
|
|
801
|
-
messageHistory = (
|
|
802
|
-
[row["messageHistory"]]
|
|
803
|
-
if "messageHistory" in dataframe.columns
|
|
804
|
-
else []
|
|
805
|
-
)
|
|
806
|
-
promptTemplate = prompt_template
|
|
807
|
-
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
|
807
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
|
808
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
|
808
809
|
|
|
809
|
-
|
|
810
|
-
|
|
810
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
|
811
|
+
# output = row.get(outputColName, "")
|
|
812
|
+
output = row.get("output","")
|
|
813
|
+
intermediateSteps = row.get("intermediateSteps", "")
|
|
811
814
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
intermediateSteps = row.get("intermediateSteps", "")
|
|
815
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
816
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
815
817
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
+
compoundKey = f"{rowID}-{columnID}-{columnID}"
|
|
819
|
+
# rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
|
|
820
|
+
rowIdMapping[compoundKey] = {"index": index}
|
|
818
821
|
|
|
819
|
-
compoundKey = f"{rowID}-{columnID}-{columnID}"
|
|
820
|
-
rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
|
|
821
822
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
},
|
|
845
|
-
"categories": categories,
|
|
846
|
-
"evaluationStrictness": evaluationStrictness,
|
|
847
|
-
"grammarCheckOutput": grammarCheckOutput,
|
|
848
|
-
"insightLength": insightLength,
|
|
849
|
-
"numJudges": numJudges,
|
|
850
|
-
"penaltyBonusInstructions": penaltyBonusInstructions,
|
|
851
|
-
"probableEdgeCases": probableEdgeCases,
|
|
852
|
-
"model": model,
|
|
853
|
-
"provider": provider,
|
|
823
|
+
templateData = {
|
|
824
|
+
"processID": getProcessID(),
|
|
825
|
+
"socketID": socketID,
|
|
826
|
+
"rowID": rowID,
|
|
827
|
+
"columnID": columnID,
|
|
828
|
+
"processType": "FULL_EVAL_RUN",
|
|
829
|
+
"evalType": "LLM",
|
|
830
|
+
"workspaceID": workspaceID,
|
|
831
|
+
"email": email,
|
|
832
|
+
"playgroundID": activePlayground,
|
|
833
|
+
"source": "SDK",
|
|
834
|
+
"processData": {
|
|
835
|
+
# "analyticsName": evalName,
|
|
836
|
+
# "definition": evalDefinition,
|
|
837
|
+
"executionDependency": {
|
|
838
|
+
"query": "",
|
|
839
|
+
"context": "",
|
|
840
|
+
"output": output,
|
|
841
|
+
"tools": tools,
|
|
842
|
+
"groundTruth": groundTruth,
|
|
843
|
+
"messageHistory": messageHistory,
|
|
844
|
+
"intermediateSteps": intermediateSteps,
|
|
854
845
|
},
|
|
855
|
-
"
|
|
856
|
-
"
|
|
857
|
-
"
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
promptTemplate, tempObj
|
|
872
|
-
)
|
|
873
|
-
else:
|
|
874
|
-
query += f" {key}: {value}, "
|
|
846
|
+
"evallist":evals,
|
|
847
|
+
# "model": model,
|
|
848
|
+
# "provider": provider,
|
|
849
|
+
"sessionID":self.sessionID
|
|
850
|
+
# "categories": categories,
|
|
851
|
+
# "evaluationStrictness": evaluationStrictness,
|
|
852
|
+
# "grammarCheckOutput": grammarCheckOutput,
|
|
853
|
+
# "insightLength": insightLength,
|
|
854
|
+
# "numJudges": numJudges,
|
|
855
|
+
# "penaltyBonusInstructions": penaltyBonusInstructions,
|
|
856
|
+
# "probableEdgeCases": probableEdgeCases,
|
|
857
|
+
},
|
|
858
|
+
"type": "FULL_EVAL_RUN",
|
|
859
|
+
# "kpi": evalName,
|
|
860
|
+
# "fieldMappig": fieldMapping,
|
|
861
|
+
}
|
|
875
862
|
|
|
876
|
-
|
|
877
|
-
|
|
863
|
+
query = ""
|
|
864
|
+
context = ""
|
|
865
|
+
for key, value in inputDict.items():
|
|
866
|
+
if isinstance(value, str):
|
|
867
|
+
length = len(value.split()) * 1.5
|
|
868
|
+
if length > 50:
|
|
878
869
|
context += f" {key}: {value}, "
|
|
870
|
+
else:
|
|
871
|
+
if promptTemplate:
|
|
872
|
+
tempObj = {key: value}
|
|
873
|
+
promptTemplate = getInputPopulatedPrompt(
|
|
874
|
+
promptTemplate, tempObj
|
|
875
|
+
)
|
|
876
|
+
else:
|
|
877
|
+
query += f" {key}: {value}, "
|
|
879
878
|
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
879
|
+
if not context.strip():
|
|
880
|
+
for key, value in inputDict.items():
|
|
881
|
+
context += f" {key}: {value}, "
|
|
882
|
+
|
|
883
|
+
templateData["processData"]["executionDependency"][
|
|
884
|
+
"context"
|
|
885
|
+
] = context.strip()
|
|
886
|
+
templateData["processData"]["executionDependency"][
|
|
887
|
+
"query"
|
|
888
|
+
] = query.strip()
|
|
889
|
+
if promptTemplate and not query.strip():
|
|
883
890
|
templateData["processData"]["executionDependency"][
|
|
884
891
|
"query"
|
|
885
|
-
] =
|
|
886
|
-
if promptTemplate and not query.strip():
|
|
887
|
-
templateData["processData"]["executionDependency"][
|
|
888
|
-
"query"
|
|
889
|
-
] = promptTemplate
|
|
890
|
-
|
|
891
|
-
currentBatch.append(templateData)
|
|
892
|
-
if len(currentBatch) == 10:
|
|
893
|
-
self.allBatches.append(currentBatch)
|
|
894
|
-
currentBatch = []
|
|
892
|
+
] = promptTemplate
|
|
895
893
|
|
|
896
|
-
|
|
894
|
+
currentBatch.append(templateData)
|
|
895
|
+
if len(currentBatch) == 10:
|
|
897
896
|
self.allBatches.append(currentBatch)
|
|
897
|
+
currentBatch = []
|
|
898
|
+
|
|
899
|
+
if currentBatch:
|
|
900
|
+
self.allBatches.append(currentBatch)
|
|
898
901
|
|
|
899
902
|
for batch in tqdm(
|
|
900
903
|
self.allBatches,
|
|
@@ -905,7 +908,8 @@ class LlumoClient:
|
|
|
905
908
|
):
|
|
906
909
|
try:
|
|
907
910
|
self.postBatch(batch=batch, workspaceID=workspaceID)
|
|
908
|
-
time.sleep(
|
|
911
|
+
time.sleep(2)
|
|
912
|
+
# print(batch)
|
|
909
913
|
except Exception as e:
|
|
910
914
|
print(f"Error posting batch: {e}")
|
|
911
915
|
raise
|
|
@@ -921,8 +925,8 @@ class LlumoClient:
|
|
|
921
925
|
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
|
922
926
|
expectedRowIDs = set(rowIdMapping.keys())
|
|
923
927
|
missingRowIDs = expectedRowIDs - receivedRowIDs
|
|
924
|
-
# print("All expected keys:",
|
|
925
|
-
# print("All received keys:",
|
|
928
|
+
# print("All expected keys:", expectedRowIDs)
|
|
929
|
+
# print("All received keys:", receivedRowIDs)
|
|
926
930
|
# print("Missing keys:", len(missingRowIDs))
|
|
927
931
|
missingRowIDs = list(missingRowIDs)
|
|
928
932
|
if len(missingRowIDs) > 0:
|
|
@@ -930,102 +934,83 @@ class LlumoClient:
|
|
|
930
934
|
rawResults.extend(dataFromDb)
|
|
931
935
|
|
|
932
936
|
self.evalData = rawResults
|
|
937
|
+
# print("RAW RESULTS: ", self.evalData)
|
|
933
938
|
|
|
934
939
|
# Initialize dataframe columns for each eval
|
|
935
|
-
for
|
|
936
|
-
dataframe[
|
|
937
|
-
dataframe[f"{
|
|
940
|
+
for ev_name in evals:
|
|
941
|
+
dataframe[ev_name] = ""
|
|
942
|
+
dataframe[f"{ev_name} Reason"] = ""
|
|
943
|
+
# dataframe[f"{ev_name} EdgeCase"] = None
|
|
938
944
|
|
|
939
945
|
# Map results to dataframe rows
|
|
940
946
|
for item in rawResults:
|
|
941
947
|
for compound_key, value in item.items():
|
|
942
|
-
if compound_key in rowIdMapping:
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
|
|
947
|
-
|
|
948
|
-
# Log the evaluation step
|
|
949
|
-
if hasattr(self, "logEvalStep"):
|
|
950
|
-
try:
|
|
951
|
-
start_time = time.time()
|
|
952
|
-
self.logEvalStep(
|
|
953
|
-
stepName=f"EVAL-{evalName}",
|
|
954
|
-
output=value.get("value"),
|
|
955
|
-
context=row.get("context", ""),
|
|
956
|
-
query=row.get("query", ""),
|
|
957
|
-
messageHistory=row.get("messageHistory", ""),
|
|
958
|
-
tools=row.get("tools", ""),
|
|
959
|
-
intermediateSteps=row.get("intermediateSteps", ""),
|
|
960
|
-
groundTruth=row.get("groundTruth", ""),
|
|
961
|
-
analyticsScore=value.get("analyticsScore", {}),
|
|
962
|
-
reasoning=value.get("reasoning", {}),
|
|
963
|
-
classification=value.get("classification", {}),
|
|
964
|
-
evalLabel=value.get("evalLabel", {}),
|
|
965
|
-
latencyMs=int((time.time() - start_time) * 1000),
|
|
966
|
-
status="SUCCESS",
|
|
967
|
-
message="",
|
|
968
|
-
)
|
|
969
|
-
except Exception as e:
|
|
970
|
-
print(f"Error logging eval step: {e}")
|
|
948
|
+
if compound_key not in rowIdMapping:
|
|
949
|
+
continue
|
|
950
|
+
index = rowIdMapping[compound_key]["index"]
|
|
951
|
+
rowID, columnID, _ = compound_key.split("-", 2)
|
|
971
952
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
self.endLlumoRun()
|
|
953
|
+
if hasattr(self, "startLlumoRun"):
|
|
954
|
+
self.startLlumoRun(runName="evaluateMultiple",rowID = rowID, columnID = columnID)
|
|
975
955
|
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
# df = dataframe.fillna("Some error occured").astype(object)
|
|
979
|
-
with warnings.catch_warnings():
|
|
980
|
-
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
981
|
-
df = dataframe.fillna("Some error occurred").astype(str)
|
|
956
|
+
# get the dataframe row at this index
|
|
957
|
+
row = dataframe.iloc[index].to_dict()
|
|
982
958
|
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
email,
|
|
986
|
-
workspaceID,
|
|
987
|
-
df,
|
|
988
|
-
promptText=prompt_template,
|
|
989
|
-
definationMapping=self.definationMapping,
|
|
990
|
-
outputColName=outputColName,
|
|
991
|
-
activePlayground=activePlayground,
|
|
992
|
-
customAnalytics=customAnalytics,
|
|
993
|
-
):
|
|
994
|
-
print(
|
|
995
|
-
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
996
|
-
)
|
|
997
|
-
if getDataFrame:
|
|
998
|
-
return LlumoDataFrameResults(
|
|
999
|
-
dataframe,
|
|
1000
|
-
evals=self.evals,
|
|
1001
|
-
evalData=self.evalData,
|
|
1002
|
-
definationMapping=self.definationMapping,
|
|
1003
|
-
)
|
|
1004
|
-
else:
|
|
1005
|
-
data = dataframe.to_dict(orient="records")
|
|
1006
|
-
return LlumoDictResults(
|
|
1007
|
-
data,
|
|
1008
|
-
evals=self.evals,
|
|
1009
|
-
evalData=self.evalData,
|
|
1010
|
-
definationMapping=self.definationMapping,
|
|
1011
|
-
)
|
|
959
|
+
if not value:
|
|
960
|
+
continue
|
|
1012
961
|
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
962
|
+
|
|
963
|
+
# ️ Handle fullEval block
|
|
964
|
+
fullEval = value.get("fullEval") if isinstance(value, dict) else None
|
|
965
|
+
if fullEval:
|
|
966
|
+
if "evalMetrics" in fullEval and isinstance(fullEval["evalMetrics"], list):
|
|
967
|
+
for eval_item in fullEval["evalMetrics"]:
|
|
968
|
+
evalName = eval_item.get("evalName") or eval_item.get("kpiName")
|
|
969
|
+
score = str(eval_item.get("score")) or eval_item.get("value")
|
|
970
|
+
reasoning = eval_item.get("reasoning")
|
|
971
|
+
# edgeCase = eval_item.get("edgeCase")
|
|
972
|
+
|
|
973
|
+
if evalName:
|
|
974
|
+
dataframe.at[index, evalName] = score
|
|
975
|
+
dataframe.at[index, f"{evalName} Reason"] = reasoning
|
|
976
|
+
# dataframe.at[index, f"{evalName} EdgeCase"] = edgeCase
|
|
977
|
+
|
|
978
|
+
# logEvalStep if available
|
|
979
|
+
if hasattr(self, "logEvalStep"):
|
|
980
|
+
try:
|
|
981
|
+
start_time = time.time()
|
|
982
|
+
self.logEvalStep(
|
|
983
|
+
stepName=f"EVAL-{evalName}",
|
|
984
|
+
output=row.get("output", ""),
|
|
985
|
+
context=row.get("context", ""),
|
|
986
|
+
query=row.get("query", ""),
|
|
987
|
+
messageHistory=row.get("messageHistory", ""),
|
|
988
|
+
tools=row.get("tools", ""),
|
|
989
|
+
intermediateSteps=row.get("intermediateSteps", ""),
|
|
990
|
+
groundTruth=row.get("groundTruth", ""),
|
|
991
|
+
analyticsScore=score,
|
|
992
|
+
reasoning=reasoning,
|
|
993
|
+
classification=eval_item.get("classification", {}),
|
|
994
|
+
evalLabel=eval_item.get("evalLabel", {}),
|
|
995
|
+
latencyMs=int((time.time() - start_time) * 1000),
|
|
996
|
+
status="SUCCESS",
|
|
997
|
+
message="",
|
|
998
|
+
)
|
|
999
|
+
except Exception as e:
|
|
1000
|
+
print(f"⚠️ logEvalStep failed: {e}")
|
|
1001
|
+
if hasattr(self, "endLlumoRun"):
|
|
1002
|
+
self.endEvalRun()
|
|
1003
|
+
|
|
1004
|
+
# Clean up and finish
|
|
1005
|
+
try:
|
|
1006
|
+
self.socket.disconnect()
|
|
1007
|
+
except Exception:
|
|
1008
|
+
pass
|
|
1009
|
+
|
|
1010
|
+
# if hasattr(self, "endLlumoRun"):
|
|
1011
|
+
# self.endEvalRun()
|
|
1012
|
+
#
|
|
1013
|
+
return dataframe
|
|
1029
1014
|
|
|
1030
1015
|
def promptSweep(
|
|
1031
1016
|
self,
|
|
@@ -1806,8 +1791,8 @@ class LlumoClient:
|
|
|
1806
1791
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
|
1807
1792
|
self.validateApiKey(evalName=evals[0])
|
|
1808
1793
|
if createExperiment:
|
|
1809
|
-
if playgroundID:
|
|
1810
|
-
activePlayground = playgroundID
|
|
1794
|
+
if self.playgroundID:
|
|
1795
|
+
activePlayground = self.playgroundID
|
|
1811
1796
|
else:
|
|
1812
1797
|
activePlayground = str(
|
|
1813
1798
|
createEvalPlayground(email=self.email, workspaceID=self.workspaceID)
|
|
@@ -130,7 +130,7 @@ def checkUserHits(
|
|
|
130
130
|
response = json.loads(responseBody.text)
|
|
131
131
|
|
|
132
132
|
proceed = response.get("execution", "")
|
|
133
|
-
print(proceed)
|
|
133
|
+
# print(proceed)
|
|
134
134
|
|
|
135
135
|
if proceed:
|
|
136
136
|
return {"success": True, "message": "Hits added and access granted."}
|
|
@@ -234,7 +234,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
|
234
234
|
except Exception as e:
|
|
235
235
|
pass
|
|
236
236
|
evalDependencies = checkDependency(_returnDepMapping=True,customevals=customAnalytics)
|
|
237
|
-
print(allEvals)
|
|
237
|
+
# print(allEvals)
|
|
238
238
|
# Create a mapping of column names to unique column IDs
|
|
239
239
|
columnIDMapping = {}
|
|
240
240
|
|
|
@@ -51,8 +51,21 @@ class LlumoSessionContext(LlumoClient):
|
|
|
51
51
|
def __exit__(self, excType, excVal, excTb):
|
|
52
52
|
self.end()
|
|
53
53
|
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
|
|
55
|
+
def startLlumoRun(self, runName: str, rowID: str = "", columnID: str = "", runID: str = None):
|
|
56
|
+
|
|
57
|
+
if runID is None:
|
|
58
|
+
LlumoRunID = str(uuid.uuid4().hex[:16])
|
|
59
|
+
else:
|
|
60
|
+
LlumoRunID = runID
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Proceed with using LlumoRunID, rowID, columnID...
|
|
64
|
+
# if rowID =="":
|
|
65
|
+
# rowID = str(uuid.uuid4().hex[:16])
|
|
66
|
+
# if columnID == "":
|
|
67
|
+
# columnID = str(uuid.uuid4().hex[:16])
|
|
68
|
+
|
|
56
69
|
currentTime = datetime(2025, 8, 2, 10, 20, 15, tzinfo=timezone.utc)
|
|
57
70
|
createdAt = currentTime.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
|
58
71
|
llumoRun = {
|
|
@@ -62,8 +75,8 @@ class LlumoSessionContext(LlumoClient):
|
|
|
62
75
|
"playgroundID": self.logger.getPlaygroundID(),
|
|
63
76
|
"workspaceID": self.logger.getWorkspaceID(),
|
|
64
77
|
"source": "SDK",
|
|
65
|
-
"rowID":
|
|
66
|
-
"columnID":
|
|
78
|
+
"rowID": rowID,
|
|
79
|
+
"columnID": columnID,
|
|
67
80
|
"email": self.logger.getUserEmailID(),
|
|
68
81
|
"createdAt": createdAt,
|
|
69
82
|
"createdBy": self.logger.getUserEmailID(),
|
|
@@ -96,12 +109,52 @@ class LlumoSessionContext(LlumoClient):
|
|
|
96
109
|
# STEP 3: Send the payload
|
|
97
110
|
url = "https://app.llumo.ai/api/create-debug-log"
|
|
98
111
|
headers = {
|
|
99
|
-
"Authorization": f"Bearer {self.
|
|
112
|
+
"Authorization": f"Bearer {self.logger.getWorkspaceID()}",
|
|
100
113
|
"Content-Type": "application/json",
|
|
101
114
|
}
|
|
102
115
|
|
|
103
116
|
try:
|
|
104
|
-
|
|
117
|
+
print(run)
|
|
118
|
+
response = requests.post(url, headers=headers, json=run, timeout=20)
|
|
119
|
+
response.raise_for_status()
|
|
120
|
+
# print(response.json())
|
|
121
|
+
except requests.exceptions.Timeout:
|
|
122
|
+
# print("Request timed out.")
|
|
123
|
+
pass
|
|
124
|
+
except requests.exceptions.RequestException as e:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
# Cleanup
|
|
128
|
+
if self.threadLlumoRun:
|
|
129
|
+
_ctxLlumoRun.reset(self.threadLlumoRun)
|
|
130
|
+
self.threadLlumoRun = None
|
|
131
|
+
|
|
132
|
+
def endEvalRun(self):
|
|
133
|
+
run = getLlumoRun()
|
|
134
|
+
if run is None:
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
# STEP 1: Sort steps by timestamp
|
|
138
|
+
steps = run.get("steps", [])
|
|
139
|
+
# sorted_steps = sorted(steps, key=lambda s: s.get("timestamp", 0))
|
|
140
|
+
|
|
141
|
+
# # STEP 2: Remove timestamp from each step before sending
|
|
142
|
+
# clean_steps = [
|
|
143
|
+
# {k: v for k, v in step.items() if k != "timestamp"} for step in sorted_steps
|
|
144
|
+
# ]
|
|
145
|
+
# run["steps"] = clean_steps
|
|
146
|
+
|
|
147
|
+
# print(run["runName"]) # optional debug log
|
|
148
|
+
|
|
149
|
+
# STEP 3: Send the payload
|
|
150
|
+
url = "https://backend-api.llumo.ai/api/v1/create-debug-log-for-sdk"
|
|
151
|
+
headers = {
|
|
152
|
+
"Authorization": f"Bearer {self.logger.getWorkspaceID()}",
|
|
153
|
+
"Content-Type": "application/json",
|
|
154
|
+
}
|
|
155
|
+
# print(run)
|
|
156
|
+
try:
|
|
157
|
+
response = requests.post(url, headers=headers, json={"log":run}, timeout=20)
|
|
105
158
|
response.raise_for_status()
|
|
106
159
|
# print(response.json())
|
|
107
160
|
except requests.exceptions.Timeout:
|
|
@@ -145,51 +198,52 @@ class LlumoSessionContext(LlumoClient):
|
|
|
145
198
|
provider: str,
|
|
146
199
|
inputTokens: int,
|
|
147
200
|
outputTokens: int,
|
|
148
|
-
temperature: float,
|
|
149
|
-
promptTruncated: bool,
|
|
201
|
+
# temperature: float,
|
|
202
|
+
# promptTruncated: bool,
|
|
150
203
|
latencyMs: int,
|
|
151
|
-
|
|
204
|
+
prompt: str,
|
|
152
205
|
output: str,
|
|
153
206
|
status: str,
|
|
154
|
-
message: str,
|
|
207
|
+
# message: str,
|
|
155
208
|
):
|
|
156
209
|
metadata = {
|
|
157
210
|
"model": model,
|
|
158
211
|
"provider": provider,
|
|
159
212
|
"inputTokens": inputTokens,
|
|
160
213
|
"outputTokens": outputTokens,
|
|
161
|
-
"temperature": temperature,
|
|
162
|
-
"promptTruncated": promptTruncated,
|
|
214
|
+
# "temperature": temperature,
|
|
215
|
+
# "promptTruncated": promptTruncated,
|
|
163
216
|
"latencyMs": latencyMs,
|
|
164
|
-
"
|
|
217
|
+
"prompt": prompt,
|
|
165
218
|
"output": output,
|
|
166
219
|
"status": status,
|
|
167
|
-
"message": message,
|
|
220
|
+
# "message": message,
|
|
168
221
|
}
|
|
222
|
+
|
|
169
223
|
self.logStep("LLM", stepName, metadata)
|
|
170
224
|
|
|
171
225
|
def logRetrieverStep(
|
|
172
226
|
self,
|
|
173
227
|
stepName: str,
|
|
174
228
|
retrieverSource: str,
|
|
175
|
-
queryVectorType: str,
|
|
176
229
|
topK: int,
|
|
177
|
-
|
|
178
|
-
|
|
230
|
+
chunkSize,
|
|
231
|
+
context : str,
|
|
232
|
+
searchQuery: str,
|
|
179
233
|
latencyMs: int,
|
|
180
|
-
status: str
|
|
181
|
-
message: str,
|
|
234
|
+
status: str
|
|
182
235
|
):
|
|
183
236
|
metadata = {
|
|
184
237
|
"retrieverSource": retrieverSource,
|
|
185
|
-
"queryVectorType": queryVectorType,
|
|
186
238
|
"topK": topK,
|
|
187
|
-
"
|
|
188
|
-
"
|
|
239
|
+
"chunkSize":chunkSize,
|
|
240
|
+
"context": context,
|
|
241
|
+
"searchQuery": searchQuery,
|
|
189
242
|
"latencyMs": latencyMs,
|
|
190
243
|
"status": status,
|
|
191
|
-
"message": message,
|
|
244
|
+
# "message": message,
|
|
192
245
|
}
|
|
246
|
+
|
|
193
247
|
self.logStep("RETRIEVER", stepName, metadata)
|
|
194
248
|
|
|
195
249
|
def logAgentStep(
|
|
@@ -201,7 +255,7 @@ class LlumoSessionContext(LlumoClient):
|
|
|
201
255
|
tools: List[str],
|
|
202
256
|
query: str,
|
|
203
257
|
status: str,
|
|
204
|
-
message: str,
|
|
258
|
+
# message: str,
|
|
205
259
|
):
|
|
206
260
|
metadata = {
|
|
207
261
|
"agentType": agentType,
|
|
@@ -210,8 +264,8 @@ class LlumoSessionContext(LlumoClient):
|
|
|
210
264
|
"tools": tools,
|
|
211
265
|
"query": query,
|
|
212
266
|
"status": status,
|
|
213
|
-
|
|
214
|
-
|
|
267
|
+
# "message": message,
|
|
268
|
+
}
|
|
215
269
|
self.logStep("AGENT", stepName, metadata)
|
|
216
270
|
|
|
217
271
|
def logToolSelectorStep(
|
|
@@ -222,7 +276,7 @@ class LlumoSessionContext(LlumoClient):
|
|
|
222
276
|
selectedTool: str,
|
|
223
277
|
reasoning: str,
|
|
224
278
|
status: str,
|
|
225
|
-
message: str,
|
|
279
|
+
# message: str,
|
|
226
280
|
):
|
|
227
281
|
metadata = {
|
|
228
282
|
"selectorType": selectorType,
|
|
@@ -230,7 +284,7 @@ class LlumoSessionContext(LlumoClient):
|
|
|
230
284
|
"selectedTool": selectedTool,
|
|
231
285
|
"reasoning": reasoning,
|
|
232
286
|
"status": status,
|
|
233
|
-
"message": message,
|
|
287
|
+
# "message": message,
|
|
234
288
|
}
|
|
235
289
|
self.logStep("TOOL_SELECTOR", stepName, metadata)
|
|
236
290
|
|
|
@@ -238,19 +292,21 @@ class LlumoSessionContext(LlumoClient):
|
|
|
238
292
|
self,
|
|
239
293
|
stepName: str,
|
|
240
294
|
toolName: str,
|
|
295
|
+
description: str,
|
|
241
296
|
input: Dict[str, Any],
|
|
242
297
|
output: str,
|
|
243
298
|
latencyMs: int,
|
|
244
299
|
status: str,
|
|
245
|
-
message: str,
|
|
300
|
+
# message: str,
|
|
246
301
|
):
|
|
247
302
|
metadata = {
|
|
248
303
|
"toolName": toolName,
|
|
304
|
+
"description":description,
|
|
249
305
|
"input": input,
|
|
250
306
|
"output": output,
|
|
251
307
|
"latencyMs": latencyMs,
|
|
252
308
|
"status": status,
|
|
253
|
-
"message": message,
|
|
309
|
+
# "message": message,
|
|
254
310
|
}
|
|
255
311
|
self.logStep("TOOL", stepName, metadata)
|
|
256
312
|
|
|
@@ -364,3 +420,16 @@ class LlumoSessionContext(LlumoClient):
|
|
|
364
420
|
"message": message,
|
|
365
421
|
}
|
|
366
422
|
self.logStep("CUSTOM_SCRIPT", stepName, metadata)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def logQueryStep(self,stepName,model,provider,inputTokens,query,status):
|
|
426
|
+
metadata = {
|
|
427
|
+
"model": model,
|
|
428
|
+
"provider": provider,
|
|
429
|
+
"inputTokens": inputTokens,
|
|
430
|
+
"query": query,
|
|
431
|
+
"status":status
|
|
432
|
+
}
|
|
433
|
+
self.logStep("QUERY", stepName, metadata)
|
|
434
|
+
|
|
435
|
+
|
|
@@ -12,8 +12,6 @@ def performEvaluation(data, api_key=None, evals=["Response Correctness"], **kwar
|
|
|
12
12
|
results = client.evaluateMultiple(
|
|
13
13
|
data,
|
|
14
14
|
evals=evals,
|
|
15
|
-
createExperiment=kwargs.get("createExperiment", False),
|
|
16
|
-
playgroundID=kwargs.get("playgroundID"),
|
|
17
15
|
prompt_template="Give answer to the query: {{query}}, using context: {{context}}",
|
|
18
16
|
getDataFrame=False,
|
|
19
17
|
)
|
|
@@ -138,6 +136,13 @@ class OpenAI(OpenAIClient):
|
|
|
138
136
|
response = original_create(*args, **kwargs)
|
|
139
137
|
latency = int((time.time() - start_time) * 1000)
|
|
140
138
|
output_text = response.choices[0].message.content
|
|
139
|
+
self.session.logQueryStep(
|
|
140
|
+
stepName="Query Invocation",
|
|
141
|
+
model=model,
|
|
142
|
+
provider="openai",
|
|
143
|
+
inputTokens=response.usage.prompt_tokens,
|
|
144
|
+
query=user_message,
|
|
145
|
+
status = "SUCCESS")
|
|
141
146
|
|
|
142
147
|
self.session.logLlmStep(
|
|
143
148
|
stepName=f"LLM-{user_message[:30]}",
|
|
@@ -145,13 +150,13 @@ class OpenAI(OpenAIClient):
|
|
|
145
150
|
provider="openai",
|
|
146
151
|
inputTokens=response.usage.prompt_tokens,
|
|
147
152
|
outputTokens=response.usage.completion_tokens,
|
|
148
|
-
temperature=kwargs.get("temperature", 0.0),
|
|
149
|
-
promptTruncated=False,
|
|
153
|
+
# temperature=kwargs.get("temperature", 0.0),
|
|
154
|
+
# promptTruncated=False,
|
|
150
155
|
latencyMs=latency,
|
|
151
|
-
|
|
156
|
+
prompt=user_message,
|
|
152
157
|
output=output_text,
|
|
153
158
|
status="SUCCESS",
|
|
154
|
-
message="",
|
|
159
|
+
# message="",
|
|
155
160
|
)
|
|
156
161
|
|
|
157
162
|
# Response Correctness Evaluation
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|