llumo 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/client.py +458 -140
- llumo/exceptions.py +4 -0
- llumo/helpingFuntions.py +127 -12
- llumo/sockets.py +25 -12
- {llumo-0.2.13.dist-info → llumo-0.2.14.dist-info}/METADATA +2 -1
- llumo-0.2.14.dist-info/RECORD +13 -0
- llumo-0.2.13.dist-info/RECORD +0 -13
- {llumo-0.2.13.dist-info → llumo-0.2.14.dist-info}/WHEEL +0 -0
- {llumo-0.2.13.dist-info → llumo-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.13.dist-info → llumo-0.2.14.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
@@ -16,10 +16,20 @@ from .exceptions import LlumoAIError
|
|
16
16
|
from .helpingFuntions import *
|
17
17
|
from .sockets import LlumoSocketClient
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
import threading
|
20
|
+
from tqdm import tqdm
|
21
|
+
|
22
|
+
postUrl = (
|
23
|
+
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
24
|
+
)
|
25
|
+
fetchUrl = (
|
26
|
+
"https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
|
27
|
+
)
|
28
|
+
socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
|
29
|
+
# {
|
30
|
+
# "workspaceID":"c9191fdf33bdd7838328c1a0",
|
31
|
+
# "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
|
32
|
+
# }
|
23
33
|
validateUrl = "https://app.llumo.ai/api/workspace-details"
|
24
34
|
socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
|
25
35
|
|
@@ -32,7 +42,7 @@ class LlumoClient:
|
|
32
42
|
self.processMapping = {}
|
33
43
|
self.definationMapping = {}
|
34
44
|
|
35
|
-
def validateApiKey(self, evalName=" "):
|
45
|
+
def validateApiKey(self, evalName="Input Bias"):
|
36
46
|
headers = {
|
37
47
|
"Authorization": f"Bearer {self.apiKey}",
|
38
48
|
"Content-Type": "application/json",
|
@@ -41,13 +51,6 @@ class LlumoClient:
|
|
41
51
|
|
42
52
|
try:
|
43
53
|
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
44
|
-
|
45
|
-
|
46
|
-
try:
|
47
|
-
response_preview = response.text[:500] # First 500 chars
|
48
|
-
# print(f"Response preview: {response_preview}")
|
49
|
-
except Exception as e:
|
50
|
-
print(f"Could not get response preview: {e}")
|
51
54
|
|
52
55
|
except requests.exceptions.RequestException as e:
|
53
56
|
print(f"Request exception: {str(e)}")
|
@@ -82,14 +85,16 @@ class LlumoClient:
|
|
82
85
|
try:
|
83
86
|
self.hitsAvailable = data["data"]["data"].get("remainingHits", 0)
|
84
87
|
self.workspaceID = data["data"]["data"].get("workspaceID")
|
85
|
-
self.evalDefinition = data["data"]["data"]
|
88
|
+
self.evalDefinition = data["data"]["data"]["analyticsMapping"]
|
86
89
|
self.socketToken = data["data"]["data"].get("token")
|
87
90
|
self.hasSubscribed = data["data"]["data"].get("hasSubscribed", False)
|
88
91
|
self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
|
89
|
-
self.subscriptionEndDate = data["data"]["data"].get(
|
92
|
+
self.subscriptionEndDate = data["data"]["data"].get(
|
93
|
+
"subscriptionEndDate", None
|
94
|
+
)
|
90
95
|
self.email = data["data"]["data"].get("email", None)
|
91
|
-
|
92
|
-
self.definationMapping[evalName] =
|
96
|
+
|
97
|
+
self.definationMapping[evalName] = data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, None)
|
93
98
|
|
94
99
|
except Exception as e:
|
95
100
|
# print(f"Error extracting data from response: {str(e)}")
|
@@ -159,13 +164,24 @@ class LlumoClient:
|
|
159
164
|
# this function allows the users to run exactl one eval at a time
|
160
165
|
def evaluate(
|
161
166
|
self,
|
162
|
-
|
167
|
+
data,
|
163
168
|
eval="Response Completeness",
|
164
169
|
prompt_template="",
|
165
170
|
outputColName="output",
|
166
171
|
createExperiment: bool = False,
|
172
|
+
_tocheck=True,
|
167
173
|
):
|
168
174
|
|
175
|
+
# converting it into a pandas dataframe object
|
176
|
+
dataframe = pd.DataFrame(data)
|
177
|
+
|
178
|
+
# check for dependencies for the selected eval metric
|
179
|
+
metricDependencies = checkDependency(
|
180
|
+
eval, columns=list(dataframe.columns), tocheck=_tocheck
|
181
|
+
)
|
182
|
+
if metricDependencies["status"] == False:
|
183
|
+
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
184
|
+
|
169
185
|
results = {}
|
170
186
|
try:
|
171
187
|
socketID = self.socket.connect(timeout=150)
|
@@ -206,7 +222,7 @@ class LlumoClient:
|
|
206
222
|
# if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
207
223
|
# raise LlumoAIError.InsufficientCredits()
|
208
224
|
|
209
|
-
evalDefinition = self.evalDefinition[eval]
|
225
|
+
evalDefinition = self.evalDefinition[eval].get("definition")
|
210
226
|
model = "GPT_4"
|
211
227
|
provider = "OPENAI"
|
212
228
|
evalType = "LLM"
|
@@ -318,13 +334,13 @@ class LlumoClient:
|
|
318
334
|
|
319
335
|
for cnt, batch in enumerate(self.allBatches):
|
320
336
|
try:
|
321
|
-
|
337
|
+
|
322
338
|
self.postBatch(batch=batch, workspaceID=workspaceID)
|
323
|
-
|
339
|
+
print("Betch Posted with item len: ", len(batch))
|
324
340
|
except Exception as e:
|
325
341
|
continue
|
326
342
|
|
327
|
-
time.sleep(
|
343
|
+
# time.sleep(3)
|
328
344
|
|
329
345
|
timeout = max(50, min(600, totalItems * 10))
|
330
346
|
|
@@ -367,7 +383,14 @@ class LlumoClient:
|
|
367
383
|
pd.set_option("future.no_silent_downcasting", True)
|
368
384
|
df = dataframe.fillna("Some error occured").astype(object)
|
369
385
|
|
370
|
-
if createPlayground(
|
386
|
+
if createPlayground(
|
387
|
+
email,
|
388
|
+
workspaceID,
|
389
|
+
df,
|
390
|
+
promptText=prompt_template,
|
391
|
+
definationMapping=self.definationMapping,
|
392
|
+
outputColName=outputColName,
|
393
|
+
):
|
371
394
|
print(
|
372
395
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
373
396
|
)
|
@@ -375,33 +398,10 @@ class LlumoClient:
|
|
375
398
|
return dataframe
|
376
399
|
|
377
400
|
# this function allows the users to run multiple evals at once
|
378
|
-
def evaluateMultiple(
|
379
|
-
self,
|
380
|
-
dataframe,
|
381
|
-
eval=["Response Completeness"],
|
382
|
-
prompt_template="",
|
383
|
-
outputColName="output",
|
384
|
-
createExperiment: bool = False,
|
385
|
-
):
|
386
|
-
resultdf = dataframe.copy()
|
387
|
-
for evalName in eval:
|
388
|
-
resultdf = self.evaluate(dataframe = resultdf,eval=evalName,prompt_template=prompt_template,outputColName=outputColName,createExperiment = False)
|
389
401
|
|
390
|
-
|
391
|
-
pd.set_option("future.no_silent_downcasting", True)
|
392
|
-
df = resultdf.fillna("Some error occured").astype(object)
|
393
|
-
|
394
|
-
if createPlayground(self.email, self.workspaceID, df,definationMapping=self.definationMapping,outputColName=outputColName,promptText=prompt_template):
|
395
|
-
print(
|
396
|
-
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
397
|
-
)
|
398
|
-
else:
|
399
|
-
return resultdf
|
400
|
-
|
401
|
-
|
402
|
-
def evaluateCompressor(self, dataframe, prompt_template):
|
402
|
+
def evaluateCompressor(self, data, prompt_template):
|
403
403
|
results = []
|
404
|
-
|
404
|
+
dataframe = pd.DataFrame(data)
|
405
405
|
try:
|
406
406
|
socketID = self.socket.connect(timeout=150)
|
407
407
|
max_wait_secs = 20
|
@@ -410,7 +410,9 @@ class LlumoClient:
|
|
410
410
|
time.sleep(0.1)
|
411
411
|
waited_secs += 0.1
|
412
412
|
if waited_secs >= max_wait_secs:
|
413
|
-
raise RuntimeError(
|
413
|
+
raise RuntimeError(
|
414
|
+
"Timeout waiting for server 'connection-established' event."
|
415
|
+
)
|
414
416
|
|
415
417
|
try:
|
416
418
|
self.validateApiKey()
|
@@ -421,8 +423,14 @@ class LlumoClient:
|
|
421
423
|
print(f"Response content: {e.response.text[:500]}...")
|
422
424
|
raise
|
423
425
|
|
424
|
-
userHits = checkUserHits(
|
425
|
-
|
426
|
+
userHits = checkUserHits(
|
427
|
+
self.workspaceID,
|
428
|
+
self.hasSubscribed,
|
429
|
+
self.trialEndDate,
|
430
|
+
self.subscriptionEndDate,
|
431
|
+
self.hitsAvailable,
|
432
|
+
len(dataframe),
|
433
|
+
)
|
426
434
|
|
427
435
|
if not userHits["success"]:
|
428
436
|
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
@@ -443,14 +451,22 @@ class LlumoClient:
|
|
443
451
|
if not all([ky in dataframe.columns for ky in keys]):
|
444
452
|
raise LlumoAIError.InvalidPromptTemplate()
|
445
453
|
|
446
|
-
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
454
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
455
|
+
"-", ""
|
456
|
+
)
|
447
457
|
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
448
458
|
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
449
459
|
|
450
|
-
compressed_prompt_id =
|
451
|
-
|
460
|
+
compressed_prompt_id = (
|
461
|
+
f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
462
|
+
)
|
463
|
+
compressed_prompt_output_id = (
|
464
|
+
f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
465
|
+
)
|
452
466
|
cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
453
|
-
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
467
|
+
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
468
|
+
"-", ""
|
469
|
+
)
|
454
470
|
|
455
471
|
rowDataDict = {}
|
456
472
|
for col in dataframe.columns:
|
@@ -470,7 +486,7 @@ class LlumoClient:
|
|
470
486
|
"compressed_prompt": compressed_prompt_id,
|
471
487
|
"compressed_prompt_output": compressed_prompt_output_id,
|
472
488
|
"cost": cost_id,
|
473
|
-
"cost_saving": cost_saving_id
|
489
|
+
"cost_saving": cost_saving_id,
|
474
490
|
},
|
475
491
|
"processData": {
|
476
492
|
"rowData": rowDataDict,
|
@@ -483,12 +499,12 @@ class LlumoClient:
|
|
483
499
|
"compressed_prompt": compressed_prompt_id,
|
484
500
|
"compressed_prompt_output": compressed_prompt_output_id,
|
485
501
|
"cost": cost_id,
|
486
|
-
"cost_saving": cost_saving_id
|
487
|
-
}
|
502
|
+
"cost_saving": cost_saving_id,
|
503
|
+
},
|
488
504
|
},
|
489
505
|
"workspaceID": workspaceID,
|
490
506
|
"email": email,
|
491
|
-
"playgroundID": activePlayground
|
507
|
+
"playgroundID": activePlayground,
|
492
508
|
}
|
493
509
|
|
494
510
|
rowIdMapping[rowID] = index
|
@@ -513,7 +529,12 @@ class LlumoClient:
|
|
513
529
|
|
514
530
|
self.AllProcessMapping()
|
515
531
|
timeout = max(60, min(600, total_items * 10))
|
516
|
-
self.socket.listenForResults(
|
532
|
+
self.socket.listenForResults(
|
533
|
+
min_wait=20,
|
534
|
+
max_wait=timeout,
|
535
|
+
inactivity_timeout=30,
|
536
|
+
expected_results=None,
|
537
|
+
)
|
517
538
|
|
518
539
|
results = self.socket.getReceivedData()
|
519
540
|
# results = self.finalResp(eval_results)
|
@@ -532,7 +553,7 @@ class LlumoClient:
|
|
532
553
|
for records in results:
|
533
554
|
for compound_key, value in records.items():
|
534
555
|
# for compound_key, value in item['data'].items():
|
535
|
-
rowID = compound_key.split(
|
556
|
+
rowID = compound_key.split("-")[0]
|
536
557
|
# looking for the index of each rowID , in the original dataframe
|
537
558
|
if rowID in rowIdMapping:
|
538
559
|
index = rowIdMapping[rowID]
|
@@ -550,22 +571,259 @@ class LlumoClient:
|
|
550
571
|
# dataframe["cost_saving"] = cost_saving
|
551
572
|
|
552
573
|
return dataframe
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
)
|
574
|
+
|
575
|
+
def evaluateMultiple(
|
576
|
+
self,
|
577
|
+
data,
|
578
|
+
evals: list, # list of eval metric names
|
579
|
+
prompt_template="",
|
580
|
+
outputColName="output",
|
581
|
+
createExperiment: bool = False,
|
582
|
+
_tocheck=True,
|
583
|
+
):
|
584
|
+
dataframe = pd.DataFrame(data)
|
585
|
+
workspaceID = None
|
586
|
+
email = None
|
587
|
+
socketID = self.socket.connect(timeout=250)
|
588
|
+
self.allBatches = []
|
589
|
+
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
590
|
+
|
591
|
+
# Wait for socket connection
|
592
|
+
max_wait_secs = 20
|
593
|
+
waited_secs = 0
|
594
|
+
while not self.socket._connection_established.is_set():
|
595
|
+
time.sleep(0.1)
|
596
|
+
waited_secs += 0.1
|
597
|
+
if waited_secs >= max_wait_secs:
|
598
|
+
raise RuntimeError("Timeout waiting for server connection")
|
599
|
+
|
600
|
+
# Start listener thread
|
601
|
+
expectedResults = len(dataframe) * len(evals)
|
602
|
+
# print("expected result" ,expectedResults)
|
603
|
+
timeout = max(100, min(150, expectedResults * 10))
|
604
|
+
listener_thread = threading.Thread(
|
605
|
+
target=self.socket.listenForResults,
|
606
|
+
kwargs={
|
607
|
+
"min_wait": 40,
|
608
|
+
"max_wait": timeout,
|
609
|
+
"inactivity_timeout": 10,
|
610
|
+
"expected_results": expectedResults,
|
611
|
+
},
|
612
|
+
daemon=True,
|
613
|
+
)
|
614
|
+
listener_thread.start()
|
563
615
|
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
616
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
617
|
+
"-", ""
|
618
|
+
)
|
619
|
+
for evalName in evals:
|
620
|
+
# print(f"\n======= Running evaluation for: {evalName} =======")
|
621
|
+
|
622
|
+
# Validate API and dependencies
|
623
|
+
self.validateApiKey(evalName=evalName)
|
624
|
+
metricDependencies = checkDependency(
|
625
|
+
evalName, list(dataframe.columns), tocheck=_tocheck
|
626
|
+
)
|
627
|
+
if not metricDependencies["status"]:
|
628
|
+
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
629
|
+
|
630
|
+
evalDefinition = self.evalDefinition[evalName]["definition"]
|
631
|
+
model = "GPT_4"
|
632
|
+
provider = "OPENAI"
|
633
|
+
evalType = "LLM"
|
634
|
+
workspaceID = self.workspaceID
|
635
|
+
email = self.email
|
636
|
+
|
637
|
+
userHits = checkUserHits(
|
638
|
+
self.workspaceID,
|
639
|
+
self.hasSubscribed,
|
640
|
+
self.trialEndDate,
|
641
|
+
self.subscriptionEndDate,
|
642
|
+
self.hitsAvailable,
|
643
|
+
len(dataframe),
|
644
|
+
)
|
645
|
+
if not userHits["success"]:
|
646
|
+
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
647
|
+
|
648
|
+
currentBatch = []
|
649
|
+
for index, row in dataframe.iterrows():
|
650
|
+
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
651
|
+
groundTruth = row.get("groundTruth", "")
|
652
|
+
messageHistory = (
|
653
|
+
[row["messageHistory"]]
|
654
|
+
if "messageHistory" in dataframe.columns
|
655
|
+
else []
|
656
|
+
)
|
657
|
+
promptTemplate = prompt_template
|
658
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
659
|
+
|
660
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
661
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
662
|
+
|
663
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
664
|
+
output = row.get(outputColName, "")
|
665
|
+
|
666
|
+
|
667
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
668
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
669
|
+
|
670
|
+
compoundKey = f"{rowID}-{columnID}-{columnID}"
|
671
|
+
rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
|
672
|
+
|
673
|
+
templateData = {
|
674
|
+
"processID": getProcessID(),
|
675
|
+
"socketID": socketID,
|
676
|
+
"source": "SDK",
|
677
|
+
"processData": {
|
678
|
+
"executionDependency": {
|
679
|
+
"query": "",
|
680
|
+
"context": "",
|
681
|
+
"output": output,
|
682
|
+
"tools": tools,
|
683
|
+
"groundTruth": groundTruth,
|
684
|
+
"messageHistory": messageHistory,
|
685
|
+
},
|
686
|
+
"definition": evalDefinition,
|
687
|
+
"model": model,
|
688
|
+
"provider": provider,
|
689
|
+
"analytics": evalName,
|
690
|
+
},
|
691
|
+
"workspaceID": workspaceID,
|
692
|
+
"type": "EVAL",
|
693
|
+
"evalType": evalType,
|
694
|
+
"kpi": evalName,
|
695
|
+
"columnID": columnID,
|
696
|
+
"rowID": rowID,
|
697
|
+
"playgroundID": activePlayground,
|
698
|
+
"processType": "EVAL",
|
699
|
+
"email": email,
|
700
|
+
}
|
701
|
+
|
702
|
+
query = ""
|
703
|
+
context = ""
|
704
|
+
for key, value in inputDict.items():
|
705
|
+
if isinstance(value, str):
|
706
|
+
length = len(value.split()) * 1.5
|
707
|
+
if length > 50:
|
708
|
+
context += f" {key}: {value}, "
|
709
|
+
else:
|
710
|
+
if promptTemplate:
|
711
|
+
tempObj = {key: value}
|
712
|
+
promptTemplate = getInputPopulatedPrompt(
|
713
|
+
promptTemplate, tempObj
|
714
|
+
)
|
715
|
+
else:
|
716
|
+
query += f" {key}: {value}, "
|
717
|
+
|
718
|
+
if not context.strip():
|
719
|
+
for key, value in inputDict.items():
|
720
|
+
context += f" {key}: {value}, "
|
721
|
+
|
722
|
+
templateData["processData"]["executionDependency"][
|
723
|
+
"context"
|
724
|
+
] = context.strip()
|
725
|
+
templateData["processData"]["executionDependency"][
|
726
|
+
"query"
|
727
|
+
] = query.strip()
|
728
|
+
if promptTemplate and not query.strip():
|
729
|
+
templateData["processData"]["executionDependency"][
|
730
|
+
"query"
|
731
|
+
] = promptTemplate
|
732
|
+
|
733
|
+
currentBatch.append(templateData)
|
734
|
+
if len(currentBatch) == 10:
|
735
|
+
self.allBatches.append(currentBatch)
|
736
|
+
currentBatch = []
|
737
|
+
|
738
|
+
if currentBatch:
|
739
|
+
self.allBatches.append(currentBatch)
|
740
|
+
|
741
|
+
for batch in tqdm(
|
742
|
+
self.allBatches,
|
743
|
+
desc="Processing Batches",
|
744
|
+
unit="batch",
|
745
|
+
colour="magenta",
|
746
|
+
ascii=False,
|
747
|
+
):
|
748
|
+
try:
|
749
|
+
self.postBatch(batch=batch, workspaceID=workspaceID)
|
750
|
+
time.sleep(3)
|
751
|
+
except Exception as e:
|
752
|
+
print(f"Error posting batch: {e}")
|
753
|
+
raise
|
754
|
+
|
755
|
+
# Wait for results
|
756
|
+
time.sleep(3)
|
757
|
+
listener_thread.join()
|
758
|
+
|
759
|
+
|
760
|
+
rawResults = self.socket.getReceivedData()
|
761
|
+
|
762
|
+
# print("data from db #####################",dataFromDb)
|
763
|
+
# Fix here: keep full keys, do not split keys
|
764
|
+
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
765
|
+
expectedRowIDs = set(rowIdMapping.keys())
|
766
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
767
|
+
# print("All expected keys:", expected_rowIDs)
|
768
|
+
# print("All received keys:", received_rowIDs)
|
769
|
+
# print("Missing keys:", len(missingRowIDs))
|
770
|
+
missingRowIDs=list(missingRowIDs)
|
771
|
+
|
772
|
+
if len(missingRowIDs) > 0:
|
773
|
+
dataFromDb=fetchData(workspaceID,activePlayground,missingRowIDs)
|
774
|
+
rawResults.extend(dataFromDb)
|
775
|
+
|
776
|
+
|
777
|
+
|
778
|
+
|
779
|
+
|
780
|
+
|
781
|
+
# Initialize dataframe columns for each eval
|
782
|
+
for eval in evals:
|
783
|
+
dataframe[eval] = None
|
784
|
+
dataframe[f"{eval} Reason"] = None
|
785
|
+
|
786
|
+
# Map results to dataframe rows
|
787
|
+
for item in rawResults:
|
788
|
+
for compound_key, value in item.items():
|
789
|
+
if compound_key in rowIdMapping:
|
790
|
+
index = rowIdMapping[compound_key]["index"]
|
791
|
+
evalName = rowIdMapping[compound_key]["eval"]
|
792
|
+
dataframe.at[index, evalName] = value.get("value")
|
793
|
+
dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
|
794
|
+
|
795
|
+
self.socket.disconnect()
|
796
|
+
|
797
|
+
|
798
|
+
if createExperiment:
|
799
|
+
pd.set_option("future.no_silent_downcasting", True)
|
800
|
+
df = dataframe.fillna("Some error occured").astype(object)
|
801
|
+
if createPlayground(
|
802
|
+
email,
|
803
|
+
workspaceID,
|
804
|
+
df,
|
805
|
+
promptText=prompt_template,
|
806
|
+
definationMapping=self.definationMapping,
|
807
|
+
outputColName=outputColName,
|
808
|
+
):
|
809
|
+
print(
|
810
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
811
|
+
)
|
812
|
+
else:
|
813
|
+
return dataframe
|
568
814
|
|
815
|
+
def run_sweep(
|
816
|
+
self,
|
817
|
+
templates: List[str],
|
818
|
+
dataset: Dict[str, List[str]],
|
819
|
+
model_aliases: List[AVAILABLEMODELS],
|
820
|
+
apiKey: str,
|
821
|
+
evals=["Response Correctness"],
|
822
|
+
toEvaluate: bool = False,
|
823
|
+
createExperiment: bool = False,
|
824
|
+
) -> pd.DataFrame:
|
825
|
+
|
826
|
+
self.validateApiKey(evalName=" ")
|
569
827
|
workspaceID = self.workspaceID
|
570
828
|
email = self.email
|
571
829
|
executor = ModelExecutor(apiKey)
|
@@ -589,7 +847,9 @@ class LlumoClient:
|
|
589
847
|
for i, model in enumerate(model_aliases, 1):
|
590
848
|
try:
|
591
849
|
provider = getProviderFromModel(model)
|
592
|
-
response = executor.execute(
|
850
|
+
response = executor.execute(
|
851
|
+
provider, model.value, prompt, apiKey
|
852
|
+
)
|
593
853
|
outputKey = f"output_{i}"
|
594
854
|
row[outputKey] = response
|
595
855
|
except Exception as e:
|
@@ -597,25 +857,23 @@ class LlumoClient:
|
|
597
857
|
|
598
858
|
results.append(row)
|
599
859
|
|
600
|
-
|
601
|
-
|
602
860
|
df = pd.DataFrame(results)
|
603
861
|
|
604
|
-
|
605
|
-
if toEvaluate==True:
|
862
|
+
if toEvaluate == True:
|
606
863
|
dfWithEvals = df.copy()
|
607
|
-
for i, model in enumerate(model_aliases,1):
|
864
|
+
for i, model in enumerate(model_aliases, 1):
|
608
865
|
outputColName = f"output_{i}"
|
609
866
|
try:
|
610
867
|
res = self.evaluateMultiple(
|
611
|
-
df,
|
612
|
-
|
868
|
+
df.to_dict("records"),
|
869
|
+
evals=evals,
|
613
870
|
prompt_template=str(templates[0]),
|
614
871
|
outputColName=outputColName,
|
872
|
+
_tocheck=False,
|
615
873
|
)
|
616
874
|
|
617
875
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
618
|
-
for evalMetric in
|
876
|
+
for evalMetric in evals:
|
619
877
|
scoreCol = f"{evalMetric}"
|
620
878
|
reasonCol = f"{evalMetric} Reason"
|
621
879
|
if scoreCol in res.columns:
|
@@ -624,7 +882,9 @@ class LlumoClient:
|
|
624
882
|
res = res.rename(columns={reasonCol: f"{reasonCol}_{i}"})
|
625
883
|
|
626
884
|
# Drop duplicated columns from df (like prompt, variables, etc.)
|
627
|
-
newCols = [
|
885
|
+
newCols = [
|
886
|
+
col for col in res.columns if col not in dfWithEvals.columns
|
887
|
+
]
|
628
888
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
629
889
|
|
630
890
|
except Exception as e:
|
@@ -633,60 +893,72 @@ class LlumoClient:
|
|
633
893
|
if createExperiment:
|
634
894
|
pd.set_option("future.no_silent_downcasting", True)
|
635
895
|
dfWithEvals = dfWithEvals.fillna("Some error occurred")
|
636
|
-
if createPlayground(
|
637
|
-
|
638
|
-
|
896
|
+
if createPlayground(
|
897
|
+
email,
|
898
|
+
workspaceID,
|
899
|
+
dfWithEvals,
|
900
|
+
promptText=templates[0],
|
901
|
+
definationMapping=self.definationMapping,
|
902
|
+
):
|
903
|
+
|
904
|
+
print(
|
905
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
906
|
+
)
|
639
907
|
else:
|
640
908
|
return dfWithEvals
|
641
909
|
else:
|
642
|
-
if createExperiment==True:
|
910
|
+
if createExperiment == True:
|
643
911
|
pd.set_option("future.no_silent_downcasting", True)
|
644
912
|
df = df.fillna("Some error occurred")
|
645
913
|
|
646
914
|
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
647
|
-
print(
|
648
|
-
|
915
|
+
print(
|
916
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
917
|
+
)
|
918
|
+
else:
|
649
919
|
return df
|
650
920
|
|
651
|
-
|
652
921
|
# this function generates an output using llm and tools and evaluate that output
|
653
922
|
def evaluateAgents(
|
654
923
|
self,
|
655
|
-
|
924
|
+
data,
|
656
925
|
model,
|
657
926
|
agents,
|
658
927
|
model_api_key=None,
|
928
|
+
evals=["Final Task Alignment"],
|
659
929
|
prompt_template="Give answer for the given query: {{query}}",
|
660
930
|
createExperiment: bool = False,
|
661
931
|
):
|
662
932
|
if model.lower() not in ["openai", "google"]:
|
663
933
|
raise ValueError("Model must be 'openai' or 'google'")
|
664
934
|
|
935
|
+
# converting into pandas dataframe object
|
936
|
+
dataframe = pd.DataFrame(data)
|
937
|
+
|
665
938
|
# Run unified agent execution
|
666
939
|
toolResponseDf = LlumoAgentExecutor.run(
|
667
940
|
dataframe, agents, model=model, model_api_key=model_api_key
|
668
941
|
)
|
669
|
-
|
670
|
-
|
671
|
-
evals = [
|
672
|
-
"Tool Reliability",
|
673
|
-
"Stepwise Progression",
|
674
|
-
"Tool Selection Accuracy",
|
675
|
-
"Final Task Alignment",
|
676
|
-
]
|
677
942
|
|
678
|
-
for eval in evals:
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
943
|
+
# for eval in evals:
|
944
|
+
# Perform evaluation
|
945
|
+
# toolResponseDf = self.evaluate(
|
946
|
+
# toolResponseDf.to_dict(orient = "records"),
|
947
|
+
# eval=eval,
|
948
|
+
# prompt_template=prompt_template,
|
949
|
+
# createExperiment=False,
|
950
|
+
# )
|
951
|
+
toolResponseDf = self.evaluateMultiple(
|
952
|
+
toolResponseDf.to_dict(orient="records"),
|
953
|
+
evals=evals,
|
954
|
+
prompt_template=prompt_template,
|
955
|
+
createExperiment=False,
|
956
|
+
)
|
957
|
+
|
686
958
|
if createExperiment:
|
687
959
|
pd.set_option("future.no_silent_downcasting", True)
|
688
960
|
df = toolResponseDf.fillna("Some error occured")
|
689
|
-
if createPlayground(self.email, self.workspaceID, df):
|
961
|
+
if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
|
690
962
|
print(
|
691
963
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
692
964
|
)
|
@@ -696,47 +968,49 @@ class LlumoClient:
|
|
696
968
|
# this function evaluate that tools output given by the user
|
697
969
|
def evaluateAgentResponses(
|
698
970
|
self,
|
699
|
-
|
971
|
+
data,
|
700
972
|
evals=["Final Task Alignment"],
|
701
973
|
outputColName="output",
|
702
974
|
createExperiment: bool = False,
|
703
975
|
):
|
976
|
+
dataframe = pd.DataFrame(data)
|
977
|
+
|
704
978
|
try:
|
705
979
|
if "query" and "messageHistory" and "tools" not in dataframe.columns:
|
706
980
|
raise ValueError(
|
707
981
|
"DataFrame must contain 'query', 'messageHistory','output' ,and 'tools' columns. Make sure the columns names are same as mentioned here."
|
708
982
|
)
|
709
|
-
|
710
|
-
|
711
|
-
# evals = [
|
712
|
-
# "Tool Reliability",
|
713
|
-
# "Stepwise Progression",
|
714
|
-
# "Tool Selection Accuracy",
|
715
|
-
# "Final Task Alignment",
|
716
|
-
# ]
|
983
|
+
|
717
984
|
toolResponseDf = dataframe.copy()
|
718
|
-
for eval in evals:
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
985
|
+
# for eval in evals:
|
986
|
+
# # Perform evaluation
|
987
|
+
# toolResponseDf = self.evaluate(
|
988
|
+
# toolResponseDf.to_dict(orient = "records"), eval=eval, prompt_template="Give answer for the given query: {{query}}",outputColName=outputColName
|
989
|
+
# )
|
990
|
+
toolResponseDf = self.evaluateMultiple(
|
991
|
+
toolResponseDf.to_dict(orient="records"),
|
992
|
+
evals=evals,
|
993
|
+
prompt_template="Give answer for the given query: {{query}}",
|
994
|
+
outputColName=outputColName,
|
995
|
+
createExperiment=createExperiment
|
996
|
+
)
|
997
|
+
if createExperiment:
|
998
|
+
pass
|
999
|
+
else:
|
1000
|
+
return toolResponseDf
|
726
1001
|
|
727
1002
|
except Exception as e:
|
728
1003
|
raise e
|
729
1004
|
|
730
|
-
|
731
1005
|
def runDataStream(
|
732
1006
|
self,
|
733
|
-
|
1007
|
+
data,
|
734
1008
|
streamName: str,
|
735
1009
|
queryColName: str = "query",
|
736
1010
|
createExperiment: bool = False,
|
737
1011
|
):
|
738
1012
|
results = {}
|
739
|
-
|
1013
|
+
dataframe = pd.DataFrame(data)
|
740
1014
|
try:
|
741
1015
|
socketID = self.socket.connect(timeout=150)
|
742
1016
|
# Ensure full connection before proceeding
|
@@ -751,16 +1025,11 @@ class LlumoClient:
|
|
751
1025
|
)
|
752
1026
|
# print(f"Connected with socket ID: {socketID}")
|
753
1027
|
rowIdMapping = {}
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
print(f"Error during API key validation: {str(e)}")
|
760
|
-
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
761
|
-
print(f"Status code: {e.response.status_code}")
|
762
|
-
print(f"Response content: {e.response.text[:500]}...")
|
763
|
-
raise
|
1028
|
+
|
1029
|
+
# print(f"Validating API key...")
|
1030
|
+
self.validateApiKey()
|
1031
|
+
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
1032
|
+
|
764
1033
|
# check for available hits and trial limit
|
765
1034
|
userHits = checkUserHits(
|
766
1035
|
self.workspaceID,
|
@@ -888,7 +1157,13 @@ class LlumoClient:
|
|
888
1157
|
pd.set_option("future.no_silent_downcasting", True)
|
889
1158
|
df = dataframe.fillna("Some error occured").astype(object)
|
890
1159
|
|
891
|
-
if createPlayground(
|
1160
|
+
if createPlayground(
|
1161
|
+
email,
|
1162
|
+
workspaceID,
|
1163
|
+
df,
|
1164
|
+
queryColName=queryColName,
|
1165
|
+
dataStreamName=streamId,
|
1166
|
+
):
|
892
1167
|
print(
|
893
1168
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
894
1169
|
)
|
@@ -908,6 +1183,49 @@ class LlumoClient:
|
|
908
1183
|
except Exception as e:
|
909
1184
|
raise "Some error ocuured please check your API key"
|
910
1185
|
|
1186
|
+
def upload(self, file_path):
|
1187
|
+
|
1188
|
+
workspaceID = None
|
1189
|
+
email = None
|
1190
|
+
|
1191
|
+
|
1192
|
+
try:
|
1193
|
+
self.validateApiKey()
|
1194
|
+
except Exception as e:
|
1195
|
+
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
1196
|
+
pass
|
1197
|
+
raise
|
1198
|
+
|
1199
|
+
# Get file extension
|
1200
|
+
_, ext = os.path.splitext(file_path)
|
1201
|
+
ext = ext.lower()
|
1202
|
+
|
1203
|
+
# Supported formats
|
1204
|
+
try:
|
1205
|
+
if ext == ".csv":
|
1206
|
+
df = pd.read_csv(file_path)
|
1207
|
+
elif ext in [".xlsx", ".xls"]:
|
1208
|
+
df = pd.read_excel(file_path)
|
1209
|
+
elif ext == ".json":
|
1210
|
+
df = pd.read_json(file_path, orient="records")
|
1211
|
+
elif ext == ".parquet":
|
1212
|
+
df = pd.read_parquet(file_path)
|
1213
|
+
else:
|
1214
|
+
raise ValueError(f"Unsupported file format: {ext}")
|
1215
|
+
|
1216
|
+
# If successfully loaded, call createPlayground
|
1217
|
+
df = df.astype(str)
|
1218
|
+
if createPlayground(self.email, self.workspaceID, df):
|
1219
|
+
|
1220
|
+
print(
|
1221
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
return True
|
1225
|
+
|
1226
|
+
except Exception as e:
|
1227
|
+
print(f"Error: {e}")
|
1228
|
+
|
911
1229
|
|
912
1230
|
class SafeDict(dict):
|
913
1231
|
def __missing__(self, key):
|
llumo/exceptions.py
CHANGED
@@ -46,6 +46,10 @@ class LlumoAIError(Exception):
|
|
46
46
|
def modelHitsExhausted(details = "Your credits for the selected model exhausted."):
|
47
47
|
return LlumoAIError(details)
|
48
48
|
|
49
|
+
@staticmethod
|
50
|
+
def dependencyError(details):
|
51
|
+
return LlumoAIError(details)
|
52
|
+
|
49
53
|
# @staticmethod
|
50
54
|
# def dateNotFound():
|
51
55
|
# return LlumoAIError("Trial end date or subscription end date not found for the given user.")
|
llumo/helpingFuntions.py
CHANGED
@@ -212,7 +212,6 @@ def deleteColumnListInPlayground(workspaceID: str, playgroundID: str):
|
|
212
212
|
print("❌ Error:", response.status_code, response.text)
|
213
213
|
return None
|
214
214
|
|
215
|
-
|
216
215
|
def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColName=None,outputColName= "output",dataStreamName=None,definationMapping=None):
|
217
216
|
if len(dataframe) > 100:
|
218
217
|
dataframe = dataframe.head(100)
|
@@ -238,7 +237,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
238
237
|
columnIDMapping[col] = columnID
|
239
238
|
|
240
239
|
|
241
|
-
if col.startswith('output'):
|
240
|
+
if col.startswith('output') and promptText!=None:
|
242
241
|
# For output columns, create the prompt template with promptText
|
243
242
|
if promptText:
|
244
243
|
# Extract variables from promptText and set them as dependencies
|
@@ -277,7 +276,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
277
276
|
"order": indx,
|
278
277
|
}
|
279
278
|
|
280
|
-
elif col.startswith('Data '):
|
279
|
+
elif col.startswith('Data ') :
|
281
280
|
if queryColName and dataStreamName:
|
282
281
|
dependencies = []
|
283
282
|
dependencies.append(columnIDMapping[queryColName])
|
@@ -291,7 +290,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
291
290
|
"type": "DATA_STREAM",
|
292
291
|
"order": indx}
|
293
292
|
|
294
|
-
elif col in allEvals:
|
293
|
+
elif col in allEvals and promptText!=None:
|
295
294
|
|
296
295
|
dependencies = []
|
297
296
|
variables = re.findall(r'{{(.*?)}}', promptText)
|
@@ -303,8 +302,8 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
303
302
|
dependencies.append(columnIDMapping[varName])
|
304
303
|
|
305
304
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
306
|
-
longDef = definationMapping.get(col, {}).get(
|
307
|
-
shortDef =
|
305
|
+
longDef = definationMapping.get(col, {}).get('definition', "")
|
306
|
+
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
308
307
|
enum = col.upper().replace(" ","_")
|
309
308
|
|
310
309
|
template = {
|
@@ -312,7 +311,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
312
311
|
col.lower().replace(" ","_")
|
313
312
|
],
|
314
313
|
"evaluationMetric": "ALL",
|
315
|
-
"evaluationModel": "
|
314
|
+
"evaluationModel": "LLUMO_EVALLM",
|
316
315
|
"selectPrompt": columnIDMapping[outputColName],
|
317
316
|
"scoreCondition": "GREATER_THAN",
|
318
317
|
"scoreValue": "50",
|
@@ -336,12 +335,13 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
336
335
|
"analyticsENUM": enum,
|
337
336
|
"prompt": shortDef,
|
338
337
|
"analyticsName": col,
|
339
|
-
"columnID":
|
338
|
+
"columnID": columnID,
|
340
339
|
"label": col,
|
341
340
|
"order": indx
|
342
341
|
}
|
343
342
|
|
344
|
-
|
343
|
+
elif col.endswith(' Reason') and promptText!=None:
|
344
|
+
continue
|
345
345
|
|
346
346
|
|
347
347
|
else:
|
@@ -371,9 +371,25 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
371
371
|
|
372
372
|
# For each column, we need to map the column ID to the corresponding value in the row
|
373
373
|
for col in dataframe.columns:
|
374
|
-
columnID = columnIDMapping[col]
|
375
|
-
|
376
|
-
|
374
|
+
columnID = columnIDMapping[col]
|
375
|
+
|
376
|
+
if col in allEvals and promptText!=None:
|
377
|
+
row_dict[columnID] = {
|
378
|
+
|
379
|
+
"value": row[col],
|
380
|
+
"type": "EVAL",
|
381
|
+
"isValid": True,
|
382
|
+
"reasoning": row[col+" Reason"],
|
383
|
+
"edgeCase": "minorHallucinationDetailNotInContext",
|
384
|
+
"kpi": col
|
385
|
+
|
386
|
+
}
|
387
|
+
elif col.endswith(' Reason') and promptText!=None:
|
388
|
+
continue
|
389
|
+
else:# Get the columnID from the mapping
|
390
|
+
row_dict[columnID] = row[col]
|
391
|
+
|
392
|
+
# row_dict[columnID] = row[col] # Directly map the column ID to the row value
|
377
393
|
# Add the row index (if necessary)
|
378
394
|
row_dict["pIndex"] = indx
|
379
395
|
rowTemplate["dataToUploadList"].append(row_dict)
|
@@ -430,6 +446,8 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
430
446
|
payload1, payload2 = createColumn(
|
431
447
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName
|
432
448
|
)
|
449
|
+
|
450
|
+
# Debugging line to check the payload2 structure
|
433
451
|
deleteExistingRows = deleteColumnListInPlayground(
|
434
452
|
workspaceID=workspaceID, playgroundID=playgroundId
|
435
453
|
)
|
@@ -441,6 +459,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
441
459
|
|
442
460
|
|
443
461
|
|
462
|
+
|
444
463
|
def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
445
464
|
headers = {
|
446
465
|
|
@@ -490,4 +509,100 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
490
509
|
else:
|
491
510
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
492
511
|
return None
|
512
|
+
def checkDependency(selectedEval, columns,tocheck=True):
|
513
|
+
"""
|
514
|
+
Checks if all the required input columns for the selected evaluation metric are present.
|
515
|
+
|
516
|
+
Parameters:
|
517
|
+
- selectedEval (str): The name of the selected evaluation metric.
|
518
|
+
- columns (list): List of column names present in the dataset.
|
519
|
+
|
520
|
+
Raises:
|
521
|
+
- LlumoAIError.dependencyError: If any required column is missing.
|
522
|
+
"""
|
523
|
+
if tocheck:
|
524
|
+
# Define required dependencies for each evaluation metric
|
525
|
+
metricDependencies = {
|
526
|
+
'Response Completeness': ['context', 'query', 'output'],
|
527
|
+
'Response Bias': ['output'],
|
528
|
+
'Response Harmfulness': ['output'],
|
529
|
+
'Input Toxicity': ['query'],
|
530
|
+
'Input Harmfulness': ['query'],
|
531
|
+
'Context Utilization': ['output', 'context'],
|
532
|
+
'Relevance Retention': ['context', 'query'],
|
533
|
+
'Semantic Cohesion': ['context'],
|
534
|
+
'Final Task Alignment': ['messageHistory'],
|
535
|
+
'Tool Reliability': ['messageHistory'],
|
536
|
+
'Response Correctness': ['output', 'query', 'context'],
|
537
|
+
'Response Toxicity': ['output'],
|
538
|
+
'Input Bias': ['query'],
|
539
|
+
'Input Relevancy': ['context', 'query'],
|
540
|
+
'Redundancy Reduction': ['context'],
|
541
|
+
'Response Sentiment': ['output'],
|
542
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
543
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
544
|
+
'Hallucination': ['query', 'context', 'output'],
|
545
|
+
'Groundedness': ['groundTruth', 'output'],
|
546
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
547
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
548
|
+
}
|
549
|
+
|
550
|
+
# Check if the selected evaluation metric is known
|
551
|
+
if selectedEval not in metricDependencies:
|
552
|
+
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
553
|
+
|
554
|
+
# Get the required columns for the selected evaluation
|
555
|
+
columnsRequired = metricDependencies[selectedEval]
|
556
|
+
|
557
|
+
# Check if each required column is present in the provided columns
|
558
|
+
for requirement in columnsRequired:
|
559
|
+
if requirement not in columns:
|
560
|
+
return {"status":False,
|
561
|
+
"message":f"'{selectedEval}' requires columns: {columnsRequired}. "
|
562
|
+
f"Missing: '{requirement}'. Please ensure your data includes all required columns."
|
563
|
+
}
|
564
|
+
return {"status":True,"message":"success"}
|
565
|
+
else:
|
566
|
+
return {"status":True,"message":"success"}
|
567
|
+
|
568
|
+
|
569
|
+
def fetchData(workspaceID, playgroundID, missingList: list):
|
570
|
+
# Define the URL and prepare the payload
|
571
|
+
socket_data_url = "https://app.llumo.ai/api/eval/get-awaited"
|
572
|
+
payload = {
|
573
|
+
"workspaceID": workspaceID,
|
574
|
+
"playgroundID": playgroundID,
|
575
|
+
"missingList": missingList
|
576
|
+
}
|
577
|
+
|
578
|
+
try:
|
579
|
+
# Send a POST request to the API
|
580
|
+
response = requests.post(socket_data_url, json=payload)
|
581
|
+
|
582
|
+
# Check if the response is successful
|
583
|
+
if response.status_code == 200:
|
584
|
+
# Parse the JSON data from the response
|
585
|
+
data = response.json().get("data", {})
|
586
|
+
|
587
|
+
|
588
|
+
# Prepare the list of all data values in the desired format
|
589
|
+
result_list = []
|
590
|
+
for key, value in data.items():
|
591
|
+
# Create a dictionary for each item in the response data
|
592
|
+
result_list.append({
|
593
|
+
key: {
|
594
|
+
"value": value.get("value"),
|
595
|
+
"reasoning": value.get("reasoning"),
|
596
|
+
"edgeCase": value.get("edgeCase"),
|
597
|
+
"kpi": value.get("kpi")
|
598
|
+
}
|
599
|
+
})
|
600
|
+
|
601
|
+
return result_list
|
602
|
+
else:
|
603
|
+
print(f"Failed to fetch data. Status Code: {response.status_code}")
|
604
|
+
return []
|
493
605
|
|
606
|
+
except Exception as e:
|
607
|
+
print(f"An error occurred: {e}")
|
608
|
+
return []
|
llumo/sockets.py
CHANGED
@@ -17,15 +17,16 @@ class LlumoSocketClient:
|
|
17
17
|
|
18
18
|
# Initialize client
|
19
19
|
self.sio = socketio.Client(
|
20
|
-
|
21
|
-
|
20
|
+
logger=False,
|
21
|
+
engineio_logger=False,
|
22
22
|
reconnection=True,
|
23
|
-
reconnection_attempts=
|
23
|
+
reconnection_attempts=1,
|
24
24
|
reconnection_delay=1,
|
25
25
|
)
|
26
26
|
|
27
27
|
@self.sio.on("connect")
|
28
28
|
def on_connect():
|
29
|
+
self.sio.emit("ready")
|
29
30
|
# print("Socket connection established")
|
30
31
|
self._connected = True
|
31
32
|
# Don't set connection_established yet - wait for server confirmation
|
@@ -37,21 +38,27 @@ class LlumoSocketClient:
|
|
37
38
|
# f"Server acknowledged connection with 'connection-established' event: {data}"
|
38
39
|
# )
|
39
40
|
if isinstance(data, dict) and "socketId" in data:
|
41
|
+
self.sio.emit("ready")
|
40
42
|
self.server_socket_id = data["socketId"]
|
41
43
|
# print(f"Received server socket ID: {self.server_socket_id}")
|
42
44
|
self._connection_established.set()
|
43
45
|
|
44
46
|
@self.sio.on("result-update")
|
45
|
-
def on_result_update(data):
|
47
|
+
def on_result_update(data, callback=None):
|
46
48
|
with self._lock:
|
47
49
|
# print(f"Received result-update event: {data}")
|
48
50
|
self._received_data.append(data)
|
49
51
|
self._last_update_time = time.time()
|
50
52
|
|
51
53
|
# ✅ Stop if all expected results are received
|
52
|
-
if
|
54
|
+
if (
|
55
|
+
self._expected_results
|
56
|
+
and len(self._received_data) >= self._expected_results
|
57
|
+
):
|
53
58
|
# print("✅ All expected results received.")
|
54
59
|
self._listening_done.set()
|
60
|
+
if callback:
|
61
|
+
callback(True)
|
55
62
|
|
56
63
|
@self.sio.on("disconnect")
|
57
64
|
def on_disconnect():
|
@@ -81,13 +88,17 @@ class LlumoSocketClient:
|
|
81
88
|
start = time.time()
|
82
89
|
while not self.sio.connected:
|
83
90
|
if time.time() - start > timeout:
|
84
|
-
raise RuntimeError(
|
91
|
+
raise RuntimeError(
|
92
|
+
"Timed out waiting for low-level socket connection."
|
93
|
+
)
|
85
94
|
time.sleep(0.1)
|
86
95
|
# print("[DEBUG] SocketIO low-level connection established.")
|
87
96
|
|
88
97
|
# Wait for server "connection-established" event
|
89
98
|
if not self._connection_established.wait(timeout):
|
90
|
-
raise RuntimeError(
|
99
|
+
raise RuntimeError(
|
100
|
+
"Timed out waiting for connection-established event."
|
101
|
+
)
|
91
102
|
|
92
103
|
self._connected = True
|
93
104
|
self._last_update_time = time.time()
|
@@ -100,10 +111,13 @@ class LlumoSocketClient:
|
|
100
111
|
self._connected = False
|
101
112
|
raise RuntimeError(f"WebSocket connection failed: {e}")
|
102
113
|
|
103
|
-
def listenForResults(
|
104
|
-
|
105
|
-
|
114
|
+
def listenForResults(
|
115
|
+
self, min_wait=30, max_wait=300, inactivity_timeout=50, expected_results=None
|
116
|
+
):
|
117
|
+
# if not self._connected:
|
118
|
+
# raise RuntimeError("WebSocket is not connected. Call connect() first.")
|
106
119
|
|
120
|
+
# total records
|
107
121
|
self._expected_results = expected_results # NEW
|
108
122
|
start_time = time.time()
|
109
123
|
self._last_update_time = time.time()
|
@@ -128,14 +142,13 @@ class LlumoSocketClient:
|
|
128
142
|
self._listening_done.set()
|
129
143
|
break
|
130
144
|
|
131
|
-
time.sleep(3)
|
132
|
-
|
133
145
|
timeout_thread = threading.Thread(target=timeout_watcher, daemon=True)
|
134
146
|
timeout_thread.start()
|
135
147
|
self._listening_done.wait()
|
136
148
|
|
137
149
|
def getReceivedData(self):
|
138
150
|
with self._lock:
|
151
|
+
# print("Total received:", len(self._received_data)) # DEBUG
|
139
152
|
return self._received_data.copy()
|
140
153
|
|
141
154
|
def disconnect(self):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: llumo
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.14
|
4
4
|
Summary: Python SDK for interacting with the Llumo ai API.
|
5
5
|
Home-page: https://www.llumo.ai/
|
6
6
|
Author: Llumo
|
@@ -21,6 +21,7 @@ Requires-Dist: requests>=2.0.0
|
|
21
21
|
Requires-Dist: python-socketio
|
22
22
|
Requires-Dist: python-dotenv
|
23
23
|
Requires-Dist: openai==1.75.0
|
24
|
+
Requires-Dist: tqdm==4.67.1
|
24
25
|
Requires-Dist: google-generativeai==0.8.5
|
25
26
|
Dynamic: author
|
26
27
|
Dynamic: author-email
|
@@ -0,0 +1,13 @@
|
|
1
|
+
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
+
llumo/client.py,sha256=HpvUyucrGPbcPQMz_cTRDcEsBFpmNt8jfW1zJU4Nyss,46781
|
3
|
+
llumo/exceptions.py,sha256=i3Qv4_g7XjRuho7-b7ybjw2bwSh_NhvICR6ZAgiLQX8,1944
|
4
|
+
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
+
llumo/helpingFuntions.py,sha256=RgWok8DoE1R-Tc0kJ9B5En6LEUEk5EvQU8iJiGPbUsw,21911
|
7
|
+
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
+
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
9
|
+
llumo-0.2.14.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
+
llumo-0.2.14.dist-info/METADATA,sha256=B7NLMFRj8018jkWxEzKSSGlb2CS3d45rDtnywDh_4kc,1519
|
11
|
+
llumo-0.2.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
llumo-0.2.14.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
+
llumo-0.2.14.dist-info/RECORD,,
|
llumo-0.2.13.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=mVqxNyYMbHlL9lnkpw_H6_n98WYO38ZcLZNnQX1TjUE,35948
|
3
|
-
llumo/exceptions.py,sha256=iCj7HhtO_ckC2EaVBdXbAudNpuMDsYmmMEV5lwynZ-E,1854
|
4
|
-
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=ah0FUQcRV3gfguvjQQ_aZzq59hpJttqAPJdjJVNYdFc,17110
|
7
|
-
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
-
llumo/sockets.py,sha256=i90l2rr08paa-ifKy2E5YMIS8r3yRBmu2CUOjhFKork,5579
|
9
|
-
llumo-0.2.13.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.13.dist-info/METADATA,sha256=W-ZVsTKhiPw6strlNQEqyufgOLfV9ZCjCpY6Dj3Qc94,1491
|
11
|
-
llumo-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.13.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|