llumo 0.2.13b1__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/client.py +457 -141
- llumo/exceptions.py +4 -0
- llumo/helpingFuntions.py +127 -12
- llumo/sockets.py +23 -10
- {llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/METADATA +2 -1
- llumo-0.2.14.dist-info/RECORD +13 -0
- llumo-0.2.13b1.dist-info/RECORD +0 -13
- {llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/WHEEL +0 -0
- {llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.13b1.dist-info → llumo-0.2.14.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
@@ -16,10 +16,20 @@ from .exceptions import LlumoAIError
|
|
16
16
|
from .helpingFuntions import *
|
17
17
|
from .sockets import LlumoSocketClient
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
import threading
|
20
|
+
from tqdm import tqdm
|
21
|
+
|
22
|
+
postUrl = (
|
23
|
+
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
24
|
+
)
|
25
|
+
fetchUrl = (
|
26
|
+
"https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
|
27
|
+
)
|
28
|
+
socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
|
29
|
+
# {
|
30
|
+
# "workspaceID":"c9191fdf33bdd7838328c1a0",
|
31
|
+
# "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
|
32
|
+
# }
|
23
33
|
validateUrl = "https://app.llumo.ai/api/workspace-details"
|
24
34
|
socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
|
25
35
|
|
@@ -32,7 +42,7 @@ class LlumoClient:
|
|
32
42
|
self.processMapping = {}
|
33
43
|
self.definationMapping = {}
|
34
44
|
|
35
|
-
def validateApiKey(self, evalName=" "):
|
45
|
+
def validateApiKey(self, evalName="Input Bias"):
|
36
46
|
headers = {
|
37
47
|
"Authorization": f"Bearer {self.apiKey}",
|
38
48
|
"Content-Type": "application/json",
|
@@ -41,13 +51,6 @@ class LlumoClient:
|
|
41
51
|
|
42
52
|
try:
|
43
53
|
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
44
|
-
|
45
|
-
|
46
|
-
try:
|
47
|
-
response_preview = response.text[:500] # First 500 chars
|
48
|
-
# print(f"Response preview: {response_preview}")
|
49
|
-
except Exception as e:
|
50
|
-
print(f"Could not get response preview: {e}")
|
51
54
|
|
52
55
|
except requests.exceptions.RequestException as e:
|
53
56
|
print(f"Request exception: {str(e)}")
|
@@ -82,14 +85,16 @@ class LlumoClient:
|
|
82
85
|
try:
|
83
86
|
self.hitsAvailable = data["data"]["data"].get("remainingHits", 0)
|
84
87
|
self.workspaceID = data["data"]["data"].get("workspaceID")
|
85
|
-
self.evalDefinition = data["data"]["data"]
|
88
|
+
self.evalDefinition = data["data"]["data"]["analyticsMapping"]
|
86
89
|
self.socketToken = data["data"]["data"].get("token")
|
87
90
|
self.hasSubscribed = data["data"]["data"].get("hasSubscribed", False)
|
88
91
|
self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
|
89
|
-
self.subscriptionEndDate = data["data"]["data"].get(
|
92
|
+
self.subscriptionEndDate = data["data"]["data"].get(
|
93
|
+
"subscriptionEndDate", None
|
94
|
+
)
|
90
95
|
self.email = data["data"]["data"].get("email", None)
|
91
|
-
|
92
|
-
self.definationMapping[evalName] =
|
96
|
+
|
97
|
+
self.definationMapping[evalName] = data.get("data", {}).get("data", {}).get("analyticsMapping", {}).get(evalName, None)
|
93
98
|
|
94
99
|
except Exception as e:
|
95
100
|
# print(f"Error extracting data from response: {str(e)}")
|
@@ -159,13 +164,24 @@ class LlumoClient:
|
|
159
164
|
# this function allows the users to run exactl one eval at a time
|
160
165
|
def evaluate(
|
161
166
|
self,
|
162
|
-
|
167
|
+
data,
|
163
168
|
eval="Response Completeness",
|
164
169
|
prompt_template="",
|
165
170
|
outputColName="output",
|
166
171
|
createExperiment: bool = False,
|
172
|
+
_tocheck=True,
|
167
173
|
):
|
168
174
|
|
175
|
+
# converting it into a pandas dataframe object
|
176
|
+
dataframe = pd.DataFrame(data)
|
177
|
+
|
178
|
+
# check for dependencies for the selected eval metric
|
179
|
+
metricDependencies = checkDependency(
|
180
|
+
eval, columns=list(dataframe.columns), tocheck=_tocheck
|
181
|
+
)
|
182
|
+
if metricDependencies["status"] == False:
|
183
|
+
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
184
|
+
|
169
185
|
results = {}
|
170
186
|
try:
|
171
187
|
socketID = self.socket.connect(timeout=150)
|
@@ -206,7 +222,7 @@ class LlumoClient:
|
|
206
222
|
# if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
207
223
|
# raise LlumoAIError.InsufficientCredits()
|
208
224
|
|
209
|
-
evalDefinition = self.evalDefinition[eval]
|
225
|
+
evalDefinition = self.evalDefinition[eval].get("definition")
|
210
226
|
model = "GPT_4"
|
211
227
|
provider = "OPENAI"
|
212
228
|
evalType = "LLM"
|
@@ -318,13 +334,13 @@ class LlumoClient:
|
|
318
334
|
|
319
335
|
for cnt, batch in enumerate(self.allBatches):
|
320
336
|
try:
|
321
|
-
|
337
|
+
|
322
338
|
self.postBatch(batch=batch, workspaceID=workspaceID)
|
323
|
-
|
339
|
+
print("Betch Posted with item len: ", len(batch))
|
324
340
|
except Exception as e:
|
325
341
|
continue
|
326
342
|
|
327
|
-
time.sleep(3)
|
343
|
+
# time.sleep(3)
|
328
344
|
|
329
345
|
timeout = max(50, min(600, totalItems * 10))
|
330
346
|
|
@@ -367,7 +383,14 @@ class LlumoClient:
|
|
367
383
|
pd.set_option("future.no_silent_downcasting", True)
|
368
384
|
df = dataframe.fillna("Some error occured").astype(object)
|
369
385
|
|
370
|
-
if createPlayground(
|
386
|
+
if createPlayground(
|
387
|
+
email,
|
388
|
+
workspaceID,
|
389
|
+
df,
|
390
|
+
promptText=prompt_template,
|
391
|
+
definationMapping=self.definationMapping,
|
392
|
+
outputColName=outputColName,
|
393
|
+
):
|
371
394
|
print(
|
372
395
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
373
396
|
)
|
@@ -375,34 +398,10 @@ class LlumoClient:
|
|
375
398
|
return dataframe
|
376
399
|
|
377
400
|
# this function allows the users to run multiple evals at once
|
378
|
-
def evaluateMultiple(
|
379
|
-
self,
|
380
|
-
dataframe,
|
381
|
-
eval=["Response Completeness"],
|
382
|
-
prompt_template="",
|
383
|
-
outputColName="output",
|
384
|
-
createExperiment: bool = False,
|
385
|
-
):
|
386
|
-
resultdf = dataframe.copy()
|
387
|
-
for evalName in eval:
|
388
|
-
time.sleep(2)
|
389
|
-
resultdf = self.evaluate(dataframe = resultdf,eval=evalName,prompt_template=prompt_template,outputColName=outputColName,createExperiment = False)
|
390
401
|
|
391
|
-
|
392
|
-
pd.set_option("future.no_silent_downcasting", True)
|
393
|
-
df = resultdf.fillna("Some error occured").astype(object)
|
394
|
-
|
395
|
-
if createPlayground(self.email, self.workspaceID, df,definationMapping=self.definationMapping,outputColName=outputColName,promptText=prompt_template):
|
396
|
-
print(
|
397
|
-
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
398
|
-
)
|
399
|
-
else:
|
400
|
-
return resultdf
|
401
|
-
|
402
|
-
|
403
|
-
def evaluateCompressor(self, dataframe, prompt_template):
|
402
|
+
def evaluateCompressor(self, data, prompt_template):
|
404
403
|
results = []
|
405
|
-
|
404
|
+
dataframe = pd.DataFrame(data)
|
406
405
|
try:
|
407
406
|
socketID = self.socket.connect(timeout=150)
|
408
407
|
max_wait_secs = 20
|
@@ -411,7 +410,9 @@ class LlumoClient:
|
|
411
410
|
time.sleep(0.1)
|
412
411
|
waited_secs += 0.1
|
413
412
|
if waited_secs >= max_wait_secs:
|
414
|
-
raise RuntimeError(
|
413
|
+
raise RuntimeError(
|
414
|
+
"Timeout waiting for server 'connection-established' event."
|
415
|
+
)
|
415
416
|
|
416
417
|
try:
|
417
418
|
self.validateApiKey()
|
@@ -422,8 +423,14 @@ class LlumoClient:
|
|
422
423
|
print(f"Response content: {e.response.text[:500]}...")
|
423
424
|
raise
|
424
425
|
|
425
|
-
userHits = checkUserHits(
|
426
|
-
|
426
|
+
userHits = checkUserHits(
|
427
|
+
self.workspaceID,
|
428
|
+
self.hasSubscribed,
|
429
|
+
self.trialEndDate,
|
430
|
+
self.subscriptionEndDate,
|
431
|
+
self.hitsAvailable,
|
432
|
+
len(dataframe),
|
433
|
+
)
|
427
434
|
|
428
435
|
if not userHits["success"]:
|
429
436
|
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
@@ -444,14 +451,22 @@ class LlumoClient:
|
|
444
451
|
if not all([ky in dataframe.columns for ky in keys]):
|
445
452
|
raise LlumoAIError.InvalidPromptTemplate()
|
446
453
|
|
447
|
-
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
454
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
455
|
+
"-", ""
|
456
|
+
)
|
448
457
|
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
449
458
|
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
450
459
|
|
451
|
-
compressed_prompt_id =
|
452
|
-
|
460
|
+
compressed_prompt_id = (
|
461
|
+
f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
462
|
+
)
|
463
|
+
compressed_prompt_output_id = (
|
464
|
+
f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
465
|
+
)
|
453
466
|
cost_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
454
|
-
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
467
|
+
cost_saving_id = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
468
|
+
"-", ""
|
469
|
+
)
|
455
470
|
|
456
471
|
rowDataDict = {}
|
457
472
|
for col in dataframe.columns:
|
@@ -471,7 +486,7 @@ class LlumoClient:
|
|
471
486
|
"compressed_prompt": compressed_prompt_id,
|
472
487
|
"compressed_prompt_output": compressed_prompt_output_id,
|
473
488
|
"cost": cost_id,
|
474
|
-
"cost_saving": cost_saving_id
|
489
|
+
"cost_saving": cost_saving_id,
|
475
490
|
},
|
476
491
|
"processData": {
|
477
492
|
"rowData": rowDataDict,
|
@@ -484,12 +499,12 @@ class LlumoClient:
|
|
484
499
|
"compressed_prompt": compressed_prompt_id,
|
485
500
|
"compressed_prompt_output": compressed_prompt_output_id,
|
486
501
|
"cost": cost_id,
|
487
|
-
"cost_saving": cost_saving_id
|
488
|
-
}
|
502
|
+
"cost_saving": cost_saving_id,
|
503
|
+
},
|
489
504
|
},
|
490
505
|
"workspaceID": workspaceID,
|
491
506
|
"email": email,
|
492
|
-
"playgroundID": activePlayground
|
507
|
+
"playgroundID": activePlayground,
|
493
508
|
}
|
494
509
|
|
495
510
|
rowIdMapping[rowID] = index
|
@@ -514,7 +529,12 @@ class LlumoClient:
|
|
514
529
|
|
515
530
|
self.AllProcessMapping()
|
516
531
|
timeout = max(60, min(600, total_items * 10))
|
517
|
-
self.socket.listenForResults(
|
532
|
+
self.socket.listenForResults(
|
533
|
+
min_wait=20,
|
534
|
+
max_wait=timeout,
|
535
|
+
inactivity_timeout=30,
|
536
|
+
expected_results=None,
|
537
|
+
)
|
518
538
|
|
519
539
|
results = self.socket.getReceivedData()
|
520
540
|
# results = self.finalResp(eval_results)
|
@@ -533,7 +553,7 @@ class LlumoClient:
|
|
533
553
|
for records in results:
|
534
554
|
for compound_key, value in records.items():
|
535
555
|
# for compound_key, value in item['data'].items():
|
536
|
-
rowID = compound_key.split(
|
556
|
+
rowID = compound_key.split("-")[0]
|
537
557
|
# looking for the index of each rowID , in the original dataframe
|
538
558
|
if rowID in rowIdMapping:
|
539
559
|
index = rowIdMapping[rowID]
|
@@ -551,22 +571,259 @@ class LlumoClient:
|
|
551
571
|
# dataframe["cost_saving"] = cost_saving
|
552
572
|
|
553
573
|
return dataframe
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
)
|
574
|
+
|
575
|
+
def evaluateMultiple(
|
576
|
+
self,
|
577
|
+
data,
|
578
|
+
evals: list, # list of eval metric names
|
579
|
+
prompt_template="",
|
580
|
+
outputColName="output",
|
581
|
+
createExperiment: bool = False,
|
582
|
+
_tocheck=True,
|
583
|
+
):
|
584
|
+
dataframe = pd.DataFrame(data)
|
585
|
+
workspaceID = None
|
586
|
+
email = None
|
587
|
+
socketID = self.socket.connect(timeout=250)
|
588
|
+
self.allBatches = []
|
589
|
+
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
590
|
+
|
591
|
+
# Wait for socket connection
|
592
|
+
max_wait_secs = 20
|
593
|
+
waited_secs = 0
|
594
|
+
while not self.socket._connection_established.is_set():
|
595
|
+
time.sleep(0.1)
|
596
|
+
waited_secs += 0.1
|
597
|
+
if waited_secs >= max_wait_secs:
|
598
|
+
raise RuntimeError("Timeout waiting for server connection")
|
599
|
+
|
600
|
+
# Start listener thread
|
601
|
+
expectedResults = len(dataframe) * len(evals)
|
602
|
+
# print("expected result" ,expectedResults)
|
603
|
+
timeout = max(100, min(150, expectedResults * 10))
|
604
|
+
listener_thread = threading.Thread(
|
605
|
+
target=self.socket.listenForResults,
|
606
|
+
kwargs={
|
607
|
+
"min_wait": 40,
|
608
|
+
"max_wait": timeout,
|
609
|
+
"inactivity_timeout": 10,
|
610
|
+
"expected_results": expectedResults,
|
611
|
+
},
|
612
|
+
daemon=True,
|
613
|
+
)
|
614
|
+
listener_thread.start()
|
564
615
|
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
616
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
617
|
+
"-", ""
|
618
|
+
)
|
619
|
+
for evalName in evals:
|
620
|
+
# print(f"\n======= Running evaluation for: {evalName} =======")
|
621
|
+
|
622
|
+
# Validate API and dependencies
|
623
|
+
self.validateApiKey(evalName=evalName)
|
624
|
+
metricDependencies = checkDependency(
|
625
|
+
evalName, list(dataframe.columns), tocheck=_tocheck
|
626
|
+
)
|
627
|
+
if not metricDependencies["status"]:
|
628
|
+
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
629
|
+
|
630
|
+
evalDefinition = self.evalDefinition[evalName]["definition"]
|
631
|
+
model = "GPT_4"
|
632
|
+
provider = "OPENAI"
|
633
|
+
evalType = "LLM"
|
634
|
+
workspaceID = self.workspaceID
|
635
|
+
email = self.email
|
636
|
+
|
637
|
+
userHits = checkUserHits(
|
638
|
+
self.workspaceID,
|
639
|
+
self.hasSubscribed,
|
640
|
+
self.trialEndDate,
|
641
|
+
self.subscriptionEndDate,
|
642
|
+
self.hitsAvailable,
|
643
|
+
len(dataframe),
|
644
|
+
)
|
645
|
+
if not userHits["success"]:
|
646
|
+
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
647
|
+
|
648
|
+
currentBatch = []
|
649
|
+
for index, row in dataframe.iterrows():
|
650
|
+
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
651
|
+
groundTruth = row.get("groundTruth", "")
|
652
|
+
messageHistory = (
|
653
|
+
[row["messageHistory"]]
|
654
|
+
if "messageHistory" in dataframe.columns
|
655
|
+
else []
|
656
|
+
)
|
657
|
+
promptTemplate = prompt_template
|
658
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
659
|
+
|
660
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
661
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
662
|
+
|
663
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
664
|
+
output = row.get(outputColName, "")
|
665
|
+
|
666
|
+
|
667
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
668
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
669
|
+
|
670
|
+
compoundKey = f"{rowID}-{columnID}-{columnID}"
|
671
|
+
rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
|
672
|
+
|
673
|
+
templateData = {
|
674
|
+
"processID": getProcessID(),
|
675
|
+
"socketID": socketID,
|
676
|
+
"source": "SDK",
|
677
|
+
"processData": {
|
678
|
+
"executionDependency": {
|
679
|
+
"query": "",
|
680
|
+
"context": "",
|
681
|
+
"output": output,
|
682
|
+
"tools": tools,
|
683
|
+
"groundTruth": groundTruth,
|
684
|
+
"messageHistory": messageHistory,
|
685
|
+
},
|
686
|
+
"definition": evalDefinition,
|
687
|
+
"model": model,
|
688
|
+
"provider": provider,
|
689
|
+
"analytics": evalName,
|
690
|
+
},
|
691
|
+
"workspaceID": workspaceID,
|
692
|
+
"type": "EVAL",
|
693
|
+
"evalType": evalType,
|
694
|
+
"kpi": evalName,
|
695
|
+
"columnID": columnID,
|
696
|
+
"rowID": rowID,
|
697
|
+
"playgroundID": activePlayground,
|
698
|
+
"processType": "EVAL",
|
699
|
+
"email": email,
|
700
|
+
}
|
701
|
+
|
702
|
+
query = ""
|
703
|
+
context = ""
|
704
|
+
for key, value in inputDict.items():
|
705
|
+
if isinstance(value, str):
|
706
|
+
length = len(value.split()) * 1.5
|
707
|
+
if length > 50:
|
708
|
+
context += f" {key}: {value}, "
|
709
|
+
else:
|
710
|
+
if promptTemplate:
|
711
|
+
tempObj = {key: value}
|
712
|
+
promptTemplate = getInputPopulatedPrompt(
|
713
|
+
promptTemplate, tempObj
|
714
|
+
)
|
715
|
+
else:
|
716
|
+
query += f" {key}: {value}, "
|
717
|
+
|
718
|
+
if not context.strip():
|
719
|
+
for key, value in inputDict.items():
|
720
|
+
context += f" {key}: {value}, "
|
721
|
+
|
722
|
+
templateData["processData"]["executionDependency"][
|
723
|
+
"context"
|
724
|
+
] = context.strip()
|
725
|
+
templateData["processData"]["executionDependency"][
|
726
|
+
"query"
|
727
|
+
] = query.strip()
|
728
|
+
if promptTemplate and not query.strip():
|
729
|
+
templateData["processData"]["executionDependency"][
|
730
|
+
"query"
|
731
|
+
] = promptTemplate
|
569
732
|
|
733
|
+
currentBatch.append(templateData)
|
734
|
+
if len(currentBatch) == 10:
|
735
|
+
self.allBatches.append(currentBatch)
|
736
|
+
currentBatch = []
|
737
|
+
|
738
|
+
if currentBatch:
|
739
|
+
self.allBatches.append(currentBatch)
|
740
|
+
|
741
|
+
for batch in tqdm(
|
742
|
+
self.allBatches,
|
743
|
+
desc="Processing Batches",
|
744
|
+
unit="batch",
|
745
|
+
colour="magenta",
|
746
|
+
ascii=False,
|
747
|
+
):
|
748
|
+
try:
|
749
|
+
self.postBatch(batch=batch, workspaceID=workspaceID)
|
750
|
+
time.sleep(3)
|
751
|
+
except Exception as e:
|
752
|
+
print(f"Error posting batch: {e}")
|
753
|
+
raise
|
754
|
+
|
755
|
+
# Wait for results
|
756
|
+
time.sleep(3)
|
757
|
+
listener_thread.join()
|
758
|
+
|
759
|
+
|
760
|
+
rawResults = self.socket.getReceivedData()
|
761
|
+
|
762
|
+
# print("data from db #####################",dataFromDb)
|
763
|
+
# Fix here: keep full keys, do not split keys
|
764
|
+
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
765
|
+
expectedRowIDs = set(rowIdMapping.keys())
|
766
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
767
|
+
# print("All expected keys:", expected_rowIDs)
|
768
|
+
# print("All received keys:", received_rowIDs)
|
769
|
+
# print("Missing keys:", len(missingRowIDs))
|
770
|
+
missingRowIDs=list(missingRowIDs)
|
771
|
+
|
772
|
+
if len(missingRowIDs) > 0:
|
773
|
+
dataFromDb=fetchData(workspaceID,activePlayground,missingRowIDs)
|
774
|
+
rawResults.extend(dataFromDb)
|
775
|
+
|
776
|
+
|
777
|
+
|
778
|
+
|
779
|
+
|
780
|
+
|
781
|
+
# Initialize dataframe columns for each eval
|
782
|
+
for eval in evals:
|
783
|
+
dataframe[eval] = None
|
784
|
+
dataframe[f"{eval} Reason"] = None
|
785
|
+
|
786
|
+
# Map results to dataframe rows
|
787
|
+
for item in rawResults:
|
788
|
+
for compound_key, value in item.items():
|
789
|
+
if compound_key in rowIdMapping:
|
790
|
+
index = rowIdMapping[compound_key]["index"]
|
791
|
+
evalName = rowIdMapping[compound_key]["eval"]
|
792
|
+
dataframe.at[index, evalName] = value.get("value")
|
793
|
+
dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
|
794
|
+
|
795
|
+
self.socket.disconnect()
|
796
|
+
|
797
|
+
|
798
|
+
if createExperiment:
|
799
|
+
pd.set_option("future.no_silent_downcasting", True)
|
800
|
+
df = dataframe.fillna("Some error occured").astype(object)
|
801
|
+
if createPlayground(
|
802
|
+
email,
|
803
|
+
workspaceID,
|
804
|
+
df,
|
805
|
+
promptText=prompt_template,
|
806
|
+
definationMapping=self.definationMapping,
|
807
|
+
outputColName=outputColName,
|
808
|
+
):
|
809
|
+
print(
|
810
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
811
|
+
)
|
812
|
+
else:
|
813
|
+
return dataframe
|
814
|
+
|
815
|
+
def run_sweep(
|
816
|
+
self,
|
817
|
+
templates: List[str],
|
818
|
+
dataset: Dict[str, List[str]],
|
819
|
+
model_aliases: List[AVAILABLEMODELS],
|
820
|
+
apiKey: str,
|
821
|
+
evals=["Response Correctness"],
|
822
|
+
toEvaluate: bool = False,
|
823
|
+
createExperiment: bool = False,
|
824
|
+
) -> pd.DataFrame:
|
825
|
+
|
826
|
+
self.validateApiKey(evalName=" ")
|
570
827
|
workspaceID = self.workspaceID
|
571
828
|
email = self.email
|
572
829
|
executor = ModelExecutor(apiKey)
|
@@ -590,7 +847,9 @@ class LlumoClient:
|
|
590
847
|
for i, model in enumerate(model_aliases, 1):
|
591
848
|
try:
|
592
849
|
provider = getProviderFromModel(model)
|
593
|
-
response = executor.execute(
|
850
|
+
response = executor.execute(
|
851
|
+
provider, model.value, prompt, apiKey
|
852
|
+
)
|
594
853
|
outputKey = f"output_{i}"
|
595
854
|
row[outputKey] = response
|
596
855
|
except Exception as e:
|
@@ -598,25 +857,23 @@ class LlumoClient:
|
|
598
857
|
|
599
858
|
results.append(row)
|
600
859
|
|
601
|
-
|
602
|
-
|
603
860
|
df = pd.DataFrame(results)
|
604
861
|
|
605
|
-
|
606
|
-
if toEvaluate==True:
|
862
|
+
if toEvaluate == True:
|
607
863
|
dfWithEvals = df.copy()
|
608
|
-
for i, model in enumerate(model_aliases,1):
|
864
|
+
for i, model in enumerate(model_aliases, 1):
|
609
865
|
outputColName = f"output_{i}"
|
610
866
|
try:
|
611
867
|
res = self.evaluateMultiple(
|
612
|
-
df,
|
613
|
-
|
868
|
+
df.to_dict("records"),
|
869
|
+
evals=evals,
|
614
870
|
prompt_template=str(templates[0]),
|
615
871
|
outputColName=outputColName,
|
872
|
+
_tocheck=False,
|
616
873
|
)
|
617
874
|
|
618
875
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
619
|
-
for evalMetric in
|
876
|
+
for evalMetric in evals:
|
620
877
|
scoreCol = f"{evalMetric}"
|
621
878
|
reasonCol = f"{evalMetric} Reason"
|
622
879
|
if scoreCol in res.columns:
|
@@ -625,7 +882,9 @@ class LlumoClient:
|
|
625
882
|
res = res.rename(columns={reasonCol: f"{reasonCol}_{i}"})
|
626
883
|
|
627
884
|
# Drop duplicated columns from df (like prompt, variables, etc.)
|
628
|
-
newCols = [
|
885
|
+
newCols = [
|
886
|
+
col for col in res.columns if col not in dfWithEvals.columns
|
887
|
+
]
|
629
888
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
630
889
|
|
631
890
|
except Exception as e:
|
@@ -634,26 +893,35 @@ class LlumoClient:
|
|
634
893
|
if createExperiment:
|
635
894
|
pd.set_option("future.no_silent_downcasting", True)
|
636
895
|
dfWithEvals = dfWithEvals.fillna("Some error occurred")
|
637
|
-
if createPlayground(
|
638
|
-
|
639
|
-
|
896
|
+
if createPlayground(
|
897
|
+
email,
|
898
|
+
workspaceID,
|
899
|
+
dfWithEvals,
|
900
|
+
promptText=templates[0],
|
901
|
+
definationMapping=self.definationMapping,
|
902
|
+
):
|
903
|
+
|
904
|
+
print(
|
905
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
906
|
+
)
|
640
907
|
else:
|
641
908
|
return dfWithEvals
|
642
909
|
else:
|
643
|
-
if createExperiment==True:
|
910
|
+
if createExperiment == True:
|
644
911
|
pd.set_option("future.no_silent_downcasting", True)
|
645
912
|
df = df.fillna("Some error occurred")
|
646
913
|
|
647
914
|
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
648
|
-
print(
|
649
|
-
|
915
|
+
print(
|
916
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
917
|
+
)
|
918
|
+
else:
|
650
919
|
return df
|
651
920
|
|
652
|
-
|
653
921
|
# this function generates an output using llm and tools and evaluate that output
|
654
922
|
def evaluateAgents(
|
655
923
|
self,
|
656
|
-
|
924
|
+
data,
|
657
925
|
model,
|
658
926
|
agents,
|
659
927
|
model_api_key=None,
|
@@ -664,31 +932,33 @@ class LlumoClient:
|
|
664
932
|
if model.lower() not in ["openai", "google"]:
|
665
933
|
raise ValueError("Model must be 'openai' or 'google'")
|
666
934
|
|
935
|
+
# converting into pandas dataframe object
|
936
|
+
dataframe = pd.DataFrame(data)
|
937
|
+
|
667
938
|
# Run unified agent execution
|
668
939
|
toolResponseDf = LlumoAgentExecutor.run(
|
669
940
|
dataframe, agents, model=model, model_api_key=model_api_key
|
670
941
|
)
|
671
|
-
|
672
|
-
|
673
|
-
# evals = [
|
674
|
-
# "Tool Reliability",
|
675
|
-
# "Stepwise Progression",
|
676
|
-
# "Tool Selection Accuracy",
|
677
|
-
# "Final Task Alignment",
|
678
|
-
# ]
|
679
942
|
|
680
|
-
for eval in evals:
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
943
|
+
# for eval in evals:
|
944
|
+
# Perform evaluation
|
945
|
+
# toolResponseDf = self.evaluate(
|
946
|
+
# toolResponseDf.to_dict(orient = "records"),
|
947
|
+
# eval=eval,
|
948
|
+
# prompt_template=prompt_template,
|
949
|
+
# createExperiment=False,
|
950
|
+
# )
|
951
|
+
toolResponseDf = self.evaluateMultiple(
|
952
|
+
toolResponseDf.to_dict(orient="records"),
|
953
|
+
evals=evals,
|
954
|
+
prompt_template=prompt_template,
|
955
|
+
createExperiment=False,
|
956
|
+
)
|
957
|
+
|
688
958
|
if createExperiment:
|
689
959
|
pd.set_option("future.no_silent_downcasting", True)
|
690
960
|
df = toolResponseDf.fillna("Some error occured")
|
691
|
-
if createPlayground(self.email, self.workspaceID, df):
|
961
|
+
if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
|
692
962
|
print(
|
693
963
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
694
964
|
)
|
@@ -698,47 +968,49 @@ class LlumoClient:
|
|
698
968
|
# this function evaluate that tools output given by the user
|
699
969
|
def evaluateAgentResponses(
|
700
970
|
self,
|
701
|
-
|
971
|
+
data,
|
702
972
|
evals=["Final Task Alignment"],
|
703
973
|
outputColName="output",
|
704
974
|
createExperiment: bool = False,
|
705
975
|
):
|
976
|
+
dataframe = pd.DataFrame(data)
|
977
|
+
|
706
978
|
try:
|
707
979
|
if "query" and "messageHistory" and "tools" not in dataframe.columns:
|
708
980
|
raise ValueError(
|
709
981
|
"DataFrame must contain 'query', 'messageHistory','output' ,and 'tools' columns. Make sure the columns names are same as mentioned here."
|
710
982
|
)
|
711
|
-
|
712
|
-
|
713
|
-
# evals = [
|
714
|
-
# "Tool Reliability",
|
715
|
-
# "Stepwise Progression",
|
716
|
-
# "Tool Selection Accuracy",
|
717
|
-
# "Final Task Alignment",
|
718
|
-
# ]
|
983
|
+
|
719
984
|
toolResponseDf = dataframe.copy()
|
720
|
-
for eval in evals:
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
985
|
+
# for eval in evals:
|
986
|
+
# # Perform evaluation
|
987
|
+
# toolResponseDf = self.evaluate(
|
988
|
+
# toolResponseDf.to_dict(orient = "records"), eval=eval, prompt_template="Give answer for the given query: {{query}}",outputColName=outputColName
|
989
|
+
# )
|
990
|
+
toolResponseDf = self.evaluateMultiple(
|
991
|
+
toolResponseDf.to_dict(orient="records"),
|
992
|
+
evals=evals,
|
993
|
+
prompt_template="Give answer for the given query: {{query}}",
|
994
|
+
outputColName=outputColName,
|
995
|
+
createExperiment=createExperiment
|
996
|
+
)
|
997
|
+
if createExperiment:
|
998
|
+
pass
|
999
|
+
else:
|
1000
|
+
return toolResponseDf
|
728
1001
|
|
729
1002
|
except Exception as e:
|
730
1003
|
raise e
|
731
1004
|
|
732
|
-
|
733
1005
|
def runDataStream(
|
734
1006
|
self,
|
735
|
-
|
1007
|
+
data,
|
736
1008
|
streamName: str,
|
737
1009
|
queryColName: str = "query",
|
738
1010
|
createExperiment: bool = False,
|
739
1011
|
):
|
740
1012
|
results = {}
|
741
|
-
|
1013
|
+
dataframe = pd.DataFrame(data)
|
742
1014
|
try:
|
743
1015
|
socketID = self.socket.connect(timeout=150)
|
744
1016
|
# Ensure full connection before proceeding
|
@@ -753,16 +1025,11 @@ class LlumoClient:
|
|
753
1025
|
)
|
754
1026
|
# print(f"Connected with socket ID: {socketID}")
|
755
1027
|
rowIdMapping = {}
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
print(f"Error during API key validation: {str(e)}")
|
762
|
-
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
763
|
-
print(f"Status code: {e.response.status_code}")
|
764
|
-
print(f"Response content: {e.response.text[:500]}...")
|
765
|
-
raise
|
1028
|
+
|
1029
|
+
# print(f"Validating API key...")
|
1030
|
+
self.validateApiKey()
|
1031
|
+
# print(f"API key validation successful. Hits available: {self.hitsAvailable}")
|
1032
|
+
|
766
1033
|
# check for available hits and trial limit
|
767
1034
|
userHits = checkUserHits(
|
768
1035
|
self.workspaceID,
|
@@ -890,7 +1157,13 @@ class LlumoClient:
|
|
890
1157
|
pd.set_option("future.no_silent_downcasting", True)
|
891
1158
|
df = dataframe.fillna("Some error occured").astype(object)
|
892
1159
|
|
893
|
-
if createPlayground(
|
1160
|
+
if createPlayground(
|
1161
|
+
email,
|
1162
|
+
workspaceID,
|
1163
|
+
df,
|
1164
|
+
queryColName=queryColName,
|
1165
|
+
dataStreamName=streamId,
|
1166
|
+
):
|
894
1167
|
print(
|
895
1168
|
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
896
1169
|
)
|
@@ -910,6 +1183,49 @@ class LlumoClient:
|
|
910
1183
|
except Exception as e:
|
911
1184
|
raise "Some error ocuured please check your API key"
|
912
1185
|
|
1186
|
+
def upload(self, file_path):
|
1187
|
+
|
1188
|
+
workspaceID = None
|
1189
|
+
email = None
|
1190
|
+
|
1191
|
+
|
1192
|
+
try:
|
1193
|
+
self.validateApiKey()
|
1194
|
+
except Exception as e:
|
1195
|
+
if hasattr(e, "response") and getattr(e, "response", None) is not None:
|
1196
|
+
pass
|
1197
|
+
raise
|
1198
|
+
|
1199
|
+
# Get file extension
|
1200
|
+
_, ext = os.path.splitext(file_path)
|
1201
|
+
ext = ext.lower()
|
1202
|
+
|
1203
|
+
# Supported formats
|
1204
|
+
try:
|
1205
|
+
if ext == ".csv":
|
1206
|
+
df = pd.read_csv(file_path)
|
1207
|
+
elif ext in [".xlsx", ".xls"]:
|
1208
|
+
df = pd.read_excel(file_path)
|
1209
|
+
elif ext == ".json":
|
1210
|
+
df = pd.read_json(file_path, orient="records")
|
1211
|
+
elif ext == ".parquet":
|
1212
|
+
df = pd.read_parquet(file_path)
|
1213
|
+
else:
|
1214
|
+
raise ValueError(f"Unsupported file format: {ext}")
|
1215
|
+
|
1216
|
+
# If successfully loaded, call createPlayground
|
1217
|
+
df = df.astype(str)
|
1218
|
+
if createPlayground(self.email, self.workspaceID, df):
|
1219
|
+
|
1220
|
+
print(
|
1221
|
+
"Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results."
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
return True
|
1225
|
+
|
1226
|
+
except Exception as e:
|
1227
|
+
print(f"Error: {e}")
|
1228
|
+
|
913
1229
|
|
914
1230
|
class SafeDict(dict):
|
915
1231
|
def __missing__(self, key):
|
llumo/exceptions.py
CHANGED
@@ -46,6 +46,10 @@ class LlumoAIError(Exception):
|
|
46
46
|
def modelHitsExhausted(details = "Your credits for the selected model exhausted."):
|
47
47
|
return LlumoAIError(details)
|
48
48
|
|
49
|
+
@staticmethod
|
50
|
+
def dependencyError(details):
|
51
|
+
return LlumoAIError(details)
|
52
|
+
|
49
53
|
# @staticmethod
|
50
54
|
# def dateNotFound():
|
51
55
|
# return LlumoAIError("Trial end date or subscription end date not found for the given user.")
|
llumo/helpingFuntions.py
CHANGED
@@ -212,7 +212,6 @@ def deleteColumnListInPlayground(workspaceID: str, playgroundID: str):
|
|
212
212
|
print("❌ Error:", response.status_code, response.text)
|
213
213
|
return None
|
214
214
|
|
215
|
-
|
216
215
|
def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColName=None,outputColName= "output",dataStreamName=None,definationMapping=None):
|
217
216
|
if len(dataframe) > 100:
|
218
217
|
dataframe = dataframe.head(100)
|
@@ -238,7 +237,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
238
237
|
columnIDMapping[col] = columnID
|
239
238
|
|
240
239
|
|
241
|
-
if col.startswith('output'):
|
240
|
+
if col.startswith('output') and promptText!=None:
|
242
241
|
# For output columns, create the prompt template with promptText
|
243
242
|
if promptText:
|
244
243
|
# Extract variables from promptText and set them as dependencies
|
@@ -277,7 +276,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
277
276
|
"order": indx,
|
278
277
|
}
|
279
278
|
|
280
|
-
elif col.startswith('Data '):
|
279
|
+
elif col.startswith('Data ') :
|
281
280
|
if queryColName and dataStreamName:
|
282
281
|
dependencies = []
|
283
282
|
dependencies.append(columnIDMapping[queryColName])
|
@@ -291,7 +290,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
291
290
|
"type": "DATA_STREAM",
|
292
291
|
"order": indx}
|
293
292
|
|
294
|
-
elif col in allEvals:
|
293
|
+
elif col in allEvals and promptText!=None:
|
295
294
|
|
296
295
|
dependencies = []
|
297
296
|
variables = re.findall(r'{{(.*?)}}', promptText)
|
@@ -303,8 +302,8 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
303
302
|
dependencies.append(columnIDMapping[varName])
|
304
303
|
|
305
304
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
306
|
-
longDef = definationMapping.get(col, {}).get(
|
307
|
-
shortDef =
|
305
|
+
longDef = definationMapping.get(col, {}).get('definition', "")
|
306
|
+
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
308
307
|
enum = col.upper().replace(" ","_")
|
309
308
|
|
310
309
|
template = {
|
@@ -312,7 +311,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
312
311
|
col.lower().replace(" ","_")
|
313
312
|
],
|
314
313
|
"evaluationMetric": "ALL",
|
315
|
-
"evaluationModel": "
|
314
|
+
"evaluationModel": "LLUMO_EVALLM",
|
316
315
|
"selectPrompt": columnIDMapping[outputColName],
|
317
316
|
"scoreCondition": "GREATER_THAN",
|
318
317
|
"scoreValue": "50",
|
@@ -336,12 +335,13 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
336
335
|
"analyticsENUM": enum,
|
337
336
|
"prompt": shortDef,
|
338
337
|
"analyticsName": col,
|
339
|
-
"columnID":
|
338
|
+
"columnID": columnID,
|
340
339
|
"label": col,
|
341
340
|
"order": indx
|
342
341
|
}
|
343
342
|
|
344
|
-
|
343
|
+
elif col.endswith(' Reason') and promptText!=None:
|
344
|
+
continue
|
345
345
|
|
346
346
|
|
347
347
|
else:
|
@@ -371,9 +371,25 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
371
371
|
|
372
372
|
# For each column, we need to map the column ID to the corresponding value in the row
|
373
373
|
for col in dataframe.columns:
|
374
|
-
columnID = columnIDMapping[col]
|
375
|
-
|
376
|
-
|
374
|
+
columnID = columnIDMapping[col]
|
375
|
+
|
376
|
+
if col in allEvals and promptText!=None:
|
377
|
+
row_dict[columnID] = {
|
378
|
+
|
379
|
+
"value": row[col],
|
380
|
+
"type": "EVAL",
|
381
|
+
"isValid": True,
|
382
|
+
"reasoning": row[col+" Reason"],
|
383
|
+
"edgeCase": "minorHallucinationDetailNotInContext",
|
384
|
+
"kpi": col
|
385
|
+
|
386
|
+
}
|
387
|
+
elif col.endswith(' Reason') and promptText!=None:
|
388
|
+
continue
|
389
|
+
else:# Get the columnID from the mapping
|
390
|
+
row_dict[columnID] = row[col]
|
391
|
+
|
392
|
+
# row_dict[columnID] = row[col] # Directly map the column ID to the row value
|
377
393
|
# Add the row index (if necessary)
|
378
394
|
row_dict["pIndex"] = indx
|
379
395
|
rowTemplate["dataToUploadList"].append(row_dict)
|
@@ -430,6 +446,8 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
430
446
|
payload1, payload2 = createColumn(
|
431
447
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName
|
432
448
|
)
|
449
|
+
|
450
|
+
# Debugging line to check the payload2 structure
|
433
451
|
deleteExistingRows = deleteColumnListInPlayground(
|
434
452
|
workspaceID=workspaceID, playgroundID=playgroundId
|
435
453
|
)
|
@@ -441,6 +459,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
441
459
|
|
442
460
|
|
443
461
|
|
462
|
+
|
444
463
|
def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
445
464
|
headers = {
|
446
465
|
|
@@ -490,4 +509,100 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
490
509
|
else:
|
491
510
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
492
511
|
return None
|
512
|
+
def checkDependency(selectedEval, columns,tocheck=True):
|
513
|
+
"""
|
514
|
+
Checks if all the required input columns for the selected evaluation metric are present.
|
515
|
+
|
516
|
+
Parameters:
|
517
|
+
- selectedEval (str): The name of the selected evaluation metric.
|
518
|
+
- columns (list): List of column names present in the dataset.
|
519
|
+
|
520
|
+
Raises:
|
521
|
+
- LlumoAIError.dependencyError: If any required column is missing.
|
522
|
+
"""
|
523
|
+
if tocheck:
|
524
|
+
# Define required dependencies for each evaluation metric
|
525
|
+
metricDependencies = {
|
526
|
+
'Response Completeness': ['context', 'query', 'output'],
|
527
|
+
'Response Bias': ['output'],
|
528
|
+
'Response Harmfulness': ['output'],
|
529
|
+
'Input Toxicity': ['query'],
|
530
|
+
'Input Harmfulness': ['query'],
|
531
|
+
'Context Utilization': ['output', 'context'],
|
532
|
+
'Relevance Retention': ['context', 'query'],
|
533
|
+
'Semantic Cohesion': ['context'],
|
534
|
+
'Final Task Alignment': ['messageHistory'],
|
535
|
+
'Tool Reliability': ['messageHistory'],
|
536
|
+
'Response Correctness': ['output', 'query', 'context'],
|
537
|
+
'Response Toxicity': ['output'],
|
538
|
+
'Input Bias': ['query'],
|
539
|
+
'Input Relevancy': ['context', 'query'],
|
540
|
+
'Redundancy Reduction': ['context'],
|
541
|
+
'Response Sentiment': ['output'],
|
542
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
543
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
544
|
+
'Hallucination': ['query', 'context', 'output'],
|
545
|
+
'Groundedness': ['groundTruth', 'output'],
|
546
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
547
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
548
|
+
}
|
549
|
+
|
550
|
+
# Check if the selected evaluation metric is known
|
551
|
+
if selectedEval not in metricDependencies:
|
552
|
+
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
553
|
+
|
554
|
+
# Get the required columns for the selected evaluation
|
555
|
+
columnsRequired = metricDependencies[selectedEval]
|
556
|
+
|
557
|
+
# Check if each required column is present in the provided columns
|
558
|
+
for requirement in columnsRequired:
|
559
|
+
if requirement not in columns:
|
560
|
+
return {"status":False,
|
561
|
+
"message":f"'{selectedEval}' requires columns: {columnsRequired}. "
|
562
|
+
f"Missing: '{requirement}'. Please ensure your data includes all required columns."
|
563
|
+
}
|
564
|
+
return {"status":True,"message":"success"}
|
565
|
+
else:
|
566
|
+
return {"status":True,"message":"success"}
|
567
|
+
|
568
|
+
|
569
|
+
def fetchData(workspaceID, playgroundID, missingList: list):
|
570
|
+
# Define the URL and prepare the payload
|
571
|
+
socket_data_url = "https://app.llumo.ai/api/eval/get-awaited"
|
572
|
+
payload = {
|
573
|
+
"workspaceID": workspaceID,
|
574
|
+
"playgroundID": playgroundID,
|
575
|
+
"missingList": missingList
|
576
|
+
}
|
577
|
+
|
578
|
+
try:
|
579
|
+
# Send a POST request to the API
|
580
|
+
response = requests.post(socket_data_url, json=payload)
|
581
|
+
|
582
|
+
# Check if the response is successful
|
583
|
+
if response.status_code == 200:
|
584
|
+
# Parse the JSON data from the response
|
585
|
+
data = response.json().get("data", {})
|
586
|
+
|
587
|
+
|
588
|
+
# Prepare the list of all data values in the desired format
|
589
|
+
result_list = []
|
590
|
+
for key, value in data.items():
|
591
|
+
# Create a dictionary for each item in the response data
|
592
|
+
result_list.append({
|
593
|
+
key: {
|
594
|
+
"value": value.get("value"),
|
595
|
+
"reasoning": value.get("reasoning"),
|
596
|
+
"edgeCase": value.get("edgeCase"),
|
597
|
+
"kpi": value.get("kpi")
|
598
|
+
}
|
599
|
+
})
|
600
|
+
|
601
|
+
return result_list
|
602
|
+
else:
|
603
|
+
print(f"Failed to fetch data. Status Code: {response.status_code}")
|
604
|
+
return []
|
493
605
|
|
606
|
+
except Exception as e:
|
607
|
+
print(f"An error occurred: {e}")
|
608
|
+
return []
|
llumo/sockets.py
CHANGED
@@ -17,15 +17,16 @@ class LlumoSocketClient:
|
|
17
17
|
|
18
18
|
# Initialize client
|
19
19
|
self.sio = socketio.Client(
|
20
|
-
|
21
|
-
|
20
|
+
logger=False,
|
21
|
+
engineio_logger=False,
|
22
22
|
reconnection=True,
|
23
|
-
reconnection_attempts=
|
23
|
+
reconnection_attempts=1,
|
24
24
|
reconnection_delay=1,
|
25
25
|
)
|
26
26
|
|
27
27
|
@self.sio.on("connect")
|
28
28
|
def on_connect():
|
29
|
+
self.sio.emit("ready")
|
29
30
|
# print("Socket connection established")
|
30
31
|
self._connected = True
|
31
32
|
# Don't set connection_established yet - wait for server confirmation
|
@@ -37,21 +38,27 @@ class LlumoSocketClient:
|
|
37
38
|
# f"Server acknowledged connection with 'connection-established' event: {data}"
|
38
39
|
# )
|
39
40
|
if isinstance(data, dict) and "socketId" in data:
|
41
|
+
self.sio.emit("ready")
|
40
42
|
self.server_socket_id = data["socketId"]
|
41
43
|
# print(f"Received server socket ID: {self.server_socket_id}")
|
42
44
|
self._connection_established.set()
|
43
45
|
|
44
46
|
@self.sio.on("result-update")
|
45
|
-
def on_result_update(data):
|
47
|
+
def on_result_update(data, callback=None):
|
46
48
|
with self._lock:
|
47
49
|
# print(f"Received result-update event: {data}")
|
48
50
|
self._received_data.append(data)
|
49
51
|
self._last_update_time = time.time()
|
50
52
|
|
51
53
|
# ✅ Stop if all expected results are received
|
52
|
-
if
|
54
|
+
if (
|
55
|
+
self._expected_results
|
56
|
+
and len(self._received_data) >= self._expected_results
|
57
|
+
):
|
53
58
|
# print("✅ All expected results received.")
|
54
59
|
self._listening_done.set()
|
60
|
+
if callback:
|
61
|
+
callback(True)
|
55
62
|
|
56
63
|
@self.sio.on("disconnect")
|
57
64
|
def on_disconnect():
|
@@ -81,13 +88,17 @@ class LlumoSocketClient:
|
|
81
88
|
start = time.time()
|
82
89
|
while not self.sio.connected:
|
83
90
|
if time.time() - start > timeout:
|
84
|
-
raise RuntimeError(
|
91
|
+
raise RuntimeError(
|
92
|
+
"Timed out waiting for low-level socket connection."
|
93
|
+
)
|
85
94
|
time.sleep(0.1)
|
86
95
|
# print("[DEBUG] SocketIO low-level connection established.")
|
87
96
|
|
88
97
|
# Wait for server "connection-established" event
|
89
98
|
if not self._connection_established.wait(timeout):
|
90
|
-
raise RuntimeError(
|
99
|
+
raise RuntimeError(
|
100
|
+
"Timed out waiting for connection-established event."
|
101
|
+
)
|
91
102
|
|
92
103
|
self._connected = True
|
93
104
|
self._last_update_time = time.time()
|
@@ -100,10 +111,13 @@ class LlumoSocketClient:
|
|
100
111
|
self._connected = False
|
101
112
|
raise RuntimeError(f"WebSocket connection failed: {e}")
|
102
113
|
|
103
|
-
def listenForResults(
|
114
|
+
def listenForResults(
|
115
|
+
self, min_wait=30, max_wait=300, inactivity_timeout=50, expected_results=None
|
116
|
+
):
|
104
117
|
# if not self._connected:
|
105
118
|
# raise RuntimeError("WebSocket is not connected. Call connect() first.")
|
106
119
|
|
120
|
+
# total records
|
107
121
|
self._expected_results = expected_results # NEW
|
108
122
|
start_time = time.time()
|
109
123
|
self._last_update_time = time.time()
|
@@ -128,14 +142,13 @@ class LlumoSocketClient:
|
|
128
142
|
self._listening_done.set()
|
129
143
|
break
|
130
144
|
|
131
|
-
time.sleep(3)
|
132
|
-
|
133
145
|
timeout_thread = threading.Thread(target=timeout_watcher, daemon=True)
|
134
146
|
timeout_thread.start()
|
135
147
|
self._listening_done.wait()
|
136
148
|
|
137
149
|
def getReceivedData(self):
|
138
150
|
with self._lock:
|
151
|
+
# print("Total received:", len(self._received_data)) # DEBUG
|
139
152
|
return self._received_data.copy()
|
140
153
|
|
141
154
|
def disconnect(self):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: llumo
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.14
|
4
4
|
Summary: Python SDK for interacting with the Llumo ai API.
|
5
5
|
Home-page: https://www.llumo.ai/
|
6
6
|
Author: Llumo
|
@@ -21,6 +21,7 @@ Requires-Dist: requests>=2.0.0
|
|
21
21
|
Requires-Dist: python-socketio
|
22
22
|
Requires-Dist: python-dotenv
|
23
23
|
Requires-Dist: openai==1.75.0
|
24
|
+
Requires-Dist: tqdm==4.67.1
|
24
25
|
Requires-Dist: google-generativeai==0.8.5
|
25
26
|
Dynamic: author
|
26
27
|
Dynamic: author-email
|
@@ -0,0 +1,13 @@
|
|
1
|
+
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
+
llumo/client.py,sha256=HpvUyucrGPbcPQMz_cTRDcEsBFpmNt8jfW1zJU4Nyss,46781
|
3
|
+
llumo/exceptions.py,sha256=i3Qv4_g7XjRuho7-b7ybjw2bwSh_NhvICR6ZAgiLQX8,1944
|
4
|
+
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
+
llumo/helpingFuntions.py,sha256=RgWok8DoE1R-Tc0kJ9B5En6LEUEk5EvQU8iJiGPbUsw,21911
|
7
|
+
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
+
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
9
|
+
llumo-0.2.14.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
+
llumo-0.2.14.dist-info/METADATA,sha256=B7NLMFRj8018jkWxEzKSSGlb2CS3d45rDtnywDh_4kc,1519
|
11
|
+
llumo-0.2.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
llumo-0.2.14.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
+
llumo-0.2.14.dist-info/RECORD,,
|
llumo-0.2.13b1.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=pzmJkz5LRF3h1WgjmezNnJEUAZ9_5nF47eW489F9-y4,36026
|
3
|
-
llumo/exceptions.py,sha256=iCj7HhtO_ckC2EaVBdXbAudNpuMDsYmmMEV5lwynZ-E,1854
|
4
|
-
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=ah0FUQcRV3gfguvjQQ_aZzq59hpJttqAPJdjJVNYdFc,17110
|
7
|
-
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
-
llumo/sockets.py,sha256=0BCcdCaiXDR7LO_9NIYA6urtpgdmyWW2M1US67G9Eus,5583
|
9
|
-
llumo-0.2.13b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.13b1.dist-info/METADATA,sha256=Kyb0OFYTsOosmZ6Rcok4LNgWqVsUldzjeXmnw2vOnGA,1493
|
11
|
-
llumo-0.2.13b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.13b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.13b1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|