llumo 0.2.13b1__py3-none-any.whl → 0.2.14b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/client.py +79 -26
- llumo/exceptions.py +4 -0
- llumo/helpingFuntions.py +80 -8
- {llumo-0.2.13b1.dist-info → llumo-0.2.14b1.dist-info}/METADATA +1 -1
- llumo-0.2.14b1.dist-info/RECORD +13 -0
- llumo-0.2.13b1.dist-info/RECORD +0 -13
- {llumo-0.2.13b1.dist-info → llumo-0.2.14b1.dist-info}/WHEEL +0 -0
- {llumo-0.2.13b1.dist-info → llumo-0.2.14b1.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.13b1.dist-info → llumo-0.2.14b1.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
@@ -80,17 +80,16 @@ class LlumoClient:
|
|
80
80
|
raise LlumoAIError.InvalidApiResponse()
|
81
81
|
|
82
82
|
try:
|
83
|
-
self.hitsAvailable = data[
|
83
|
+
self.hitsAvailable = data['data']["data"].get("remainingHits", 0)
|
84
84
|
self.workspaceID = data["data"]["data"].get("workspaceID")
|
85
|
-
self.evalDefinition = data["data"]["data"]
|
85
|
+
self.evalDefinition = data["data"]["data"]["analyticsMapping"]
|
86
86
|
self.socketToken = data["data"]["data"].get("token")
|
87
87
|
self.hasSubscribed = data["data"]["data"].get("hasSubscribed", False)
|
88
88
|
self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
|
89
89
|
self.subscriptionEndDate = data["data"]["data"].get("subscriptionEndDate", None)
|
90
90
|
self.email = data["data"]["data"].get("email", None)
|
91
91
|
|
92
|
-
self.definationMapping[evalName] =
|
93
|
-
|
92
|
+
self.definationMapping[evalName] = data["data"]["data"]["analyticsMapping"][evalName]
|
94
93
|
except Exception as e:
|
95
94
|
# print(f"Error extracting data from response: {str(e)}")
|
96
95
|
raise LlumoAIError.UnexpectedError(detail=str(e))
|
@@ -159,12 +158,21 @@ class LlumoClient:
|
|
159
158
|
# this function allows the users to run exactl one eval at a time
|
160
159
|
def evaluate(
|
161
160
|
self,
|
162
|
-
|
161
|
+
data,
|
163
162
|
eval="Response Completeness",
|
164
163
|
prompt_template="",
|
165
164
|
outputColName="output",
|
166
165
|
createExperiment: bool = False,
|
166
|
+
_tocheck = True,
|
167
167
|
):
|
168
|
+
|
169
|
+
# converting it into a pandas dataframe object
|
170
|
+
dataframe = pd.DataFrame(data)
|
171
|
+
|
172
|
+
# check for dependencies for the selected eval metric
|
173
|
+
metricDependencies = checkDependency(eval,columns=list(dataframe.columns),tocheck=_tocheck)
|
174
|
+
if metricDependencies["status"] == False:
|
175
|
+
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
168
176
|
|
169
177
|
results = {}
|
170
178
|
try:
|
@@ -206,7 +214,7 @@ class LlumoClient:
|
|
206
214
|
# if self.hitsAvailable == 0 or len(dataframe) > self.hitsAvailable:
|
207
215
|
# raise LlumoAIError.InsufficientCredits()
|
208
216
|
|
209
|
-
evalDefinition = self.evalDefinition[eval]
|
217
|
+
evalDefinition = self.evalDefinition[eval].get("definition")
|
210
218
|
model = "GPT_4"
|
211
219
|
provider = "OPENAI"
|
212
220
|
evalType = "LLM"
|
@@ -377,32 +385,71 @@ class LlumoClient:
|
|
377
385
|
# this function allows the users to run multiple evals at once
|
378
386
|
def evaluateMultiple(
|
379
387
|
self,
|
380
|
-
|
388
|
+
data,
|
381
389
|
eval=["Response Completeness"],
|
382
|
-
prompt_template="",
|
390
|
+
prompt_template="Give answer to the given query:{{query}} , using the given context: {{context}}",
|
383
391
|
outputColName="output",
|
384
392
|
createExperiment: bool = False,
|
393
|
+
_tocheck = True,
|
385
394
|
):
|
386
|
-
|
395
|
+
"""
|
396
|
+
Runs multiple evaluation metrics on the same input dataset.
|
397
|
+
|
398
|
+
Parameters:
|
399
|
+
data (list of dict): Input data, where each dict represents a row.
|
400
|
+
eval (list of str): List of evaluation metric names to run.
|
401
|
+
prompt_template (str): Optional prompt template used in evaluation.
|
402
|
+
outputColName (str): Column name in data that holds the model output.
|
403
|
+
createExperiment (bool): Whether to log the results to Llumo playground.
|
404
|
+
|
405
|
+
Returns:
|
406
|
+
pandas.DataFrame: Final dataframe with all evaluation results.
|
407
|
+
"""
|
408
|
+
|
409
|
+
# Convert input dict list into a DataFrame
|
410
|
+
dataframe = pd.DataFrame(data)
|
411
|
+
|
412
|
+
# Copy to hold final results
|
413
|
+
resultdf = dataframe.copy()
|
414
|
+
|
415
|
+
# Run each evaluation metric one by one
|
387
416
|
for evalName in eval:
|
388
|
-
time.sleep(2)
|
389
|
-
|
417
|
+
time.sleep(2) # small delay to avoid overload or rate limits
|
418
|
+
|
419
|
+
# Call evaluate (assumes evaluate takes dict, not dataframe)
|
420
|
+
resultdf = self.evaluate(
|
421
|
+
data=resultdf.to_dict(orient="records"), # convert df back to dict list
|
422
|
+
eval=evalName,
|
423
|
+
prompt_template=prompt_template,
|
424
|
+
outputColName=outputColName,
|
425
|
+
createExperiment=False,
|
426
|
+
_tocheck=_tocheck,
|
427
|
+
)
|
390
428
|
|
429
|
+
# Save to playground if requested
|
391
430
|
if createExperiment:
|
392
431
|
pd.set_option("future.no_silent_downcasting", True)
|
393
432
|
df = resultdf.fillna("Some error occured").astype(object)
|
394
433
|
|
395
|
-
if createPlayground(
|
434
|
+
if createPlayground(
|
435
|
+
self.email,
|
436
|
+
self.workspaceID,
|
437
|
+
df,
|
438
|
+
definationMapping=self.definationMapping,
|
439
|
+
outputColName=outputColName,
|
440
|
+
promptText=prompt_template
|
441
|
+
):
|
396
442
|
print(
|
397
|
-
"Your data has been saved in the Llumo Experiment.
|
443
|
+
"Your data has been saved in the Llumo Experiment. "
|
444
|
+
"Visit https://app.llumo.ai/evallm to see the results. "
|
445
|
+
"Please rerun the experiment to see the results on playground."
|
398
446
|
)
|
399
447
|
else:
|
400
448
|
return resultdf
|
401
449
|
|
402
|
-
|
403
|
-
def evaluateCompressor(self, dataframe, prompt_template):
|
450
|
+
def evaluateCompressor(self, data, prompt_template):
|
404
451
|
results = []
|
405
|
-
|
452
|
+
dataframe = pd.DataFrame(data)
|
406
453
|
try:
|
407
454
|
socketID = self.socket.connect(timeout=150)
|
408
455
|
max_wait_secs = 20
|
@@ -609,10 +656,11 @@ class LlumoClient:
|
|
609
656
|
outputColName = f"output_{i}"
|
610
657
|
try:
|
611
658
|
res = self.evaluateMultiple(
|
612
|
-
df,
|
659
|
+
df.to_dict("records"),
|
613
660
|
eval=eval,
|
614
661
|
prompt_template=str(templates[0]),
|
615
662
|
outputColName=outputColName,
|
663
|
+
_tocheck=False,
|
616
664
|
)
|
617
665
|
|
618
666
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
@@ -634,7 +682,7 @@ class LlumoClient:
|
|
634
682
|
if createExperiment:
|
635
683
|
pd.set_option("future.no_silent_downcasting", True)
|
636
684
|
dfWithEvals = dfWithEvals.fillna("Some error occurred")
|
637
|
-
if createPlayground(email, workspaceID, dfWithEvals, promptText=templates[0]):
|
685
|
+
if createPlayground(email, workspaceID, dfWithEvals, promptText=templates[0],definationMapping=self.definationMapping):
|
638
686
|
|
639
687
|
print("Your data has been saved in the Llumo Experiment. Visit https://app.llumo.ai/evallm to see the results.")
|
640
688
|
else:
|
@@ -653,7 +701,7 @@ class LlumoClient:
|
|
653
701
|
# this function generates an output using llm and tools and evaluate that output
|
654
702
|
def evaluateAgents(
|
655
703
|
self,
|
656
|
-
|
704
|
+
data,
|
657
705
|
model,
|
658
706
|
agents,
|
659
707
|
model_api_key=None,
|
@@ -664,6 +712,9 @@ class LlumoClient:
|
|
664
712
|
if model.lower() not in ["openai", "google"]:
|
665
713
|
raise ValueError("Model must be 'openai' or 'google'")
|
666
714
|
|
715
|
+
# converting into pandas dataframe object
|
716
|
+
dataframe = pd.DataFrame(data)
|
717
|
+
|
667
718
|
# Run unified agent execution
|
668
719
|
toolResponseDf = LlumoAgentExecutor.run(
|
669
720
|
dataframe, agents, model=model, model_api_key=model_api_key
|
@@ -680,7 +731,7 @@ class LlumoClient:
|
|
680
731
|
for eval in evals:
|
681
732
|
# Perform evaluation
|
682
733
|
toolResponseDf = self.evaluate(
|
683
|
-
toolResponseDf,
|
734
|
+
toolResponseDf.to_dict(orient = "records"),
|
684
735
|
eval=eval,
|
685
736
|
prompt_template=prompt_template,
|
686
737
|
createExperiment=False,
|
@@ -698,32 +749,34 @@ class LlumoClient:
|
|
698
749
|
# this function evaluate that tools output given by the user
|
699
750
|
def evaluateAgentResponses(
|
700
751
|
self,
|
701
|
-
|
752
|
+
data,
|
702
753
|
evals=["Final Task Alignment"],
|
703
754
|
outputColName="output",
|
704
755
|
createExperiment: bool = False,
|
705
756
|
):
|
757
|
+
dataframe = pd.DataFrame(data)
|
758
|
+
|
706
759
|
try:
|
707
760
|
if "query" and "messageHistory" and "tools" not in dataframe.columns:
|
708
761
|
raise ValueError(
|
709
762
|
"DataFrame must contain 'query', 'messageHistory','output' ,and 'tools' columns. Make sure the columns names are same as mentioned here."
|
710
763
|
)
|
711
|
-
|
712
|
-
|
764
|
+
|
765
|
+
|
713
766
|
# evals = [
|
714
767
|
# "Tool Reliability",
|
715
768
|
# "Stepwise Progression",
|
716
769
|
# "Tool Selection Accuracy",
|
717
770
|
# "Final Task Alignment",
|
718
771
|
# ]
|
772
|
+
|
719
773
|
toolResponseDf = dataframe.copy()
|
720
774
|
for eval in evals:
|
721
775
|
# Perform evaluation
|
722
776
|
toolResponseDf = self.evaluate(
|
723
|
-
toolResponseDf, eval=eval, prompt_template=
|
777
|
+
toolResponseDf.to_dict(orient = "records"), eval=eval, prompt_template="Give answer for the given query: {{query}}",outputColName=outputColName
|
724
778
|
)
|
725
|
-
|
726
|
-
|
779
|
+
|
727
780
|
return toolResponseDf
|
728
781
|
|
729
782
|
except Exception as e:
|
llumo/exceptions.py
CHANGED
@@ -46,6 +46,10 @@ class LlumoAIError(Exception):
|
|
46
46
|
def modelHitsExhausted(details = "Your credits for the selected model exhausted."):
|
47
47
|
return LlumoAIError(details)
|
48
48
|
|
49
|
+
@staticmethod
|
50
|
+
def dependencyError(details):
|
51
|
+
return LlumoAIError(details)
|
52
|
+
|
49
53
|
# @staticmethod
|
50
54
|
# def dateNotFound():
|
51
55
|
# return LlumoAIError("Trial end date or subscription end date not found for the given user.")
|
llumo/helpingFuntions.py
CHANGED
@@ -303,8 +303,8 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
303
303
|
dependencies.append(columnIDMapping[varName])
|
304
304
|
|
305
305
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
306
|
-
longDef = definationMapping.get(col, {}).get(
|
307
|
-
shortDef =
|
306
|
+
longDef = definationMapping.get(col, {}).get('definition', "")
|
307
|
+
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
308
308
|
enum = col.upper().replace(" ","_")
|
309
309
|
|
310
310
|
template = {
|
@@ -312,7 +312,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
312
312
|
col.lower().replace(" ","_")
|
313
313
|
],
|
314
314
|
"evaluationMetric": "ALL",
|
315
|
-
"evaluationModel": "
|
315
|
+
"evaluationModel": "LLUMO_EVALLM",
|
316
316
|
"selectPrompt": columnIDMapping[outputColName],
|
317
317
|
"scoreCondition": "GREATER_THAN",
|
318
318
|
"scoreValue": "50",
|
@@ -336,7 +336,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
336
336
|
"analyticsENUM": enum,
|
337
337
|
"prompt": shortDef,
|
338
338
|
"analyticsName": col,
|
339
|
-
"columnID":
|
339
|
+
"columnID": columnID,
|
340
340
|
"label": col,
|
341
341
|
"order": indx
|
342
342
|
}
|
@@ -371,9 +371,24 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
371
371
|
|
372
372
|
# For each column, we need to map the column ID to the corresponding value in the row
|
373
373
|
for col in dataframe.columns:
|
374
|
-
columnID = columnIDMapping[col]
|
375
|
-
|
376
|
-
|
374
|
+
columnID = columnIDMapping[col]
|
375
|
+
|
376
|
+
if col in allEvals:
|
377
|
+
row_dict[columnID] = {
|
378
|
+
|
379
|
+
"value": row[col],
|
380
|
+
"type": "EVAL",
|
381
|
+
"isValid": True,
|
382
|
+
"reasoning": row[col+" Reason"],
|
383
|
+
"edgeCase": "minorHallucinationDetailNotInContext",
|
384
|
+
"kpi": col
|
385
|
+
|
386
|
+
}
|
387
|
+
else:# Get the columnID from the mapping
|
388
|
+
row_dict[columnID] = {
|
389
|
+
"value": row[col],
|
390
|
+
"type":"VARIABLE"# Map the columnID to the value in the row
|
391
|
+
}
|
377
392
|
# Add the row index (if necessary)
|
378
393
|
row_dict["pIndex"] = indx
|
379
394
|
rowTemplate["dataToUploadList"].append(row_dict)
|
@@ -430,6 +445,9 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
430
445
|
payload1, payload2 = createColumn(
|
431
446
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName
|
432
447
|
)
|
448
|
+
|
449
|
+
|
450
|
+
|
433
451
|
deleteExistingRows = deleteColumnListInPlayground(
|
434
452
|
workspaceID=workspaceID, playgroundID=playgroundId
|
435
453
|
)
|
@@ -490,4 +508,58 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
490
508
|
else:
|
491
509
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
492
510
|
return None
|
493
|
-
|
511
|
+
def checkDependency(selectedEval, columns,tocheck=True):
|
512
|
+
"""
|
513
|
+
Checks if all the required input columns for the selected evaluation metric are present.
|
514
|
+
|
515
|
+
Parameters:
|
516
|
+
- selectedEval (str): The name of the selected evaluation metric.
|
517
|
+
- columns (list): List of column names present in the dataset.
|
518
|
+
|
519
|
+
Raises:
|
520
|
+
- LlumoAIError.dependencyError: If any required column is missing.
|
521
|
+
"""
|
522
|
+
if tocheck:
|
523
|
+
# Define required dependencies for each evaluation metric
|
524
|
+
metricDependencies = {
|
525
|
+
'Response Completeness': ['context', 'query', 'output'],
|
526
|
+
'Response Bias': ['output'],
|
527
|
+
'Response Harmfulness': ['output'],
|
528
|
+
'Input Toxicity': ['query'],
|
529
|
+
'Input Harmfulness': ['query'],
|
530
|
+
'Context Utilization': ['output', 'context'],
|
531
|
+
'Relevance Retention': ['context', 'query'],
|
532
|
+
'Semantic Cohesion': ['context'],
|
533
|
+
'Final Task Alignment': ['messageHistory'],
|
534
|
+
'Tool Reliability': ['messageHistory'],
|
535
|
+
'Response Correctness': ['output', 'query', 'context'],
|
536
|
+
'Response Toxicity': ['output'],
|
537
|
+
'Input Bias': ['query'],
|
538
|
+
'Input Relevancy': ['context', 'query'],
|
539
|
+
'Redundancy Reduction': ['context'],
|
540
|
+
'Response Sentiment': ['output'],
|
541
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
542
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
543
|
+
'Hallucination': ['query', 'context', 'output'],
|
544
|
+
'Groundedness': ['groundTruth', 'output'],
|
545
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
546
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
547
|
+
}
|
548
|
+
|
549
|
+
# Check if the selected evaluation metric is known
|
550
|
+
if selectedEval not in metricDependencies:
|
551
|
+
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
552
|
+
|
553
|
+
# Get the required columns for the selected evaluation
|
554
|
+
columnsRequired = metricDependencies[selectedEval]
|
555
|
+
|
556
|
+
# Check if each required column is present in the provided columns
|
557
|
+
for requirement in columnsRequired:
|
558
|
+
if requirement not in columns:
|
559
|
+
return {"status":False,
|
560
|
+
"message":f"'{selectedEval}' requires columns: {columnsRequired}. "
|
561
|
+
f"Missing: '{requirement}'. Please ensure your data includes all required columns."
|
562
|
+
}
|
563
|
+
return {"status":True,"message":"success"}
|
564
|
+
else:
|
565
|
+
return {"status":True,"message":"success"}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
+
llumo/client.py,sha256=KbvBokbeINsEv2bJ9S3Zys6GYmpA7BIsjchmYZJx5C0,38133
|
3
|
+
llumo/exceptions.py,sha256=i3Qv4_g7XjRuho7-b7ybjw2bwSh_NhvICR6ZAgiLQX8,1944
|
4
|
+
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
+
llumo/helpingFuntions.py,sha256=FOHDxnofS316u6fnuPDVzM-fIrTzAGkMTAjoiadlKVw,20230
|
7
|
+
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
+
llumo/sockets.py,sha256=0BCcdCaiXDR7LO_9NIYA6urtpgdmyWW2M1US67G9Eus,5583
|
9
|
+
llumo-0.2.14b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
+
llumo-0.2.14b1.dist-info/METADATA,sha256=TxKTceUWuxS1UkZT1vd_3yVbR98_ntHSEU1yq2HW_bg,1493
|
11
|
+
llumo-0.2.14b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
llumo-0.2.14b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
+
llumo-0.2.14b1.dist-info/RECORD,,
|
llumo-0.2.13b1.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=pzmJkz5LRF3h1WgjmezNnJEUAZ9_5nF47eW489F9-y4,36026
|
3
|
-
llumo/exceptions.py,sha256=iCj7HhtO_ckC2EaVBdXbAudNpuMDsYmmMEV5lwynZ-E,1854
|
4
|
-
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=ah0FUQcRV3gfguvjQQ_aZzq59hpJttqAPJdjJVNYdFc,17110
|
7
|
-
llumo/models.py,sha256=YH-qAMnShmUpmKE2LQAzQdpRsaXkFSlOqMxHwU4zBUI,1560
|
8
|
-
llumo/sockets.py,sha256=0BCcdCaiXDR7LO_9NIYA6urtpgdmyWW2M1US67G9Eus,5583
|
9
|
-
llumo-0.2.13b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.13b1.dist-info/METADATA,sha256=Kyb0OFYTsOosmZ6Rcok4LNgWqVsUldzjeXmnw2vOnGA,1493
|
11
|
-
llumo-0.2.13b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.13b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.13b1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|