llumo 0.2.15b1__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +2 -1
- llumo/chains.py +73 -0
- llumo/client.py +259 -70
- llumo/execution.py +4 -5
- llumo/google.py +66 -0
- llumo/helpingFuntions.py +160 -116
- llumo/openai.py +78 -0
- {llumo-0.2.15b1.dist-info → llumo-0.2.16.dist-info}/METADATA +1 -1
- llumo-0.2.16.dist-info/RECORD +16 -0
- llumo-0.2.15b1.dist-info/RECORD +0 -13
- {llumo-0.2.15b1.dist-info → llumo-0.2.16.dist-info}/WHEEL +0 -0
- {llumo-0.2.15b1.dist-info → llumo-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.15b1.dist-info → llumo-0.2.16.dist-info}/top_level.txt +0 -0
llumo/__init__.py
CHANGED
llumo/chains.py
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
from .helpingFuntions import *
|
3
|
+
|
4
|
+
class LlumoDataFrameResults(pd.DataFrame):
|
5
|
+
_metadata=["evals","evalData","definationMapping"]
|
6
|
+
|
7
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
8
|
+
self.evals = evals or []
|
9
|
+
self.evalData= evalData or []
|
10
|
+
self.definationMapping= definationMapping or {}
|
11
|
+
super().__init__(*args, **kwargs)
|
12
|
+
|
13
|
+
@property
|
14
|
+
def _constructor(self):
|
15
|
+
# Needed so slicing operations return the same type
|
16
|
+
return LlumoDataFrameResults
|
17
|
+
|
18
|
+
def insights(self):
|
19
|
+
|
20
|
+
if not self.evalData:
|
21
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
22
|
+
return None
|
23
|
+
try:
|
24
|
+
insights=[]
|
25
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
26
|
+
|
27
|
+
for evalname in self.evals:
|
28
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
29
|
+
allReasons = []
|
30
|
+
for edgeCase in reasonData[evalname]:
|
31
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
32
|
+
|
33
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
34
|
+
|
35
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
36
|
+
return insights
|
37
|
+
except Exception as e:
|
38
|
+
|
39
|
+
print("Can not genrate insights for this eval, please try again later.")
|
40
|
+
|
41
|
+
|
42
|
+
class LlumoDictResults(list):
|
43
|
+
_metadata=["evals","evalData","definationMapping"]
|
44
|
+
|
45
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
46
|
+
self.evals = evals or []
|
47
|
+
self.evalData= evalData or []
|
48
|
+
self.definationMapping= definationMapping or {}
|
49
|
+
super().__init__(*args, **kwargs) # This will handle list[dict]
|
50
|
+
|
51
|
+
def insights(self):
|
52
|
+
|
53
|
+
if not self.evalData:
|
54
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
55
|
+
return None
|
56
|
+
try:
|
57
|
+
insights=[]
|
58
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
59
|
+
for evalname in self.evals:
|
60
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
61
|
+
allReasons = []
|
62
|
+
for edgeCase in reasonData[evalname]:
|
63
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
64
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
65
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
66
|
+
return insights
|
67
|
+
except Exception as e:
|
68
|
+
print("Can not genrate insights for this eval, please try again later.")
|
69
|
+
|
70
|
+
|
71
|
+
for _cls in (LlumoDataFrameResults, LlumoDictResults):
|
72
|
+
_cls.__name__ = "LlumoResults"
|
73
|
+
_cls.__qualname__ = "LlumoResults"
|
llumo/client.py
CHANGED
@@ -5,20 +5,23 @@ import time
|
|
5
5
|
import re
|
6
6
|
import json
|
7
7
|
import uuid
|
8
|
-
|
8
|
+
import warnings
|
9
9
|
import os
|
10
10
|
import itertools
|
11
11
|
import pandas as pd
|
12
12
|
from typing import List, Dict
|
13
|
-
from .models import AVAILABLEMODELS, getProviderFromModel
|
13
|
+
from .models import AVAILABLEMODELS, getProviderFromModel, Provider
|
14
14
|
from .execution import ModelExecutor
|
15
15
|
from .exceptions import LlumoAIError
|
16
16
|
from .helpingFuntions import *
|
17
17
|
from .sockets import LlumoSocketClient
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
19
|
+
from .chains import LlumoDataFrameResults,LlumoDictResults
|
19
20
|
import threading
|
20
21
|
from tqdm import tqdm
|
21
22
|
|
23
|
+
pd.set_option('future.no_silent_downcasting', True)
|
24
|
+
|
22
25
|
postUrl = (
|
23
26
|
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
24
27
|
)
|
@@ -38,7 +41,8 @@ class LlumoClient:
|
|
38
41
|
|
39
42
|
def __init__(self, api_key):
|
40
43
|
self.apiKey = api_key
|
41
|
-
|
44
|
+
self.evalData=[]
|
45
|
+
self.evals=[]
|
42
46
|
self.processMapping = {}
|
43
47
|
self.definationMapping = {}
|
44
48
|
|
@@ -50,7 +54,7 @@ class LlumoClient:
|
|
50
54
|
reqBody = {"analytics": [evalName]}
|
51
55
|
|
52
56
|
try:
|
53
|
-
|
57
|
+
|
54
58
|
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
55
59
|
|
56
60
|
except requests.exceptions.RequestException as e:
|
@@ -393,27 +397,51 @@ class LlumoClient:
|
|
393
397
|
outputColName=outputColName,
|
394
398
|
):
|
395
399
|
print(
|
396
|
-
"
|
400
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
397
401
|
)
|
398
402
|
else:
|
399
403
|
return dataframe
|
400
404
|
|
401
405
|
# this function allows the users to run multiple evals at once
|
402
406
|
|
403
|
-
def
|
407
|
+
def compressor(self, data, prompt_template):
|
404
408
|
results = []
|
409
|
+
if isinstance(data, dict):
|
410
|
+
data = [data]
|
411
|
+
elif not isinstance(data, list):
|
412
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
413
|
+
|
405
414
|
dataframe = pd.DataFrame(data)
|
415
|
+
|
406
416
|
try:
|
407
|
-
|
417
|
+
self.socket = LlumoSocketClient(socketUrl)
|
418
|
+
dataframe = pd.DataFrame(data).astype(str)
|
419
|
+
socketID = self.socket.connect(timeout=250)
|
420
|
+
|
421
|
+
# Wait for socket connection
|
408
422
|
max_wait_secs = 20
|
409
423
|
waited_secs = 0
|
410
424
|
while not self.socket._connection_established.is_set():
|
411
425
|
time.sleep(0.1)
|
412
426
|
waited_secs += 0.1
|
413
427
|
if waited_secs >= max_wait_secs:
|
414
|
-
raise RuntimeError(
|
415
|
-
|
416
|
-
|
428
|
+
raise RuntimeError("Timeout waiting for server connection")
|
429
|
+
|
430
|
+
# Start listener thread
|
431
|
+
expectedResults = len(dataframe)
|
432
|
+
# print("expected result" ,expectedResults)
|
433
|
+
timeout = max(100, min(150, expectedResults * 10))
|
434
|
+
listener_thread = threading.Thread(
|
435
|
+
target=self.socket.listenForResults,
|
436
|
+
kwargs={
|
437
|
+
"min_wait": 40,
|
438
|
+
"max_wait": timeout,
|
439
|
+
"inactivity_timeout": 10,
|
440
|
+
"expected_results": expectedResults,
|
441
|
+
},
|
442
|
+
daemon=True,
|
443
|
+
)
|
444
|
+
listener_thread.start()
|
417
445
|
|
418
446
|
try:
|
419
447
|
self.validateApiKey()
|
@@ -508,7 +536,7 @@ class LlumoClient:
|
|
508
536
|
"playgroundID": activePlayground,
|
509
537
|
}
|
510
538
|
|
511
|
-
rowIdMapping[rowID] = index
|
539
|
+
rowIdMapping[f'{rowID}-{columnID}-{columnID}'] = index
|
512
540
|
# print("__________________________TEMPLATE__________________________________")
|
513
541
|
# print(templateData)
|
514
542
|
|
@@ -537,7 +565,19 @@ class LlumoClient:
|
|
537
565
|
expected_results=None,
|
538
566
|
)
|
539
567
|
|
540
|
-
|
568
|
+
rawResults = self.socket.getReceivedData()
|
569
|
+
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
570
|
+
expectedRowIDs = set(rowIdMapping.keys())
|
571
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
572
|
+
# print("All expected keys:", expected_rowIDs)
|
573
|
+
# print("All received keys:", received_rowIDs)
|
574
|
+
# print("Missing keys:", len(missingRowIDs))
|
575
|
+
missingRowIDs = list(missingRowIDs)
|
576
|
+
|
577
|
+
if len(missingRowIDs) > 0:
|
578
|
+
dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
|
579
|
+
rawResults.extend(dataFromDb)
|
580
|
+
|
541
581
|
# results = self.finalResp(eval_results)
|
542
582
|
# print(f"======= Completed evaluation: {eval} =======\n")
|
543
583
|
|
@@ -551,10 +591,10 @@ class LlumoClient:
|
|
551
591
|
print(f"Error disconnecting socket: {e}")
|
552
592
|
|
553
593
|
dataframe["Compressed Input"] = None
|
554
|
-
for records in
|
594
|
+
for records in rawResults:
|
555
595
|
for compound_key, value in records.items():
|
556
596
|
# for compound_key, value in item['data'].items():
|
557
|
-
rowID = compound_key
|
597
|
+
rowID = compound_key
|
558
598
|
# looking for the index of each rowID , in the original dataframe
|
559
599
|
if rowID in rowIdMapping:
|
560
600
|
index = rowIdMapping[rowID]
|
@@ -577,16 +617,24 @@ class LlumoClient:
|
|
577
617
|
self,
|
578
618
|
data,
|
579
619
|
evals: list, # list of eval metric names
|
580
|
-
prompt_template="",
|
620
|
+
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
581
621
|
outputColName="output",
|
582
622
|
createExperiment: bool = False,
|
623
|
+
getDataFrame:bool =False,
|
583
624
|
_tocheck=True,
|
584
625
|
):
|
626
|
+
if isinstance(data, dict):
|
627
|
+
data = [data]
|
628
|
+
elif not isinstance(data, list):
|
629
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
630
|
+
|
585
631
|
self.socket = LlumoSocketClient(socketUrl)
|
586
632
|
dataframe = pd.DataFrame(data).astype(str)
|
587
633
|
workspaceID = None
|
588
634
|
email = None
|
589
635
|
socketID = self.socket.connect(timeout=250)
|
636
|
+
self.evalData=[]
|
637
|
+
self.evals=evals
|
590
638
|
self.allBatches = []
|
591
639
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
592
640
|
|
@@ -614,10 +662,14 @@ class LlumoClient:
|
|
614
662
|
daemon=True,
|
615
663
|
)
|
616
664
|
listener_thread.start()
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
665
|
+
self.validateApiKey(evalName=evals[0])
|
666
|
+
if createExperiment:
|
667
|
+
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
668
|
+
|
669
|
+
else:
|
670
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
671
|
+
"-", ""
|
672
|
+
)
|
621
673
|
for evalName in evals:
|
622
674
|
# print(f"\n======= Running evaluation for: {evalName} =======")
|
623
675
|
|
@@ -776,10 +828,8 @@ class LlumoClient:
|
|
776
828
|
rawResults.extend(dataFromDb)
|
777
829
|
|
778
830
|
|
831
|
+
self.evalData = rawResults
|
779
832
|
|
780
|
-
|
781
|
-
|
782
|
-
|
783
833
|
# Initialize dataframe columns for each eval
|
784
834
|
for eval in evals:
|
785
835
|
dataframe[eval] = None
|
@@ -799,7 +849,12 @@ class LlumoClient:
|
|
799
849
|
|
800
850
|
if createExperiment:
|
801
851
|
pd.set_option("future.no_silent_downcasting", True)
|
802
|
-
df = dataframe.fillna("Some error occured").astype(object)
|
852
|
+
# df = dataframe.fillna("Some error occured").astype(object)
|
853
|
+
with warnings.catch_warnings():
|
854
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
855
|
+
df = dataframe.fillna("Some error occurred").astype(str)
|
856
|
+
|
857
|
+
df = dataframe.fillna("Some error occured").infer_objects(copy=False)
|
803
858
|
if createPlayground(
|
804
859
|
email,
|
805
860
|
workspaceID,
|
@@ -807,13 +862,19 @@ class LlumoClient:
|
|
807
862
|
promptText=prompt_template,
|
808
863
|
definationMapping=self.definationMapping,
|
809
864
|
outputColName=outputColName,
|
865
|
+
activePlayground= activePlayground
|
810
866
|
):
|
811
867
|
print(
|
812
|
-
"
|
868
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
813
869
|
)
|
814
870
|
else:
|
815
|
-
|
871
|
+
if getDataFrame:
|
872
|
+
return LlumoDataFrameResults(dataframe,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
873
|
+
else:
|
874
|
+
data=dataframe.to_dict(orient="records")
|
875
|
+
return LlumoDictResults(data,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
816
876
|
|
877
|
+
|
817
878
|
def promptSweep(
|
818
879
|
self,
|
819
880
|
templates: List[str],
|
@@ -823,9 +884,14 @@ class LlumoClient:
|
|
823
884
|
evals=["Response Correctness"],
|
824
885
|
toEvaluate: bool = False,
|
825
886
|
createExperiment: bool = False,
|
887
|
+
getDataFrame = False
|
826
888
|
|
827
889
|
|
828
890
|
) -> pd.DataFrame:
|
891
|
+
if isinstance(dataset, dict):
|
892
|
+
dataset = [dataset]
|
893
|
+
elif not isinstance(dataset, list):
|
894
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
829
895
|
|
830
896
|
modelStatus = validateModels(model_aliases=model_aliases)
|
831
897
|
if modelStatus["status"]== False:
|
@@ -878,6 +944,8 @@ class LlumoClient:
|
|
878
944
|
prompt_template=str(templates[0]),
|
879
945
|
outputColName=outputColName,
|
880
946
|
_tocheck=False,
|
947
|
+
getDataFrame=True,
|
948
|
+
createExperiment = False
|
881
949
|
)
|
882
950
|
|
883
951
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
@@ -910,10 +978,17 @@ class LlumoClient:
|
|
910
978
|
):
|
911
979
|
|
912
980
|
print(
|
913
|
-
"
|
981
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
914
982
|
)
|
915
983
|
else:
|
916
|
-
|
984
|
+
if getDataFrame:
|
985
|
+
return LlumoDataFrameResults(dfWithEvals, evals=self.evals, evalData=self.evalData,
|
986
|
+
definationMapping=self.definationMapping)
|
987
|
+
else:
|
988
|
+
data = dfWithEvals.to_dict(orient="records")
|
989
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,definationMapping=self.definationMapping)
|
990
|
+
|
991
|
+
|
917
992
|
else:
|
918
993
|
if createExperiment == True:
|
919
994
|
pd.set_option("future.no_silent_downcasting", True)
|
@@ -921,10 +996,18 @@ class LlumoClient:
|
|
921
996
|
|
922
997
|
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
923
998
|
print(
|
924
|
-
"
|
999
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
925
1000
|
)
|
926
1001
|
else:
|
927
|
-
|
1002
|
+
if getDataFrame:
|
1003
|
+
return LlumoDataFrameResults(df, evals=self.evals, evalData=self.evalData,
|
1004
|
+
definationMapping=self.definationMapping)
|
1005
|
+
else:
|
1006
|
+
data = df.to_dict(orient="records")
|
1007
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
1008
|
+
definationMapping=self.definationMapping)
|
1009
|
+
|
1010
|
+
|
928
1011
|
|
929
1012
|
# this function generates an output using llm and tools and evaluate that output
|
930
1013
|
def evaluateAgents(
|
@@ -936,8 +1019,14 @@ class LlumoClient:
|
|
936
1019
|
evals=["Final Task Alignment"],
|
937
1020
|
prompt_template="Give answer for the given query: {{query}}",
|
938
1021
|
createExperiment: bool = False,
|
1022
|
+
getDataFrame:bool = False
|
939
1023
|
|
940
1024
|
):
|
1025
|
+
if isinstance(data, dict):
|
1026
|
+
data = [data]
|
1027
|
+
elif not isinstance(data, list):
|
1028
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1029
|
+
|
941
1030
|
if model.lower() not in ["openai", "google"]:
|
942
1031
|
raise ValueError("Model must be 'openai' or 'google'")
|
943
1032
|
|
@@ -961,27 +1050,37 @@ class LlumoClient:
|
|
961
1050
|
toolResponseDf.to_dict(orient="records"),
|
962
1051
|
evals=evals,
|
963
1052
|
prompt_template=prompt_template,
|
964
|
-
createExperiment=
|
1053
|
+
createExperiment=createExperiment,
|
1054
|
+
getDataFrame=getDataFrame
|
1055
|
+
|
965
1056
|
)
|
966
1057
|
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1058
|
+
return toolResponseDf
|
1059
|
+
# if createExperiment:
|
1060
|
+
# pd.set_option("future.no_silent_downcasting", True)
|
1061
|
+
# df = toolResponseDf.fillna("Some error occured")
|
1062
|
+
# if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
|
1063
|
+
# print(
|
1064
|
+
# "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1065
|
+
# )
|
1066
|
+
# else:
|
1067
|
+
# return toolResponseDf
|
976
1068
|
|
977
1069
|
# this function evaluate that tools output given by the user
|
978
1070
|
def evaluateAgentResponses(
|
979
1071
|
self,
|
980
1072
|
data,
|
981
1073
|
evals=["Final Task Alignment"],
|
982
|
-
outputColName="output",
|
983
1074
|
createExperiment: bool = False,
|
1075
|
+
getDataFrame = False,
|
1076
|
+
outputColName="output"
|
1077
|
+
|
984
1078
|
):
|
1079
|
+
if isinstance(data, dict):
|
1080
|
+
data = [data]
|
1081
|
+
elif not isinstance(data, list):
|
1082
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1083
|
+
|
985
1084
|
dataframe = pd.DataFrame(data)
|
986
1085
|
|
987
1086
|
try:
|
@@ -1001,7 +1100,9 @@ class LlumoClient:
|
|
1001
1100
|
evals=evals,
|
1002
1101
|
prompt_template="Give answer for the given query: {{query}}",
|
1003
1102
|
outputColName=outputColName,
|
1004
|
-
createExperiment=createExperiment
|
1103
|
+
createExperiment=createExperiment,
|
1104
|
+
getDataFrame = getDataFrame
|
1105
|
+
|
1005
1106
|
)
|
1006
1107
|
if createExperiment:
|
1007
1108
|
pass
|
@@ -1022,8 +1123,14 @@ class LlumoClient:
|
|
1022
1123
|
prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
|
1023
1124
|
evals=["Context Utilization"],
|
1024
1125
|
toEvaluate=False,
|
1025
|
-
generateOutput=True
|
1126
|
+
generateOutput=True,
|
1127
|
+
getDataFrame = False
|
1026
1128
|
):
|
1129
|
+
if isinstance(data, dict):
|
1130
|
+
data = [data]
|
1131
|
+
elif not isinstance(data, list):
|
1132
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1133
|
+
|
1027
1134
|
# Validate required parameters
|
1028
1135
|
if generateOutput:
|
1029
1136
|
if not modelAliases:
|
@@ -1147,23 +1254,25 @@ class LlumoClient:
|
|
1147
1254
|
outputEvalMapping = None
|
1148
1255
|
if toEvaluate:
|
1149
1256
|
for evalName in evals:
|
1150
|
-
|
1151
1257
|
# Validate API and dependencies
|
1152
1258
|
self.validateApiKey(evalName=evalName)
|
1153
1259
|
metricDependencies = checkDependency(
|
1154
|
-
evalName, list(working_df.columns), tocheck=
|
1260
|
+
evalName, list(working_df.columns), tocheck=False
|
1155
1261
|
)
|
1156
1262
|
if not metricDependencies["status"]:
|
1157
1263
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
1158
1264
|
|
1159
|
-
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template)
|
1160
|
-
|
1265
|
+
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template,generateOutput)
|
1161
1266
|
|
1267
|
+
|
1162
1268
|
self.socket.disconnect()
|
1163
1269
|
|
1164
1270
|
# Create experiment if required
|
1165
1271
|
if createExperiment:
|
1166
|
-
df = working_df.fillna("Some error occured").astype(object)
|
1272
|
+
# df = working_df.fillna("Some error occured").astype(object)
|
1273
|
+
with warnings.catch_warnings():
|
1274
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
1275
|
+
df = working_df.fillna("Some error occurred").astype(str)
|
1167
1276
|
if createPlayground(
|
1168
1277
|
email, workspaceID, df,
|
1169
1278
|
queryColName=queryColName,
|
@@ -1173,10 +1282,22 @@ class LlumoClient:
|
|
1173
1282
|
evalOutputMap=outputEvalMapping
|
1174
1283
|
):
|
1175
1284
|
print(
|
1176
|
-
"
|
1285
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1177
1286
|
else:
|
1178
|
-
|
1179
|
-
|
1287
|
+
if getDataFrame == True and toEvaluate == True:
|
1288
|
+
return LlumoDataFrameResults(working_df, evals=self.evals, evalData=self.evalData,
|
1289
|
+
definationMapping=self.definationMapping)
|
1290
|
+
|
1291
|
+
elif getDataFrame == False and toEvaluate == True:
|
1292
|
+
data = working_df.to_dict(orient="records")
|
1293
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
1294
|
+
definationMapping=self.definationMapping)
|
1295
|
+
|
1296
|
+
elif getDataFrame== True and toEvaluate == False:
|
1297
|
+
return working_df
|
1298
|
+
|
1299
|
+
elif getDataFrame == False and toEvaluate == False :
|
1300
|
+
return working_df.to_dict(orient = "records")
|
1180
1301
|
|
1181
1302
|
def _outputForStream(self, df, modelAliases, prompt_template, apiKey):
|
1182
1303
|
executor = ModelExecutor(apiKey)
|
@@ -1189,45 +1310,81 @@ class LlumoClient:
|
|
1189
1310
|
inputDict = {key: row[key] for key in inputVariables}
|
1190
1311
|
for i, model in enumerate(modelAliases, 1):
|
1191
1312
|
try:
|
1313
|
+
|
1192
1314
|
provider = getProviderFromModel(model)
|
1315
|
+
if provider == Provider.OPENAI:
|
1316
|
+
validateOpenaiKey(apiKey)
|
1317
|
+
elif provider == Provider.GOOGLE:
|
1318
|
+
validateGoogleKey(apiKey)
|
1319
|
+
|
1193
1320
|
filled_template = getInputPopulatedPrompt(prompt_template, inputDict)
|
1194
1321
|
response = executor.execute(provider, model.value, filled_template, apiKey)
|
1195
1322
|
df.at[indx, f"output_{i}"] = response
|
1323
|
+
|
1196
1324
|
except Exception as e:
|
1197
|
-
df.at[indx, f"output_{i}"] = str(e)
|
1325
|
+
# df.at[indx, f"output_{i}"] = str(e)
|
1326
|
+
raise e
|
1327
|
+
|
1198
1328
|
return df
|
1199
1329
|
|
1200
|
-
def _evaluateForStream(self, df, evals, modelAliases, prompt_template):
|
1330
|
+
def _evaluateForStream(self, df, evals, modelAliases, prompt_template, generateOutput):
|
1201
1331
|
dfWithEvals = df.copy()
|
1202
|
-
|
1203
1332
|
outputColMapping = {}
|
1204
1333
|
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1334
|
+
if generateOutput:
|
1335
|
+
# Evaluate per model output
|
1336
|
+
for i, model in enumerate(modelAliases, 1):
|
1337
|
+
outputColName = f"output_{i}"
|
1338
|
+
try:
|
1339
|
+
res = self.evaluateMultiple(
|
1340
|
+
dfWithEvals.to_dict("records"),
|
1341
|
+
evals=evals,
|
1342
|
+
prompt_template=prompt_template,
|
1343
|
+
outputColName=outputColName,
|
1344
|
+
_tocheck=False,
|
1345
|
+
getDataFrame=True,
|
1346
|
+
createExperiment=False
|
1347
|
+
)
|
1348
|
+
|
1349
|
+
for evalMetric in evals:
|
1350
|
+
scoreCol = f"{evalMetric}"
|
1351
|
+
reasonCol = f"{evalMetric} Reason"
|
1352
|
+
if scoreCol in res.columns:
|
1353
|
+
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1354
|
+
if reasonCol in res.columns:
|
1355
|
+
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1208
1356
|
|
1357
|
+
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1358
|
+
|
1359
|
+
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1360
|
+
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1361
|
+
|
1362
|
+
except Exception as e:
|
1363
|
+
print(f"Evaluation failed for model {model.value}: {str(e)}")
|
1364
|
+
|
1365
|
+
else:
|
1366
|
+
# Evaluate only once on "output" column
|
1367
|
+
try:
|
1368
|
+
outputColName = "output"
|
1209
1369
|
res = self.evaluateMultiple(
|
1210
1370
|
dfWithEvals.to_dict("records"),
|
1211
1371
|
evals=evals,
|
1212
1372
|
prompt_template=prompt_template,
|
1213
1373
|
outputColName=outputColName,
|
1214
1374
|
_tocheck=False,
|
1375
|
+
getDataFrame=True,
|
1376
|
+
createExperiment=False
|
1215
1377
|
)
|
1216
1378
|
for evalMetric in evals:
|
1217
1379
|
scoreCol = f"{evalMetric}"
|
1218
1380
|
reasonCol = f"{evalMetric} Reason"
|
1219
|
-
|
1220
|
-
if scoreCol in res.columns:
|
1221
|
-
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1222
|
-
if reasonCol in res.columns:
|
1223
|
-
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1224
|
-
|
1225
|
-
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1381
|
+
outputColMapping[scoreCol] = "output"
|
1226
1382
|
|
1227
1383
|
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1228
1384
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1229
1385
|
except Exception as e:
|
1230
|
-
print(f"Evaluation failed
|
1386
|
+
print(f"Evaluation failed: {str(e)}")
|
1387
|
+
|
1231
1388
|
return dfWithEvals, outputColMapping
|
1232
1389
|
|
1233
1390
|
def runDataStream(
|
@@ -1236,8 +1393,13 @@ class LlumoClient:
|
|
1236
1393
|
streamName: str,
|
1237
1394
|
queryColName: str = "query",
|
1238
1395
|
createExperiment: bool = False,
|
1396
|
+
getDataFrame = False
|
1239
1397
|
):
|
1240
1398
|
|
1399
|
+
if isinstance(data, dict):
|
1400
|
+
data = [data]
|
1401
|
+
elif not isinstance(data, list):
|
1402
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1241
1403
|
|
1242
1404
|
# Copy the original dataframe
|
1243
1405
|
original_df = pd.DataFrame(data)
|
@@ -1354,10 +1516,16 @@ class LlumoClient:
|
|
1354
1516
|
definationMapping=self.definationMapping,
|
1355
1517
|
):
|
1356
1518
|
print(
|
1357
|
-
"
|
1519
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1358
1520
|
else:
|
1359
|
-
|
1360
|
-
|
1521
|
+
if getDataFrame:
|
1522
|
+
return working_df
|
1523
|
+
|
1524
|
+
else:
|
1525
|
+
data = working_df.to_dict(orient="records")
|
1526
|
+
return data
|
1527
|
+
# self.latestDataframe = working_df
|
1528
|
+
# return working_df
|
1361
1529
|
|
1362
1530
|
|
1363
1531
|
def createExperiment(self, dataframe):
|
@@ -1367,12 +1535,12 @@ class LlumoClient:
|
|
1367
1535
|
flag = createPlayground(self.email, self.workspaceID, dataframe)
|
1368
1536
|
if flag:
|
1369
1537
|
print(
|
1370
|
-
"
|
1538
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1371
1539
|
)
|
1372
1540
|
except Exception as e:
|
1373
1541
|
raise "Some error ocuured please check your API key"
|
1374
1542
|
|
1375
|
-
def
|
1543
|
+
def uploadfile(self, file_path):
|
1376
1544
|
|
1377
1545
|
workspaceID = None
|
1378
1546
|
email = None
|
@@ -1407,13 +1575,34 @@ class LlumoClient:
|
|
1407
1575
|
if createPlayground(self.email, self.workspaceID, df):
|
1408
1576
|
|
1409
1577
|
print(
|
1410
|
-
"
|
1578
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1411
1579
|
)
|
1412
1580
|
|
1413
1581
|
return True
|
1414
1582
|
|
1415
1583
|
except Exception as e:
|
1416
1584
|
print(f"Error: {e}")
|
1585
|
+
|
1586
|
+
def upload(self,data):
|
1587
|
+
try:
|
1588
|
+
if isinstance(data, dict):
|
1589
|
+
data = [data]
|
1590
|
+
# Check if data is now a list of dictionaries
|
1591
|
+
if isinstance(data, list) and all(isinstance(item, dict) for item in data):
|
1592
|
+
dataframe = pd.DataFrame(data).astype(str)
|
1593
|
+
else:
|
1594
|
+
raise ValueError("Data must be a dictionary or a list of dictionaries.")
|
1595
|
+
self.validateApiKey()
|
1596
|
+
if createPlayground(self.email, self.workspaceID, dataframe):
|
1597
|
+
print(
|
1598
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1599
|
+
)
|
1600
|
+
return True
|
1601
|
+
|
1602
|
+
except Exception as e:
|
1603
|
+
print(f"Error: {e}")
|
1604
|
+
return False
|
1605
|
+
|
1417
1606
|
|
1418
1607
|
|
1419
1608
|
class SafeDict(dict):
|
llumo/execution.py
CHANGED
@@ -25,15 +25,14 @@ class ModelExecutor:
|
|
25
25
|
return response.choices[0].message.content
|
26
26
|
|
27
27
|
def _executeGoogle(self, modelName: str, prompt: str,api_key) -> str:
|
28
|
-
|
28
|
+
|
29
29
|
# Configure GenAI with API Key
|
30
30
|
genai.configure(api_key=api_key)
|
31
|
-
|
31
|
+
|
32
32
|
# Select Generative Model
|
33
33
|
model = genai.GenerativeModel("gemini-2.0-flash-lite")
|
34
34
|
# Generate Response
|
35
35
|
response = model.generate_content(prompt)
|
36
36
|
return response.text
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
|
38
|
+
|
llumo/google.py
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
from google import generativeai as _genai
|
2
|
+
from .client import LlumoClient
|
3
|
+
|
4
|
+
|
5
|
+
def evaluate_multiple(data, api_key=None, evals=["Response Correctness"]):
|
6
|
+
client = LlumoClient(api_key=api_key)
|
7
|
+
results = client.evaluateMultiple(
|
8
|
+
data,
|
9
|
+
evals=evals,
|
10
|
+
createExperiment=False,
|
11
|
+
prompt_template="Give answer to the query: {{query}}, using context: {{context}}",
|
12
|
+
getDataFrame=False
|
13
|
+
)
|
14
|
+
return results
|
15
|
+
|
16
|
+
|
17
|
+
class ChatCompletionWithEval:
|
18
|
+
def __init__(self, response, evaluation):
|
19
|
+
self._response = response
|
20
|
+
self.evaluation = evaluation
|
21
|
+
|
22
|
+
def __getattr__(self, name):
|
23
|
+
return getattr(self._response, name)
|
24
|
+
|
25
|
+
def __getitem__(self, key):
|
26
|
+
return self._response[key]
|
27
|
+
|
28
|
+
def __repr__(self):
|
29
|
+
return repr(self._response)
|
30
|
+
|
31
|
+
|
32
|
+
class genai:
|
33
|
+
class GenerativeModel:
|
34
|
+
def __init__(self, api_key: str, model: str = "gemini-2.5-flash"):
|
35
|
+
_genai.configure(api_key=api_key)
|
36
|
+
self._api_key = api_key
|
37
|
+
self._model_name = model
|
38
|
+
self._model_instance = _genai.GenerativeModel(model_name=model)
|
39
|
+
|
40
|
+
def generate_content(self, contents: str | list[str], **kwargs):
|
41
|
+
context = kwargs.pop("context", None)
|
42
|
+
evals = kwargs.pop("evals", [])
|
43
|
+
llumo_key = kwargs.pop("llumo_key", None)
|
44
|
+
|
45
|
+
# Run Gemini generation
|
46
|
+
response = self._model_instance.generate_content(contents=contents, **kwargs)
|
47
|
+
output = response.text
|
48
|
+
|
49
|
+
eval_input = [{
|
50
|
+
"query": contents,
|
51
|
+
"context": context or contents,
|
52
|
+
"output": output,
|
53
|
+
}]
|
54
|
+
|
55
|
+
evaluation = None
|
56
|
+
try:
|
57
|
+
evaluation = evaluate_multiple(data=eval_input, evals=evals, api_key=llumo_key)
|
58
|
+
except Exception as e:
|
59
|
+
evaluation = None
|
60
|
+
|
61
|
+
if evaluation is None:
|
62
|
+
print("Cannot process your request for evaluation, please check your api and try again later.")
|
63
|
+
return response
|
64
|
+
|
65
|
+
|
66
|
+
return ChatCompletionWithEval(response, evaluation)
|
llumo/helpingFuntions.py
CHANGED
@@ -8,6 +8,9 @@ import json
|
|
8
8
|
import base64
|
9
9
|
import os
|
10
10
|
import re
|
11
|
+
import openai
|
12
|
+
import google.generativeai as genai
|
13
|
+
from collections import defaultdict
|
11
14
|
|
12
15
|
|
13
16
|
from .models import _MODEL_METADATA, AVAILABLEMODELS
|
@@ -21,8 +24,7 @@ uploadColList = (
|
|
21
24
|
uploadRowList = (
|
22
25
|
"https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
|
23
26
|
)
|
24
|
-
createInsightUrl
|
25
|
-
getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
|
27
|
+
createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
|
26
28
|
|
27
29
|
|
28
30
|
def getProcessID():
|
@@ -227,7 +229,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
227
229
|
}
|
228
230
|
allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
|
229
231
|
|
230
|
-
|
232
|
+
evalDependencies = checkDependency(_returnDepMapping=True)
|
231
233
|
|
232
234
|
# Create a mapping of column names to unique column IDs
|
233
235
|
columnIDMapping = {}
|
@@ -240,46 +242,46 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
240
242
|
columnIDMapping[col] = columnID
|
241
243
|
|
242
244
|
|
243
|
-
if col.startswith('output') and promptText!=None:
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
245
|
+
# if col.startswith('output') and promptText!=None:
|
246
|
+
# # For output columns, create the prompt template with promptText
|
247
|
+
# if promptText:
|
248
|
+
# # Extract variables from promptText and set them as dependencies
|
249
|
+
# dependencies = []
|
250
|
+
#
|
251
|
+
# # Find variables inside {{variable}}
|
252
|
+
# variables = re.findall(r'{{(.*?)}}', promptText)
|
253
|
+
#
|
254
|
+
# # Loop through each variable and check if it exists as a column name
|
255
|
+
# for var in variables:
|
256
|
+
# varName = var.strip()
|
257
|
+
# if varName in columnIDMapping: # Check if the variable is a column name
|
258
|
+
# dependencies.append(columnIDMapping[varName]) # Add its columnID
|
259
|
+
#
|
260
|
+
# # Now update the template for the output column
|
261
|
+
#
|
262
|
+
# template={
|
263
|
+
# "provider": "OPENAI",
|
264
|
+
# "model": "GPT_4o",
|
265
|
+
# "promptText": promptText,
|
266
|
+
# "modelOptions": {
|
267
|
+
# "temperature": 0,
|
268
|
+
# "frequencyPenalty": 0,
|
269
|
+
# "presencePenalty": 0,
|
270
|
+
# "maxToken": 8192
|
271
|
+
# },
|
272
|
+
# "toolConfig": "none",
|
273
|
+
# "concurrency": "",
|
274
|
+
# "outputType": "STRING",
|
275
|
+
# "isPromptSelected": True,
|
276
|
+
# "isSmartPromptSelected": False,
|
277
|
+
# "dependency": dependencies, # Use the dependencies extracted from promptText
|
278
|
+
# "columnID": columnID, # Use the generated column ID
|
279
|
+
# "label": col,
|
280
|
+
# "type": "PROMPT",
|
281
|
+
# "order": indx,
|
282
|
+
# }
|
283
|
+
|
284
|
+
if col.startswith('context') and dataStreamName != None :
|
283
285
|
if queryColName and dataStreamName:
|
284
286
|
dependencies = []
|
285
287
|
dependencies.append(columnIDMapping[queryColName])
|
@@ -310,9 +312,9 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
310
312
|
|
311
313
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
312
314
|
|
313
|
-
longDef = definationMapping.get(col, {}).get('definition', "")
|
314
|
-
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
315
|
-
enum =
|
315
|
+
longDef = definationMapping.get(col.rsplit("_",1)[0], {}).get('definition', "")
|
316
|
+
shortDef =definationMapping.get(col.rsplit("_",1)[0], {}).get('briefDefinition', "")
|
317
|
+
enum = col.rsplit("_",1)[0].upper().replace(" ","_")
|
316
318
|
|
317
319
|
template = {
|
318
320
|
"analytics": [
|
@@ -320,20 +322,23 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
320
322
|
],
|
321
323
|
"evaluationMetric": "ALL",
|
322
324
|
"evaluationModel": "LLUMO_EVALLM",
|
323
|
-
"selectPrompt": columnIDMapping[
|
325
|
+
"selectPrompt": None if "output" not in columnIDMapping.keys() else columnIDMapping["output"],
|
324
326
|
"scoreCondition": "GREATER_THAN",
|
325
327
|
"scoreValue": "50",
|
326
328
|
"scoreResult": "PASS",
|
327
|
-
"llmKpi": col,
|
329
|
+
"llmKpi": col.rsplit("_",1)[0],
|
328
330
|
"setRules": True,
|
329
331
|
"type": "EVAL",
|
330
332
|
"evalType": "LLM",
|
331
333
|
"similarityMetric": None,
|
332
334
|
"embeddingModel": None,
|
333
|
-
"groundTruth": None,
|
335
|
+
"groundTruth": None if "groundTruth" not in columnIDMapping.keys() else columnIDMapping["groundTruth"],
|
334
336
|
"dataStream": None,
|
335
|
-
"context":
|
336
|
-
"dependency":
|
337
|
+
"context":None if "context" not in columnIDMapping.keys() else columnIDMapping["context"],
|
338
|
+
"dependency":[ columnIDMapping[dep] for dep in evalDependencies[ col.rsplit("_",1)[0]]],
|
339
|
+
"query": None if "query" not in columnIDMapping.keys() else columnIDMapping["query"],
|
340
|
+
"tools":None if "tools" not in columnIDMapping.keys() else columnIDMapping["tools"],
|
341
|
+
"messageHistory":None if "messageHistory" not in columnIDMapping.keys() else columnIDMapping["messageHistory"],
|
337
342
|
"hallucinationFields": {
|
338
343
|
"query": None,
|
339
344
|
"context": None,
|
@@ -342,7 +347,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
342
347
|
"definition": longDef,
|
343
348
|
"analyticsENUM": enum,
|
344
349
|
"prompt": shortDef,
|
345
|
-
"analyticsName": col,
|
350
|
+
"analyticsName": col.rsplit("_",1)[0],
|
346
351
|
"columnID": columnID,
|
347
352
|
"label": col,
|
348
353
|
"order": indx
|
@@ -378,12 +383,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
378
383
|
row_dict = {}
|
379
384
|
|
380
385
|
# For each column, we need to map the column ID to the corresponding value in the row
|
381
|
-
|
386
|
+
|
382
387
|
for col in dataframe.columns:
|
383
388
|
columnID = columnIDMapping[col]
|
384
389
|
|
385
390
|
if any(col.startswith(eval + "_") or col == eval for eval in allEvals) and not " Reason" in col and promptText!=None:
|
386
|
-
|
391
|
+
|
387
392
|
row_dict[columnID] = {
|
388
393
|
|
389
394
|
"value": row[col],
|
@@ -450,9 +455,12 @@ def uploadRowsInDBPlayground(payload):
|
|
450
455
|
return None
|
451
456
|
|
452
457
|
|
453
|
-
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None):
|
458
|
+
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
|
454
459
|
|
455
|
-
|
460
|
+
if activePlayground != None:
|
461
|
+
playgroundId=activePlayground
|
462
|
+
else:
|
463
|
+
playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
|
456
464
|
payload1, payload2 = createColumn(
|
457
465
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
|
458
466
|
)
|
@@ -470,7 +478,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
470
478
|
|
471
479
|
|
472
480
|
|
473
|
-
def getPlaygroundInsights(
|
481
|
+
def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: list):
|
474
482
|
headers = {
|
475
483
|
|
476
484
|
"Content-Type": "application/json",
|
@@ -478,48 +486,29 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
478
486
|
|
479
487
|
# Initial request to generate playground insights
|
480
488
|
payload = {
|
481
|
-
"
|
482
|
-
"
|
489
|
+
"uniqueClassesString": uniqueClassesString,
|
490
|
+
"reasonList": reasonList,
|
491
|
+
"definition": defination,
|
483
492
|
}
|
484
493
|
|
485
494
|
urlGenerate = createInsightUrl
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
if responseGenerate.status_code == 200:
|
490
|
-
responseJson = responseGenerate.json()
|
491
|
-
|
492
|
-
insightStatus = responseJson.get("data", {}).get("insight", False)
|
495
|
+
try:
|
496
|
+
responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
|
493
497
|
|
494
|
-
if
|
495
|
-
|
496
|
-
urlGetAll = getPlaygroundInsightsUrl
|
498
|
+
if responseGenerate.status_code == 200:
|
499
|
+
responseJson = responseGenerate.json()
|
497
500
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
# Extract insight and solution
|
506
|
-
insights = []
|
507
|
-
for item in data:
|
508
|
-
insight = item.get("insight", "")
|
509
|
-
solution = item.get("solution", "")
|
510
|
-
insights.append({"insight": insight, "solution": solution})
|
511
|
-
|
512
|
-
return insights
|
513
|
-
else:
|
514
|
-
print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
|
515
|
-
return None
|
516
|
-
else:
|
517
|
-
print("No insight generated.")
|
518
|
-
return None
|
501
|
+
filteredResponse = {key: value for key, value in responseJson.items() if key in ['analysis', 'nextStep']}
|
502
|
+
|
503
|
+
return filteredResponse
|
504
|
+
except Exception as e:
|
505
|
+
print(f"Exception occurred while generating insight: {e}")
|
506
|
+
return None
|
507
|
+
|
519
508
|
else:
|
520
509
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
521
510
|
return None
|
522
|
-
def checkDependency(selectedEval, columns,tocheck=True):
|
511
|
+
def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
|
523
512
|
"""
|
524
513
|
Checks if all the required input columns for the selected evaluation metric are present.
|
525
514
|
|
@@ -530,33 +519,35 @@ def checkDependency(selectedEval, columns,tocheck=True):
|
|
530
519
|
Raises:
|
531
520
|
- LlumoAIError.dependencyError: If any required column is missing.
|
532
521
|
"""
|
533
|
-
if tocheck:
|
534
522
|
# Define required dependencies for each evaluation metric
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
523
|
+
metricDependencies = {
|
524
|
+
'Response Completeness': ['context', 'query', 'output'],
|
525
|
+
'Response Bias': ['output'],
|
526
|
+
'Response Harmfulness': ['output'],
|
527
|
+
'Input Toxicity': ['query'],
|
528
|
+
'Input Harmfulness': ['query'],
|
529
|
+
'Context Utilization': ['output', 'context'],
|
530
|
+
'Relevance Retention': ['context', 'query'],
|
531
|
+
'Semantic Cohesion': ['context'],
|
532
|
+
'Final Task Alignment': ['messageHistory'],
|
533
|
+
'Tool Reliability': ['messageHistory'],
|
534
|
+
'Response Correctness': ['output', 'query', 'context'],
|
535
|
+
'Response Toxicity': ['output'],
|
536
|
+
'Input Bias': ['query'],
|
537
|
+
'Input Relevancy': ['context', 'query'],
|
538
|
+
'Redundancy Reduction': ['context'],
|
539
|
+
'Response Sentiment': ['output'],
|
540
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
541
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
542
|
+
'Hallucination': ['query', 'context', 'output'],
|
543
|
+
'Groundedness': ['groundTruth', 'output'],
|
544
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
545
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
546
|
+
}
|
547
|
+
if _returnDepMapping == True:
|
548
|
+
return metricDependencies
|
559
549
|
|
550
|
+
if tocheck == True:
|
560
551
|
# Check if the selected evaluation metric is known
|
561
552
|
if selectedEval not in metricDependencies:
|
562
553
|
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
@@ -632,3 +623,56 @@ def validateModels(model_aliases):
|
|
632
623
|
|
633
624
|
|
634
625
|
|
626
|
+
def validateOpenaiKey(api_key):
|
627
|
+
try:
|
628
|
+
client = openai.OpenAI(api_key=api_key)
|
629
|
+
_ = client.models.list() # Light call to list models
|
630
|
+
except openai.AuthenticationError:
|
631
|
+
raise ValueError("❌ Invalid OpenAI API key.")
|
632
|
+
except Exception as e:
|
633
|
+
raise RuntimeError(f"⚠️ Error validating OpenAI key: {e}")
|
634
|
+
|
635
|
+
def validateGoogleKey(api_key):
|
636
|
+
try:
|
637
|
+
genai.configure(api_key=api_key)
|
638
|
+
_ = genai.GenerativeModel("gemini-2.0-flash-lite").generate_content("test")
|
639
|
+
except Exception as e:
|
640
|
+
if "PERMISSION_DENIED" in str(e) or "API key not valid" in str(e):
|
641
|
+
raise ValueError("❌ Invalid Google API key.")
|
642
|
+
raise RuntimeError(f"⚠️ Error validating Gemini key: {e}")
|
643
|
+
|
644
|
+
def groupLogsByClass(logs, max_logs=2):
|
645
|
+
# Initialize the final result structures (no defaultdict)
|
646
|
+
groupedLogs = {}
|
647
|
+
uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
|
648
|
+
|
649
|
+
# Iterate through the logs
|
650
|
+
for log in logs:
|
651
|
+
log_details = list(log.values())[0] # Get the details dictionary
|
652
|
+
eval_name = log_details.get("kpi", "unmarked")
|
653
|
+
edge_case = log_details.get("edgeCase", "unmarked")
|
654
|
+
reasoning = log_details.get("reasoning", "")
|
655
|
+
|
656
|
+
if eval_name != "unmarked" and edge_case != "unmarked":
|
657
|
+
# Ensure that the eval_name and edge_case exist in the dictionary
|
658
|
+
if eval_name not in groupedLogs:
|
659
|
+
groupedLogs[eval_name] = {}
|
660
|
+
uniqueEdgeCases[eval_name] = set() # Initialize the set for unique edge cases
|
661
|
+
|
662
|
+
if edge_case not in groupedLogs[eval_name]:
|
663
|
+
groupedLogs[eval_name][edge_case] = []
|
664
|
+
|
665
|
+
# Append the reasoning to the correct place
|
666
|
+
groupedLogs[eval_name][edge_case].append(reasoning)
|
667
|
+
uniqueEdgeCases[eval_name].add(edge_case) # Add the edge case to the set
|
668
|
+
|
669
|
+
# Limit the number of reasons to max_logs
|
670
|
+
for eval_name in groupedLogs:
|
671
|
+
for edge_case in groupedLogs[eval_name]:
|
672
|
+
groupedLogs[eval_name][edge_case] = groupedLogs[eval_name][edge_case][:max_logs]
|
673
|
+
|
674
|
+
# Convert the set of unique edge cases to a list for easier reading
|
675
|
+
for eval_name in uniqueEdgeCases:
|
676
|
+
uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
|
677
|
+
|
678
|
+
return groupedLogs, uniqueEdgeCases
|
llumo/openai.py
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
from openai import OpenAI as OpenAIClient
|
2
|
+
from .client import LlumoClient
|
3
|
+
|
4
|
+
# Dummy evaluation function that uses LlumoClient
|
5
|
+
def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
|
6
|
+
client = LlumoClient(api_key=api_key)
|
7
|
+
results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
|
8
|
+
return results
|
9
|
+
|
10
|
+
# Wrapper around ChatCompletion to allow custom fields like `.evaluation`
|
11
|
+
class ChatCompletionWithEval:
|
12
|
+
def __init__(self, response, evaluation):
|
13
|
+
self._response = response
|
14
|
+
self.evaluation = evaluation
|
15
|
+
|
16
|
+
def __getattr__(self, name):
|
17
|
+
return getattr(self._response, name)
|
18
|
+
|
19
|
+
def __getitem__(self, key):
|
20
|
+
return self._response[key]
|
21
|
+
|
22
|
+
def __repr__(self):
|
23
|
+
return repr(self._response)
|
24
|
+
|
25
|
+
class openai(OpenAIClient):
|
26
|
+
def __init__(self, api_key: str):
|
27
|
+
super().__init__(api_key=api_key)
|
28
|
+
|
29
|
+
original_create = self.chat.completions.create
|
30
|
+
|
31
|
+
class ChatCompletionsWrapper:
|
32
|
+
@staticmethod
|
33
|
+
def create(*args, **kwargs):
|
34
|
+
context = kwargs.pop("context", None)
|
35
|
+
evals = kwargs.pop("evals", [])
|
36
|
+
llumo_key = kwargs.pop("llumo_key", None)
|
37
|
+
|
38
|
+
messages = kwargs.get("messages", [])
|
39
|
+
user_message = next(
|
40
|
+
(m.get("content") for m in reversed(messages) if m.get("role") == "user"),
|
41
|
+
"",
|
42
|
+
)
|
43
|
+
|
44
|
+
# If context is None or empty or whitespace-only, set it to user_message
|
45
|
+
if not context or context.strip() == "":
|
46
|
+
context = user_message
|
47
|
+
|
48
|
+
response = original_create(*args, **kwargs)
|
49
|
+
|
50
|
+
try:
|
51
|
+
output_text = response.choices[0].message.content
|
52
|
+
except Exception:
|
53
|
+
output_text = ""
|
54
|
+
|
55
|
+
eval_input = [{
|
56
|
+
"query": user_message,
|
57
|
+
"context": context,
|
58
|
+
"output": output_text,
|
59
|
+
}]
|
60
|
+
|
61
|
+
# Safely call evaluate_multiple, if error return None
|
62
|
+
evaluation = None
|
63
|
+
try:
|
64
|
+
evaluation = evaluate_multiple(eval_input, api_key=llumo_key,evals=evals)
|
65
|
+
except Exception as e:
|
66
|
+
# You can optionally log the error here if you want
|
67
|
+
# print(f"Evaluation failed, skipping: {e}")
|
68
|
+
evaluation = None
|
69
|
+
|
70
|
+
# If evaluation is None, just return normal response
|
71
|
+
if evaluation is None:
|
72
|
+
print("Cannot process your request for evaluation, please check your api and try again later.")
|
73
|
+
return response
|
74
|
+
|
75
|
+
# Otherwise wrap with evaluation attached
|
76
|
+
return ChatCompletionWithEval(response, evaluation)
|
77
|
+
|
78
|
+
self.chat.completions.create = ChatCompletionsWrapper.create
|
@@ -0,0 +1,16 @@
|
|
1
|
+
llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
|
2
|
+
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
3
|
+
llumo/client.py,sha256=20xn-RVaWwEwEi2pFaf_ZWF6OJhctQoxc707NAzTcQk,62532
|
4
|
+
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
5
|
+
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
6
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
7
|
+
llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
|
8
|
+
llumo/helpingFuntions.py,sha256=fqwtTNuDYiERB1_ljme-Ldc7WduU6g4Zl54O2INW7dQ,25241
|
9
|
+
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
10
|
+
llumo/openai.py,sha256=DGhEwQIJIIycGpw3hYQnyxdj6RFVpZ-gay-fZGqtkhU,3013
|
11
|
+
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
12
|
+
llumo-0.2.16.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
13
|
+
llumo-0.2.16.dist-info/METADATA,sha256=GXu8pBXbYQ_EWRRPlAfAqaqqSl1Lb6raDCceRo8f8Qs,1519
|
14
|
+
llumo-0.2.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
llumo-0.2.16.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
16
|
+
llumo-0.2.16.dist-info/RECORD,,
|
llumo-0.2.15b1.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=XljwD5mZxjyrXHhu8YhN0cGsd-O_LyKbPzrhS8zbqZo,53778
|
3
|
-
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
4
|
-
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=0-ZwG0fnbfP4DP1JTMewM8LdXzz_-p1gRqhPsX0Zmpk,22785
|
7
|
-
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
8
|
-
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
9
|
-
llumo-0.2.15b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.15b1.dist-info/METADATA,sha256=yDLkiD46Qq44PA3ylKK2dzsXZmnuE23yxH0RmoqizOk,1521
|
11
|
-
llumo-0.2.15b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.15b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.15b1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|