llumo 0.2.15b2__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +2 -1
- llumo/chains.py +73 -0
- llumo/client.py +248 -67
- llumo/google.py +66 -0
- llumo/helpingFuntions.py +140 -115
- llumo/openai.py +78 -0
- {llumo-0.2.15b2.dist-info → llumo-0.2.16.dist-info}/METADATA +1 -1
- llumo-0.2.16.dist-info/RECORD +16 -0
- llumo-0.2.15b2.dist-info/RECORD +0 -13
- {llumo-0.2.15b2.dist-info → llumo-0.2.16.dist-info}/WHEEL +0 -0
- {llumo-0.2.15b2.dist-info → llumo-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.15b2.dist-info → llumo-0.2.16.dist-info}/top_level.txt +0 -0
llumo/__init__.py
CHANGED
llumo/chains.py
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
from .helpingFuntions import *
|
3
|
+
|
4
|
+
class LlumoDataFrameResults(pd.DataFrame):
|
5
|
+
_metadata=["evals","evalData","definationMapping"]
|
6
|
+
|
7
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
8
|
+
self.evals = evals or []
|
9
|
+
self.evalData= evalData or []
|
10
|
+
self.definationMapping= definationMapping or {}
|
11
|
+
super().__init__(*args, **kwargs)
|
12
|
+
|
13
|
+
@property
|
14
|
+
def _constructor(self):
|
15
|
+
# Needed so slicing operations return the same type
|
16
|
+
return LlumoDataFrameResults
|
17
|
+
|
18
|
+
def insights(self):
|
19
|
+
|
20
|
+
if not self.evalData:
|
21
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
22
|
+
return None
|
23
|
+
try:
|
24
|
+
insights=[]
|
25
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
26
|
+
|
27
|
+
for evalname in self.evals:
|
28
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
29
|
+
allReasons = []
|
30
|
+
for edgeCase in reasonData[evalname]:
|
31
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
32
|
+
|
33
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
34
|
+
|
35
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
36
|
+
return insights
|
37
|
+
except Exception as e:
|
38
|
+
|
39
|
+
print("Can not genrate insights for this eval, please try again later.")
|
40
|
+
|
41
|
+
|
42
|
+
class LlumoDictResults(list):
|
43
|
+
_metadata=["evals","evalData","definationMapping"]
|
44
|
+
|
45
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
46
|
+
self.evals = evals or []
|
47
|
+
self.evalData= evalData or []
|
48
|
+
self.definationMapping= definationMapping or {}
|
49
|
+
super().__init__(*args, **kwargs) # This will handle list[dict]
|
50
|
+
|
51
|
+
def insights(self):
|
52
|
+
|
53
|
+
if not self.evalData:
|
54
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
55
|
+
return None
|
56
|
+
try:
|
57
|
+
insights=[]
|
58
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
59
|
+
for evalname in self.evals:
|
60
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
61
|
+
allReasons = []
|
62
|
+
for edgeCase in reasonData[evalname]:
|
63
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
64
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
65
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
66
|
+
return insights
|
67
|
+
except Exception as e:
|
68
|
+
print("Can not genrate insights for this eval, please try again later.")
|
69
|
+
|
70
|
+
|
71
|
+
for _cls in (LlumoDataFrameResults, LlumoDictResults):
|
72
|
+
_cls.__name__ = "LlumoResults"
|
73
|
+
_cls.__qualname__ = "LlumoResults"
|
llumo/client.py
CHANGED
@@ -5,7 +5,7 @@ import time
|
|
5
5
|
import re
|
6
6
|
import json
|
7
7
|
import uuid
|
8
|
-
|
8
|
+
import warnings
|
9
9
|
import os
|
10
10
|
import itertools
|
11
11
|
import pandas as pd
|
@@ -16,9 +16,12 @@ from .exceptions import LlumoAIError
|
|
16
16
|
from .helpingFuntions import *
|
17
17
|
from .sockets import LlumoSocketClient
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
19
|
+
from .chains import LlumoDataFrameResults,LlumoDictResults
|
19
20
|
import threading
|
20
21
|
from tqdm import tqdm
|
21
22
|
|
23
|
+
pd.set_option('future.no_silent_downcasting', True)
|
24
|
+
|
22
25
|
postUrl = (
|
23
26
|
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
24
27
|
)
|
@@ -38,7 +41,8 @@ class LlumoClient:
|
|
38
41
|
|
39
42
|
def __init__(self, api_key):
|
40
43
|
self.apiKey = api_key
|
41
|
-
|
44
|
+
self.evalData=[]
|
45
|
+
self.evals=[]
|
42
46
|
self.processMapping = {}
|
43
47
|
self.definationMapping = {}
|
44
48
|
|
@@ -393,27 +397,51 @@ class LlumoClient:
|
|
393
397
|
outputColName=outputColName,
|
394
398
|
):
|
395
399
|
print(
|
396
|
-
"
|
400
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
397
401
|
)
|
398
402
|
else:
|
399
403
|
return dataframe
|
400
404
|
|
401
405
|
# this function allows the users to run multiple evals at once
|
402
406
|
|
403
|
-
def
|
407
|
+
def compressor(self, data, prompt_template):
|
404
408
|
results = []
|
409
|
+
if isinstance(data, dict):
|
410
|
+
data = [data]
|
411
|
+
elif not isinstance(data, list):
|
412
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
413
|
+
|
405
414
|
dataframe = pd.DataFrame(data)
|
415
|
+
|
406
416
|
try:
|
407
|
-
|
417
|
+
self.socket = LlumoSocketClient(socketUrl)
|
418
|
+
dataframe = pd.DataFrame(data).astype(str)
|
419
|
+
socketID = self.socket.connect(timeout=250)
|
420
|
+
|
421
|
+
# Wait for socket connection
|
408
422
|
max_wait_secs = 20
|
409
423
|
waited_secs = 0
|
410
424
|
while not self.socket._connection_established.is_set():
|
411
425
|
time.sleep(0.1)
|
412
426
|
waited_secs += 0.1
|
413
427
|
if waited_secs >= max_wait_secs:
|
414
|
-
raise RuntimeError(
|
415
|
-
|
416
|
-
|
428
|
+
raise RuntimeError("Timeout waiting for server connection")
|
429
|
+
|
430
|
+
# Start listener thread
|
431
|
+
expectedResults = len(dataframe)
|
432
|
+
# print("expected result" ,expectedResults)
|
433
|
+
timeout = max(100, min(150, expectedResults * 10))
|
434
|
+
listener_thread = threading.Thread(
|
435
|
+
target=self.socket.listenForResults,
|
436
|
+
kwargs={
|
437
|
+
"min_wait": 40,
|
438
|
+
"max_wait": timeout,
|
439
|
+
"inactivity_timeout": 10,
|
440
|
+
"expected_results": expectedResults,
|
441
|
+
},
|
442
|
+
daemon=True,
|
443
|
+
)
|
444
|
+
listener_thread.start()
|
417
445
|
|
418
446
|
try:
|
419
447
|
self.validateApiKey()
|
@@ -508,7 +536,7 @@ class LlumoClient:
|
|
508
536
|
"playgroundID": activePlayground,
|
509
537
|
}
|
510
538
|
|
511
|
-
rowIdMapping[rowID] = index
|
539
|
+
rowIdMapping[f'{rowID}-{columnID}-{columnID}'] = index
|
512
540
|
# print("__________________________TEMPLATE__________________________________")
|
513
541
|
# print(templateData)
|
514
542
|
|
@@ -537,7 +565,19 @@ class LlumoClient:
|
|
537
565
|
expected_results=None,
|
538
566
|
)
|
539
567
|
|
540
|
-
|
568
|
+
rawResults = self.socket.getReceivedData()
|
569
|
+
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
570
|
+
expectedRowIDs = set(rowIdMapping.keys())
|
571
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
572
|
+
# print("All expected keys:", expected_rowIDs)
|
573
|
+
# print("All received keys:", received_rowIDs)
|
574
|
+
# print("Missing keys:", len(missingRowIDs))
|
575
|
+
missingRowIDs = list(missingRowIDs)
|
576
|
+
|
577
|
+
if len(missingRowIDs) > 0:
|
578
|
+
dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
|
579
|
+
rawResults.extend(dataFromDb)
|
580
|
+
|
541
581
|
# results = self.finalResp(eval_results)
|
542
582
|
# print(f"======= Completed evaluation: {eval} =======\n")
|
543
583
|
|
@@ -551,10 +591,10 @@ class LlumoClient:
|
|
551
591
|
print(f"Error disconnecting socket: {e}")
|
552
592
|
|
553
593
|
dataframe["Compressed Input"] = None
|
554
|
-
for records in
|
594
|
+
for records in rawResults:
|
555
595
|
for compound_key, value in records.items():
|
556
596
|
# for compound_key, value in item['data'].items():
|
557
|
-
rowID = compound_key
|
597
|
+
rowID = compound_key
|
558
598
|
# looking for the index of each rowID , in the original dataframe
|
559
599
|
if rowID in rowIdMapping:
|
560
600
|
index = rowIdMapping[rowID]
|
@@ -577,16 +617,24 @@ class LlumoClient:
|
|
577
617
|
self,
|
578
618
|
data,
|
579
619
|
evals: list, # list of eval metric names
|
580
|
-
prompt_template="",
|
620
|
+
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
581
621
|
outputColName="output",
|
582
622
|
createExperiment: bool = False,
|
623
|
+
getDataFrame:bool =False,
|
583
624
|
_tocheck=True,
|
584
625
|
):
|
626
|
+
if isinstance(data, dict):
|
627
|
+
data = [data]
|
628
|
+
elif not isinstance(data, list):
|
629
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
630
|
+
|
585
631
|
self.socket = LlumoSocketClient(socketUrl)
|
586
632
|
dataframe = pd.DataFrame(data).astype(str)
|
587
633
|
workspaceID = None
|
588
634
|
email = None
|
589
635
|
socketID = self.socket.connect(timeout=250)
|
636
|
+
self.evalData=[]
|
637
|
+
self.evals=evals
|
590
638
|
self.allBatches = []
|
591
639
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
592
640
|
|
@@ -614,10 +662,14 @@ class LlumoClient:
|
|
614
662
|
daemon=True,
|
615
663
|
)
|
616
664
|
listener_thread.start()
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
665
|
+
self.validateApiKey(evalName=evals[0])
|
666
|
+
if createExperiment:
|
667
|
+
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
668
|
+
|
669
|
+
else:
|
670
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
671
|
+
"-", ""
|
672
|
+
)
|
621
673
|
for evalName in evals:
|
622
674
|
# print(f"\n======= Running evaluation for: {evalName} =======")
|
623
675
|
|
@@ -776,10 +828,8 @@ class LlumoClient:
|
|
776
828
|
rawResults.extend(dataFromDb)
|
777
829
|
|
778
830
|
|
831
|
+
self.evalData = rawResults
|
779
832
|
|
780
|
-
|
781
|
-
|
782
|
-
|
783
833
|
# Initialize dataframe columns for each eval
|
784
834
|
for eval in evals:
|
785
835
|
dataframe[eval] = None
|
@@ -799,7 +849,12 @@ class LlumoClient:
|
|
799
849
|
|
800
850
|
if createExperiment:
|
801
851
|
pd.set_option("future.no_silent_downcasting", True)
|
802
|
-
df = dataframe.fillna("Some error occured").astype(object)
|
852
|
+
# df = dataframe.fillna("Some error occured").astype(object)
|
853
|
+
with warnings.catch_warnings():
|
854
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
855
|
+
df = dataframe.fillna("Some error occurred").astype(str)
|
856
|
+
|
857
|
+
df = dataframe.fillna("Some error occured").infer_objects(copy=False)
|
803
858
|
if createPlayground(
|
804
859
|
email,
|
805
860
|
workspaceID,
|
@@ -807,13 +862,19 @@ class LlumoClient:
|
|
807
862
|
promptText=prompt_template,
|
808
863
|
definationMapping=self.definationMapping,
|
809
864
|
outputColName=outputColName,
|
865
|
+
activePlayground= activePlayground
|
810
866
|
):
|
811
867
|
print(
|
812
|
-
"
|
868
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
813
869
|
)
|
814
870
|
else:
|
815
|
-
|
871
|
+
if getDataFrame:
|
872
|
+
return LlumoDataFrameResults(dataframe,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
873
|
+
else:
|
874
|
+
data=dataframe.to_dict(orient="records")
|
875
|
+
return LlumoDictResults(data,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
816
876
|
|
877
|
+
|
817
878
|
def promptSweep(
|
818
879
|
self,
|
819
880
|
templates: List[str],
|
@@ -823,9 +884,14 @@ class LlumoClient:
|
|
823
884
|
evals=["Response Correctness"],
|
824
885
|
toEvaluate: bool = False,
|
825
886
|
createExperiment: bool = False,
|
887
|
+
getDataFrame = False
|
826
888
|
|
827
889
|
|
828
890
|
) -> pd.DataFrame:
|
891
|
+
if isinstance(dataset, dict):
|
892
|
+
dataset = [dataset]
|
893
|
+
elif not isinstance(dataset, list):
|
894
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
829
895
|
|
830
896
|
modelStatus = validateModels(model_aliases=model_aliases)
|
831
897
|
if modelStatus["status"]== False:
|
@@ -878,6 +944,8 @@ class LlumoClient:
|
|
878
944
|
prompt_template=str(templates[0]),
|
879
945
|
outputColName=outputColName,
|
880
946
|
_tocheck=False,
|
947
|
+
getDataFrame=True,
|
948
|
+
createExperiment = False
|
881
949
|
)
|
882
950
|
|
883
951
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
@@ -910,10 +978,17 @@ class LlumoClient:
|
|
910
978
|
):
|
911
979
|
|
912
980
|
print(
|
913
|
-
"
|
981
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
914
982
|
)
|
915
983
|
else:
|
916
|
-
|
984
|
+
if getDataFrame:
|
985
|
+
return LlumoDataFrameResults(dfWithEvals, evals=self.evals, evalData=self.evalData,
|
986
|
+
definationMapping=self.definationMapping)
|
987
|
+
else:
|
988
|
+
data = dfWithEvals.to_dict(orient="records")
|
989
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,definationMapping=self.definationMapping)
|
990
|
+
|
991
|
+
|
917
992
|
else:
|
918
993
|
if createExperiment == True:
|
919
994
|
pd.set_option("future.no_silent_downcasting", True)
|
@@ -921,10 +996,18 @@ class LlumoClient:
|
|
921
996
|
|
922
997
|
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
923
998
|
print(
|
924
|
-
"
|
999
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
925
1000
|
)
|
926
1001
|
else:
|
927
|
-
|
1002
|
+
if getDataFrame:
|
1003
|
+
return LlumoDataFrameResults(df, evals=self.evals, evalData=self.evalData,
|
1004
|
+
definationMapping=self.definationMapping)
|
1005
|
+
else:
|
1006
|
+
data = df.to_dict(orient="records")
|
1007
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
1008
|
+
definationMapping=self.definationMapping)
|
1009
|
+
|
1010
|
+
|
928
1011
|
|
929
1012
|
# this function generates an output using llm and tools and evaluate that output
|
930
1013
|
def evaluateAgents(
|
@@ -936,8 +1019,14 @@ class LlumoClient:
|
|
936
1019
|
evals=["Final Task Alignment"],
|
937
1020
|
prompt_template="Give answer for the given query: {{query}}",
|
938
1021
|
createExperiment: bool = False,
|
1022
|
+
getDataFrame:bool = False
|
939
1023
|
|
940
1024
|
):
|
1025
|
+
if isinstance(data, dict):
|
1026
|
+
data = [data]
|
1027
|
+
elif not isinstance(data, list):
|
1028
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1029
|
+
|
941
1030
|
if model.lower() not in ["openai", "google"]:
|
942
1031
|
raise ValueError("Model must be 'openai' or 'google'")
|
943
1032
|
|
@@ -961,27 +1050,37 @@ class LlumoClient:
|
|
961
1050
|
toolResponseDf.to_dict(orient="records"),
|
962
1051
|
evals=evals,
|
963
1052
|
prompt_template=prompt_template,
|
964
|
-
createExperiment=
|
1053
|
+
createExperiment=createExperiment,
|
1054
|
+
getDataFrame=getDataFrame
|
1055
|
+
|
965
1056
|
)
|
966
1057
|
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1058
|
+
return toolResponseDf
|
1059
|
+
# if createExperiment:
|
1060
|
+
# pd.set_option("future.no_silent_downcasting", True)
|
1061
|
+
# df = toolResponseDf.fillna("Some error occured")
|
1062
|
+
# if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
|
1063
|
+
# print(
|
1064
|
+
# "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1065
|
+
# )
|
1066
|
+
# else:
|
1067
|
+
# return toolResponseDf
|
976
1068
|
|
977
1069
|
# this function evaluate that tools output given by the user
|
978
1070
|
def evaluateAgentResponses(
|
979
1071
|
self,
|
980
1072
|
data,
|
981
1073
|
evals=["Final Task Alignment"],
|
982
|
-
outputColName="output",
|
983
1074
|
createExperiment: bool = False,
|
1075
|
+
getDataFrame = False,
|
1076
|
+
outputColName="output"
|
1077
|
+
|
984
1078
|
):
|
1079
|
+
if isinstance(data, dict):
|
1080
|
+
data = [data]
|
1081
|
+
elif not isinstance(data, list):
|
1082
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1083
|
+
|
985
1084
|
dataframe = pd.DataFrame(data)
|
986
1085
|
|
987
1086
|
try:
|
@@ -1001,7 +1100,9 @@ class LlumoClient:
|
|
1001
1100
|
evals=evals,
|
1002
1101
|
prompt_template="Give answer for the given query: {{query}}",
|
1003
1102
|
outputColName=outputColName,
|
1004
|
-
createExperiment=createExperiment
|
1103
|
+
createExperiment=createExperiment,
|
1104
|
+
getDataFrame = getDataFrame
|
1105
|
+
|
1005
1106
|
)
|
1006
1107
|
if createExperiment:
|
1007
1108
|
pass
|
@@ -1022,8 +1123,14 @@ class LlumoClient:
|
|
1022
1123
|
prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
|
1023
1124
|
evals=["Context Utilization"],
|
1024
1125
|
toEvaluate=False,
|
1025
|
-
generateOutput=True
|
1126
|
+
generateOutput=True,
|
1127
|
+
getDataFrame = False
|
1026
1128
|
):
|
1129
|
+
if isinstance(data, dict):
|
1130
|
+
data = [data]
|
1131
|
+
elif not isinstance(data, list):
|
1132
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1133
|
+
|
1027
1134
|
# Validate required parameters
|
1028
1135
|
if generateOutput:
|
1029
1136
|
if not modelAliases:
|
@@ -1147,7 +1254,6 @@ class LlumoClient:
|
|
1147
1254
|
outputEvalMapping = None
|
1148
1255
|
if toEvaluate:
|
1149
1256
|
for evalName in evals:
|
1150
|
-
|
1151
1257
|
# Validate API and dependencies
|
1152
1258
|
self.validateApiKey(evalName=evalName)
|
1153
1259
|
metricDependencies = checkDependency(
|
@@ -1156,14 +1262,17 @@ class LlumoClient:
|
|
1156
1262
|
if not metricDependencies["status"]:
|
1157
1263
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
1158
1264
|
|
1159
|
-
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template)
|
1160
|
-
|
1265
|
+
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template,generateOutput)
|
1161
1266
|
|
1267
|
+
|
1162
1268
|
self.socket.disconnect()
|
1163
1269
|
|
1164
1270
|
# Create experiment if required
|
1165
1271
|
if createExperiment:
|
1166
|
-
df = working_df.fillna("Some error occured").astype(object)
|
1272
|
+
# df = working_df.fillna("Some error occured").astype(object)
|
1273
|
+
with warnings.catch_warnings():
|
1274
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
1275
|
+
df = working_df.fillna("Some error occurred").astype(str)
|
1167
1276
|
if createPlayground(
|
1168
1277
|
email, workspaceID, df,
|
1169
1278
|
queryColName=queryColName,
|
@@ -1173,10 +1282,22 @@ class LlumoClient:
|
|
1173
1282
|
evalOutputMap=outputEvalMapping
|
1174
1283
|
):
|
1175
1284
|
print(
|
1176
|
-
"
|
1285
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1177
1286
|
else:
|
1178
|
-
|
1179
|
-
|
1287
|
+
if getDataFrame == True and toEvaluate == True:
|
1288
|
+
return LlumoDataFrameResults(working_df, evals=self.evals, evalData=self.evalData,
|
1289
|
+
definationMapping=self.definationMapping)
|
1290
|
+
|
1291
|
+
elif getDataFrame == False and toEvaluate == True:
|
1292
|
+
data = working_df.to_dict(orient="records")
|
1293
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
1294
|
+
definationMapping=self.definationMapping)
|
1295
|
+
|
1296
|
+
elif getDataFrame== True and toEvaluate == False:
|
1297
|
+
return working_df
|
1298
|
+
|
1299
|
+
elif getDataFrame == False and toEvaluate == False :
|
1300
|
+
return working_df.to_dict(orient = "records")
|
1180
1301
|
|
1181
1302
|
def _outputForStream(self, df, modelAliases, prompt_template, apiKey):
|
1182
1303
|
executor = ModelExecutor(apiKey)
|
@@ -1192,50 +1313,78 @@ class LlumoClient:
|
|
1192
1313
|
|
1193
1314
|
provider = getProviderFromModel(model)
|
1194
1315
|
if provider == Provider.OPENAI:
|
1195
|
-
|
1316
|
+
validateOpenaiKey(apiKey)
|
1196
1317
|
elif provider == Provider.GOOGLE:
|
1197
1318
|
validateGoogleKey(apiKey)
|
1198
1319
|
|
1199
1320
|
filled_template = getInputPopulatedPrompt(prompt_template, inputDict)
|
1200
1321
|
response = executor.execute(provider, model.value, filled_template, apiKey)
|
1201
1322
|
df.at[indx, f"output_{i}"] = response
|
1323
|
+
|
1202
1324
|
except Exception as e:
|
1203
1325
|
# df.at[indx, f"output_{i}"] = str(e)
|
1204
1326
|
raise e
|
1205
1327
|
|
1206
1328
|
return df
|
1207
1329
|
|
1208
|
-
def _evaluateForStream(self, df, evals, modelAliases, prompt_template):
|
1330
|
+
def _evaluateForStream(self, df, evals, modelAliases, prompt_template, generateOutput):
|
1209
1331
|
dfWithEvals = df.copy()
|
1210
|
-
|
1211
1332
|
outputColMapping = {}
|
1212
1333
|
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1334
|
+
if generateOutput:
|
1335
|
+
# Evaluate per model output
|
1336
|
+
for i, model in enumerate(modelAliases, 1):
|
1337
|
+
outputColName = f"output_{i}"
|
1338
|
+
try:
|
1339
|
+
res = self.evaluateMultiple(
|
1340
|
+
dfWithEvals.to_dict("records"),
|
1341
|
+
evals=evals,
|
1342
|
+
prompt_template=prompt_template,
|
1343
|
+
outputColName=outputColName,
|
1344
|
+
_tocheck=False,
|
1345
|
+
getDataFrame=True,
|
1346
|
+
createExperiment=False
|
1347
|
+
)
|
1348
|
+
|
1349
|
+
for evalMetric in evals:
|
1350
|
+
scoreCol = f"{evalMetric}"
|
1351
|
+
reasonCol = f"{evalMetric} Reason"
|
1352
|
+
if scoreCol in res.columns:
|
1353
|
+
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1354
|
+
if reasonCol in res.columns:
|
1355
|
+
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1356
|
+
|
1357
|
+
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1216
1358
|
|
1359
|
+
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1360
|
+
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1361
|
+
|
1362
|
+
except Exception as e:
|
1363
|
+
print(f"Evaluation failed for model {model.value}: {str(e)}")
|
1364
|
+
|
1365
|
+
else:
|
1366
|
+
# Evaluate only once on "output" column
|
1367
|
+
try:
|
1368
|
+
outputColName = "output"
|
1217
1369
|
res = self.evaluateMultiple(
|
1218
1370
|
dfWithEvals.to_dict("records"),
|
1219
1371
|
evals=evals,
|
1220
1372
|
prompt_template=prompt_template,
|
1221
1373
|
outputColName=outputColName,
|
1222
1374
|
_tocheck=False,
|
1375
|
+
getDataFrame=True,
|
1376
|
+
createExperiment=False
|
1223
1377
|
)
|
1224
1378
|
for evalMetric in evals:
|
1225
1379
|
scoreCol = f"{evalMetric}"
|
1226
1380
|
reasonCol = f"{evalMetric} Reason"
|
1227
|
-
|
1228
|
-
if scoreCol in res.columns:
|
1229
|
-
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1230
|
-
if reasonCol in res.columns:
|
1231
|
-
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1232
|
-
|
1233
|
-
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1381
|
+
outputColMapping[scoreCol] = "output"
|
1234
1382
|
|
1235
1383
|
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1236
1384
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1237
1385
|
except Exception as e:
|
1238
|
-
print(f"Evaluation failed
|
1386
|
+
print(f"Evaluation failed: {str(e)}")
|
1387
|
+
|
1239
1388
|
return dfWithEvals, outputColMapping
|
1240
1389
|
|
1241
1390
|
def runDataStream(
|
@@ -1244,8 +1393,13 @@ class LlumoClient:
|
|
1244
1393
|
streamName: str,
|
1245
1394
|
queryColName: str = "query",
|
1246
1395
|
createExperiment: bool = False,
|
1396
|
+
getDataFrame = False
|
1247
1397
|
):
|
1248
1398
|
|
1399
|
+
if isinstance(data, dict):
|
1400
|
+
data = [data]
|
1401
|
+
elif not isinstance(data, list):
|
1402
|
+
raise ValueError("Data should be a dict or a list of dicts.")
|
1249
1403
|
|
1250
1404
|
# Copy the original dataframe
|
1251
1405
|
original_df = pd.DataFrame(data)
|
@@ -1362,10 +1516,16 @@ class LlumoClient:
|
|
1362
1516
|
definationMapping=self.definationMapping,
|
1363
1517
|
):
|
1364
1518
|
print(
|
1365
|
-
"
|
1519
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1366
1520
|
else:
|
1367
|
-
|
1368
|
-
|
1521
|
+
if getDataFrame:
|
1522
|
+
return working_df
|
1523
|
+
|
1524
|
+
else:
|
1525
|
+
data = working_df.to_dict(orient="records")
|
1526
|
+
return data
|
1527
|
+
# self.latestDataframe = working_df
|
1528
|
+
# return working_df
|
1369
1529
|
|
1370
1530
|
|
1371
1531
|
def createExperiment(self, dataframe):
|
@@ -1375,12 +1535,12 @@ class LlumoClient:
|
|
1375
1535
|
flag = createPlayground(self.email, self.workspaceID, dataframe)
|
1376
1536
|
if flag:
|
1377
1537
|
print(
|
1378
|
-
"
|
1538
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1379
1539
|
)
|
1380
1540
|
except Exception as e:
|
1381
1541
|
raise "Some error ocuured please check your API key"
|
1382
1542
|
|
1383
|
-
def
|
1543
|
+
def uploadfile(self, file_path):
|
1384
1544
|
|
1385
1545
|
workspaceID = None
|
1386
1546
|
email = None
|
@@ -1415,13 +1575,34 @@ class LlumoClient:
|
|
1415
1575
|
if createPlayground(self.email, self.workspaceID, df):
|
1416
1576
|
|
1417
1577
|
print(
|
1418
|
-
"
|
1578
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1419
1579
|
)
|
1420
1580
|
|
1421
1581
|
return True
|
1422
1582
|
|
1423
1583
|
except Exception as e:
|
1424
1584
|
print(f"Error: {e}")
|
1585
|
+
|
1586
|
+
def upload(self,data):
|
1587
|
+
try:
|
1588
|
+
if isinstance(data, dict):
|
1589
|
+
data = [data]
|
1590
|
+
# Check if data is now a list of dictionaries
|
1591
|
+
if isinstance(data, list) and all(isinstance(item, dict) for item in data):
|
1592
|
+
dataframe = pd.DataFrame(data).astype(str)
|
1593
|
+
else:
|
1594
|
+
raise ValueError("Data must be a dictionary or a list of dictionaries.")
|
1595
|
+
self.validateApiKey()
|
1596
|
+
if createPlayground(self.email, self.workspaceID, dataframe):
|
1597
|
+
print(
|
1598
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1599
|
+
)
|
1600
|
+
return True
|
1601
|
+
|
1602
|
+
except Exception as e:
|
1603
|
+
print(f"Error: {e}")
|
1604
|
+
return False
|
1605
|
+
|
1425
1606
|
|
1426
1607
|
|
1427
1608
|
class SafeDict(dict):
|
llumo/google.py
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
from google import generativeai as _genai
|
2
|
+
from .client import LlumoClient
|
3
|
+
|
4
|
+
|
5
|
+
def evaluate_multiple(data, api_key=None, evals=["Response Correctness"]):
|
6
|
+
client = LlumoClient(api_key=api_key)
|
7
|
+
results = client.evaluateMultiple(
|
8
|
+
data,
|
9
|
+
evals=evals,
|
10
|
+
createExperiment=False,
|
11
|
+
prompt_template="Give answer to the query: {{query}}, using context: {{context}}",
|
12
|
+
getDataFrame=False
|
13
|
+
)
|
14
|
+
return results
|
15
|
+
|
16
|
+
|
17
|
+
class ChatCompletionWithEval:
|
18
|
+
def __init__(self, response, evaluation):
|
19
|
+
self._response = response
|
20
|
+
self.evaluation = evaluation
|
21
|
+
|
22
|
+
def __getattr__(self, name):
|
23
|
+
return getattr(self._response, name)
|
24
|
+
|
25
|
+
def __getitem__(self, key):
|
26
|
+
return self._response[key]
|
27
|
+
|
28
|
+
def __repr__(self):
|
29
|
+
return repr(self._response)
|
30
|
+
|
31
|
+
|
32
|
+
class genai:
|
33
|
+
class GenerativeModel:
|
34
|
+
def __init__(self, api_key: str, model: str = "gemini-2.5-flash"):
|
35
|
+
_genai.configure(api_key=api_key)
|
36
|
+
self._api_key = api_key
|
37
|
+
self._model_name = model
|
38
|
+
self._model_instance = _genai.GenerativeModel(model_name=model)
|
39
|
+
|
40
|
+
def generate_content(self, contents: str | list[str], **kwargs):
|
41
|
+
context = kwargs.pop("context", None)
|
42
|
+
evals = kwargs.pop("evals", [])
|
43
|
+
llumo_key = kwargs.pop("llumo_key", None)
|
44
|
+
|
45
|
+
# Run Gemini generation
|
46
|
+
response = self._model_instance.generate_content(contents=contents, **kwargs)
|
47
|
+
output = response.text
|
48
|
+
|
49
|
+
eval_input = [{
|
50
|
+
"query": contents,
|
51
|
+
"context": context or contents,
|
52
|
+
"output": output,
|
53
|
+
}]
|
54
|
+
|
55
|
+
evaluation = None
|
56
|
+
try:
|
57
|
+
evaluation = evaluate_multiple(data=eval_input, evals=evals, api_key=llumo_key)
|
58
|
+
except Exception as e:
|
59
|
+
evaluation = None
|
60
|
+
|
61
|
+
if evaluation is None:
|
62
|
+
print("Cannot process your request for evaluation, please check your api and try again later.")
|
63
|
+
return response
|
64
|
+
|
65
|
+
|
66
|
+
return ChatCompletionWithEval(response, evaluation)
|
llumo/helpingFuntions.py
CHANGED
@@ -10,6 +10,7 @@ import os
|
|
10
10
|
import re
|
11
11
|
import openai
|
12
12
|
import google.generativeai as genai
|
13
|
+
from collections import defaultdict
|
13
14
|
|
14
15
|
|
15
16
|
from .models import _MODEL_METADATA, AVAILABLEMODELS
|
@@ -23,8 +24,7 @@ uploadColList = (
|
|
23
24
|
uploadRowList = (
|
24
25
|
"https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
|
25
26
|
)
|
26
|
-
createInsightUrl
|
27
|
-
getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
|
27
|
+
createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
|
28
28
|
|
29
29
|
|
30
30
|
def getProcessID():
|
@@ -229,7 +229,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
229
229
|
}
|
230
230
|
allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
|
231
231
|
|
232
|
-
|
232
|
+
evalDependencies = checkDependency(_returnDepMapping=True)
|
233
233
|
|
234
234
|
# Create a mapping of column names to unique column IDs
|
235
235
|
columnIDMapping = {}
|
@@ -242,46 +242,46 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
242
242
|
columnIDMapping[col] = columnID
|
243
243
|
|
244
244
|
|
245
|
-
if col.startswith('output') and promptText!=None:
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
245
|
+
# if col.startswith('output') and promptText!=None:
|
246
|
+
# # For output columns, create the prompt template with promptText
|
247
|
+
# if promptText:
|
248
|
+
# # Extract variables from promptText and set them as dependencies
|
249
|
+
# dependencies = []
|
250
|
+
#
|
251
|
+
# # Find variables inside {{variable}}
|
252
|
+
# variables = re.findall(r'{{(.*?)}}', promptText)
|
253
|
+
#
|
254
|
+
# # Loop through each variable and check if it exists as a column name
|
255
|
+
# for var in variables:
|
256
|
+
# varName = var.strip()
|
257
|
+
# if varName in columnIDMapping: # Check if the variable is a column name
|
258
|
+
# dependencies.append(columnIDMapping[varName]) # Add its columnID
|
259
|
+
#
|
260
|
+
# # Now update the template for the output column
|
261
|
+
#
|
262
|
+
# template={
|
263
|
+
# "provider": "OPENAI",
|
264
|
+
# "model": "GPT_4o",
|
265
|
+
# "promptText": promptText,
|
266
|
+
# "modelOptions": {
|
267
|
+
# "temperature": 0,
|
268
|
+
# "frequencyPenalty": 0,
|
269
|
+
# "presencePenalty": 0,
|
270
|
+
# "maxToken": 8192
|
271
|
+
# },
|
272
|
+
# "toolConfig": "none",
|
273
|
+
# "concurrency": "",
|
274
|
+
# "outputType": "STRING",
|
275
|
+
# "isPromptSelected": True,
|
276
|
+
# "isSmartPromptSelected": False,
|
277
|
+
# "dependency": dependencies, # Use the dependencies extracted from promptText
|
278
|
+
# "columnID": columnID, # Use the generated column ID
|
279
|
+
# "label": col,
|
280
|
+
# "type": "PROMPT",
|
281
|
+
# "order": indx,
|
282
|
+
# }
|
283
|
+
|
284
|
+
if col.startswith('context') and dataStreamName != None :
|
285
285
|
if queryColName and dataStreamName:
|
286
286
|
dependencies = []
|
287
287
|
dependencies.append(columnIDMapping[queryColName])
|
@@ -312,9 +312,9 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
312
312
|
|
313
313
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
314
314
|
|
315
|
-
longDef = definationMapping.get(col, {}).get('definition', "")
|
316
|
-
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
317
|
-
enum =
|
315
|
+
longDef = definationMapping.get(col.rsplit("_",1)[0], {}).get('definition', "")
|
316
|
+
shortDef =definationMapping.get(col.rsplit("_",1)[0], {}).get('briefDefinition', "")
|
317
|
+
enum = col.rsplit("_",1)[0].upper().replace(" ","_")
|
318
318
|
|
319
319
|
template = {
|
320
320
|
"analytics": [
|
@@ -322,20 +322,23 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
322
322
|
],
|
323
323
|
"evaluationMetric": "ALL",
|
324
324
|
"evaluationModel": "LLUMO_EVALLM",
|
325
|
-
"selectPrompt": columnIDMapping[
|
325
|
+
"selectPrompt": None if "output" not in columnIDMapping.keys() else columnIDMapping["output"],
|
326
326
|
"scoreCondition": "GREATER_THAN",
|
327
327
|
"scoreValue": "50",
|
328
328
|
"scoreResult": "PASS",
|
329
|
-
"llmKpi": col,
|
329
|
+
"llmKpi": col.rsplit("_",1)[0],
|
330
330
|
"setRules": True,
|
331
331
|
"type": "EVAL",
|
332
332
|
"evalType": "LLM",
|
333
333
|
"similarityMetric": None,
|
334
334
|
"embeddingModel": None,
|
335
|
-
"groundTruth": None,
|
335
|
+
"groundTruth": None if "groundTruth" not in columnIDMapping.keys() else columnIDMapping["groundTruth"],
|
336
336
|
"dataStream": None,
|
337
|
-
"context":
|
338
|
-
"dependency":
|
337
|
+
"context":None if "context" not in columnIDMapping.keys() else columnIDMapping["context"],
|
338
|
+
"dependency":[ columnIDMapping[dep] for dep in evalDependencies[ col.rsplit("_",1)[0]]],
|
339
|
+
"query": None if "query" not in columnIDMapping.keys() else columnIDMapping["query"],
|
340
|
+
"tools":None if "tools" not in columnIDMapping.keys() else columnIDMapping["tools"],
|
341
|
+
"messageHistory":None if "messageHistory" not in columnIDMapping.keys() else columnIDMapping["messageHistory"],
|
339
342
|
"hallucinationFields": {
|
340
343
|
"query": None,
|
341
344
|
"context": None,
|
@@ -344,7 +347,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
344
347
|
"definition": longDef,
|
345
348
|
"analyticsENUM": enum,
|
346
349
|
"prompt": shortDef,
|
347
|
-
"analyticsName": col,
|
350
|
+
"analyticsName": col.rsplit("_",1)[0],
|
348
351
|
"columnID": columnID,
|
349
352
|
"label": col,
|
350
353
|
"order": indx
|
@@ -452,9 +455,12 @@ def uploadRowsInDBPlayground(payload):
|
|
452
455
|
return None
|
453
456
|
|
454
457
|
|
455
|
-
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None):
|
458
|
+
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
|
456
459
|
|
457
|
-
|
460
|
+
if activePlayground != None:
|
461
|
+
playgroundId=activePlayground
|
462
|
+
else:
|
463
|
+
playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
|
458
464
|
payload1, payload2 = createColumn(
|
459
465
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
|
460
466
|
)
|
@@ -472,7 +478,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
472
478
|
|
473
479
|
|
474
480
|
|
475
|
-
def getPlaygroundInsights(
|
481
|
+
def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: list):
|
476
482
|
headers = {
|
477
483
|
|
478
484
|
"Content-Type": "application/json",
|
@@ -480,48 +486,29 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
480
486
|
|
481
487
|
# Initial request to generate playground insights
|
482
488
|
payload = {
|
483
|
-
"
|
484
|
-
"
|
489
|
+
"uniqueClassesString": uniqueClassesString,
|
490
|
+
"reasonList": reasonList,
|
491
|
+
"definition": defination,
|
485
492
|
}
|
486
493
|
|
487
494
|
urlGenerate = createInsightUrl
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
if responseGenerate.status_code == 200:
|
492
|
-
responseJson = responseGenerate.json()
|
493
|
-
|
494
|
-
insightStatus = responseJson.get("data", {}).get("insight", False)
|
495
|
+
try:
|
496
|
+
responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
|
495
497
|
|
496
|
-
if
|
497
|
-
|
498
|
-
urlGetAll = getPlaygroundInsightsUrl
|
498
|
+
if responseGenerate.status_code == 200:
|
499
|
+
responseJson = responseGenerate.json()
|
499
500
|
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
# Extract insight and solution
|
508
|
-
insights = []
|
509
|
-
for item in data:
|
510
|
-
insight = item.get("insight", "")
|
511
|
-
solution = item.get("solution", "")
|
512
|
-
insights.append({"insight": insight, "solution": solution})
|
513
|
-
|
514
|
-
return insights
|
515
|
-
else:
|
516
|
-
print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
|
517
|
-
return None
|
518
|
-
else:
|
519
|
-
print("No insight generated.")
|
520
|
-
return None
|
501
|
+
filteredResponse = {key: value for key, value in responseJson.items() if key in ['analysis', 'nextStep']}
|
502
|
+
|
503
|
+
return filteredResponse
|
504
|
+
except Exception as e:
|
505
|
+
print(f"Exception occurred while generating insight: {e}")
|
506
|
+
return None
|
507
|
+
|
521
508
|
else:
|
522
509
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
523
510
|
return None
|
524
|
-
def checkDependency(selectedEval, columns,tocheck=True):
|
511
|
+
def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
|
525
512
|
"""
|
526
513
|
Checks if all the required input columns for the selected evaluation metric are present.
|
527
514
|
|
@@ -532,33 +519,35 @@ def checkDependency(selectedEval, columns,tocheck=True):
|
|
532
519
|
Raises:
|
533
520
|
- LlumoAIError.dependencyError: If any required column is missing.
|
534
521
|
"""
|
535
|
-
if tocheck:
|
536
522
|
# Define required dependencies for each evaluation metric
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
523
|
+
metricDependencies = {
|
524
|
+
'Response Completeness': ['context', 'query', 'output'],
|
525
|
+
'Response Bias': ['output'],
|
526
|
+
'Response Harmfulness': ['output'],
|
527
|
+
'Input Toxicity': ['query'],
|
528
|
+
'Input Harmfulness': ['query'],
|
529
|
+
'Context Utilization': ['output', 'context'],
|
530
|
+
'Relevance Retention': ['context', 'query'],
|
531
|
+
'Semantic Cohesion': ['context'],
|
532
|
+
'Final Task Alignment': ['messageHistory'],
|
533
|
+
'Tool Reliability': ['messageHistory'],
|
534
|
+
'Response Correctness': ['output', 'query', 'context'],
|
535
|
+
'Response Toxicity': ['output'],
|
536
|
+
'Input Bias': ['query'],
|
537
|
+
'Input Relevancy': ['context', 'query'],
|
538
|
+
'Redundancy Reduction': ['context'],
|
539
|
+
'Response Sentiment': ['output'],
|
540
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
541
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
542
|
+
'Hallucination': ['query', 'context', 'output'],
|
543
|
+
'Groundedness': ['groundTruth', 'output'],
|
544
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
545
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
546
|
+
}
|
547
|
+
if _returnDepMapping == True:
|
548
|
+
return metricDependencies
|
561
549
|
|
550
|
+
if tocheck == True:
|
562
551
|
# Check if the selected evaluation metric is known
|
563
552
|
if selectedEval not in metricDependencies:
|
564
553
|
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
@@ -646,8 +635,44 @@ def validateOpenaiKey(api_key):
|
|
646
635
|
def validateGoogleKey(api_key):
|
647
636
|
try:
|
648
637
|
genai.configure(api_key=api_key)
|
649
|
-
_ = genai.GenerativeModel("gemini-2.0").generate_content("test")
|
638
|
+
_ = genai.GenerativeModel("gemini-2.0-flash-lite").generate_content("test")
|
650
639
|
except Exception as e:
|
651
640
|
if "PERMISSION_DENIED" in str(e) or "API key not valid" in str(e):
|
652
641
|
raise ValueError("❌ Invalid Google API key.")
|
653
642
|
raise RuntimeError(f"⚠️ Error validating Gemini key: {e}")
|
643
|
+
|
644
|
+
def groupLogsByClass(logs, max_logs=2):
|
645
|
+
# Initialize the final result structures (no defaultdict)
|
646
|
+
groupedLogs = {}
|
647
|
+
uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
|
648
|
+
|
649
|
+
# Iterate through the logs
|
650
|
+
for log in logs:
|
651
|
+
log_details = list(log.values())[0] # Get the details dictionary
|
652
|
+
eval_name = log_details.get("kpi", "unmarked")
|
653
|
+
edge_case = log_details.get("edgeCase", "unmarked")
|
654
|
+
reasoning = log_details.get("reasoning", "")
|
655
|
+
|
656
|
+
if eval_name != "unmarked" and edge_case != "unmarked":
|
657
|
+
# Ensure that the eval_name and edge_case exist in the dictionary
|
658
|
+
if eval_name not in groupedLogs:
|
659
|
+
groupedLogs[eval_name] = {}
|
660
|
+
uniqueEdgeCases[eval_name] = set() # Initialize the set for unique edge cases
|
661
|
+
|
662
|
+
if edge_case not in groupedLogs[eval_name]:
|
663
|
+
groupedLogs[eval_name][edge_case] = []
|
664
|
+
|
665
|
+
# Append the reasoning to the correct place
|
666
|
+
groupedLogs[eval_name][edge_case].append(reasoning)
|
667
|
+
uniqueEdgeCases[eval_name].add(edge_case) # Add the edge case to the set
|
668
|
+
|
669
|
+
# Limit the number of reasons to max_logs
|
670
|
+
for eval_name in groupedLogs:
|
671
|
+
for edge_case in groupedLogs[eval_name]:
|
672
|
+
groupedLogs[eval_name][edge_case] = groupedLogs[eval_name][edge_case][:max_logs]
|
673
|
+
|
674
|
+
# Convert the set of unique edge cases to a list for easier reading
|
675
|
+
for eval_name in uniqueEdgeCases:
|
676
|
+
uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
|
677
|
+
|
678
|
+
return groupedLogs, uniqueEdgeCases
|
llumo/openai.py
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
from openai import OpenAI as OpenAIClient
|
2
|
+
from .client import LlumoClient
|
3
|
+
|
4
|
+
# Dummy evaluation function that uses LlumoClient
|
5
|
+
def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
|
6
|
+
client = LlumoClient(api_key=api_key)
|
7
|
+
results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
|
8
|
+
return results
|
9
|
+
|
10
|
+
# Wrapper around ChatCompletion to allow custom fields like `.evaluation`
|
11
|
+
class ChatCompletionWithEval:
|
12
|
+
def __init__(self, response, evaluation):
|
13
|
+
self._response = response
|
14
|
+
self.evaluation = evaluation
|
15
|
+
|
16
|
+
def __getattr__(self, name):
|
17
|
+
return getattr(self._response, name)
|
18
|
+
|
19
|
+
def __getitem__(self, key):
|
20
|
+
return self._response[key]
|
21
|
+
|
22
|
+
def __repr__(self):
|
23
|
+
return repr(self._response)
|
24
|
+
|
25
|
+
class openai(OpenAIClient):
|
26
|
+
def __init__(self, api_key: str):
|
27
|
+
super().__init__(api_key=api_key)
|
28
|
+
|
29
|
+
original_create = self.chat.completions.create
|
30
|
+
|
31
|
+
class ChatCompletionsWrapper:
|
32
|
+
@staticmethod
|
33
|
+
def create(*args, **kwargs):
|
34
|
+
context = kwargs.pop("context", None)
|
35
|
+
evals = kwargs.pop("evals", [])
|
36
|
+
llumo_key = kwargs.pop("llumo_key", None)
|
37
|
+
|
38
|
+
messages = kwargs.get("messages", [])
|
39
|
+
user_message = next(
|
40
|
+
(m.get("content") for m in reversed(messages) if m.get("role") == "user"),
|
41
|
+
"",
|
42
|
+
)
|
43
|
+
|
44
|
+
# If context is None or empty or whitespace-only, set it to user_message
|
45
|
+
if not context or context.strip() == "":
|
46
|
+
context = user_message
|
47
|
+
|
48
|
+
response = original_create(*args, **kwargs)
|
49
|
+
|
50
|
+
try:
|
51
|
+
output_text = response.choices[0].message.content
|
52
|
+
except Exception:
|
53
|
+
output_text = ""
|
54
|
+
|
55
|
+
eval_input = [{
|
56
|
+
"query": user_message,
|
57
|
+
"context": context,
|
58
|
+
"output": output_text,
|
59
|
+
}]
|
60
|
+
|
61
|
+
# Safely call evaluate_multiple, if error return None
|
62
|
+
evaluation = None
|
63
|
+
try:
|
64
|
+
evaluation = evaluate_multiple(eval_input, api_key=llumo_key,evals=evals)
|
65
|
+
except Exception as e:
|
66
|
+
# You can optionally log the error here if you want
|
67
|
+
# print(f"Evaluation failed, skipping: {e}")
|
68
|
+
evaluation = None
|
69
|
+
|
70
|
+
# If evaluation is None, just return normal response
|
71
|
+
if evaluation is None:
|
72
|
+
print("Cannot process your request for evaluation, please check your api and try again later.")
|
73
|
+
return response
|
74
|
+
|
75
|
+
# Otherwise wrap with evaluation attached
|
76
|
+
return ChatCompletionWithEval(response, evaluation)
|
77
|
+
|
78
|
+
self.chat.completions.create = ChatCompletionsWrapper.create
|
@@ -0,0 +1,16 @@
|
|
1
|
+
llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
|
2
|
+
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
3
|
+
llumo/client.py,sha256=20xn-RVaWwEwEi2pFaf_ZWF6OJhctQoxc707NAzTcQk,62532
|
4
|
+
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
5
|
+
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
6
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
7
|
+
llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
|
8
|
+
llumo/helpingFuntions.py,sha256=fqwtTNuDYiERB1_ljme-Ldc7WduU6g4Zl54O2INW7dQ,25241
|
9
|
+
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
10
|
+
llumo/openai.py,sha256=DGhEwQIJIIycGpw3hYQnyxdj6RFVpZ-gay-fZGqtkhU,3013
|
11
|
+
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
12
|
+
llumo-0.2.16.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
13
|
+
llumo-0.2.16.dist-info/METADATA,sha256=GXu8pBXbYQ_EWRRPlAfAqaqqSl1Lb6raDCceRo8f8Qs,1519
|
14
|
+
llumo-0.2.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
llumo-0.2.16.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
16
|
+
llumo-0.2.16.dist-info/RECORD,,
|
llumo-0.2.15b2.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=60RSxhk-9wzK9KgBz8dfbUd3-AaKiljxqbHI5UL8GIw,54021
|
3
|
-
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
4
|
-
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=BZfUIgTO0PJchppHn0wDRF1wcYSuMST5ry95HBPN5SQ,23534
|
7
|
-
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
8
|
-
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
9
|
-
llumo-0.2.15b2.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.15b2.dist-info/METADATA,sha256=vbXwSwhuxnO0CSMz4uJ45AepuwVMl7irZlHmYkqRYbY,1521
|
11
|
-
llumo-0.2.15b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.15b2.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.15b2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|