llumo 0.2.15b1__py3-none-any.whl → 0.2.16b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +2 -1
- llumo/chains.py +73 -0
- llumo/client.py +204 -68
- llumo/execution.py +4 -5
- llumo/google.py +34 -0
- llumo/helpingFuntions.py +162 -117
- llumo/openai.py +79 -0
- {llumo-0.2.15b1.dist-info → llumo-0.2.16b1.dist-info}/METADATA +1 -1
- llumo-0.2.16b1.dist-info/RECORD +16 -0
- llumo-0.2.15b1.dist-info/RECORD +0 -13
- {llumo-0.2.15b1.dist-info → llumo-0.2.16b1.dist-info}/WHEEL +0 -0
- {llumo-0.2.15b1.dist-info → llumo-0.2.16b1.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.15b1.dist-info → llumo-0.2.16b1.dist-info}/top_level.txt +0 -0
llumo/__init__.py
CHANGED
llumo/chains.py
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
from .helpingFuntions import *
|
3
|
+
|
4
|
+
class LlumoDataFrameResults(pd.DataFrame):
|
5
|
+
_metadata=["evals","evalData","definationMapping"]
|
6
|
+
|
7
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
8
|
+
self.evals = evals or []
|
9
|
+
self.evalData= evalData or []
|
10
|
+
self.definationMapping= definationMapping or {}
|
11
|
+
super().__init__(*args, **kwargs)
|
12
|
+
|
13
|
+
@property
|
14
|
+
def _constructor(self):
|
15
|
+
# Needed so slicing operations return the same type
|
16
|
+
return LlumoDataFrameResults
|
17
|
+
|
18
|
+
def insights(self):
|
19
|
+
|
20
|
+
if not self.evalData:
|
21
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
22
|
+
return None
|
23
|
+
try:
|
24
|
+
insights=[]
|
25
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
26
|
+
|
27
|
+
for evalname in self.evals:
|
28
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
29
|
+
allReasons = []
|
30
|
+
for edgeCase in reasonData[evalname]:
|
31
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
32
|
+
|
33
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
34
|
+
|
35
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
36
|
+
return insights
|
37
|
+
except Exception as e:
|
38
|
+
|
39
|
+
print("Can not genrate insights for this eval, please try again later.")
|
40
|
+
|
41
|
+
|
42
|
+
class LlumoDictResults(list):
|
43
|
+
_metadata=["evals","evalData","definationMapping"]
|
44
|
+
|
45
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
46
|
+
self.evals = evals or []
|
47
|
+
self.evalData= evalData or []
|
48
|
+
self.definationMapping= definationMapping or {}
|
49
|
+
super().__init__(*args, **kwargs) # This will handle list[dict]
|
50
|
+
|
51
|
+
def insights(self):
|
52
|
+
|
53
|
+
if not self.evalData:
|
54
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
55
|
+
return None
|
56
|
+
try:
|
57
|
+
insights=[]
|
58
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
59
|
+
for evalname in self.evals:
|
60
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
61
|
+
allReasons = []
|
62
|
+
for edgeCase in reasonData[evalname]:
|
63
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
64
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
65
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
66
|
+
return insights
|
67
|
+
except Exception as e:
|
68
|
+
print("Can not genrate insights for this eval, please try again later.")
|
69
|
+
|
70
|
+
|
71
|
+
for _cls in (LlumoDataFrameResults, LlumoDictResults):
|
72
|
+
_cls.__name__ = "LlumoResults"
|
73
|
+
_cls.__qualname__ = "LlumoResults"
|
llumo/client.py
CHANGED
@@ -5,20 +5,23 @@ import time
|
|
5
5
|
import re
|
6
6
|
import json
|
7
7
|
import uuid
|
8
|
-
|
8
|
+
import warnings
|
9
9
|
import os
|
10
10
|
import itertools
|
11
11
|
import pandas as pd
|
12
12
|
from typing import List, Dict
|
13
|
-
from .models import AVAILABLEMODELS, getProviderFromModel
|
13
|
+
from .models import AVAILABLEMODELS, getProviderFromModel, Provider
|
14
14
|
from .execution import ModelExecutor
|
15
15
|
from .exceptions import LlumoAIError
|
16
16
|
from .helpingFuntions import *
|
17
17
|
from .sockets import LlumoSocketClient
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
19
|
+
from .chains import LlumoDataFrameResults,LlumoDictResults
|
19
20
|
import threading
|
20
21
|
from tqdm import tqdm
|
21
22
|
|
23
|
+
pd.set_option('future.no_silent_downcasting', True)
|
24
|
+
|
22
25
|
postUrl = (
|
23
26
|
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
24
27
|
)
|
@@ -38,7 +41,8 @@ class LlumoClient:
|
|
38
41
|
|
39
42
|
def __init__(self, api_key):
|
40
43
|
self.apiKey = api_key
|
41
|
-
|
44
|
+
self.evalData=[]
|
45
|
+
self.evals=[]
|
42
46
|
self.processMapping = {}
|
43
47
|
self.definationMapping = {}
|
44
48
|
|
@@ -50,7 +54,7 @@ class LlumoClient:
|
|
50
54
|
reqBody = {"analytics": [evalName]}
|
51
55
|
|
52
56
|
try:
|
53
|
-
|
57
|
+
|
54
58
|
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
55
59
|
|
56
60
|
except requests.exceptions.RequestException as e:
|
@@ -393,27 +397,46 @@ class LlumoClient:
|
|
393
397
|
outputColName=outputColName,
|
394
398
|
):
|
395
399
|
print(
|
396
|
-
"
|
400
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
397
401
|
)
|
398
402
|
else:
|
399
403
|
return dataframe
|
400
404
|
|
401
405
|
# this function allows the users to run multiple evals at once
|
402
406
|
|
403
|
-
def
|
407
|
+
def compressor(self, data, prompt_template):
|
404
408
|
results = []
|
405
409
|
dataframe = pd.DataFrame(data)
|
410
|
+
|
406
411
|
try:
|
407
|
-
|
412
|
+
self.socket = LlumoSocketClient(socketUrl)
|
413
|
+
dataframe = pd.DataFrame(data).astype(str)
|
414
|
+
socketID = self.socket.connect(timeout=250)
|
415
|
+
|
416
|
+
# Wait for socket connection
|
408
417
|
max_wait_secs = 20
|
409
418
|
waited_secs = 0
|
410
419
|
while not self.socket._connection_established.is_set():
|
411
420
|
time.sleep(0.1)
|
412
421
|
waited_secs += 0.1
|
413
422
|
if waited_secs >= max_wait_secs:
|
414
|
-
raise RuntimeError(
|
415
|
-
|
416
|
-
|
423
|
+
raise RuntimeError("Timeout waiting for server connection")
|
424
|
+
|
425
|
+
# Start listener thread
|
426
|
+
expectedResults = len(dataframe)
|
427
|
+
# print("expected result" ,expectedResults)
|
428
|
+
timeout = max(100, min(150, expectedResults * 10))
|
429
|
+
listener_thread = threading.Thread(
|
430
|
+
target=self.socket.listenForResults,
|
431
|
+
kwargs={
|
432
|
+
"min_wait": 40,
|
433
|
+
"max_wait": timeout,
|
434
|
+
"inactivity_timeout": 10,
|
435
|
+
"expected_results": expectedResults,
|
436
|
+
},
|
437
|
+
daemon=True,
|
438
|
+
)
|
439
|
+
listener_thread.start()
|
417
440
|
|
418
441
|
try:
|
419
442
|
self.validateApiKey()
|
@@ -508,7 +531,7 @@ class LlumoClient:
|
|
508
531
|
"playgroundID": activePlayground,
|
509
532
|
}
|
510
533
|
|
511
|
-
rowIdMapping[rowID] = index
|
534
|
+
rowIdMapping[f'{rowID}-{columnID}-{columnID}'] = index
|
512
535
|
# print("__________________________TEMPLATE__________________________________")
|
513
536
|
# print(templateData)
|
514
537
|
|
@@ -537,7 +560,19 @@ class LlumoClient:
|
|
537
560
|
expected_results=None,
|
538
561
|
)
|
539
562
|
|
540
|
-
|
563
|
+
rawResults = self.socket.getReceivedData()
|
564
|
+
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
565
|
+
expectedRowIDs = set(rowIdMapping.keys())
|
566
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
567
|
+
# print("All expected keys:", expected_rowIDs)
|
568
|
+
# print("All received keys:", received_rowIDs)
|
569
|
+
# print("Missing keys:", len(missingRowIDs))
|
570
|
+
missingRowIDs = list(missingRowIDs)
|
571
|
+
|
572
|
+
if len(missingRowIDs) > 0:
|
573
|
+
dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
|
574
|
+
rawResults.extend(dataFromDb)
|
575
|
+
|
541
576
|
# results = self.finalResp(eval_results)
|
542
577
|
# print(f"======= Completed evaluation: {eval} =======\n")
|
543
578
|
|
@@ -551,10 +586,10 @@ class LlumoClient:
|
|
551
586
|
print(f"Error disconnecting socket: {e}")
|
552
587
|
|
553
588
|
dataframe["Compressed Input"] = None
|
554
|
-
for records in
|
589
|
+
for records in rawResults:
|
555
590
|
for compound_key, value in records.items():
|
556
591
|
# for compound_key, value in item['data'].items():
|
557
|
-
rowID = compound_key
|
592
|
+
rowID = compound_key
|
558
593
|
# looking for the index of each rowID , in the original dataframe
|
559
594
|
if rowID in rowIdMapping:
|
560
595
|
index = rowIdMapping[rowID]
|
@@ -580,6 +615,7 @@ class LlumoClient:
|
|
580
615
|
prompt_template="",
|
581
616
|
outputColName="output",
|
582
617
|
createExperiment: bool = False,
|
618
|
+
getDataFrame:bool =False,
|
583
619
|
_tocheck=True,
|
584
620
|
):
|
585
621
|
self.socket = LlumoSocketClient(socketUrl)
|
@@ -587,6 +623,8 @@ class LlumoClient:
|
|
587
623
|
workspaceID = None
|
588
624
|
email = None
|
589
625
|
socketID = self.socket.connect(timeout=250)
|
626
|
+
self.evalData=[]
|
627
|
+
self.evals=evals
|
590
628
|
self.allBatches = []
|
591
629
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
592
630
|
|
@@ -614,10 +652,14 @@ class LlumoClient:
|
|
614
652
|
daemon=True,
|
615
653
|
)
|
616
654
|
listener_thread.start()
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
655
|
+
self.validateApiKey(evalName=evals[0])
|
656
|
+
if createExperiment:
|
657
|
+
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
658
|
+
|
659
|
+
else:
|
660
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
661
|
+
"-", ""
|
662
|
+
)
|
621
663
|
for evalName in evals:
|
622
664
|
# print(f"\n======= Running evaluation for: {evalName} =======")
|
623
665
|
|
@@ -776,10 +818,8 @@ class LlumoClient:
|
|
776
818
|
rawResults.extend(dataFromDb)
|
777
819
|
|
778
820
|
|
821
|
+
self.evalData = rawResults
|
779
822
|
|
780
|
-
|
781
|
-
|
782
|
-
|
783
823
|
# Initialize dataframe columns for each eval
|
784
824
|
for eval in evals:
|
785
825
|
dataframe[eval] = None
|
@@ -799,7 +839,12 @@ class LlumoClient:
|
|
799
839
|
|
800
840
|
if createExperiment:
|
801
841
|
pd.set_option("future.no_silent_downcasting", True)
|
802
|
-
df = dataframe.fillna("Some error occured").astype(object)
|
842
|
+
# df = dataframe.fillna("Some error occured").astype(object)
|
843
|
+
with warnings.catch_warnings():
|
844
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
845
|
+
df = dataframe.fillna("Some error occurred").astype(str)
|
846
|
+
|
847
|
+
df = dataframe.fillna("Some error occured").infer_objects(copy=False)
|
803
848
|
if createPlayground(
|
804
849
|
email,
|
805
850
|
workspaceID,
|
@@ -807,13 +852,19 @@ class LlumoClient:
|
|
807
852
|
promptText=prompt_template,
|
808
853
|
definationMapping=self.definationMapping,
|
809
854
|
outputColName=outputColName,
|
855
|
+
activePlayground= activePlayground
|
810
856
|
):
|
811
857
|
print(
|
812
|
-
"
|
858
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
813
859
|
)
|
814
860
|
else:
|
815
|
-
|
861
|
+
if getDataFrame:
|
862
|
+
return LlumoDataFrameResults(dataframe,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
863
|
+
else:
|
864
|
+
data=dataframe.to_dict(orient="records")
|
865
|
+
return LlumoDictResults(data,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
816
866
|
|
867
|
+
|
817
868
|
def promptSweep(
|
818
869
|
self,
|
819
870
|
templates: List[str],
|
@@ -823,6 +874,7 @@ class LlumoClient:
|
|
823
874
|
evals=["Response Correctness"],
|
824
875
|
toEvaluate: bool = False,
|
825
876
|
createExperiment: bool = False,
|
877
|
+
getDataFrame = False
|
826
878
|
|
827
879
|
|
828
880
|
) -> pd.DataFrame:
|
@@ -878,6 +930,8 @@ class LlumoClient:
|
|
878
930
|
prompt_template=str(templates[0]),
|
879
931
|
outputColName=outputColName,
|
880
932
|
_tocheck=False,
|
933
|
+
getDataFrame=True,
|
934
|
+
createExperiment = False
|
881
935
|
)
|
882
936
|
|
883
937
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
@@ -910,10 +964,17 @@ class LlumoClient:
|
|
910
964
|
):
|
911
965
|
|
912
966
|
print(
|
913
|
-
"
|
967
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
914
968
|
)
|
915
969
|
else:
|
916
|
-
|
970
|
+
if getDataFrame:
|
971
|
+
return LlumoDataFrameResults(dfWithEvals, evals=self.evals, evalData=self.evalData,
|
972
|
+
definationMapping=self.definationMapping)
|
973
|
+
else:
|
974
|
+
data = dfWithEvals.to_dict(orient="records")
|
975
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,definationMapping=self.definationMapping)
|
976
|
+
|
977
|
+
|
917
978
|
else:
|
918
979
|
if createExperiment == True:
|
919
980
|
pd.set_option("future.no_silent_downcasting", True)
|
@@ -921,10 +982,18 @@ class LlumoClient:
|
|
921
982
|
|
922
983
|
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
923
984
|
print(
|
924
|
-
"
|
985
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
925
986
|
)
|
926
987
|
else:
|
927
|
-
|
988
|
+
if getDataFrame:
|
989
|
+
return LlumoDataFrameResults(df, evals=self.evals, evalData=self.evalData,
|
990
|
+
definationMapping=self.definationMapping)
|
991
|
+
else:
|
992
|
+
data = df.to_dict(orient="records")
|
993
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
994
|
+
definationMapping=self.definationMapping)
|
995
|
+
|
996
|
+
|
928
997
|
|
929
998
|
# this function generates an output using llm and tools and evaluate that output
|
930
999
|
def evaluateAgents(
|
@@ -936,6 +1005,7 @@ class LlumoClient:
|
|
936
1005
|
evals=["Final Task Alignment"],
|
937
1006
|
prompt_template="Give answer for the given query: {{query}}",
|
938
1007
|
createExperiment: bool = False,
|
1008
|
+
getDataFrame:bool = False
|
939
1009
|
|
940
1010
|
):
|
941
1011
|
if model.lower() not in ["openai", "google"]:
|
@@ -961,27 +1031,33 @@ class LlumoClient:
|
|
961
1031
|
toolResponseDf.to_dict(orient="records"),
|
962
1032
|
evals=evals,
|
963
1033
|
prompt_template=prompt_template,
|
964
|
-
createExperiment=
|
1034
|
+
createExperiment=createExperiment,
|
1035
|
+
getDataFrame=getDataFrame
|
1036
|
+
|
965
1037
|
)
|
966
1038
|
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1039
|
+
return toolResponseDf
|
1040
|
+
# if createExperiment:
|
1041
|
+
# pd.set_option("future.no_silent_downcasting", True)
|
1042
|
+
# df = toolResponseDf.fillna("Some error occured")
|
1043
|
+
# if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
|
1044
|
+
# print(
|
1045
|
+
# "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1046
|
+
# )
|
1047
|
+
# else:
|
1048
|
+
# return toolResponseDf
|
976
1049
|
|
977
1050
|
# this function evaluate that tools output given by the user
|
978
1051
|
def evaluateAgentResponses(
|
979
1052
|
self,
|
980
1053
|
data,
|
981
1054
|
evals=["Final Task Alignment"],
|
982
|
-
outputColName="output",
|
983
1055
|
createExperiment: bool = False,
|
1056
|
+
getDataFrame = False,
|
1057
|
+
outputColName="output"
|
1058
|
+
|
984
1059
|
):
|
1060
|
+
|
985
1061
|
dataframe = pd.DataFrame(data)
|
986
1062
|
|
987
1063
|
try:
|
@@ -1001,7 +1077,9 @@ class LlumoClient:
|
|
1001
1077
|
evals=evals,
|
1002
1078
|
prompt_template="Give answer for the given query: {{query}}",
|
1003
1079
|
outputColName=outputColName,
|
1004
|
-
createExperiment=createExperiment
|
1080
|
+
createExperiment=createExperiment,
|
1081
|
+
getDataFrame = getDataFrame
|
1082
|
+
|
1005
1083
|
)
|
1006
1084
|
if createExperiment:
|
1007
1085
|
pass
|
@@ -1022,7 +1100,8 @@ class LlumoClient:
|
|
1022
1100
|
prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
|
1023
1101
|
evals=["Context Utilization"],
|
1024
1102
|
toEvaluate=False,
|
1025
|
-
generateOutput=True
|
1103
|
+
generateOutput=True,
|
1104
|
+
getDataFrame = False
|
1026
1105
|
):
|
1027
1106
|
# Validate required parameters
|
1028
1107
|
if generateOutput:
|
@@ -1147,23 +1226,25 @@ class LlumoClient:
|
|
1147
1226
|
outputEvalMapping = None
|
1148
1227
|
if toEvaluate:
|
1149
1228
|
for evalName in evals:
|
1150
|
-
|
1151
1229
|
# Validate API and dependencies
|
1152
1230
|
self.validateApiKey(evalName=evalName)
|
1153
1231
|
metricDependencies = checkDependency(
|
1154
|
-
evalName, list(working_df.columns), tocheck=
|
1232
|
+
evalName, list(working_df.columns), tocheck=False
|
1155
1233
|
)
|
1156
1234
|
if not metricDependencies["status"]:
|
1157
1235
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
1158
1236
|
|
1159
|
-
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template)
|
1160
|
-
|
1237
|
+
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template,generateOutput)
|
1161
1238
|
|
1239
|
+
|
1162
1240
|
self.socket.disconnect()
|
1163
1241
|
|
1164
1242
|
# Create experiment if required
|
1165
1243
|
if createExperiment:
|
1166
|
-
df = working_df.fillna("Some error occured").astype(object)
|
1244
|
+
# df = working_df.fillna("Some error occured").astype(object)
|
1245
|
+
with warnings.catch_warnings():
|
1246
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
1247
|
+
df = working_df.fillna("Some error occurred").astype(str)
|
1167
1248
|
if createPlayground(
|
1168
1249
|
email, workspaceID, df,
|
1169
1250
|
queryColName=queryColName,
|
@@ -1173,10 +1254,22 @@ class LlumoClient:
|
|
1173
1254
|
evalOutputMap=outputEvalMapping
|
1174
1255
|
):
|
1175
1256
|
print(
|
1176
|
-
"
|
1257
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1177
1258
|
else:
|
1178
|
-
|
1179
|
-
|
1259
|
+
if getDataFrame == True and toEvaluate == True:
|
1260
|
+
return LlumoDataFrameResults(working_df, evals=self.evals, evalData=self.evalData,
|
1261
|
+
definationMapping=self.definationMapping)
|
1262
|
+
|
1263
|
+
elif getDataFrame == False and toEvaluate == True:
|
1264
|
+
data = working_df.to_dict(orient="records")
|
1265
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
1266
|
+
definationMapping=self.definationMapping)
|
1267
|
+
|
1268
|
+
elif getDataFrame== True and toEvaluate == False:
|
1269
|
+
return working_df
|
1270
|
+
|
1271
|
+
elif getDataFrame == False and toEvaluate == False :
|
1272
|
+
return working_df.to_dict(orient = "records")
|
1180
1273
|
|
1181
1274
|
def _outputForStream(self, df, modelAliases, prompt_template, apiKey):
|
1182
1275
|
executor = ModelExecutor(apiKey)
|
@@ -1189,45 +1282,81 @@ class LlumoClient:
|
|
1189
1282
|
inputDict = {key: row[key] for key in inputVariables}
|
1190
1283
|
for i, model in enumerate(modelAliases, 1):
|
1191
1284
|
try:
|
1285
|
+
|
1192
1286
|
provider = getProviderFromModel(model)
|
1287
|
+
if provider == Provider.OPENAI:
|
1288
|
+
validateOpenaiKey(apiKey)
|
1289
|
+
elif provider == Provider.GOOGLE:
|
1290
|
+
validateGoogleKey(apiKey)
|
1291
|
+
|
1193
1292
|
filled_template = getInputPopulatedPrompt(prompt_template, inputDict)
|
1194
1293
|
response = executor.execute(provider, model.value, filled_template, apiKey)
|
1195
1294
|
df.at[indx, f"output_{i}"] = response
|
1295
|
+
|
1196
1296
|
except Exception as e:
|
1197
|
-
df.at[indx, f"output_{i}"] = str(e)
|
1297
|
+
# df.at[indx, f"output_{i}"] = str(e)
|
1298
|
+
raise e
|
1299
|
+
|
1198
1300
|
return df
|
1199
1301
|
|
1200
|
-
def _evaluateForStream(self, df, evals, modelAliases, prompt_template):
|
1302
|
+
def _evaluateForStream(self, df, evals, modelAliases, prompt_template, generateOutput):
|
1201
1303
|
dfWithEvals = df.copy()
|
1202
|
-
|
1203
1304
|
outputColMapping = {}
|
1204
1305
|
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1306
|
+
if generateOutput:
|
1307
|
+
# Evaluate per model output
|
1308
|
+
for i, model in enumerate(modelAliases, 1):
|
1309
|
+
outputColName = f"output_{i}"
|
1310
|
+
try:
|
1311
|
+
res = self.evaluateMultiple(
|
1312
|
+
dfWithEvals.to_dict("records"),
|
1313
|
+
evals=evals,
|
1314
|
+
prompt_template=prompt_template,
|
1315
|
+
outputColName=outputColName,
|
1316
|
+
_tocheck=False,
|
1317
|
+
getDataFrame=True,
|
1318
|
+
createExperiment=False
|
1319
|
+
)
|
1208
1320
|
|
1321
|
+
for evalMetric in evals:
|
1322
|
+
scoreCol = f"{evalMetric}"
|
1323
|
+
reasonCol = f"{evalMetric} Reason"
|
1324
|
+
if scoreCol in res.columns:
|
1325
|
+
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1326
|
+
if reasonCol in res.columns:
|
1327
|
+
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1328
|
+
|
1329
|
+
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1330
|
+
|
1331
|
+
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1332
|
+
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1333
|
+
|
1334
|
+
except Exception as e:
|
1335
|
+
print(f"Evaluation failed for model {model.value}: {str(e)}")
|
1336
|
+
|
1337
|
+
else:
|
1338
|
+
# Evaluate only once on "output" column
|
1339
|
+
try:
|
1340
|
+
outputColName = "output"
|
1209
1341
|
res = self.evaluateMultiple(
|
1210
1342
|
dfWithEvals.to_dict("records"),
|
1211
1343
|
evals=evals,
|
1212
1344
|
prompt_template=prompt_template,
|
1213
1345
|
outputColName=outputColName,
|
1214
1346
|
_tocheck=False,
|
1347
|
+
getDataFrame=True,
|
1348
|
+
createExperiment=False
|
1215
1349
|
)
|
1216
1350
|
for evalMetric in evals:
|
1217
1351
|
scoreCol = f"{evalMetric}"
|
1218
1352
|
reasonCol = f"{evalMetric} Reason"
|
1219
|
-
|
1220
|
-
if scoreCol in res.columns:
|
1221
|
-
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1222
|
-
if reasonCol in res.columns:
|
1223
|
-
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1224
|
-
|
1225
|
-
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1353
|
+
outputColMapping[scoreCol] = "output"
|
1226
1354
|
|
1227
1355
|
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1228
1356
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1229
1357
|
except Exception as e:
|
1230
|
-
print(f"Evaluation failed
|
1358
|
+
print(f"Evaluation failed: {str(e)}")
|
1359
|
+
|
1231
1360
|
return dfWithEvals, outputColMapping
|
1232
1361
|
|
1233
1362
|
def runDataStream(
|
@@ -1236,6 +1365,7 @@ class LlumoClient:
|
|
1236
1365
|
streamName: str,
|
1237
1366
|
queryColName: str = "query",
|
1238
1367
|
createExperiment: bool = False,
|
1368
|
+
getDataFrame = False
|
1239
1369
|
):
|
1240
1370
|
|
1241
1371
|
|
@@ -1354,10 +1484,16 @@ class LlumoClient:
|
|
1354
1484
|
definationMapping=self.definationMapping,
|
1355
1485
|
):
|
1356
1486
|
print(
|
1357
|
-
"
|
1487
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1358
1488
|
else:
|
1359
|
-
|
1360
|
-
|
1489
|
+
if getDataFrame:
|
1490
|
+
return working_df
|
1491
|
+
|
1492
|
+
else:
|
1493
|
+
data = working_df.to_dict(orient="records")
|
1494
|
+
return data
|
1495
|
+
# self.latestDataframe = working_df
|
1496
|
+
# return working_df
|
1361
1497
|
|
1362
1498
|
|
1363
1499
|
def createExperiment(self, dataframe):
|
@@ -1367,7 +1503,7 @@ class LlumoClient:
|
|
1367
1503
|
flag = createPlayground(self.email, self.workspaceID, dataframe)
|
1368
1504
|
if flag:
|
1369
1505
|
print(
|
1370
|
-
"
|
1506
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1371
1507
|
)
|
1372
1508
|
except Exception as e:
|
1373
1509
|
raise "Some error ocuured please check your API key"
|
@@ -1407,7 +1543,7 @@ class LlumoClient:
|
|
1407
1543
|
if createPlayground(self.email, self.workspaceID, df):
|
1408
1544
|
|
1409
1545
|
print(
|
1410
|
-
"
|
1546
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1411
1547
|
)
|
1412
1548
|
|
1413
1549
|
return True
|
llumo/execution.py
CHANGED
@@ -25,15 +25,14 @@ class ModelExecutor:
|
|
25
25
|
return response.choices[0].message.content
|
26
26
|
|
27
27
|
def _executeGoogle(self, modelName: str, prompt: str,api_key) -> str:
|
28
|
-
|
28
|
+
|
29
29
|
# Configure GenAI with API Key
|
30
30
|
genai.configure(api_key=api_key)
|
31
|
-
|
31
|
+
|
32
32
|
# Select Generative Model
|
33
33
|
model = genai.GenerativeModel("gemini-2.0-flash-lite")
|
34
34
|
# Generate Response
|
35
35
|
response = model.generate_content(prompt)
|
36
36
|
return response.text
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
|
38
|
+
|
llumo/google.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
from google import generativeai as _genai
|
2
|
+
|
3
|
+
class genai:
|
4
|
+
"""
|
5
|
+
Top-level wrapper module to mimic:
|
6
|
+
>>> from google import genai
|
7
|
+
>>> client = genai.Client(api_key=...)
|
8
|
+
"""
|
9
|
+
|
10
|
+
class Client:
|
11
|
+
def __init__(self, api_key: str, default_model: str = "gemini-2.5-flash"):
|
12
|
+
_genai.configure(api_key=api_key)
|
13
|
+
self._defaultModel = default_model
|
14
|
+
self._defaultModelInstance = _genai.GenerativeModel(model_name=default_model)
|
15
|
+
|
16
|
+
class Models:
|
17
|
+
def __init__(self, outer):
|
18
|
+
self._outer = outer
|
19
|
+
|
20
|
+
def generate_content(self, contents: str | list[str], model: str = None, **kwargs):
|
21
|
+
model_name = model or self._outer._defaultModel
|
22
|
+
model_instance = _genai.GenerativeModel(model_name=model_name)
|
23
|
+
return model_instance.generate_content(contents=contents, **kwargs)
|
24
|
+
|
25
|
+
self.models = Models(self)
|
26
|
+
|
27
|
+
def generate(self, prompt: str | list[str], **kwargs):
|
28
|
+
"""Convenience shortcut for single-line generation."""
|
29
|
+
return self._defaultModelInstance.generate_content(prompt, **kwargs)
|
30
|
+
|
31
|
+
def setDefaultModel(self, model_name: str):
|
32
|
+
"""Change the default model at runtime."""
|
33
|
+
self._defaultModel = model_name
|
34
|
+
self._defaultModelInstance = _genai.GenerativeModel(model_name=model_name)
|
llumo/helpingFuntions.py
CHANGED
@@ -8,6 +8,9 @@ import json
|
|
8
8
|
import base64
|
9
9
|
import os
|
10
10
|
import re
|
11
|
+
import openai
|
12
|
+
import google.generativeai as genai
|
13
|
+
from collections import defaultdict
|
11
14
|
|
12
15
|
|
13
16
|
from .models import _MODEL_METADATA, AVAILABLEMODELS
|
@@ -21,8 +24,7 @@ uploadColList = (
|
|
21
24
|
uploadRowList = (
|
22
25
|
"https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
|
23
26
|
)
|
24
|
-
createInsightUrl
|
25
|
-
getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
|
27
|
+
createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
|
26
28
|
|
27
29
|
|
28
30
|
def getProcessID():
|
@@ -227,11 +229,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
227
229
|
}
|
228
230
|
allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
|
229
231
|
|
230
|
-
|
232
|
+
evalDependencies = checkDependency(_returnDepMapping=True)
|
231
233
|
|
232
234
|
# Create a mapping of column names to unique column IDs
|
233
235
|
columnIDMapping = {}
|
234
|
-
|
236
|
+
print("Def Mapping: ")
|
237
|
+
print(definationMapping)
|
235
238
|
# Iterate over each column in the dataframe
|
236
239
|
for indx, col in enumerate(dataframe.columns):
|
237
240
|
# Generate a unique column ID using uuid
|
@@ -240,46 +243,46 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
240
243
|
columnIDMapping[col] = columnID
|
241
244
|
|
242
245
|
|
243
|
-
if col.startswith('output') and promptText!=None:
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
246
|
+
# if col.startswith('output') and promptText!=None:
|
247
|
+
# # For output columns, create the prompt template with promptText
|
248
|
+
# if promptText:
|
249
|
+
# # Extract variables from promptText and set them as dependencies
|
250
|
+
# dependencies = []
|
251
|
+
#
|
252
|
+
# # Find variables inside {{variable}}
|
253
|
+
# variables = re.findall(r'{{(.*?)}}', promptText)
|
254
|
+
#
|
255
|
+
# # Loop through each variable and check if it exists as a column name
|
256
|
+
# for var in variables:
|
257
|
+
# varName = var.strip()
|
258
|
+
# if varName in columnIDMapping: # Check if the variable is a column name
|
259
|
+
# dependencies.append(columnIDMapping[varName]) # Add its columnID
|
260
|
+
#
|
261
|
+
# # Now update the template for the output column
|
262
|
+
#
|
263
|
+
# template={
|
264
|
+
# "provider": "OPENAI",
|
265
|
+
# "model": "GPT_4o",
|
266
|
+
# "promptText": promptText,
|
267
|
+
# "modelOptions": {
|
268
|
+
# "temperature": 0,
|
269
|
+
# "frequencyPenalty": 0,
|
270
|
+
# "presencePenalty": 0,
|
271
|
+
# "maxToken": 8192
|
272
|
+
# },
|
273
|
+
# "toolConfig": "none",
|
274
|
+
# "concurrency": "",
|
275
|
+
# "outputType": "STRING",
|
276
|
+
# "isPromptSelected": True,
|
277
|
+
# "isSmartPromptSelected": False,
|
278
|
+
# "dependency": dependencies, # Use the dependencies extracted from promptText
|
279
|
+
# "columnID": columnID, # Use the generated column ID
|
280
|
+
# "label": col,
|
281
|
+
# "type": "PROMPT",
|
282
|
+
# "order": indx,
|
283
|
+
# }
|
284
|
+
|
285
|
+
if col.startswith('context') and dataStreamName != None :
|
283
286
|
if queryColName and dataStreamName:
|
284
287
|
dependencies = []
|
285
288
|
dependencies.append(columnIDMapping[queryColName])
|
@@ -310,9 +313,9 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
310
313
|
|
311
314
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
312
315
|
|
313
|
-
longDef = definationMapping.get(col, {}).get('definition', "")
|
314
|
-
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
315
|
-
enum =
|
316
|
+
longDef = definationMapping.get(col.rsplit("_",1)[0], {}).get('definition', "")
|
317
|
+
shortDef =definationMapping.get(col.rsplit("_",1)[0], {}).get('briefDefinition', "")
|
318
|
+
enum = col.rsplit("_",1)[0].upper().replace(" ","_")
|
316
319
|
|
317
320
|
template = {
|
318
321
|
"analytics": [
|
@@ -320,20 +323,23 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
320
323
|
],
|
321
324
|
"evaluationMetric": "ALL",
|
322
325
|
"evaluationModel": "LLUMO_EVALLM",
|
323
|
-
"selectPrompt": columnIDMapping[
|
326
|
+
"selectPrompt": None if "output" not in columnIDMapping.keys() else columnIDMapping["output"],
|
324
327
|
"scoreCondition": "GREATER_THAN",
|
325
328
|
"scoreValue": "50",
|
326
329
|
"scoreResult": "PASS",
|
327
|
-
"llmKpi": col,
|
330
|
+
"llmKpi": col.rsplit("_",1)[0],
|
328
331
|
"setRules": True,
|
329
332
|
"type": "EVAL",
|
330
333
|
"evalType": "LLM",
|
331
334
|
"similarityMetric": None,
|
332
335
|
"embeddingModel": None,
|
333
|
-
"groundTruth": None,
|
336
|
+
"groundTruth": None if "groundTruth" not in columnIDMapping.keys() else columnIDMapping["groundTruth"],
|
334
337
|
"dataStream": None,
|
335
|
-
"context":
|
336
|
-
"dependency":
|
338
|
+
"context":None if "context" not in columnIDMapping.keys() else columnIDMapping["context"],
|
339
|
+
"dependency":[ columnIDMapping[dep] for dep in evalDependencies[ col.rsplit("_",1)[0]]],
|
340
|
+
"query": None if "query" not in columnIDMapping.keys() else columnIDMapping["query"],
|
341
|
+
"tools":None if "tools" not in columnIDMapping.keys() else columnIDMapping["tools"],
|
342
|
+
"messageHistory":None if "messageHistory" not in columnIDMapping.keys() else columnIDMapping["messageHistory"],
|
337
343
|
"hallucinationFields": {
|
338
344
|
"query": None,
|
339
345
|
"context": None,
|
@@ -342,7 +348,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
342
348
|
"definition": longDef,
|
343
349
|
"analyticsENUM": enum,
|
344
350
|
"prompt": shortDef,
|
345
|
-
"analyticsName": col,
|
351
|
+
"analyticsName": col.rsplit("_",1)[0],
|
346
352
|
"columnID": columnID,
|
347
353
|
"label": col,
|
348
354
|
"order": indx
|
@@ -378,12 +384,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
378
384
|
row_dict = {}
|
379
385
|
|
380
386
|
# For each column, we need to map the column ID to the corresponding value in the row
|
381
|
-
|
387
|
+
|
382
388
|
for col in dataframe.columns:
|
383
389
|
columnID = columnIDMapping[col]
|
384
390
|
|
385
391
|
if any(col.startswith(eval + "_") or col == eval for eval in allEvals) and not " Reason" in col and promptText!=None:
|
386
|
-
|
392
|
+
|
387
393
|
row_dict[columnID] = {
|
388
394
|
|
389
395
|
"value": row[col],
|
@@ -450,9 +456,12 @@ def uploadRowsInDBPlayground(payload):
|
|
450
456
|
return None
|
451
457
|
|
452
458
|
|
453
|
-
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None):
|
459
|
+
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
|
454
460
|
|
455
|
-
|
461
|
+
if activePlayground != None:
|
462
|
+
playgroundId=activePlayground
|
463
|
+
else:
|
464
|
+
playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
|
456
465
|
payload1, payload2 = createColumn(
|
457
466
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
|
458
467
|
)
|
@@ -470,7 +479,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
470
479
|
|
471
480
|
|
472
481
|
|
473
|
-
def getPlaygroundInsights(
|
482
|
+
def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: list):
|
474
483
|
headers = {
|
475
484
|
|
476
485
|
"Content-Type": "application/json",
|
@@ -478,48 +487,29 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
478
487
|
|
479
488
|
# Initial request to generate playground insights
|
480
489
|
payload = {
|
481
|
-
"
|
482
|
-
"
|
490
|
+
"uniqueClassesString": uniqueClassesString,
|
491
|
+
"reasonList": reasonList,
|
492
|
+
"definition": defination,
|
483
493
|
}
|
484
494
|
|
485
495
|
urlGenerate = createInsightUrl
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
if responseGenerate.status_code == 200:
|
490
|
-
responseJson = responseGenerate.json()
|
491
|
-
|
492
|
-
insightStatus = responseJson.get("data", {}).get("insight", False)
|
496
|
+
try:
|
497
|
+
responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
|
493
498
|
|
494
|
-
if
|
495
|
-
|
496
|
-
urlGetAll = getPlaygroundInsightsUrl
|
499
|
+
if responseGenerate.status_code == 200:
|
500
|
+
responseJson = responseGenerate.json()
|
497
501
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
# Extract insight and solution
|
506
|
-
insights = []
|
507
|
-
for item in data:
|
508
|
-
insight = item.get("insight", "")
|
509
|
-
solution = item.get("solution", "")
|
510
|
-
insights.append({"insight": insight, "solution": solution})
|
511
|
-
|
512
|
-
return insights
|
513
|
-
else:
|
514
|
-
print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
|
515
|
-
return None
|
516
|
-
else:
|
517
|
-
print("No insight generated.")
|
518
|
-
return None
|
502
|
+
filteredResponse = {key: value for key, value in responseJson.items() if key in ['analysis', 'nextStep']}
|
503
|
+
|
504
|
+
return filteredResponse
|
505
|
+
except Exception as e:
|
506
|
+
print(f"Exception occurred while generating insight: {e}")
|
507
|
+
return None
|
508
|
+
|
519
509
|
else:
|
520
510
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
521
511
|
return None
|
522
|
-
def checkDependency(selectedEval, columns,tocheck=True):
|
512
|
+
def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
|
523
513
|
"""
|
524
514
|
Checks if all the required input columns for the selected evaluation metric are present.
|
525
515
|
|
@@ -530,33 +520,35 @@ def checkDependency(selectedEval, columns,tocheck=True):
|
|
530
520
|
Raises:
|
531
521
|
- LlumoAIError.dependencyError: If any required column is missing.
|
532
522
|
"""
|
533
|
-
if tocheck:
|
534
523
|
# Define required dependencies for each evaluation metric
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
524
|
+
metricDependencies = {
|
525
|
+
'Response Completeness': ['context', 'query', 'output'],
|
526
|
+
'Response Bias': ['output'],
|
527
|
+
'Response Harmfulness': ['output'],
|
528
|
+
'Input Toxicity': ['query'],
|
529
|
+
'Input Harmfulness': ['query'],
|
530
|
+
'Context Utilization': ['output', 'context'],
|
531
|
+
'Relevance Retention': ['context', 'query'],
|
532
|
+
'Semantic Cohesion': ['context'],
|
533
|
+
'Final Task Alignment': ['messageHistory'],
|
534
|
+
'Tool Reliability': ['messageHistory'],
|
535
|
+
'Response Correctness': ['output', 'query', 'context'],
|
536
|
+
'Response Toxicity': ['output'],
|
537
|
+
'Input Bias': ['query'],
|
538
|
+
'Input Relevancy': ['context', 'query'],
|
539
|
+
'Redundancy Reduction': ['context'],
|
540
|
+
'Response Sentiment': ['output'],
|
541
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
542
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
543
|
+
'Hallucination': ['query', 'context', 'output'],
|
544
|
+
'Groundedness': ['groundTruth', 'output'],
|
545
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
546
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
547
|
+
}
|
548
|
+
if _returnDepMapping == True:
|
549
|
+
return metricDependencies
|
559
550
|
|
551
|
+
if tocheck == True:
|
560
552
|
# Check if the selected evaluation metric is known
|
561
553
|
if selectedEval not in metricDependencies:
|
562
554
|
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
@@ -632,3 +624,56 @@ def validateModels(model_aliases):
|
|
632
624
|
|
633
625
|
|
634
626
|
|
627
|
+
def validateOpenaiKey(api_key):
|
628
|
+
try:
|
629
|
+
client = openai.OpenAI(api_key=api_key)
|
630
|
+
_ = client.models.list() # Light call to list models
|
631
|
+
except openai.AuthenticationError:
|
632
|
+
raise ValueError("❌ Invalid OpenAI API key.")
|
633
|
+
except Exception as e:
|
634
|
+
raise RuntimeError(f"⚠️ Error validating OpenAI key: {e}")
|
635
|
+
|
636
|
+
def validateGoogleKey(api_key):
|
637
|
+
try:
|
638
|
+
genai.configure(api_key=api_key)
|
639
|
+
_ = genai.GenerativeModel("gemini-2.0-flash-lite").generate_content("test")
|
640
|
+
except Exception as e:
|
641
|
+
if "PERMISSION_DENIED" in str(e) or "API key not valid" in str(e):
|
642
|
+
raise ValueError("❌ Invalid Google API key.")
|
643
|
+
raise RuntimeError(f"⚠️ Error validating Gemini key: {e}")
|
644
|
+
|
645
|
+
def groupLogsByClass(logs, max_logs=2):
|
646
|
+
# Initialize the final result structures (no defaultdict)
|
647
|
+
groupedLogs = {}
|
648
|
+
uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
|
649
|
+
|
650
|
+
# Iterate through the logs
|
651
|
+
for log in logs:
|
652
|
+
log_details = list(log.values())[0] # Get the details dictionary
|
653
|
+
eval_name = log_details.get("kpi", "unmarked")
|
654
|
+
edge_case = log_details.get("edgeCase", "unmarked")
|
655
|
+
reasoning = log_details.get("reasoning", "")
|
656
|
+
|
657
|
+
if eval_name != "unmarked" and edge_case != "unmarked":
|
658
|
+
# Ensure that the eval_name and edge_case exist in the dictionary
|
659
|
+
if eval_name not in groupedLogs:
|
660
|
+
groupedLogs[eval_name] = {}
|
661
|
+
uniqueEdgeCases[eval_name] = set() # Initialize the set for unique edge cases
|
662
|
+
|
663
|
+
if edge_case not in groupedLogs[eval_name]:
|
664
|
+
groupedLogs[eval_name][edge_case] = []
|
665
|
+
|
666
|
+
# Append the reasoning to the correct place
|
667
|
+
groupedLogs[eval_name][edge_case].append(reasoning)
|
668
|
+
uniqueEdgeCases[eval_name].add(edge_case) # Add the edge case to the set
|
669
|
+
|
670
|
+
# Limit the number of reasons to max_logs
|
671
|
+
for eval_name in groupedLogs:
|
672
|
+
for edge_case in groupedLogs[eval_name]:
|
673
|
+
groupedLogs[eval_name][edge_case] = groupedLogs[eval_name][edge_case][:max_logs]
|
674
|
+
|
675
|
+
# Convert the set of unique edge cases to a list for easier reading
|
676
|
+
for eval_name in uniqueEdgeCases:
|
677
|
+
uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
|
678
|
+
|
679
|
+
return groupedLogs, uniqueEdgeCases
|
llumo/openai.py
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
from openai import OpenAI as OpenAIClient
|
2
|
+
from .client import LlumoClient
|
3
|
+
|
4
|
+
# Dummy evaluation function that uses LlumoClient
|
5
|
+
def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
|
6
|
+
client = LlumoClient(api_key=api_key)
|
7
|
+
results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
|
8
|
+
print(results)
|
9
|
+
return results
|
10
|
+
|
11
|
+
# Wrapper around ChatCompletion to allow custom fields like `.evaluation`
|
12
|
+
class ChatCompletionWithEval:
|
13
|
+
def __init__(self, response, evaluation):
|
14
|
+
self._response = response
|
15
|
+
self.evaluation = evaluation
|
16
|
+
|
17
|
+
def __getattr__(self, name):
|
18
|
+
return getattr(self._response, name)
|
19
|
+
|
20
|
+
def __getitem__(self, key):
|
21
|
+
return self._response[key]
|
22
|
+
|
23
|
+
def __repr__(self):
|
24
|
+
return repr(self._response)
|
25
|
+
|
26
|
+
class openai(OpenAIClient):
|
27
|
+
def __init__(self, api_key: str):
|
28
|
+
super().__init__(api_key=api_key)
|
29
|
+
|
30
|
+
original_create = self.chat.completions.create
|
31
|
+
|
32
|
+
class ChatCompletionsWrapper:
|
33
|
+
@staticmethod
|
34
|
+
def create(*args, **kwargs):
|
35
|
+
context = kwargs.pop("context", None)
|
36
|
+
evals = kwargs.pop("evals", [])
|
37
|
+
llumo_key = kwargs.pop("llumo_key", None)
|
38
|
+
|
39
|
+
messages = kwargs.get("messages", [])
|
40
|
+
user_message = next(
|
41
|
+
(m.get("content") for m in reversed(messages) if m.get("role") == "user"),
|
42
|
+
"",
|
43
|
+
)
|
44
|
+
|
45
|
+
# If context is None or empty or whitespace-only, set it to user_message
|
46
|
+
if not context or context.strip() == "":
|
47
|
+
context = user_message
|
48
|
+
|
49
|
+
response = original_create(*args, **kwargs)
|
50
|
+
|
51
|
+
try:
|
52
|
+
output_text = response.choices[0].message.content
|
53
|
+
except Exception:
|
54
|
+
output_text = ""
|
55
|
+
|
56
|
+
eval_input = [{
|
57
|
+
"query": user_message,
|
58
|
+
"context": context,
|
59
|
+
"output": output_text,
|
60
|
+
}]
|
61
|
+
|
62
|
+
# Safely call evaluate_multiple, if error return None
|
63
|
+
evaluation = None
|
64
|
+
try:
|
65
|
+
evaluation = evaluate_multiple(eval_input, api_key=llumo_key,evals=evals)
|
66
|
+
except Exception as e:
|
67
|
+
# You can optionally log the error here if you want
|
68
|
+
# print(f"Evaluation failed, skipping: {e}")
|
69
|
+
evaluation = None
|
70
|
+
|
71
|
+
# If evaluation is None, just return normal response
|
72
|
+
if evaluation is None:
|
73
|
+
print("All server are busy for evaluation ")
|
74
|
+
return response
|
75
|
+
|
76
|
+
# Otherwise wrap with evaluation attached
|
77
|
+
return ChatCompletionWithEval(response, evaluation)
|
78
|
+
|
79
|
+
self.chat.completions.create = ChatCompletionsWrapper.create
|
@@ -0,0 +1,16 @@
|
|
1
|
+
llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
|
2
|
+
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
3
|
+
llumo/client.py,sha256=rOTbw8QGi5CnQ77QKS4rKh-dSBSVoyVAORrK1i_b5EQ,60339
|
4
|
+
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
5
|
+
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
6
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
7
|
+
llumo/google.py,sha256=5AVAqxPN20UuHIqi4yuHHSTf49LI96krtbztJ5qt8L0,1413
|
8
|
+
llumo/helpingFuntions.py,sha256=0W2JNdLyOV92lgESgB_JyJmOUvW5ooRdZyjN5LKDSX0,25296
|
9
|
+
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
10
|
+
llumo/openai.py,sha256=BEmsOdHiQzDpKv6b4L62JaUMq7DbpICNPqyfMNRWi2I,2981
|
11
|
+
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
12
|
+
llumo-0.2.16b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
13
|
+
llumo-0.2.16b1.dist-info/METADATA,sha256=_e94VIPrn02CP0X9gdkICA210Te_inzaSPcfH0p-Hlk,1521
|
14
|
+
llumo-0.2.16b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
llumo-0.2.16b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
16
|
+
llumo-0.2.16b1.dist-info/RECORD,,
|
llumo-0.2.15b1.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=XljwD5mZxjyrXHhu8YhN0cGsd-O_LyKbPzrhS8zbqZo,53778
|
3
|
-
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
4
|
-
llumo/execution.py,sha256=x88wQV8eL99wNN5YtjFaAMCIfN1PdfQVlAZQb4vzgQ0,1413
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=0-ZwG0fnbfP4DP1JTMewM8LdXzz_-p1gRqhPsX0Zmpk,22785
|
7
|
-
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
8
|
-
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
9
|
-
llumo-0.2.15b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.15b1.dist-info/METADATA,sha256=yDLkiD46Qq44PA3ylKK2dzsXZmnuE23yxH0RmoqizOk,1521
|
11
|
-
llumo-0.2.15b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.15b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.15b1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|