llumo 0.2.15b2__py3-none-any.whl → 0.2.16b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +2 -1
- llumo/chains.py +73 -0
- llumo/client.py +193 -65
- llumo/google.py +34 -0
- llumo/helpingFuntions.py +142 -116
- llumo/openai.py +79 -0
- {llumo-0.2.15b2.dist-info → llumo-0.2.16b1.dist-info}/METADATA +1 -1
- llumo-0.2.16b1.dist-info/RECORD +16 -0
- llumo-0.2.15b2.dist-info/RECORD +0 -13
- {llumo-0.2.15b2.dist-info → llumo-0.2.16b1.dist-info}/WHEEL +0 -0
- {llumo-0.2.15b2.dist-info → llumo-0.2.16b1.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.15b2.dist-info → llumo-0.2.16b1.dist-info}/top_level.txt +0 -0
llumo/__init__.py
CHANGED
llumo/chains.py
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
from .helpingFuntions import *
|
3
|
+
|
4
|
+
class LlumoDataFrameResults(pd.DataFrame):
|
5
|
+
_metadata=["evals","evalData","definationMapping"]
|
6
|
+
|
7
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
8
|
+
self.evals = evals or []
|
9
|
+
self.evalData= evalData or []
|
10
|
+
self.definationMapping= definationMapping or {}
|
11
|
+
super().__init__(*args, **kwargs)
|
12
|
+
|
13
|
+
@property
|
14
|
+
def _constructor(self):
|
15
|
+
# Needed so slicing operations return the same type
|
16
|
+
return LlumoDataFrameResults
|
17
|
+
|
18
|
+
def insights(self):
|
19
|
+
|
20
|
+
if not self.evalData:
|
21
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
22
|
+
return None
|
23
|
+
try:
|
24
|
+
insights=[]
|
25
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
26
|
+
|
27
|
+
for evalname in self.evals:
|
28
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
29
|
+
allReasons = []
|
30
|
+
for edgeCase in reasonData[evalname]:
|
31
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
32
|
+
|
33
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
34
|
+
|
35
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
36
|
+
return insights
|
37
|
+
except Exception as e:
|
38
|
+
|
39
|
+
print("Can not genrate insights for this eval, please try again later.")
|
40
|
+
|
41
|
+
|
42
|
+
class LlumoDictResults(list):
|
43
|
+
_metadata=["evals","evalData","definationMapping"]
|
44
|
+
|
45
|
+
def __init__(self, *args,evals=None,evalData=None,definationMapping=None,**kwargs):
|
46
|
+
self.evals = evals or []
|
47
|
+
self.evalData= evalData or []
|
48
|
+
self.definationMapping= definationMapping or {}
|
49
|
+
super().__init__(*args, **kwargs) # This will handle list[dict]
|
50
|
+
|
51
|
+
def insights(self):
|
52
|
+
|
53
|
+
if not self.evalData:
|
54
|
+
print("No raw data available. Please run evaluateMultiple() first.")
|
55
|
+
return None
|
56
|
+
try:
|
57
|
+
insights=[]
|
58
|
+
reasonData,uniqueEdgecase=groupLogsByClass(self.evalData) # print(rawResults)
|
59
|
+
for evalname in self.evals:
|
60
|
+
uniqueclassesstring = ",".join(uniqueEdgecase.get(evalname, []))
|
61
|
+
allReasons = []
|
62
|
+
for edgeCase in reasonData[evalname]:
|
63
|
+
allReasons.extend(reasonData[evalname][edgeCase])
|
64
|
+
evalDefinition = self.definationMapping.get(evalname, {}).get("definition", "")
|
65
|
+
insights.append(getPlaygroundInsights(evalDefinition,uniqueclassesstring,allReasons))
|
66
|
+
return insights
|
67
|
+
except Exception as e:
|
68
|
+
print("Can not genrate insights for this eval, please try again later.")
|
69
|
+
|
70
|
+
|
71
|
+
for _cls in (LlumoDataFrameResults, LlumoDictResults):
|
72
|
+
_cls.__name__ = "LlumoResults"
|
73
|
+
_cls.__qualname__ = "LlumoResults"
|
llumo/client.py
CHANGED
@@ -5,7 +5,7 @@ import time
|
|
5
5
|
import re
|
6
6
|
import json
|
7
7
|
import uuid
|
8
|
-
|
8
|
+
import warnings
|
9
9
|
import os
|
10
10
|
import itertools
|
11
11
|
import pandas as pd
|
@@ -16,9 +16,12 @@ from .exceptions import LlumoAIError
|
|
16
16
|
from .helpingFuntions import *
|
17
17
|
from .sockets import LlumoSocketClient
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
19
|
+
from .chains import LlumoDataFrameResults,LlumoDictResults
|
19
20
|
import threading
|
20
21
|
from tqdm import tqdm
|
21
22
|
|
23
|
+
pd.set_option('future.no_silent_downcasting', True)
|
24
|
+
|
22
25
|
postUrl = (
|
23
26
|
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
24
27
|
)
|
@@ -38,7 +41,8 @@ class LlumoClient:
|
|
38
41
|
|
39
42
|
def __init__(self, api_key):
|
40
43
|
self.apiKey = api_key
|
41
|
-
|
44
|
+
self.evalData=[]
|
45
|
+
self.evals=[]
|
42
46
|
self.processMapping = {}
|
43
47
|
self.definationMapping = {}
|
44
48
|
|
@@ -393,27 +397,46 @@ class LlumoClient:
|
|
393
397
|
outputColName=outputColName,
|
394
398
|
):
|
395
399
|
print(
|
396
|
-
"
|
400
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.Please rerun the experiment to see the results on playground."
|
397
401
|
)
|
398
402
|
else:
|
399
403
|
return dataframe
|
400
404
|
|
401
405
|
# this function allows the users to run multiple evals at once
|
402
406
|
|
403
|
-
def
|
407
|
+
def compressor(self, data, prompt_template):
|
404
408
|
results = []
|
405
409
|
dataframe = pd.DataFrame(data)
|
410
|
+
|
406
411
|
try:
|
407
|
-
|
412
|
+
self.socket = LlumoSocketClient(socketUrl)
|
413
|
+
dataframe = pd.DataFrame(data).astype(str)
|
414
|
+
socketID = self.socket.connect(timeout=250)
|
415
|
+
|
416
|
+
# Wait for socket connection
|
408
417
|
max_wait_secs = 20
|
409
418
|
waited_secs = 0
|
410
419
|
while not self.socket._connection_established.is_set():
|
411
420
|
time.sleep(0.1)
|
412
421
|
waited_secs += 0.1
|
413
422
|
if waited_secs >= max_wait_secs:
|
414
|
-
raise RuntimeError(
|
415
|
-
|
416
|
-
|
423
|
+
raise RuntimeError("Timeout waiting for server connection")
|
424
|
+
|
425
|
+
# Start listener thread
|
426
|
+
expectedResults = len(dataframe)
|
427
|
+
# print("expected result" ,expectedResults)
|
428
|
+
timeout = max(100, min(150, expectedResults * 10))
|
429
|
+
listener_thread = threading.Thread(
|
430
|
+
target=self.socket.listenForResults,
|
431
|
+
kwargs={
|
432
|
+
"min_wait": 40,
|
433
|
+
"max_wait": timeout,
|
434
|
+
"inactivity_timeout": 10,
|
435
|
+
"expected_results": expectedResults,
|
436
|
+
},
|
437
|
+
daemon=True,
|
438
|
+
)
|
439
|
+
listener_thread.start()
|
417
440
|
|
418
441
|
try:
|
419
442
|
self.validateApiKey()
|
@@ -508,7 +531,7 @@ class LlumoClient:
|
|
508
531
|
"playgroundID": activePlayground,
|
509
532
|
}
|
510
533
|
|
511
|
-
rowIdMapping[rowID] = index
|
534
|
+
rowIdMapping[f'{rowID}-{columnID}-{columnID}'] = index
|
512
535
|
# print("__________________________TEMPLATE__________________________________")
|
513
536
|
# print(templateData)
|
514
537
|
|
@@ -537,7 +560,19 @@ class LlumoClient:
|
|
537
560
|
expected_results=None,
|
538
561
|
)
|
539
562
|
|
540
|
-
|
563
|
+
rawResults = self.socket.getReceivedData()
|
564
|
+
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
565
|
+
expectedRowIDs = set(rowIdMapping.keys())
|
566
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
567
|
+
# print("All expected keys:", expected_rowIDs)
|
568
|
+
# print("All received keys:", received_rowIDs)
|
569
|
+
# print("Missing keys:", len(missingRowIDs))
|
570
|
+
missingRowIDs = list(missingRowIDs)
|
571
|
+
|
572
|
+
if len(missingRowIDs) > 0:
|
573
|
+
dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
|
574
|
+
rawResults.extend(dataFromDb)
|
575
|
+
|
541
576
|
# results = self.finalResp(eval_results)
|
542
577
|
# print(f"======= Completed evaluation: {eval} =======\n")
|
543
578
|
|
@@ -551,10 +586,10 @@ class LlumoClient:
|
|
551
586
|
print(f"Error disconnecting socket: {e}")
|
552
587
|
|
553
588
|
dataframe["Compressed Input"] = None
|
554
|
-
for records in
|
589
|
+
for records in rawResults:
|
555
590
|
for compound_key, value in records.items():
|
556
591
|
# for compound_key, value in item['data'].items():
|
557
|
-
rowID = compound_key
|
592
|
+
rowID = compound_key
|
558
593
|
# looking for the index of each rowID , in the original dataframe
|
559
594
|
if rowID in rowIdMapping:
|
560
595
|
index = rowIdMapping[rowID]
|
@@ -580,6 +615,7 @@ class LlumoClient:
|
|
580
615
|
prompt_template="",
|
581
616
|
outputColName="output",
|
582
617
|
createExperiment: bool = False,
|
618
|
+
getDataFrame:bool =False,
|
583
619
|
_tocheck=True,
|
584
620
|
):
|
585
621
|
self.socket = LlumoSocketClient(socketUrl)
|
@@ -587,6 +623,8 @@ class LlumoClient:
|
|
587
623
|
workspaceID = None
|
588
624
|
email = None
|
589
625
|
socketID = self.socket.connect(timeout=250)
|
626
|
+
self.evalData=[]
|
627
|
+
self.evals=evals
|
590
628
|
self.allBatches = []
|
591
629
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
592
630
|
|
@@ -614,10 +652,14 @@ class LlumoClient:
|
|
614
652
|
daemon=True,
|
615
653
|
)
|
616
654
|
listener_thread.start()
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
655
|
+
self.validateApiKey(evalName=evals[0])
|
656
|
+
if createExperiment:
|
657
|
+
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
658
|
+
|
659
|
+
else:
|
660
|
+
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
661
|
+
"-", ""
|
662
|
+
)
|
621
663
|
for evalName in evals:
|
622
664
|
# print(f"\n======= Running evaluation for: {evalName} =======")
|
623
665
|
|
@@ -776,10 +818,8 @@ class LlumoClient:
|
|
776
818
|
rawResults.extend(dataFromDb)
|
777
819
|
|
778
820
|
|
821
|
+
self.evalData = rawResults
|
779
822
|
|
780
|
-
|
781
|
-
|
782
|
-
|
783
823
|
# Initialize dataframe columns for each eval
|
784
824
|
for eval in evals:
|
785
825
|
dataframe[eval] = None
|
@@ -799,7 +839,12 @@ class LlumoClient:
|
|
799
839
|
|
800
840
|
if createExperiment:
|
801
841
|
pd.set_option("future.no_silent_downcasting", True)
|
802
|
-
df = dataframe.fillna("Some error occured").astype(object)
|
842
|
+
# df = dataframe.fillna("Some error occured").astype(object)
|
843
|
+
with warnings.catch_warnings():
|
844
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
845
|
+
df = dataframe.fillna("Some error occurred").astype(str)
|
846
|
+
|
847
|
+
df = dataframe.fillna("Some error occured").infer_objects(copy=False)
|
803
848
|
if createPlayground(
|
804
849
|
email,
|
805
850
|
workspaceID,
|
@@ -807,13 +852,19 @@ class LlumoClient:
|
|
807
852
|
promptText=prompt_template,
|
808
853
|
definationMapping=self.definationMapping,
|
809
854
|
outputColName=outputColName,
|
855
|
+
activePlayground= activePlayground
|
810
856
|
):
|
811
857
|
print(
|
812
|
-
"
|
858
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
813
859
|
)
|
814
860
|
else:
|
815
|
-
|
861
|
+
if getDataFrame:
|
862
|
+
return LlumoDataFrameResults(dataframe,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
863
|
+
else:
|
864
|
+
data=dataframe.to_dict(orient="records")
|
865
|
+
return LlumoDictResults(data,evals=self.evals,evalData=self.evalData,definationMapping=self.definationMapping)
|
816
866
|
|
867
|
+
|
817
868
|
def promptSweep(
|
818
869
|
self,
|
819
870
|
templates: List[str],
|
@@ -823,6 +874,7 @@ class LlumoClient:
|
|
823
874
|
evals=["Response Correctness"],
|
824
875
|
toEvaluate: bool = False,
|
825
876
|
createExperiment: bool = False,
|
877
|
+
getDataFrame = False
|
826
878
|
|
827
879
|
|
828
880
|
) -> pd.DataFrame:
|
@@ -878,6 +930,8 @@ class LlumoClient:
|
|
878
930
|
prompt_template=str(templates[0]),
|
879
931
|
outputColName=outputColName,
|
880
932
|
_tocheck=False,
|
933
|
+
getDataFrame=True,
|
934
|
+
createExperiment = False
|
881
935
|
)
|
882
936
|
|
883
937
|
# Rename all new columns with _i+1 (e.g., _1, _2)
|
@@ -910,10 +964,17 @@ class LlumoClient:
|
|
910
964
|
):
|
911
965
|
|
912
966
|
print(
|
913
|
-
"
|
967
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
914
968
|
)
|
915
969
|
else:
|
916
|
-
|
970
|
+
if getDataFrame:
|
971
|
+
return LlumoDataFrameResults(dfWithEvals, evals=self.evals, evalData=self.evalData,
|
972
|
+
definationMapping=self.definationMapping)
|
973
|
+
else:
|
974
|
+
data = dfWithEvals.to_dict(orient="records")
|
975
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,definationMapping=self.definationMapping)
|
976
|
+
|
977
|
+
|
917
978
|
else:
|
918
979
|
if createExperiment == True:
|
919
980
|
pd.set_option("future.no_silent_downcasting", True)
|
@@ -921,10 +982,18 @@ class LlumoClient:
|
|
921
982
|
|
922
983
|
if createPlayground(email, workspaceID, df, promptText=templates[0]):
|
923
984
|
print(
|
924
|
-
"
|
985
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
925
986
|
)
|
926
987
|
else:
|
927
|
-
|
988
|
+
if getDataFrame:
|
989
|
+
return LlumoDataFrameResults(df, evals=self.evals, evalData=self.evalData,
|
990
|
+
definationMapping=self.definationMapping)
|
991
|
+
else:
|
992
|
+
data = df.to_dict(orient="records")
|
993
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
994
|
+
definationMapping=self.definationMapping)
|
995
|
+
|
996
|
+
|
928
997
|
|
929
998
|
# this function generates an output using llm and tools and evaluate that output
|
930
999
|
def evaluateAgents(
|
@@ -936,6 +1005,7 @@ class LlumoClient:
|
|
936
1005
|
evals=["Final Task Alignment"],
|
937
1006
|
prompt_template="Give answer for the given query: {{query}}",
|
938
1007
|
createExperiment: bool = False,
|
1008
|
+
getDataFrame:bool = False
|
939
1009
|
|
940
1010
|
):
|
941
1011
|
if model.lower() not in ["openai", "google"]:
|
@@ -961,27 +1031,33 @@ class LlumoClient:
|
|
961
1031
|
toolResponseDf.to_dict(orient="records"),
|
962
1032
|
evals=evals,
|
963
1033
|
prompt_template=prompt_template,
|
964
|
-
createExperiment=
|
1034
|
+
createExperiment=createExperiment,
|
1035
|
+
getDataFrame=getDataFrame
|
1036
|
+
|
965
1037
|
)
|
966
1038
|
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1039
|
+
return toolResponseDf
|
1040
|
+
# if createExperiment:
|
1041
|
+
# pd.set_option("future.no_silent_downcasting", True)
|
1042
|
+
# df = toolResponseDf.fillna("Some error occured")
|
1043
|
+
# if createPlayground(self.email, self.workspaceID, df,promptText=prompt_template,definationMapping=self.definationMapping):
|
1044
|
+
# print(
|
1045
|
+
# "LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1046
|
+
# )
|
1047
|
+
# else:
|
1048
|
+
# return toolResponseDf
|
976
1049
|
|
977
1050
|
# this function evaluate that tools output given by the user
|
978
1051
|
def evaluateAgentResponses(
|
979
1052
|
self,
|
980
1053
|
data,
|
981
1054
|
evals=["Final Task Alignment"],
|
982
|
-
outputColName="output",
|
983
1055
|
createExperiment: bool = False,
|
1056
|
+
getDataFrame = False,
|
1057
|
+
outputColName="output"
|
1058
|
+
|
984
1059
|
):
|
1060
|
+
|
985
1061
|
dataframe = pd.DataFrame(data)
|
986
1062
|
|
987
1063
|
try:
|
@@ -1001,7 +1077,9 @@ class LlumoClient:
|
|
1001
1077
|
evals=evals,
|
1002
1078
|
prompt_template="Give answer for the given query: {{query}}",
|
1003
1079
|
outputColName=outputColName,
|
1004
|
-
createExperiment=createExperiment
|
1080
|
+
createExperiment=createExperiment,
|
1081
|
+
getDataFrame = getDataFrame
|
1082
|
+
|
1005
1083
|
)
|
1006
1084
|
if createExperiment:
|
1007
1085
|
pass
|
@@ -1022,7 +1100,8 @@ class LlumoClient:
|
|
1022
1100
|
prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
|
1023
1101
|
evals=["Context Utilization"],
|
1024
1102
|
toEvaluate=False,
|
1025
|
-
generateOutput=True
|
1103
|
+
generateOutput=True,
|
1104
|
+
getDataFrame = False
|
1026
1105
|
):
|
1027
1106
|
# Validate required parameters
|
1028
1107
|
if generateOutput:
|
@@ -1147,7 +1226,6 @@ class LlumoClient:
|
|
1147
1226
|
outputEvalMapping = None
|
1148
1227
|
if toEvaluate:
|
1149
1228
|
for evalName in evals:
|
1150
|
-
|
1151
1229
|
# Validate API and dependencies
|
1152
1230
|
self.validateApiKey(evalName=evalName)
|
1153
1231
|
metricDependencies = checkDependency(
|
@@ -1156,14 +1234,17 @@ class LlumoClient:
|
|
1156
1234
|
if not metricDependencies["status"]:
|
1157
1235
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
1158
1236
|
|
1159
|
-
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template)
|
1160
|
-
|
1237
|
+
working_df, outputEvalMapping = self._evaluateForStream(working_df, evals, modelAliases, prompt_template,generateOutput)
|
1161
1238
|
|
1239
|
+
|
1162
1240
|
self.socket.disconnect()
|
1163
1241
|
|
1164
1242
|
# Create experiment if required
|
1165
1243
|
if createExperiment:
|
1166
|
-
df = working_df.fillna("Some error occured").astype(object)
|
1244
|
+
# df = working_df.fillna("Some error occured").astype(object)
|
1245
|
+
with warnings.catch_warnings():
|
1246
|
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
1247
|
+
df = working_df.fillna("Some error occurred").astype(str)
|
1167
1248
|
if createPlayground(
|
1168
1249
|
email, workspaceID, df,
|
1169
1250
|
queryColName=queryColName,
|
@@ -1173,10 +1254,22 @@ class LlumoClient:
|
|
1173
1254
|
evalOutputMap=outputEvalMapping
|
1174
1255
|
):
|
1175
1256
|
print(
|
1176
|
-
"
|
1257
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1177
1258
|
else:
|
1178
|
-
|
1179
|
-
|
1259
|
+
if getDataFrame == True and toEvaluate == True:
|
1260
|
+
return LlumoDataFrameResults(working_df, evals=self.evals, evalData=self.evalData,
|
1261
|
+
definationMapping=self.definationMapping)
|
1262
|
+
|
1263
|
+
elif getDataFrame == False and toEvaluate == True:
|
1264
|
+
data = working_df.to_dict(orient="records")
|
1265
|
+
return LlumoDictResults(data, evals=self.evals, evalData=self.evalData,
|
1266
|
+
definationMapping=self.definationMapping)
|
1267
|
+
|
1268
|
+
elif getDataFrame== True and toEvaluate == False:
|
1269
|
+
return working_df
|
1270
|
+
|
1271
|
+
elif getDataFrame == False and toEvaluate == False :
|
1272
|
+
return working_df.to_dict(orient = "records")
|
1180
1273
|
|
1181
1274
|
def _outputForStream(self, df, modelAliases, prompt_template, apiKey):
|
1182
1275
|
executor = ModelExecutor(apiKey)
|
@@ -1192,50 +1285,78 @@ class LlumoClient:
|
|
1192
1285
|
|
1193
1286
|
provider = getProviderFromModel(model)
|
1194
1287
|
if provider == Provider.OPENAI:
|
1195
|
-
|
1288
|
+
validateOpenaiKey(apiKey)
|
1196
1289
|
elif provider == Provider.GOOGLE:
|
1197
1290
|
validateGoogleKey(apiKey)
|
1198
1291
|
|
1199
1292
|
filled_template = getInputPopulatedPrompt(prompt_template, inputDict)
|
1200
1293
|
response = executor.execute(provider, model.value, filled_template, apiKey)
|
1201
1294
|
df.at[indx, f"output_{i}"] = response
|
1295
|
+
|
1202
1296
|
except Exception as e:
|
1203
1297
|
# df.at[indx, f"output_{i}"] = str(e)
|
1204
1298
|
raise e
|
1205
1299
|
|
1206
1300
|
return df
|
1207
1301
|
|
1208
|
-
def _evaluateForStream(self, df, evals, modelAliases, prompt_template):
|
1302
|
+
def _evaluateForStream(self, df, evals, modelAliases, prompt_template, generateOutput):
|
1209
1303
|
dfWithEvals = df.copy()
|
1210
|
-
|
1211
1304
|
outputColMapping = {}
|
1212
1305
|
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1306
|
+
if generateOutput:
|
1307
|
+
# Evaluate per model output
|
1308
|
+
for i, model in enumerate(modelAliases, 1):
|
1309
|
+
outputColName = f"output_{i}"
|
1310
|
+
try:
|
1311
|
+
res = self.evaluateMultiple(
|
1312
|
+
dfWithEvals.to_dict("records"),
|
1313
|
+
evals=evals,
|
1314
|
+
prompt_template=prompt_template,
|
1315
|
+
outputColName=outputColName,
|
1316
|
+
_tocheck=False,
|
1317
|
+
getDataFrame=True,
|
1318
|
+
createExperiment=False
|
1319
|
+
)
|
1216
1320
|
|
1321
|
+
for evalMetric in evals:
|
1322
|
+
scoreCol = f"{evalMetric}"
|
1323
|
+
reasonCol = f"{evalMetric} Reason"
|
1324
|
+
if scoreCol in res.columns:
|
1325
|
+
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1326
|
+
if reasonCol in res.columns:
|
1327
|
+
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1328
|
+
|
1329
|
+
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1330
|
+
|
1331
|
+
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1332
|
+
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1333
|
+
|
1334
|
+
except Exception as e:
|
1335
|
+
print(f"Evaluation failed for model {model.value}: {str(e)}")
|
1336
|
+
|
1337
|
+
else:
|
1338
|
+
# Evaluate only once on "output" column
|
1339
|
+
try:
|
1340
|
+
outputColName = "output"
|
1217
1341
|
res = self.evaluateMultiple(
|
1218
1342
|
dfWithEvals.to_dict("records"),
|
1219
1343
|
evals=evals,
|
1220
1344
|
prompt_template=prompt_template,
|
1221
1345
|
outputColName=outputColName,
|
1222
1346
|
_tocheck=False,
|
1347
|
+
getDataFrame=True,
|
1348
|
+
createExperiment=False
|
1223
1349
|
)
|
1224
1350
|
for evalMetric in evals:
|
1225
1351
|
scoreCol = f"{evalMetric}"
|
1226
1352
|
reasonCol = f"{evalMetric} Reason"
|
1227
|
-
|
1228
|
-
if scoreCol in res.columns:
|
1229
|
-
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
1230
|
-
if reasonCol in res.columns:
|
1231
|
-
res = res.rename(columns={reasonCol: f"{evalMetric}_{i} Reason"})
|
1232
|
-
|
1233
|
-
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
1353
|
+
outputColMapping[scoreCol] = "output"
|
1234
1354
|
|
1235
1355
|
newCols = [col for col in res.columns if col not in dfWithEvals.columns]
|
1236
1356
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
1237
1357
|
except Exception as e:
|
1238
|
-
print(f"Evaluation failed
|
1358
|
+
print(f"Evaluation failed: {str(e)}")
|
1359
|
+
|
1239
1360
|
return dfWithEvals, outputColMapping
|
1240
1361
|
|
1241
1362
|
def runDataStream(
|
@@ -1244,6 +1365,7 @@ class LlumoClient:
|
|
1244
1365
|
streamName: str,
|
1245
1366
|
queryColName: str = "query",
|
1246
1367
|
createExperiment: bool = False,
|
1368
|
+
getDataFrame = False
|
1247
1369
|
):
|
1248
1370
|
|
1249
1371
|
|
@@ -1362,10 +1484,16 @@ class LlumoClient:
|
|
1362
1484
|
definationMapping=self.definationMapping,
|
1363
1485
|
):
|
1364
1486
|
print(
|
1365
|
-
"
|
1487
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results.")
|
1366
1488
|
else:
|
1367
|
-
|
1368
|
-
|
1489
|
+
if getDataFrame:
|
1490
|
+
return working_df
|
1491
|
+
|
1492
|
+
else:
|
1493
|
+
data = working_df.to_dict(orient="records")
|
1494
|
+
return data
|
1495
|
+
# self.latestDataframe = working_df
|
1496
|
+
# return working_df
|
1369
1497
|
|
1370
1498
|
|
1371
1499
|
def createExperiment(self, dataframe):
|
@@ -1375,7 +1503,7 @@ class LlumoClient:
|
|
1375
1503
|
flag = createPlayground(self.email, self.workspaceID, dataframe)
|
1376
1504
|
if flag:
|
1377
1505
|
print(
|
1378
|
-
"
|
1506
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1379
1507
|
)
|
1380
1508
|
except Exception as e:
|
1381
1509
|
raise "Some error ocuured please check your API key"
|
@@ -1415,7 +1543,7 @@ class LlumoClient:
|
|
1415
1543
|
if createPlayground(self.email, self.workspaceID, df):
|
1416
1544
|
|
1417
1545
|
print(
|
1418
|
-
"
|
1546
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://llumo.ai/evallm to see the results."
|
1419
1547
|
)
|
1420
1548
|
|
1421
1549
|
return True
|
llumo/google.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
from google import generativeai as _genai
|
2
|
+
|
3
|
+
class genai:
|
4
|
+
"""
|
5
|
+
Top-level wrapper module to mimic:
|
6
|
+
>>> from google import genai
|
7
|
+
>>> client = genai.Client(api_key=...)
|
8
|
+
"""
|
9
|
+
|
10
|
+
class Client:
|
11
|
+
def __init__(self, api_key: str, default_model: str = "gemini-2.5-flash"):
|
12
|
+
_genai.configure(api_key=api_key)
|
13
|
+
self._defaultModel = default_model
|
14
|
+
self._defaultModelInstance = _genai.GenerativeModel(model_name=default_model)
|
15
|
+
|
16
|
+
class Models:
|
17
|
+
def __init__(self, outer):
|
18
|
+
self._outer = outer
|
19
|
+
|
20
|
+
def generate_content(self, contents: str | list[str], model: str = None, **kwargs):
|
21
|
+
model_name = model or self._outer._defaultModel
|
22
|
+
model_instance = _genai.GenerativeModel(model_name=model_name)
|
23
|
+
return model_instance.generate_content(contents=contents, **kwargs)
|
24
|
+
|
25
|
+
self.models = Models(self)
|
26
|
+
|
27
|
+
def generate(self, prompt: str | list[str], **kwargs):
|
28
|
+
"""Convenience shortcut for single-line generation."""
|
29
|
+
return self._defaultModelInstance.generate_content(prompt, **kwargs)
|
30
|
+
|
31
|
+
def setDefaultModel(self, model_name: str):
|
32
|
+
"""Change the default model at runtime."""
|
33
|
+
self._defaultModel = model_name
|
34
|
+
self._defaultModelInstance = _genai.GenerativeModel(model_name=model_name)
|
llumo/helpingFuntions.py
CHANGED
@@ -10,6 +10,7 @@ import os
|
|
10
10
|
import re
|
11
11
|
import openai
|
12
12
|
import google.generativeai as genai
|
13
|
+
from collections import defaultdict
|
13
14
|
|
14
15
|
|
15
16
|
from .models import _MODEL_METADATA, AVAILABLEMODELS
|
@@ -23,8 +24,7 @@ uploadColList = (
|
|
23
24
|
uploadRowList = (
|
24
25
|
"https://app.llumo.ai/api/New-Eval-API/new-upload-flow/uploadRowsInDBPlayground"
|
25
26
|
)
|
26
|
-
createInsightUrl
|
27
|
-
getPlaygroundInsightsUrl="https://app.llumo.ai/api/New-Eval-API/insights-api/get-all-playground-insights"
|
27
|
+
createInsightUrl="https://app.llumo.ai/api/external/generate-insight-from-eval-for-sdk"
|
28
28
|
|
29
29
|
|
30
30
|
def getProcessID():
|
@@ -229,11 +229,12 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
229
229
|
}
|
230
230
|
allEvals = ['Response Completeness', 'Response Bias', 'Response Harmfulness', 'Input Toxicity', 'Input Harmfulness', 'Context Utilization', 'Relevance Retention', 'Semantic Cohesion', 'Final Task Alignment', 'Tool Reliability', 'Response Correctness', 'Response Toxicity', 'Input Bias', 'Input Relevancy', 'Redundancy Reduction', 'Response Sentiment', 'Tool Selection Accuracy', 'Stepwise Progression', 'Hallucination', 'Faithfulness', 'Answer Relevancy', 'Context Precision', 'Answer Similarity', 'Harmfulness', 'Maliciousness', 'Coherence', 'Answer Correctness', 'Context Recall', 'Context Entity Recall', 'Conciseness', 'customEvalColumn', 'Groundedness', 'Memory Utilization', 'Input Relevancy (Multi-turn)']
|
231
231
|
|
232
|
-
|
232
|
+
evalDependencies = checkDependency(_returnDepMapping=True)
|
233
233
|
|
234
234
|
# Create a mapping of column names to unique column IDs
|
235
235
|
columnIDMapping = {}
|
236
|
-
|
236
|
+
print("Def Mapping: ")
|
237
|
+
print(definationMapping)
|
237
238
|
# Iterate over each column in the dataframe
|
238
239
|
for indx, col in enumerate(dataframe.columns):
|
239
240
|
# Generate a unique column ID using uuid
|
@@ -242,46 +243,46 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
242
243
|
columnIDMapping[col] = columnID
|
243
244
|
|
244
245
|
|
245
|
-
if col.startswith('output') and promptText!=None:
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
246
|
+
# if col.startswith('output') and promptText!=None:
|
247
|
+
# # For output columns, create the prompt template with promptText
|
248
|
+
# if promptText:
|
249
|
+
# # Extract variables from promptText and set them as dependencies
|
250
|
+
# dependencies = []
|
251
|
+
#
|
252
|
+
# # Find variables inside {{variable}}
|
253
|
+
# variables = re.findall(r'{{(.*?)}}', promptText)
|
254
|
+
#
|
255
|
+
# # Loop through each variable and check if it exists as a column name
|
256
|
+
# for var in variables:
|
257
|
+
# varName = var.strip()
|
258
|
+
# if varName in columnIDMapping: # Check if the variable is a column name
|
259
|
+
# dependencies.append(columnIDMapping[varName]) # Add its columnID
|
260
|
+
#
|
261
|
+
# # Now update the template for the output column
|
262
|
+
#
|
263
|
+
# template={
|
264
|
+
# "provider": "OPENAI",
|
265
|
+
# "model": "GPT_4o",
|
266
|
+
# "promptText": promptText,
|
267
|
+
# "modelOptions": {
|
268
|
+
# "temperature": 0,
|
269
|
+
# "frequencyPenalty": 0,
|
270
|
+
# "presencePenalty": 0,
|
271
|
+
# "maxToken": 8192
|
272
|
+
# },
|
273
|
+
# "toolConfig": "none",
|
274
|
+
# "concurrency": "",
|
275
|
+
# "outputType": "STRING",
|
276
|
+
# "isPromptSelected": True,
|
277
|
+
# "isSmartPromptSelected": False,
|
278
|
+
# "dependency": dependencies, # Use the dependencies extracted from promptText
|
279
|
+
# "columnID": columnID, # Use the generated column ID
|
280
|
+
# "label": col,
|
281
|
+
# "type": "PROMPT",
|
282
|
+
# "order": indx,
|
283
|
+
# }
|
284
|
+
|
285
|
+
if col.startswith('context') and dataStreamName != None :
|
285
286
|
if queryColName and dataStreamName:
|
286
287
|
dependencies = []
|
287
288
|
dependencies.append(columnIDMapping[queryColName])
|
@@ -312,9 +313,9 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
312
313
|
|
313
314
|
dependencies.append(columnIDMapping[outputColName]) # Add the output column ID
|
314
315
|
|
315
|
-
longDef = definationMapping.get(col, {}).get('definition', "")
|
316
|
-
shortDef =definationMapping.get(col, {}).get('briefDefinition', "")
|
317
|
-
enum =
|
316
|
+
longDef = definationMapping.get(col.rsplit("_",1)[0], {}).get('definition', "")
|
317
|
+
shortDef =definationMapping.get(col.rsplit("_",1)[0], {}).get('briefDefinition', "")
|
318
|
+
enum = col.rsplit("_",1)[0].upper().replace(" ","_")
|
318
319
|
|
319
320
|
template = {
|
320
321
|
"analytics": [
|
@@ -322,20 +323,23 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
322
323
|
],
|
323
324
|
"evaluationMetric": "ALL",
|
324
325
|
"evaluationModel": "LLUMO_EVALLM",
|
325
|
-
"selectPrompt": columnIDMapping[
|
326
|
+
"selectPrompt": None if "output" not in columnIDMapping.keys() else columnIDMapping["output"],
|
326
327
|
"scoreCondition": "GREATER_THAN",
|
327
328
|
"scoreValue": "50",
|
328
329
|
"scoreResult": "PASS",
|
329
|
-
"llmKpi": col,
|
330
|
+
"llmKpi": col.rsplit("_",1)[0],
|
330
331
|
"setRules": True,
|
331
332
|
"type": "EVAL",
|
332
333
|
"evalType": "LLM",
|
333
334
|
"similarityMetric": None,
|
334
335
|
"embeddingModel": None,
|
335
|
-
"groundTruth": None,
|
336
|
+
"groundTruth": None if "groundTruth" not in columnIDMapping.keys() else columnIDMapping["groundTruth"],
|
336
337
|
"dataStream": None,
|
337
|
-
"context":
|
338
|
-
"dependency":
|
338
|
+
"context":None if "context" not in columnIDMapping.keys() else columnIDMapping["context"],
|
339
|
+
"dependency":[ columnIDMapping[dep] for dep in evalDependencies[ col.rsplit("_",1)[0]]],
|
340
|
+
"query": None if "query" not in columnIDMapping.keys() else columnIDMapping["query"],
|
341
|
+
"tools":None if "tools" not in columnIDMapping.keys() else columnIDMapping["tools"],
|
342
|
+
"messageHistory":None if "messageHistory" not in columnIDMapping.keys() else columnIDMapping["messageHistory"],
|
339
343
|
"hallucinationFields": {
|
340
344
|
"query": None,
|
341
345
|
"context": None,
|
@@ -344,7 +348,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
344
348
|
"definition": longDef,
|
345
349
|
"analyticsENUM": enum,
|
346
350
|
"prompt": shortDef,
|
347
|
-
"analyticsName": col,
|
351
|
+
"analyticsName": col.rsplit("_",1)[0],
|
348
352
|
"columnID": columnID,
|
349
353
|
"label": col,
|
350
354
|
"order": indx
|
@@ -452,9 +456,12 @@ def uploadRowsInDBPlayground(payload):
|
|
452
456
|
return None
|
453
457
|
|
454
458
|
|
455
|
-
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None):
|
459
|
+
def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,dataStreamName=None,definationMapping=None,outputColName="output",evalOutputMap = None,activePlayground=None):
|
456
460
|
|
457
|
-
|
461
|
+
if activePlayground != None:
|
462
|
+
playgroundId=activePlayground
|
463
|
+
else:
|
464
|
+
playgroundId = str(createEvalPlayground(email=email, workspaceID=workspaceID))
|
458
465
|
payload1, payload2 = createColumn(
|
459
466
|
workspaceID=workspaceID, dataframe=df, playgroundID=playgroundId, promptText=promptText,queryColName=queryColName,dataStreamName=dataStreamName,definationMapping=definationMapping,outputColName=outputColName,evalOutputMap=evalOutputMap
|
460
467
|
)
|
@@ -472,7 +479,7 @@ def createPlayground(email, workspaceID, df, promptText=None,queryColName=None,d
|
|
472
479
|
|
473
480
|
|
474
481
|
|
475
|
-
def getPlaygroundInsights(
|
482
|
+
def getPlaygroundInsights(defination:str,uniqueClassesString: str, reasonList: list):
|
476
483
|
headers = {
|
477
484
|
|
478
485
|
"Content-Type": "application/json",
|
@@ -480,48 +487,29 @@ def getPlaygroundInsights(workspaceID: str, activePlayground: str):
|
|
480
487
|
|
481
488
|
# Initial request to generate playground insights
|
482
489
|
payload = {
|
483
|
-
"
|
484
|
-
"
|
490
|
+
"uniqueClassesString": uniqueClassesString,
|
491
|
+
"reasonList": reasonList,
|
492
|
+
"definition": defination,
|
485
493
|
}
|
486
494
|
|
487
495
|
urlGenerate = createInsightUrl
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
if responseGenerate.status_code == 200:
|
492
|
-
responseJson = responseGenerate.json()
|
493
|
-
|
494
|
-
insightStatus = responseJson.get("data", {}).get("insight", False)
|
496
|
+
try:
|
497
|
+
responseGenerate = requests.post(urlGenerate, json=payload, headers=headers)
|
495
498
|
|
496
|
-
if
|
497
|
-
|
498
|
-
urlGetAll = getPlaygroundInsightsUrl
|
499
|
+
if responseGenerate.status_code == 200:
|
500
|
+
responseJson = responseGenerate.json()
|
499
501
|
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
# Extract insight and solution
|
508
|
-
insights = []
|
509
|
-
for item in data:
|
510
|
-
insight = item.get("insight", "")
|
511
|
-
solution = item.get("solution", "")
|
512
|
-
insights.append({"insight": insight, "solution": solution})
|
513
|
-
|
514
|
-
return insights
|
515
|
-
else:
|
516
|
-
print(f"Error fetching all insights: {responseGetAll.status_code} - {responseGetAll.text}")
|
517
|
-
return None
|
518
|
-
else:
|
519
|
-
print("No insight generated.")
|
520
|
-
return None
|
502
|
+
filteredResponse = {key: value for key, value in responseJson.items() if key in ['analysis', 'nextStep']}
|
503
|
+
|
504
|
+
return filteredResponse
|
505
|
+
except Exception as e:
|
506
|
+
print(f"Exception occurred while generating insight: {e}")
|
507
|
+
return None
|
508
|
+
|
521
509
|
else:
|
522
510
|
print(f"Error generating insight: {responseGenerate.status_code} - {responseGenerate.text}")
|
523
511
|
return None
|
524
|
-
def checkDependency(selectedEval, columns,tocheck=True):
|
512
|
+
def checkDependency(selectedEval:list = [], columns:list = [],tocheck=True,_returnDepMapping = False):
|
525
513
|
"""
|
526
514
|
Checks if all the required input columns for the selected evaluation metric are present.
|
527
515
|
|
@@ -532,33 +520,35 @@ def checkDependency(selectedEval, columns,tocheck=True):
|
|
532
520
|
Raises:
|
533
521
|
- LlumoAIError.dependencyError: If any required column is missing.
|
534
522
|
"""
|
535
|
-
if tocheck:
|
536
523
|
# Define required dependencies for each evaluation metric
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
524
|
+
metricDependencies = {
|
525
|
+
'Response Completeness': ['context', 'query', 'output'],
|
526
|
+
'Response Bias': ['output'],
|
527
|
+
'Response Harmfulness': ['output'],
|
528
|
+
'Input Toxicity': ['query'],
|
529
|
+
'Input Harmfulness': ['query'],
|
530
|
+
'Context Utilization': ['output', 'context'],
|
531
|
+
'Relevance Retention': ['context', 'query'],
|
532
|
+
'Semantic Cohesion': ['context'],
|
533
|
+
'Final Task Alignment': ['messageHistory'],
|
534
|
+
'Tool Reliability': ['messageHistory'],
|
535
|
+
'Response Correctness': ['output', 'query', 'context'],
|
536
|
+
'Response Toxicity': ['output'],
|
537
|
+
'Input Bias': ['query'],
|
538
|
+
'Input Relevancy': ['context', 'query'],
|
539
|
+
'Redundancy Reduction': ['context'],
|
540
|
+
'Response Sentiment': ['output'],
|
541
|
+
'Tool Selection Accuracy': ['tools', 'messageHistory'],
|
542
|
+
'Stepwise Progression': ['tools', 'messageHistory'],
|
543
|
+
'Hallucination': ['query', 'context', 'output'],
|
544
|
+
'Groundedness': ['groundTruth', 'output'],
|
545
|
+
'Memory Utilization': ['context', 'messageHistory'],
|
546
|
+
'Input Relevancy (Multi-turn)': ['context', 'query']
|
547
|
+
}
|
548
|
+
if _returnDepMapping == True:
|
549
|
+
return metricDependencies
|
561
550
|
|
551
|
+
if tocheck == True:
|
562
552
|
# Check if the selected evaluation metric is known
|
563
553
|
if selectedEval not in metricDependencies:
|
564
554
|
return {"status": False,"message":f"Unknown evaluation metric: {selectedEval}"}
|
@@ -646,8 +636,44 @@ def validateOpenaiKey(api_key):
|
|
646
636
|
def validateGoogleKey(api_key):
|
647
637
|
try:
|
648
638
|
genai.configure(api_key=api_key)
|
649
|
-
_ = genai.GenerativeModel("gemini-2.0").generate_content("test")
|
639
|
+
_ = genai.GenerativeModel("gemini-2.0-flash-lite").generate_content("test")
|
650
640
|
except Exception as e:
|
651
641
|
if "PERMISSION_DENIED" in str(e) or "API key not valid" in str(e):
|
652
642
|
raise ValueError("❌ Invalid Google API key.")
|
653
643
|
raise RuntimeError(f"⚠️ Error validating Gemini key: {e}")
|
644
|
+
|
645
|
+
def groupLogsByClass(logs, max_logs=2):
|
646
|
+
# Initialize the final result structures (no defaultdict)
|
647
|
+
groupedLogs = {}
|
648
|
+
uniqueEdgeCases = {} # This will store unique edge cases for each eval_name
|
649
|
+
|
650
|
+
# Iterate through the logs
|
651
|
+
for log in logs:
|
652
|
+
log_details = list(log.values())[0] # Get the details dictionary
|
653
|
+
eval_name = log_details.get("kpi", "unmarked")
|
654
|
+
edge_case = log_details.get("edgeCase", "unmarked")
|
655
|
+
reasoning = log_details.get("reasoning", "")
|
656
|
+
|
657
|
+
if eval_name != "unmarked" and edge_case != "unmarked":
|
658
|
+
# Ensure that the eval_name and edge_case exist in the dictionary
|
659
|
+
if eval_name not in groupedLogs:
|
660
|
+
groupedLogs[eval_name] = {}
|
661
|
+
uniqueEdgeCases[eval_name] = set() # Initialize the set for unique edge cases
|
662
|
+
|
663
|
+
if edge_case not in groupedLogs[eval_name]:
|
664
|
+
groupedLogs[eval_name][edge_case] = []
|
665
|
+
|
666
|
+
# Append the reasoning to the correct place
|
667
|
+
groupedLogs[eval_name][edge_case].append(reasoning)
|
668
|
+
uniqueEdgeCases[eval_name].add(edge_case) # Add the edge case to the set
|
669
|
+
|
670
|
+
# Limit the number of reasons to max_logs
|
671
|
+
for eval_name in groupedLogs:
|
672
|
+
for edge_case in groupedLogs[eval_name]:
|
673
|
+
groupedLogs[eval_name][edge_case] = groupedLogs[eval_name][edge_case][:max_logs]
|
674
|
+
|
675
|
+
# Convert the set of unique edge cases to a list for easier reading
|
676
|
+
for eval_name in uniqueEdgeCases:
|
677
|
+
uniqueEdgeCases[eval_name] = list(uniqueEdgeCases[eval_name])
|
678
|
+
|
679
|
+
return groupedLogs, uniqueEdgeCases
|
llumo/openai.py
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
from openai import OpenAI as OpenAIClient
|
2
|
+
from .client import LlumoClient
|
3
|
+
|
4
|
+
# Dummy evaluation function that uses LlumoClient
|
5
|
+
def evaluate_multiple(data, api_key=None,evals=["Response Correctness"]):
|
6
|
+
client = LlumoClient(api_key=api_key)
|
7
|
+
results= client.evaluateMultiple(data, evals=evals,createExperiment=False,prompt_template="Give answer to the query: {{query}}, using context: {{context}}",getDataFrame=False)
|
8
|
+
print(results)
|
9
|
+
return results
|
10
|
+
|
11
|
+
# Wrapper around ChatCompletion to allow custom fields like `.evaluation`
|
12
|
+
class ChatCompletionWithEval:
|
13
|
+
def __init__(self, response, evaluation):
|
14
|
+
self._response = response
|
15
|
+
self.evaluation = evaluation
|
16
|
+
|
17
|
+
def __getattr__(self, name):
|
18
|
+
return getattr(self._response, name)
|
19
|
+
|
20
|
+
def __getitem__(self, key):
|
21
|
+
return self._response[key]
|
22
|
+
|
23
|
+
def __repr__(self):
|
24
|
+
return repr(self._response)
|
25
|
+
|
26
|
+
class openai(OpenAIClient):
|
27
|
+
def __init__(self, api_key: str):
|
28
|
+
super().__init__(api_key=api_key)
|
29
|
+
|
30
|
+
original_create = self.chat.completions.create
|
31
|
+
|
32
|
+
class ChatCompletionsWrapper:
|
33
|
+
@staticmethod
|
34
|
+
def create(*args, **kwargs):
|
35
|
+
context = kwargs.pop("context", None)
|
36
|
+
evals = kwargs.pop("evals", [])
|
37
|
+
llumo_key = kwargs.pop("llumo_key", None)
|
38
|
+
|
39
|
+
messages = kwargs.get("messages", [])
|
40
|
+
user_message = next(
|
41
|
+
(m.get("content") for m in reversed(messages) if m.get("role") == "user"),
|
42
|
+
"",
|
43
|
+
)
|
44
|
+
|
45
|
+
# If context is None or empty or whitespace-only, set it to user_message
|
46
|
+
if not context or context.strip() == "":
|
47
|
+
context = user_message
|
48
|
+
|
49
|
+
response = original_create(*args, **kwargs)
|
50
|
+
|
51
|
+
try:
|
52
|
+
output_text = response.choices[0].message.content
|
53
|
+
except Exception:
|
54
|
+
output_text = ""
|
55
|
+
|
56
|
+
eval_input = [{
|
57
|
+
"query": user_message,
|
58
|
+
"context": context,
|
59
|
+
"output": output_text,
|
60
|
+
}]
|
61
|
+
|
62
|
+
# Safely call evaluate_multiple, if error return None
|
63
|
+
evaluation = None
|
64
|
+
try:
|
65
|
+
evaluation = evaluate_multiple(eval_input, api_key=llumo_key,evals=evals)
|
66
|
+
except Exception as e:
|
67
|
+
# You can optionally log the error here if you want
|
68
|
+
# print(f"Evaluation failed, skipping: {e}")
|
69
|
+
evaluation = None
|
70
|
+
|
71
|
+
# If evaluation is None, just return normal response
|
72
|
+
if evaluation is None:
|
73
|
+
print("All server are busy for evaluation ")
|
74
|
+
return response
|
75
|
+
|
76
|
+
# Otherwise wrap with evaluation attached
|
77
|
+
return ChatCompletionWithEval(response, evaluation)
|
78
|
+
|
79
|
+
self.chat.completions.create = ChatCompletionsWrapper.create
|
@@ -0,0 +1,16 @@
|
|
1
|
+
llumo/__init__.py,sha256=YVBkF1fiXFBd_zzySi9BDWgX8MJuLBJ-oF8538MrnDU,256
|
2
|
+
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
3
|
+
llumo/client.py,sha256=rOTbw8QGi5CnQ77QKS4rKh-dSBSVoyVAORrK1i_b5EQ,60339
|
4
|
+
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
5
|
+
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
6
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
7
|
+
llumo/google.py,sha256=5AVAqxPN20UuHIqi4yuHHSTf49LI96krtbztJ5qt8L0,1413
|
8
|
+
llumo/helpingFuntions.py,sha256=0W2JNdLyOV92lgESgB_JyJmOUvW5ooRdZyjN5LKDSX0,25296
|
9
|
+
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
10
|
+
llumo/openai.py,sha256=BEmsOdHiQzDpKv6b4L62JaUMq7DbpICNPqyfMNRWi2I,2981
|
11
|
+
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
12
|
+
llumo-0.2.16b1.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
13
|
+
llumo-0.2.16b1.dist-info/METADATA,sha256=_e94VIPrn02CP0X9gdkICA210Te_inzaSPcfH0p-Hlk,1521
|
14
|
+
llumo-0.2.16b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
llumo-0.2.16b1.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
16
|
+
llumo-0.2.16b1.dist-info/RECORD,,
|
llumo-0.2.15b2.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
llumo/__init__.py,sha256=O04b4yW1BnOvcHzxWFddAKhtdBEhBNhLdb6xgnpHH_Q,205
|
2
|
-
llumo/client.py,sha256=60RSxhk-9wzK9KgBz8dfbUd3-AaKiljxqbHI5UL8GIw,54021
|
3
|
-
llumo/exceptions.py,sha256=Vp_MnanHbnd1Yjuoi6WLrKiwwZbJL3znCox2URMmGU4,2032
|
4
|
-
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
5
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
6
|
-
llumo/helpingFuntions.py,sha256=BZfUIgTO0PJchppHn0wDRF1wcYSuMST5ry95HBPN5SQ,23534
|
7
|
-
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
8
|
-
llumo/sockets.py,sha256=I2JO_eNEctRo_ikgvFVp5zDd-m0VDu04IEUhhsa1Tic,5950
|
9
|
-
llumo-0.2.15b2.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
10
|
-
llumo-0.2.15b2.dist-info/METADATA,sha256=vbXwSwhuxnO0CSMz4uJ45AepuwVMl7irZlHmYkqRYbY,1521
|
11
|
-
llumo-0.2.15b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
llumo-0.2.15b2.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
13
|
-
llumo-0.2.15b2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|