llumo 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/__init__.py +1 -1
- llumo/callback.py +1 -1
- llumo/client.py +362 -207
- llumo/llumoLogger.py +1 -1
- llumo/llumoSessionContext.py +3 -1
- llumo/openai.py +45 -45
- {llumo-0.2.25.dist-info → llumo-0.2.27.dist-info}/METADATA +1 -1
- llumo-0.2.27.dist-info/RECORD +20 -0
- llumo-0.2.25.dist-info/RECORD +0 -20
- {llumo-0.2.25.dist-info → llumo-0.2.27.dist-info}/WHEEL +0 -0
- {llumo-0.2.25.dist-info → llumo-0.2.27.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.25.dist-info → llumo-0.2.27.dist-info}/top_level.txt +0 -0
llumo/__init__.py
CHANGED
llumo/callback.py
CHANGED
|
@@ -4,7 +4,7 @@ from langchain_core.messages import BaseMessage
|
|
|
4
4
|
from langchain_core.outputs import LLMResult
|
|
5
5
|
from langchain_core.agents import AgentAction, AgentFinish
|
|
6
6
|
import json
|
|
7
|
-
from llumo.llumoLogger import
|
|
7
|
+
from llumo.llumoLogger import LlumoLogger
|
|
8
8
|
from llumo.llumoSessionContext import LlumoSessionContext
|
|
9
9
|
import time
|
|
10
10
|
import re
|
llumo/client.py
CHANGED
|
@@ -16,11 +16,11 @@ from .exceptions import LlumoAIError
|
|
|
16
16
|
from .helpingFuntions import *
|
|
17
17
|
from .sockets import LlumoSocketClient
|
|
18
18
|
from .functionCalling import LlumoAgentExecutor
|
|
19
|
-
from .chains import LlumoDataFrameResults,LlumoDictResults
|
|
19
|
+
from .chains import LlumoDataFrameResults, LlumoDictResults
|
|
20
20
|
import threading
|
|
21
21
|
from tqdm import tqdm
|
|
22
22
|
|
|
23
|
-
pd.set_option(
|
|
23
|
+
pd.set_option("future.no_silent_downcasting", True)
|
|
24
24
|
|
|
25
25
|
postUrl = (
|
|
26
26
|
"https://red-skull-service-392377961931.us-central1.run.app/api/process-playground"
|
|
@@ -39,10 +39,11 @@ socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
|
|
|
39
39
|
|
|
40
40
|
class LlumoClient:
|
|
41
41
|
|
|
42
|
-
def __init__(self, api_key):
|
|
42
|
+
def __init__(self, api_key, playground_id=None):
|
|
43
43
|
self.apiKey = api_key
|
|
44
|
-
self.
|
|
45
|
-
self.
|
|
44
|
+
self.playgroundID = playground_id
|
|
45
|
+
self.evalData = []
|
|
46
|
+
self.evals = []
|
|
46
47
|
self.processMapping = {}
|
|
47
48
|
self.definationMapping = {}
|
|
48
49
|
|
|
@@ -54,7 +55,7 @@ class LlumoClient:
|
|
|
54
55
|
reqBody = {"analytics": [evalName]}
|
|
55
56
|
|
|
56
57
|
try:
|
|
57
|
-
|
|
58
|
+
|
|
58
59
|
response = requests.post(url=validateUrl, json=reqBody, headers=headers)
|
|
59
60
|
|
|
60
61
|
except requests.exceptions.RequestException as e:
|
|
@@ -99,20 +100,90 @@ class LlumoClient:
|
|
|
99
100
|
)
|
|
100
101
|
self.email = data["data"]["data"].get("email", None)
|
|
101
102
|
|
|
102
|
-
self.definationMapping[evalName] =
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
self.
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
103
|
+
self.definationMapping[evalName] = (
|
|
104
|
+
data.get("data", {})
|
|
105
|
+
.get("data", {})
|
|
106
|
+
.get("analyticsMapping", {})
|
|
107
|
+
.get(evalName, "")
|
|
108
|
+
)
|
|
109
|
+
self.categories = (
|
|
110
|
+
data.get("data", {})
|
|
111
|
+
.get("data", {})
|
|
112
|
+
.get("analyticsMapping", {})
|
|
113
|
+
.get(evalName, "")
|
|
114
|
+
.get("categories", {})
|
|
115
|
+
)
|
|
116
|
+
self.evaluationStrictness = (
|
|
117
|
+
data.get("data", {})
|
|
118
|
+
.get("data", {})
|
|
119
|
+
.get("analyticsMapping", {})
|
|
120
|
+
.get(evalName, "")
|
|
121
|
+
.get("evaluationStrictness", {})
|
|
122
|
+
)
|
|
123
|
+
self.grammarCheckOutput = (
|
|
124
|
+
data.get("data", {})
|
|
125
|
+
.get("data", {})
|
|
126
|
+
.get("analyticsMapping", {})
|
|
127
|
+
.get(evalName, "")
|
|
128
|
+
.get("grammarCheckOutput", {})
|
|
129
|
+
)
|
|
130
|
+
self.insightsLength = (
|
|
131
|
+
data.get("data", {})
|
|
132
|
+
.get("data", {})
|
|
133
|
+
.get("analyticsMapping", {})
|
|
134
|
+
.get(evalName, "")
|
|
135
|
+
.get("insightsLength", {})
|
|
136
|
+
)
|
|
137
|
+
self.insightsLevel = (
|
|
138
|
+
data.get("data", {})
|
|
139
|
+
.get("data", {})
|
|
140
|
+
.get("analyticsMapping", {})
|
|
141
|
+
.get(evalName, "")
|
|
142
|
+
.get("insightsLevel", {})
|
|
143
|
+
)
|
|
144
|
+
self.executionDependency = (
|
|
145
|
+
data.get("data", {})
|
|
146
|
+
.get("data", {})
|
|
147
|
+
.get("analyticsMapping", {})
|
|
148
|
+
.get(evalName, "")
|
|
149
|
+
.get("executionDependency", {})
|
|
150
|
+
)
|
|
151
|
+
self.sampleData = (
|
|
152
|
+
data.get("data", {})
|
|
153
|
+
.get("data", {})
|
|
154
|
+
.get("analyticsMapping", {})
|
|
155
|
+
.get(evalName, "")
|
|
156
|
+
.get("sampleData", {})
|
|
157
|
+
)
|
|
158
|
+
self.numJudges = (
|
|
159
|
+
data.get("data", {})
|
|
160
|
+
.get("data", {})
|
|
161
|
+
.get("analyticsMapping", {})
|
|
162
|
+
.get(evalName, "")
|
|
163
|
+
.get("numJudges", {})
|
|
164
|
+
)
|
|
165
|
+
self.penaltyBonusInstructions = (
|
|
166
|
+
data.get("data", {})
|
|
167
|
+
.get("data", {})
|
|
168
|
+
.get("analyticsMapping", {})
|
|
169
|
+
.get(evalName, "")
|
|
170
|
+
.get("penaltyBonusInstructions", [])
|
|
171
|
+
)
|
|
172
|
+
self.probableEdgeCases = (
|
|
173
|
+
data.get("data", {})
|
|
174
|
+
.get("data", {})
|
|
175
|
+
.get("analyticsMapping", {})
|
|
176
|
+
.get(evalName, "")
|
|
177
|
+
.get("probableEdgeCases", [])
|
|
178
|
+
)
|
|
179
|
+
self.fieldMapping = (
|
|
180
|
+
data.get("data", {})
|
|
181
|
+
.get("data", {})
|
|
182
|
+
.get("analyticsMapping", {})
|
|
183
|
+
.get(evalName, "")
|
|
184
|
+
.get("fieldMapping", [])
|
|
185
|
+
)
|
|
186
|
+
|
|
116
187
|
except Exception as e:
|
|
117
188
|
# print(f"Error extracting data from response: {str(e)}")
|
|
118
189
|
raise LlumoAIError.UnexpectedError(detail=evalName)
|
|
@@ -548,7 +619,7 @@ class LlumoClient:
|
|
|
548
619
|
"playgroundID": activePlayground,
|
|
549
620
|
}
|
|
550
621
|
|
|
551
|
-
rowIdMapping[f
|
|
622
|
+
rowIdMapping[f"{rowID}-{columnID}-{columnID}"] = index
|
|
552
623
|
# print("__________________________TEMPLATE__________________________________")
|
|
553
624
|
# print(templateData)
|
|
554
625
|
|
|
@@ -628,15 +699,15 @@ class LlumoClient:
|
|
|
628
699
|
def evaluateMultiple(
|
|
629
700
|
self,
|
|
630
701
|
data,
|
|
631
|
-
evals: list,
|
|
632
|
-
session, # Add session parameter
|
|
702
|
+
evals: list,
|
|
633
703
|
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
|
634
704
|
outputColName="output",
|
|
635
705
|
createExperiment: bool = False,
|
|
636
|
-
getDataFrame:bool =False,
|
|
637
|
-
playgroundID: str = None,
|
|
706
|
+
getDataFrame: bool = False,
|
|
638
707
|
_tocheck=True,
|
|
639
708
|
):
|
|
709
|
+
if hasattr(self, "startLlumoRun"):
|
|
710
|
+
self.startLlumoRun(runName="evaluateMultiple")
|
|
640
711
|
if isinstance(data, dict):
|
|
641
712
|
data = [data]
|
|
642
713
|
elif not isinstance(data, list):
|
|
@@ -649,10 +720,10 @@ class LlumoClient:
|
|
|
649
720
|
try:
|
|
650
721
|
socketID = self.socket.connect(timeout=250)
|
|
651
722
|
except Exception as e:
|
|
652
|
-
socketID="DummySocketID"
|
|
653
|
-
|
|
654
|
-
self.evalData=[]
|
|
655
|
-
self.evals=evals
|
|
723
|
+
socketID = "DummySocketID"
|
|
724
|
+
|
|
725
|
+
self.evalData = []
|
|
726
|
+
self.evals = evals
|
|
656
727
|
self.allBatches = []
|
|
657
728
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
|
658
729
|
|
|
@@ -681,21 +752,18 @@ class LlumoClient:
|
|
|
681
752
|
)
|
|
682
753
|
listener_thread.start()
|
|
683
754
|
self.validateApiKey(evalName=evals[0])
|
|
684
|
-
|
|
685
|
-
if playgroundID:
|
|
686
|
-
activePlayground = playgroundID
|
|
687
|
-
else:
|
|
688
|
-
activePlayground = str(createEvalPlayground(email=self.email, workspaceID=self.workspaceID))
|
|
689
|
-
else:
|
|
690
|
-
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
755
|
+
activePlayground = self.playgroundID
|
|
691
756
|
for evalName in evals:
|
|
692
757
|
# print(f"\n======= Running evaluation for: {evalName} =======")
|
|
693
758
|
|
|
694
759
|
# Validate API and dependencies
|
|
695
760
|
self.validateApiKey(evalName=evalName)
|
|
696
|
-
customAnalytics=getCustomAnalytics(self.workspaceID)
|
|
761
|
+
customAnalytics = getCustomAnalytics(self.workspaceID)
|
|
697
762
|
metricDependencies = checkDependency(
|
|
698
|
-
evalName,
|
|
763
|
+
evalName,
|
|
764
|
+
list(dataframe.columns),
|
|
765
|
+
tocheck=_tocheck,
|
|
766
|
+
customevals=customAnalytics,
|
|
699
767
|
)
|
|
700
768
|
if not metricDependencies["status"]:
|
|
701
769
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
|
@@ -706,15 +774,14 @@ class LlumoClient:
|
|
|
706
774
|
evalType = "LLM"
|
|
707
775
|
workspaceID = self.workspaceID
|
|
708
776
|
email = self.email
|
|
709
|
-
categories=self.categories
|
|
710
|
-
evaluationStrictness=self.evaluationStrictness
|
|
711
|
-
grammarCheckOutput=self.grammarCheckOutput
|
|
712
|
-
insightLength=self.insightsLength
|
|
713
|
-
numJudges=self.numJudges
|
|
714
|
-
penaltyBonusInstructions=self.penaltyBonusInstructions
|
|
715
|
-
probableEdgeCases=self.probableEdgeCases
|
|
716
|
-
fieldMapping=self.fieldMapping
|
|
717
|
-
|
|
777
|
+
categories = self.categories
|
|
778
|
+
evaluationStrictness = self.evaluationStrictness
|
|
779
|
+
grammarCheckOutput = self.grammarCheckOutput
|
|
780
|
+
insightLength = self.insightsLength
|
|
781
|
+
numJudges = self.numJudges
|
|
782
|
+
penaltyBonusInstructions = self.penaltyBonusInstructions
|
|
783
|
+
probableEdgeCases = self.probableEdgeCases
|
|
784
|
+
fieldMapping = self.fieldMapping
|
|
718
785
|
|
|
719
786
|
userHits = checkUserHits(
|
|
720
787
|
self.workspaceID,
|
|
@@ -746,7 +813,6 @@ class LlumoClient:
|
|
|
746
813
|
output = row.get(outputColName, "")
|
|
747
814
|
intermediateSteps = row.get("intermediateSteps", "")
|
|
748
815
|
|
|
749
|
-
|
|
750
816
|
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
751
817
|
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
752
818
|
|
|
@@ -776,7 +842,7 @@ class LlumoClient:
|
|
|
776
842
|
"MessageHistory": messageHistory,
|
|
777
843
|
"IntermediateSteps": intermediateSteps,
|
|
778
844
|
},
|
|
779
|
-
"categories":categories,
|
|
845
|
+
"categories": categories,
|
|
780
846
|
"evaluationStrictness": evaluationStrictness,
|
|
781
847
|
"grammarCheckOutput": grammarCheckOutput,
|
|
782
848
|
"insightLength": insightLength,
|
|
@@ -788,7 +854,7 @@ class LlumoClient:
|
|
|
788
854
|
},
|
|
789
855
|
"type": "EVAL",
|
|
790
856
|
"kpi": evalName,
|
|
791
|
-
"fieldMappig":fieldMapping,
|
|
857
|
+
"fieldMappig": fieldMapping,
|
|
792
858
|
}
|
|
793
859
|
|
|
794
860
|
query = ""
|
|
@@ -848,25 +914,23 @@ class LlumoClient:
|
|
|
848
914
|
time.sleep(3)
|
|
849
915
|
listener_thread.join()
|
|
850
916
|
|
|
851
|
-
|
|
852
917
|
rawResults = self.socket.getReceivedData()
|
|
853
|
-
|
|
918
|
+
|
|
854
919
|
# print("data from db #####################",dataFromDb)
|
|
855
920
|
# Fix here: keep full keys, do not split keys
|
|
856
921
|
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
|
857
922
|
expectedRowIDs = set(rowIdMapping.keys())
|
|
858
|
-
missingRowIDs = expectedRowIDs - receivedRowIDs
|
|
923
|
+
missingRowIDs = expectedRowIDs - receivedRowIDs
|
|
859
924
|
# print("All expected keys:", expected_rowIDs)
|
|
860
925
|
# print("All received keys:", received_rowIDs)
|
|
861
926
|
# print("Missing keys:", len(missingRowIDs))
|
|
862
|
-
missingRowIDs=list(missingRowIDs)
|
|
927
|
+
missingRowIDs = list(missingRowIDs)
|
|
863
928
|
if len(missingRowIDs) > 0:
|
|
864
|
-
dataFromDb=fetchData(workspaceID,activePlayground,missingRowIDs)
|
|
929
|
+
dataFromDb = fetchData(workspaceID, activePlayground, missingRowIDs)
|
|
865
930
|
rawResults.extend(dataFromDb)
|
|
866
|
-
|
|
867
|
-
|
|
931
|
+
|
|
868
932
|
self.evalData = rawResults
|
|
869
|
-
|
|
933
|
+
|
|
870
934
|
# Initialize dataframe columns for each eval
|
|
871
935
|
for eval in evals:
|
|
872
936
|
dataframe[eval] = None
|
|
@@ -882,10 +946,10 @@ class LlumoClient:
|
|
|
882
946
|
dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
|
|
883
947
|
|
|
884
948
|
# Log the evaluation step
|
|
885
|
-
if
|
|
949
|
+
if hasattr(self, "logEvalStep"):
|
|
886
950
|
try:
|
|
887
951
|
start_time = time.time()
|
|
888
|
-
|
|
952
|
+
self.logEvalStep(
|
|
889
953
|
stepName=f"EVAL-{evalName}",
|
|
890
954
|
output=value.get("value"),
|
|
891
955
|
context=row.get("context", ""),
|
|
@@ -906,13 +970,14 @@ class LlumoClient:
|
|
|
906
970
|
print(f"Error logging eval step: {e}")
|
|
907
971
|
|
|
908
972
|
self.socket.disconnect()
|
|
909
|
-
|
|
973
|
+
if hasattr(self, "endLlumoRun"):
|
|
974
|
+
self.endLlumoRun()
|
|
910
975
|
|
|
911
976
|
if createExperiment:
|
|
912
977
|
pd.set_option("future.no_silent_downcasting", True)
|
|
913
978
|
# df = dataframe.fillna("Some error occured").astype(object)
|
|
914
979
|
with warnings.catch_warnings():
|
|
915
|
-
warnings.simplefilter(action=
|
|
980
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
916
981
|
df = dataframe.fillna("Some error occurred").astype(str)
|
|
917
982
|
|
|
918
983
|
df = dataframe.fillna("Some error occured").infer_objects(copy=False)
|
|
@@ -923,42 +988,60 @@ class LlumoClient:
|
|
|
923
988
|
promptText=prompt_template,
|
|
924
989
|
definationMapping=self.definationMapping,
|
|
925
990
|
outputColName=outputColName,
|
|
926
|
-
activePlayground=
|
|
927
|
-
customAnalytics=customAnalytics
|
|
991
|
+
activePlayground=activePlayground,
|
|
992
|
+
customAnalytics=customAnalytics,
|
|
928
993
|
):
|
|
929
994
|
print(
|
|
930
995
|
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
931
996
|
)
|
|
932
997
|
if getDataFrame:
|
|
933
|
-
return LlumoDataFrameResults(
|
|
998
|
+
return LlumoDataFrameResults(
|
|
999
|
+
dataframe,
|
|
1000
|
+
evals=self.evals,
|
|
1001
|
+
evalData=self.evalData,
|
|
1002
|
+
definationMapping=self.definationMapping,
|
|
1003
|
+
)
|
|
934
1004
|
else:
|
|
935
|
-
data=dataframe.to_dict(orient="records")
|
|
936
|
-
return LlumoDictResults(
|
|
1005
|
+
data = dataframe.to_dict(orient="records")
|
|
1006
|
+
return LlumoDictResults(
|
|
1007
|
+
data,
|
|
1008
|
+
evals=self.evals,
|
|
1009
|
+
evalData=self.evalData,
|
|
1010
|
+
definationMapping=self.definationMapping,
|
|
1011
|
+
)
|
|
937
1012
|
|
|
938
1013
|
else:
|
|
939
1014
|
if getDataFrame:
|
|
940
|
-
return LlumoDataFrameResults(
|
|
1015
|
+
return LlumoDataFrameResults(
|
|
1016
|
+
dataframe,
|
|
1017
|
+
evals=self.evals,
|
|
1018
|
+
evalData=self.evalData,
|
|
1019
|
+
definationMapping=self.definationMapping,
|
|
1020
|
+
)
|
|
941
1021
|
else:
|
|
942
|
-
data=dataframe.to_dict(orient="records")
|
|
943
|
-
return LlumoDictResults(
|
|
1022
|
+
data = dataframe.to_dict(orient="records")
|
|
1023
|
+
return LlumoDictResults(
|
|
1024
|
+
data,
|
|
1025
|
+
evals=self.evals,
|
|
1026
|
+
evalData=self.evalData,
|
|
1027
|
+
definationMapping=self.definationMapping,
|
|
1028
|
+
)
|
|
944
1029
|
|
|
945
|
-
|
|
946
1030
|
def promptSweep(
|
|
947
1031
|
self,
|
|
948
1032
|
templates: List[str],
|
|
949
1033
|
data,
|
|
950
1034
|
model_aliases: List[AVAILABLEMODELS],
|
|
951
|
-
apiKey: str,
|
|
952
1035
|
evals=["Response Correctness"],
|
|
953
1036
|
toEvaluate: bool = False,
|
|
954
1037
|
createExperiment: bool = False,
|
|
955
|
-
getDataFrame=False
|
|
1038
|
+
getDataFrame=False,
|
|
956
1039
|
) -> pd.DataFrame:
|
|
957
1040
|
if isinstance(data, dict):
|
|
958
|
-
|
|
959
|
-
|
|
1041
|
+
data = [data]
|
|
1042
|
+
# Check if data is now a list of dictionaries
|
|
960
1043
|
if isinstance(data, list) and all(isinstance(item, dict) for item in data):
|
|
961
|
-
working_df= pd.DataFrame(data).astype(str)
|
|
1044
|
+
working_df = pd.DataFrame(data).astype(str)
|
|
962
1045
|
else:
|
|
963
1046
|
raise ValueError("Data must be a dictionary or a list of dictionaries.")
|
|
964
1047
|
modelStatus = validateModels(model_aliases=model_aliases)
|
|
@@ -968,10 +1051,10 @@ class LlumoClient:
|
|
|
968
1051
|
self.validateApiKey()
|
|
969
1052
|
workspaceID = self.workspaceID
|
|
970
1053
|
email = self.email
|
|
971
|
-
executor = ModelExecutor(apiKey)
|
|
1054
|
+
executor = ModelExecutor(self.apiKey)
|
|
972
1055
|
prompt_template = templates[0]
|
|
973
|
-
|
|
974
|
-
working_df = self._outputForStream(working_df, model_aliases, prompt_template
|
|
1056
|
+
|
|
1057
|
+
working_df = self._outputForStream(working_df, model_aliases, prompt_template)
|
|
975
1058
|
|
|
976
1059
|
# Optional evaluation
|
|
977
1060
|
outputEvalMapping = None
|
|
@@ -985,40 +1068,49 @@ class LlumoClient:
|
|
|
985
1068
|
if not metricDependencies["status"]:
|
|
986
1069
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
|
987
1070
|
|
|
988
|
-
working_df, outputEvalMapping = self._evaluateForStream(
|
|
1071
|
+
working_df, outputEvalMapping = self._evaluateForStream(
|
|
1072
|
+
working_df, evals, model_aliases, prompt_template, generateOutput=True
|
|
1073
|
+
)
|
|
989
1074
|
if createExperiment:
|
|
990
1075
|
# df = working_df.fillna("Some error occured").astype(object)
|
|
991
1076
|
with warnings.catch_warnings():
|
|
992
|
-
warnings.simplefilter(action=
|
|
1077
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
993
1078
|
df = working_df.fillna("Some error occurred").astype(str)
|
|
994
1079
|
if createPlayground(
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
1080
|
+
email,
|
|
1081
|
+
workspaceID,
|
|
1082
|
+
df,
|
|
1083
|
+
promptText=prompt_template,
|
|
1084
|
+
definationMapping=self.definationMapping,
|
|
1085
|
+
evalOutputMap=outputEvalMapping,
|
|
999
1086
|
):
|
|
1000
1087
|
print(
|
|
1001
|
-
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1002
|
-
|
|
1088
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1003
1091
|
else:
|
|
1004
1092
|
if getDataFrame == True and toEvaluate == True:
|
|
1005
|
-
return LlumoDataFrameResults(
|
|
1006
|
-
|
|
1093
|
+
return LlumoDataFrameResults(
|
|
1094
|
+
working_df,
|
|
1095
|
+
evals=self.evals,
|
|
1096
|
+
evalData=self.evalData,
|
|
1097
|
+
definationMapping=self.definationMapping,
|
|
1098
|
+
)
|
|
1007
1099
|
|
|
1008
1100
|
elif getDataFrame == False and toEvaluate == True:
|
|
1009
1101
|
data = working_df.to_dict(orient="records")
|
|
1010
|
-
return LlumoDictResults(
|
|
1011
|
-
|
|
1102
|
+
return LlumoDictResults(
|
|
1103
|
+
data,
|
|
1104
|
+
evals=self.evals,
|
|
1105
|
+
evalData=self.evalData,
|
|
1106
|
+
definationMapping=self.definationMapping,
|
|
1107
|
+
)
|
|
1012
1108
|
|
|
1013
|
-
elif getDataFrame== True and toEvaluate == False:
|
|
1109
|
+
elif getDataFrame == True and toEvaluate == False:
|
|
1014
1110
|
return working_df
|
|
1015
1111
|
|
|
1016
|
-
elif getDataFrame == False and toEvaluate == False
|
|
1017
|
-
return working_df.to_dict(orient
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1112
|
+
elif getDataFrame == False and toEvaluate == False:
|
|
1113
|
+
return working_df.to_dict(orient="records")
|
|
1022
1114
|
|
|
1023
1115
|
# this function generates an output using llm and tools and evaluate that output
|
|
1024
1116
|
def evaluateAgents(
|
|
@@ -1030,8 +1122,7 @@ class LlumoClient:
|
|
|
1030
1122
|
evals=["Final Task Alignment"],
|
|
1031
1123
|
prompt_template="Give answer for the given query: {{query}}",
|
|
1032
1124
|
createExperiment: bool = False,
|
|
1033
|
-
getDataFrame:bool = False
|
|
1034
|
-
|
|
1125
|
+
getDataFrame: bool = False,
|
|
1035
1126
|
):
|
|
1036
1127
|
if isinstance(data, dict):
|
|
1037
1128
|
data = [data]
|
|
@@ -1062,8 +1153,7 @@ class LlumoClient:
|
|
|
1062
1153
|
evals=evals,
|
|
1063
1154
|
prompt_template=prompt_template,
|
|
1064
1155
|
createExperiment=createExperiment,
|
|
1065
|
-
getDataFrame=getDataFrame
|
|
1066
|
-
|
|
1156
|
+
getDataFrame=getDataFrame,
|
|
1067
1157
|
)
|
|
1068
1158
|
|
|
1069
1159
|
return toolResponseDf
|
|
@@ -1083,9 +1173,8 @@ class LlumoClient:
|
|
|
1083
1173
|
data,
|
|
1084
1174
|
evals=["Final Task Alignment"],
|
|
1085
1175
|
createExperiment: bool = False,
|
|
1086
|
-
getDataFrame
|
|
1087
|
-
outputColName="output"
|
|
1088
|
-
|
|
1176
|
+
getDataFrame=False,
|
|
1177
|
+
outputColName="output",
|
|
1089
1178
|
):
|
|
1090
1179
|
if isinstance(data, dict):
|
|
1091
1180
|
data = [data]
|
|
@@ -1112,8 +1201,7 @@ class LlumoClient:
|
|
|
1112
1201
|
prompt_template="Give answer for the given query: {{query}}",
|
|
1113
1202
|
outputColName=outputColName,
|
|
1114
1203
|
createExperiment=createExperiment,
|
|
1115
|
-
getDataFrame
|
|
1116
|
-
|
|
1204
|
+
getDataFrame=getDataFrame,
|
|
1117
1205
|
)
|
|
1118
1206
|
if createExperiment:
|
|
1119
1207
|
pass
|
|
@@ -1124,18 +1212,17 @@ class LlumoClient:
|
|
|
1124
1212
|
raise e
|
|
1125
1213
|
|
|
1126
1214
|
def ragSweep(
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
getDataFrame = False
|
|
1215
|
+
self,
|
|
1216
|
+
data,
|
|
1217
|
+
streamName: str,
|
|
1218
|
+
queryColName: str = "query",
|
|
1219
|
+
createExperiment: bool = False,
|
|
1220
|
+
modelAliases=[],
|
|
1221
|
+
prompt_template="Give answer to the given: {{query}} using the context:{{context}}",
|
|
1222
|
+
evals=["Context Utilization"],
|
|
1223
|
+
toEvaluate=False,
|
|
1224
|
+
generateOutput=True,
|
|
1225
|
+
getDataFrame=False,
|
|
1139
1226
|
):
|
|
1140
1227
|
if isinstance(data, dict):
|
|
1141
1228
|
data = [data]
|
|
@@ -1145,13 +1232,21 @@ class LlumoClient:
|
|
|
1145
1232
|
# Validate required parameters
|
|
1146
1233
|
if generateOutput:
|
|
1147
1234
|
if not modelAliases:
|
|
1148
|
-
raise ValueError(
|
|
1149
|
-
|
|
1150
|
-
|
|
1235
|
+
raise ValueError(
|
|
1236
|
+
"Model aliases must be provided when generateOutput is True."
|
|
1237
|
+
)
|
|
1238
|
+
if (
|
|
1239
|
+
not self.apiKey
|
|
1240
|
+
or not isinstance(self.apiKey, str)
|
|
1241
|
+
or self.apiKey.strip() == ""
|
|
1242
|
+
):
|
|
1243
|
+
raise ValueError(
|
|
1244
|
+
"Valid API key must be provided when generateOutput is True."
|
|
1245
|
+
)
|
|
1151
1246
|
|
|
1152
1247
|
modelStatus = validateModels(model_aliases=modelAliases)
|
|
1153
|
-
if modelStatus["status"]== False:
|
|
1154
|
-
if len(modelAliases) ==
|
|
1248
|
+
if modelStatus["status"] == False:
|
|
1249
|
+
if len(modelAliases) == 0:
|
|
1155
1250
|
raise LlumoAIError.providerError("No model selected.")
|
|
1156
1251
|
else:
|
|
1157
1252
|
raise LlumoAIError.providerError(modelStatus["message"])
|
|
@@ -1165,7 +1260,7 @@ class LlumoClient:
|
|
|
1165
1260
|
try:
|
|
1166
1261
|
socketID = self.socket.connect(timeout=150)
|
|
1167
1262
|
except Exception as e:
|
|
1168
|
-
socketID="DummySocketID"
|
|
1263
|
+
socketID = "DummySocketID"
|
|
1169
1264
|
# waited_secs = 0
|
|
1170
1265
|
# while not self.socket._connection_established.is_set():
|
|
1171
1266
|
# time.sleep(0.1)
|
|
@@ -1177,8 +1272,12 @@ class LlumoClient:
|
|
|
1177
1272
|
|
|
1178
1273
|
# Check user credits
|
|
1179
1274
|
userHits = checkUserHits(
|
|
1180
|
-
self.workspaceID,
|
|
1181
|
-
self.
|
|
1275
|
+
self.workspaceID,
|
|
1276
|
+
self.hasSubscribed,
|
|
1277
|
+
self.trialEndDate,
|
|
1278
|
+
self.subscriptionEndDate,
|
|
1279
|
+
self.hitsAvailable,
|
|
1280
|
+
len(working_df),
|
|
1182
1281
|
)
|
|
1183
1282
|
if not userHits["success"]:
|
|
1184
1283
|
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
|
@@ -1204,7 +1303,7 @@ class LlumoClient:
|
|
|
1204
1303
|
"inactivity_timeout": 10,
|
|
1205
1304
|
"expected_results": expectedResults,
|
|
1206
1305
|
},
|
|
1207
|
-
daemon=True
|
|
1306
|
+
daemon=True,
|
|
1208
1307
|
)
|
|
1209
1308
|
listener_thread.start()
|
|
1210
1309
|
|
|
@@ -1233,7 +1332,13 @@ class LlumoClient:
|
|
|
1233
1332
|
self.allBatches.append(currentBatch)
|
|
1234
1333
|
currentBatch = []
|
|
1235
1334
|
|
|
1236
|
-
for batch in tqdm(
|
|
1335
|
+
for batch in tqdm(
|
|
1336
|
+
self.allBatches,
|
|
1337
|
+
desc="Processing Batches",
|
|
1338
|
+
unit="batch",
|
|
1339
|
+
colour="magenta",
|
|
1340
|
+
ncols=80,
|
|
1341
|
+
):
|
|
1237
1342
|
try:
|
|
1238
1343
|
self.postDataStream(batch=batch, workspaceID=workspaceID)
|
|
1239
1344
|
time.sleep(3)
|
|
@@ -1262,7 +1367,9 @@ class LlumoClient:
|
|
|
1262
1367
|
|
|
1263
1368
|
# Output generation
|
|
1264
1369
|
if generateOutput == True:
|
|
1265
|
-
working_df = self._outputForStream(
|
|
1370
|
+
working_df = self._outputForStream(
|
|
1371
|
+
working_df, modelAliases, prompt_template
|
|
1372
|
+
)
|
|
1266
1373
|
|
|
1267
1374
|
# Optional evaluation
|
|
1268
1375
|
outputEvalMapping = None
|
|
@@ -1276,58 +1383,78 @@ class LlumoClient:
|
|
|
1276
1383
|
if not metricDependencies["status"]:
|
|
1277
1384
|
raise LlumoAIError.dependencyError(metricDependencies["message"])
|
|
1278
1385
|
|
|
1279
|
-
working_df, outputEvalMapping = self._evaluateForStream(
|
|
1386
|
+
working_df, outputEvalMapping = self._evaluateForStream(
|
|
1387
|
+
working_df, evals, modelAliases, prompt_template, generateOutput
|
|
1388
|
+
)
|
|
1280
1389
|
|
|
1281
|
-
|
|
1282
1390
|
self.socket.disconnect()
|
|
1283
1391
|
# Create experiment if required
|
|
1284
1392
|
if createExperiment:
|
|
1285
1393
|
# df = working_df.fillna("Some error occured").astype(object)
|
|
1286
1394
|
with warnings.catch_warnings():
|
|
1287
|
-
warnings.simplefilter(action=
|
|
1395
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
1288
1396
|
df = working_df.fillna("Some error occurred").astype(str)
|
|
1289
1397
|
if createPlayground(
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1398
|
+
email,
|
|
1399
|
+
workspaceID,
|
|
1400
|
+
df,
|
|
1401
|
+
queryColName=queryColName,
|
|
1402
|
+
dataStreamName=streamId,
|
|
1403
|
+
promptText=prompt_template,
|
|
1404
|
+
definationMapping=self.definationMapping,
|
|
1405
|
+
evalOutputMap=outputEvalMapping,
|
|
1296
1406
|
):
|
|
1297
1407
|
print(
|
|
1298
|
-
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1408
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1409
|
+
)
|
|
1299
1410
|
if getDataFrame == True and toEvaluate == True:
|
|
1300
|
-
return LlumoDataFrameResults(
|
|
1301
|
-
|
|
1411
|
+
return LlumoDataFrameResults(
|
|
1412
|
+
working_df,
|
|
1413
|
+
evals=self.evals,
|
|
1414
|
+
evalData=self.evalData,
|
|
1415
|
+
definationMapping=self.definationMapping,
|
|
1416
|
+
)
|
|
1302
1417
|
|
|
1303
1418
|
elif getDataFrame == False and toEvaluate == True:
|
|
1304
1419
|
data = working_df.to_dict(orient="records")
|
|
1305
|
-
return LlumoDictResults(
|
|
1306
|
-
|
|
1420
|
+
return LlumoDictResults(
|
|
1421
|
+
data,
|
|
1422
|
+
evals=self.evals,
|
|
1423
|
+
evalData=self.evalData,
|
|
1424
|
+
definationMapping=self.definationMapping,
|
|
1425
|
+
)
|
|
1307
1426
|
|
|
1308
|
-
elif getDataFrame== True and toEvaluate == False:
|
|
1427
|
+
elif getDataFrame == True and toEvaluate == False:
|
|
1309
1428
|
return working_df
|
|
1310
1429
|
|
|
1311
|
-
elif getDataFrame == False and toEvaluate == False
|
|
1312
|
-
return working_df.to_dict(orient
|
|
1430
|
+
elif getDataFrame == False and toEvaluate == False:
|
|
1431
|
+
return working_df.to_dict(orient="records")
|
|
1313
1432
|
else:
|
|
1314
1433
|
if getDataFrame == True and toEvaluate == True:
|
|
1315
|
-
return LlumoDataFrameResults(
|
|
1316
|
-
|
|
1434
|
+
return LlumoDataFrameResults(
|
|
1435
|
+
working_df,
|
|
1436
|
+
evals=self.evals,
|
|
1437
|
+
evalData=self.evalData,
|
|
1438
|
+
definationMapping=self.definationMapping,
|
|
1439
|
+
)
|
|
1317
1440
|
|
|
1318
1441
|
elif getDataFrame == False and toEvaluate == True:
|
|
1319
1442
|
data = working_df.to_dict(orient="records")
|
|
1320
|
-
return LlumoDictResults(
|
|
1321
|
-
|
|
1443
|
+
return LlumoDictResults(
|
|
1444
|
+
data,
|
|
1445
|
+
evals=self.evals,
|
|
1446
|
+
evalData=self.evalData,
|
|
1447
|
+
definationMapping=self.definationMapping,
|
|
1448
|
+
)
|
|
1322
1449
|
|
|
1323
|
-
elif getDataFrame== True and toEvaluate == False:
|
|
1450
|
+
elif getDataFrame == True and toEvaluate == False:
|
|
1324
1451
|
return working_df
|
|
1325
1452
|
|
|
1326
|
-
elif getDataFrame == False and toEvaluate == False
|
|
1327
|
-
return working_df.to_dict(orient
|
|
1453
|
+
elif getDataFrame == False and toEvaluate == False:
|
|
1454
|
+
return working_df.to_dict(orient="records")
|
|
1328
1455
|
|
|
1329
|
-
def _outputForStream(self, df, modelAliases, prompt_template
|
|
1330
|
-
executor = ModelExecutor(apiKey)
|
|
1456
|
+
def _outputForStream(self, df, modelAliases, prompt_template):
|
|
1457
|
+
executor = ModelExecutor(self.apiKey)
|
|
1331
1458
|
|
|
1332
1459
|
for indx, row in df.iterrows():
|
|
1333
1460
|
inputVariables = re.findall(r"{{(.*?)}}", prompt_template)
|
|
@@ -1340,21 +1467,25 @@ class LlumoClient:
|
|
|
1340
1467
|
|
|
1341
1468
|
provider = getProviderFromModel(model)
|
|
1342
1469
|
if provider == Provider.OPENAI:
|
|
1343
|
-
validateOpenaiKey(apiKey)
|
|
1470
|
+
validateOpenaiKey(self.apiKey)
|
|
1344
1471
|
elif provider == Provider.GOOGLE:
|
|
1345
|
-
validateGoogleKey(apiKey)
|
|
1472
|
+
validateGoogleKey(self.apiKey)
|
|
1346
1473
|
|
|
1347
|
-
filled_template = getInputPopulatedPrompt(
|
|
1348
|
-
|
|
1474
|
+
filled_template = getInputPopulatedPrompt(
|
|
1475
|
+
prompt_template, inputDict
|
|
1476
|
+
)
|
|
1477
|
+
response = executor.execute(provider, model.value, filled_template)
|
|
1349
1478
|
df.at[indx, f"output_{i}"] = response
|
|
1350
|
-
|
|
1479
|
+
|
|
1351
1480
|
except Exception as e:
|
|
1352
1481
|
# df.at[indx, f"output_{i}"] = str(e)
|
|
1353
1482
|
raise e
|
|
1354
1483
|
|
|
1355
1484
|
return df
|
|
1356
1485
|
|
|
1357
|
-
def _evaluateForStream(
|
|
1486
|
+
def _evaluateForStream(
|
|
1487
|
+
self, df, evals, modelAliases, prompt_template, generateOutput
|
|
1488
|
+
):
|
|
1358
1489
|
dfWithEvals = df.copy()
|
|
1359
1490
|
outputColMapping = {}
|
|
1360
1491
|
|
|
@@ -1370,7 +1501,7 @@ class LlumoClient:
|
|
|
1370
1501
|
outputColName=outputColName,
|
|
1371
1502
|
_tocheck=False,
|
|
1372
1503
|
getDataFrame=True,
|
|
1373
|
-
createExperiment=False
|
|
1504
|
+
createExperiment=False,
|
|
1374
1505
|
)
|
|
1375
1506
|
|
|
1376
1507
|
for evalMetric in evals:
|
|
@@ -1379,11 +1510,15 @@ class LlumoClient:
|
|
|
1379
1510
|
if scoreCol in res.columns:
|
|
1380
1511
|
res = res.rename(columns={scoreCol: f"{scoreCol}_{i}"})
|
|
1381
1512
|
if reasonCol in res.columns:
|
|
1382
|
-
res = res.rename(
|
|
1513
|
+
res = res.rename(
|
|
1514
|
+
columns={reasonCol: f"{evalMetric}_{i} Reason"}
|
|
1515
|
+
)
|
|
1383
1516
|
|
|
1384
1517
|
outputColMapping[f"{scoreCol}_{i}"] = outputColName
|
|
1385
1518
|
|
|
1386
|
-
newCols = [
|
|
1519
|
+
newCols = [
|
|
1520
|
+
col for col in res.columns if col not in dfWithEvals.columns
|
|
1521
|
+
]
|
|
1387
1522
|
dfWithEvals = pd.concat([dfWithEvals, res[newCols]], axis=1)
|
|
1388
1523
|
|
|
1389
1524
|
except Exception as e:
|
|
@@ -1400,7 +1535,7 @@ class LlumoClient:
|
|
|
1400
1535
|
outputColName=outputColName,
|
|
1401
1536
|
_tocheck=False,
|
|
1402
1537
|
getDataFrame=True,
|
|
1403
|
-
createExperiment=False
|
|
1538
|
+
createExperiment=False,
|
|
1404
1539
|
)
|
|
1405
1540
|
for evalMetric in evals:
|
|
1406
1541
|
scoreCol = f"{evalMetric}"
|
|
@@ -1415,13 +1550,13 @@ class LlumoClient:
|
|
|
1415
1550
|
return dfWithEvals, outputColMapping
|
|
1416
1551
|
|
|
1417
1552
|
def runDataStream(
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1553
|
+
self,
|
|
1554
|
+
data,
|
|
1555
|
+
streamName: str,
|
|
1556
|
+
queryColName: str = "query",
|
|
1557
|
+
createExperiment: bool = False,
|
|
1558
|
+
getDataFrame=False,
|
|
1559
|
+
):
|
|
1425
1560
|
|
|
1426
1561
|
if isinstance(data, dict):
|
|
1427
1562
|
data = [data]
|
|
@@ -1437,7 +1572,7 @@ class LlumoClient:
|
|
|
1437
1572
|
try:
|
|
1438
1573
|
socketID = self.socket.connect(timeout=150)
|
|
1439
1574
|
except Exception as e:
|
|
1440
|
-
socketID="DummySocketID"
|
|
1575
|
+
socketID = "DummySocketID"
|
|
1441
1576
|
# waited_secs = 0
|
|
1442
1577
|
# while not self.socket._connection_established.is_set():
|
|
1443
1578
|
# time.sleep(0.1)
|
|
@@ -1449,8 +1584,12 @@ class LlumoClient:
|
|
|
1449
1584
|
|
|
1450
1585
|
# Check user credits
|
|
1451
1586
|
userHits = checkUserHits(
|
|
1452
|
-
self.workspaceID,
|
|
1453
|
-
self.
|
|
1587
|
+
self.workspaceID,
|
|
1588
|
+
self.hasSubscribed,
|
|
1589
|
+
self.trialEndDate,
|
|
1590
|
+
self.subscriptionEndDate,
|
|
1591
|
+
self.hitsAvailable,
|
|
1592
|
+
len(working_df),
|
|
1454
1593
|
)
|
|
1455
1594
|
if not userHits["success"]:
|
|
1456
1595
|
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
|
@@ -1476,7 +1615,7 @@ class LlumoClient:
|
|
|
1476
1615
|
"inactivity_timeout": 10,
|
|
1477
1616
|
"expected_results": expectedResults,
|
|
1478
1617
|
},
|
|
1479
|
-
daemon=True
|
|
1618
|
+
daemon=True,
|
|
1480
1619
|
)
|
|
1481
1620
|
listener_thread.start()
|
|
1482
1621
|
|
|
@@ -1505,7 +1644,13 @@ class LlumoClient:
|
|
|
1505
1644
|
self.allBatches.append(currentBatch)
|
|
1506
1645
|
currentBatch = []
|
|
1507
1646
|
|
|
1508
|
-
for batch in tqdm(
|
|
1647
|
+
for batch in tqdm(
|
|
1648
|
+
self.allBatches,
|
|
1649
|
+
desc="Processing Batches",
|
|
1650
|
+
unit="batch",
|
|
1651
|
+
colour="magenta",
|
|
1652
|
+
ncols=80,
|
|
1653
|
+
):
|
|
1509
1654
|
try:
|
|
1510
1655
|
self.postDataStream(batch=batch, workspaceID=workspaceID)
|
|
1511
1656
|
time.sleep(3)
|
|
@@ -1532,21 +1677,22 @@ class LlumoClient:
|
|
|
1532
1677
|
idx = rowIdMapping[compound_key]["index"]
|
|
1533
1678
|
working_df.at[idx, "context"] = value.get("value")
|
|
1534
1679
|
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
1680
|
self.socket.disconnect()
|
|
1538
1681
|
|
|
1539
1682
|
# Create experiment if required
|
|
1540
1683
|
if createExperiment:
|
|
1541
1684
|
df = working_df.fillna("Some error occured").astype(object)
|
|
1542
1685
|
if createPlayground(
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1686
|
+
email,
|
|
1687
|
+
workspaceID,
|
|
1688
|
+
df,
|
|
1689
|
+
queryColName=queryColName,
|
|
1690
|
+
dataStreamName=streamId,
|
|
1691
|
+
definationMapping=self.definationMapping,
|
|
1547
1692
|
):
|
|
1548
1693
|
print(
|
|
1549
|
-
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1694
|
+
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1695
|
+
)
|
|
1550
1696
|
if getDataFrame:
|
|
1551
1697
|
return working_df
|
|
1552
1698
|
|
|
@@ -1563,7 +1709,6 @@ class LlumoClient:
|
|
|
1563
1709
|
# self.latestDataframe = working_df
|
|
1564
1710
|
# return working_df
|
|
1565
1711
|
|
|
1566
|
-
|
|
1567
1712
|
def createExperiment(self, dataframe):
|
|
1568
1713
|
try:
|
|
1569
1714
|
self.validateApiKey()
|
|
@@ -1581,7 +1726,6 @@ class LlumoClient:
|
|
|
1581
1726
|
workspaceID = None
|
|
1582
1727
|
email = None
|
|
1583
1728
|
|
|
1584
|
-
|
|
1585
1729
|
try:
|
|
1586
1730
|
self.validateApiKey()
|
|
1587
1731
|
except Exception as e:
|
|
@@ -1609,17 +1753,17 @@ class LlumoClient:
|
|
|
1609
1753
|
# If successfully loaded, call createPlayground
|
|
1610
1754
|
df = df.astype(str)
|
|
1611
1755
|
if createPlayground(self.email, self.workspaceID, df):
|
|
1612
|
-
|
|
1756
|
+
|
|
1613
1757
|
print(
|
|
1614
1758
|
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1615
1759
|
)
|
|
1616
|
-
|
|
1760
|
+
|
|
1617
1761
|
return True
|
|
1618
1762
|
|
|
1619
1763
|
except Exception as e:
|
|
1620
1764
|
print(f"Error: {e}")
|
|
1621
|
-
|
|
1622
|
-
def upload(self,data):
|
|
1765
|
+
|
|
1766
|
+
def upload(self, data):
|
|
1623
1767
|
try:
|
|
1624
1768
|
if isinstance(data, dict):
|
|
1625
1769
|
data = [data]
|
|
@@ -1639,7 +1783,6 @@ class LlumoClient:
|
|
|
1639
1783
|
print(f"Error: {e}")
|
|
1640
1784
|
return False
|
|
1641
1785
|
|
|
1642
|
-
|
|
1643
1786
|
def createExperimentWithEvals(
|
|
1644
1787
|
self,
|
|
1645
1788
|
data,
|
|
@@ -1647,7 +1790,7 @@ class LlumoClient:
|
|
|
1647
1790
|
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
|
1648
1791
|
outputColName="output",
|
|
1649
1792
|
createExperiment: bool = False,
|
|
1650
|
-
getDataFrame:bool =False,
|
|
1793
|
+
getDataFrame: bool = False,
|
|
1651
1794
|
_tocheck=True,
|
|
1652
1795
|
):
|
|
1653
1796
|
if isinstance(data, dict):
|
|
@@ -1657,8 +1800,8 @@ class LlumoClient:
|
|
|
1657
1800
|
dataframe = pd.DataFrame(data).astype(str)
|
|
1658
1801
|
workspaceID = None
|
|
1659
1802
|
email = None
|
|
1660
|
-
self.evalData=[]
|
|
1661
|
-
self.evals=evals
|
|
1803
|
+
self.evalData = []
|
|
1804
|
+
self.evals = evals
|
|
1662
1805
|
self.allBatches = []
|
|
1663
1806
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
|
1664
1807
|
self.validateApiKey(evalName=evals[0])
|
|
@@ -1666,20 +1809,22 @@ class LlumoClient:
|
|
|
1666
1809
|
if playgroundID:
|
|
1667
1810
|
activePlayground = playgroundID
|
|
1668
1811
|
else:
|
|
1669
|
-
activePlayground = str(
|
|
1670
|
-
|
|
1812
|
+
activePlayground = str(
|
|
1813
|
+
createEvalPlayground(email=self.email, workspaceID=self.workspaceID)
|
|
1814
|
+
)
|
|
1815
|
+
else:
|
|
1671
1816
|
activePlayground = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace(
|
|
1672
|
-
|
|
1673
|
-
|
|
1817
|
+
"-", ""
|
|
1818
|
+
)
|
|
1674
1819
|
for evalName in evals:
|
|
1675
1820
|
self.validateApiKey(evalName=evalName)
|
|
1676
|
-
self.evalData =dataframe.to_dict(orient="records")
|
|
1821
|
+
self.evalData = dataframe.to_dict(orient="records")
|
|
1677
1822
|
if createExperiment:
|
|
1678
1823
|
print("heading to upload")
|
|
1679
1824
|
pd.set_option("future.no_silent_downcasting", True)
|
|
1680
1825
|
# df = dataframe.fillna("Some error occured").astype(object)
|
|
1681
1826
|
with warnings.catch_warnings():
|
|
1682
|
-
warnings.simplefilter(action=
|
|
1827
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
1683
1828
|
df = dataframe.fillna("Some error occurred").astype(str)
|
|
1684
1829
|
|
|
1685
1830
|
df = dataframe.fillna("Some error occured").infer_objects(copy=False)
|
|
@@ -1690,20 +1835,30 @@ class LlumoClient:
|
|
|
1690
1835
|
promptText=prompt_template,
|
|
1691
1836
|
definationMapping=self.definationMapping,
|
|
1692
1837
|
outputColName=outputColName,
|
|
1693
|
-
activePlayground=
|
|
1838
|
+
activePlayground=activePlayground,
|
|
1694
1839
|
):
|
|
1695
1840
|
print(
|
|
1696
1841
|
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
1697
1842
|
)
|
|
1698
|
-
|
|
1843
|
+
|
|
1699
1844
|
else:
|
|
1700
1845
|
if getDataFrame:
|
|
1701
|
-
return LlumoDataFrameResults(
|
|
1846
|
+
return LlumoDataFrameResults(
|
|
1847
|
+
dataframe,
|
|
1848
|
+
evals=self.evals,
|
|
1849
|
+
evalData=self.evalData,
|
|
1850
|
+
definationMapping=self.definationMapping,
|
|
1851
|
+
)
|
|
1702
1852
|
else:
|
|
1703
|
-
data=dataframe.to_dict(orient="records")
|
|
1704
|
-
return LlumoDictResults(
|
|
1853
|
+
data = dataframe.to_dict(orient="records")
|
|
1854
|
+
return LlumoDictResults(
|
|
1855
|
+
data,
|
|
1856
|
+
evals=self.evals,
|
|
1857
|
+
evalData=self.evalData,
|
|
1858
|
+
definationMapping=self.definationMapping,
|
|
1859
|
+
)
|
|
1860
|
+
|
|
1705
1861
|
|
|
1706
|
-
|
|
1707
1862
|
class SafeDict(dict):
|
|
1708
1863
|
def __missing__(self, key):
|
|
1709
1864
|
return ""
|
llumo/llumoLogger.py
CHANGED
llumo/llumoSessionContext.py
CHANGED
|
@@ -3,6 +3,7 @@ import uuid
|
|
|
3
3
|
from typing import Optional, List, Dict, Any
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
5
|
import requests
|
|
6
|
+
from .client import LlumoClient
|
|
6
7
|
|
|
7
8
|
_ctxLogger = contextvars.ContextVar("ctxLogger")
|
|
8
9
|
_ctxSessionID = contextvars.ContextVar("ctxSessionID")
|
|
@@ -21,8 +22,9 @@ def getLlumoRun():
|
|
|
21
22
|
return _ctxLlumoRun.get()
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
class LlumoSessionContext:
|
|
25
|
+
class LlumoSessionContext(LlumoClient):
|
|
25
26
|
def __init__(self, logger, sessionID: Optional[str] = None):
|
|
27
|
+
super().__init__(api_key=logger.apiKey, playground_id=logger.getPlaygroundID())
|
|
26
28
|
self.sessionID = sessionID or str(uuid.uuid4().hex[:14])
|
|
27
29
|
self.logger = logger
|
|
28
30
|
self.apiKey = logger.apiKey
|
llumo/openai.py
CHANGED
|
@@ -2,7 +2,7 @@ import time
|
|
|
2
2
|
from openai import OpenAI as OpenAIClient
|
|
3
3
|
from .client import LlumoClient
|
|
4
4
|
from .llumoSessionContext import LlumoSessionContext
|
|
5
|
-
from .llumoLogger import
|
|
5
|
+
from .llumoLogger import LlumoLogger
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
# evaluation function that uses LlumoClient
|
|
@@ -90,26 +90,26 @@ class OpenAI(OpenAIClient):
|
|
|
90
90
|
latency = int((time.time() - start_time) * 1000)
|
|
91
91
|
# Access the first result object
|
|
92
92
|
bias_evaluation = bias_evaluation_result[0]
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
93
|
+
message = "-".join(
|
|
94
|
+
getattr(bias_evaluation, "edgeCases", {}).get("value", [])
|
|
95
|
+
)
|
|
96
|
+
self.session.logEvalStep(
|
|
97
|
+
stepName=f"EVAL-Input Bias",
|
|
98
|
+
output="",
|
|
99
|
+
context=context,
|
|
100
|
+
query=user_message,
|
|
101
|
+
messageHistory="",
|
|
102
|
+
tools="",
|
|
103
|
+
intermediateSteps="",
|
|
104
|
+
groundTruth="",
|
|
105
|
+
analyticsScore=getattr(bias_evaluation, "analyticsScore", {}),
|
|
106
|
+
reasoning=getattr(bias_evaluation, "reasoning", {}),
|
|
107
|
+
classification=getattr(bias_evaluation, "classification", {}),
|
|
108
|
+
evalLabel=getattr(bias_evaluation, "evalLabel", {}),
|
|
109
|
+
latencyMs=latency,
|
|
110
|
+
status="SUCCESS",
|
|
111
|
+
message=message,
|
|
112
|
+
)
|
|
113
113
|
except Exception as e:
|
|
114
114
|
print(f"Input Bias evaluation failed: {e}")
|
|
115
115
|
self.session.logEvalStep(
|
|
@@ -171,30 +171,30 @@ class OpenAI(OpenAIClient):
|
|
|
171
171
|
latency = int((time.time() - start_time) * 1000)
|
|
172
172
|
# Access the first result object
|
|
173
173
|
correctness_evaluation = correctness_evaluation_result[0]
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
174
|
+
message = "-".join(
|
|
175
|
+
getattr(correctness_evaluation, "edgeCases", {}).get("value", [])
|
|
176
|
+
)
|
|
177
|
+
self.session.logEvalStep(
|
|
178
|
+
stepName=f"EVAL-Response Correctness",
|
|
179
|
+
output=output_text,
|
|
180
|
+
context=context,
|
|
181
|
+
query=user_message,
|
|
182
|
+
messageHistory="",
|
|
183
|
+
tools="",
|
|
184
|
+
intermediateSteps="",
|
|
185
|
+
groundTruth="",
|
|
186
|
+
analyticsScore=getattr(
|
|
187
|
+
correctness_evaluation, "analyticsScore", {}
|
|
188
|
+
),
|
|
189
|
+
reasoning=getattr(correctness_evaluation, "reasoning", {}),
|
|
190
|
+
classification=getattr(
|
|
191
|
+
correctness_evaluation, "classification", {}
|
|
192
|
+
),
|
|
193
|
+
evalLabel=getattr(correctness_evaluation, "evalLabel", {}),
|
|
194
|
+
latencyMs=latency,
|
|
195
|
+
status="SUCCESS",
|
|
196
|
+
message=message,
|
|
197
|
+
)
|
|
198
198
|
except Exception as e:
|
|
199
199
|
print(f"Response Correctness evaluation failed: {e}")
|
|
200
200
|
correctness_evaluation = None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
llumo/__init__.py,sha256=kkuppu7ZPiVZFdnYzJ9BM3syMbYHOSZLpwKwAvGHsnY,311
|
|
2
|
+
llumo/callback.py,sha256=dOsQ35Ro6IVec3TiJfkPx3H9PQtk8oWfJA1skFENTIM,20439
|
|
3
|
+
llumo/callbacks-0.py,sha256=TEIOCWRvk2UYsTmBMBsnlgpqWvr-2y3a6d0w_e96NRM,8958
|
|
4
|
+
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
|
5
|
+
llumo/client.py,sha256=keYx0GToNB-FXmGncXd0MOwwCGjxsIoDbOhTx2rCcMQ,71582
|
|
6
|
+
llumo/exceptions.py,sha256=1OyhN9YL9LcyUPUsqYHq6Rret0udATZAwMVJaio2_Ec,2123
|
|
7
|
+
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
|
8
|
+
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
|
9
|
+
llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
|
|
10
|
+
llumo/helpingFuntions.py,sha256=B6FwUQ5f1v4FKrWCbYoGWMFdscOV_liuuhTgNQ3cdrk,27275
|
|
11
|
+
llumo/llumoLogger.py,sha256=grdjhu6Ngxg7nhnrMOP5Pd5ALR7U2ROws48yhf_N7y0,1912
|
|
12
|
+
llumo/llumoSessionContext.py,sha256=v1OPJFYWe5-mLLUohX5qY7dlzgwmxpuRZ0rDsXEv6f4,10506
|
|
13
|
+
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
|
14
|
+
llumo/openai.py,sha256=c0pZ-yzm6LfUAbfVmOiVpY9pS5sAWZRb8_jAj0ir910,8450
|
|
15
|
+
llumo/sockets.py,sha256=pBDo-U65hMIMwKMwZQl3iBkEjISEt-9BkXxZTWfSHF4,6116
|
|
16
|
+
llumo-0.2.27.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
|
17
|
+
llumo-0.2.27.dist-info/METADATA,sha256=1zlTWFpTBkmDtAYsP9HbGfN2OQ7Xuy3t9Z7FM73k8W0,1558
|
|
18
|
+
llumo-0.2.27.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
+
llumo-0.2.27.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
|
20
|
+
llumo-0.2.27.dist-info/RECORD,,
|
llumo-0.2.25.dist-info/RECORD
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
llumo/__init__.py,sha256=ZdFeOT5aDM1iA4VzQ8ryc0rxF3ihjhPO8aCRuw8t0zk,342
|
|
2
|
-
llumo/callback.py,sha256=Pzg9Smqsu5G900YZjoFwqMY0TTP4jUizxllaP0TjKgk,20439
|
|
3
|
-
llumo/callbacks-0.py,sha256=TEIOCWRvk2UYsTmBMBsnlgpqWvr-2y3a6d0w_e96NRM,8958
|
|
4
|
-
llumo/chains.py,sha256=6lCgLseh04RUgc6SahhmvQj82quay2Mi1j8gPUlx8Es,2923
|
|
5
|
-
llumo/client.py,sha256=RKI8XIIafzMWX42gXBXAcMjtOzZngx1ebgGfXmNDa-w,69064
|
|
6
|
-
llumo/exceptions.py,sha256=1OyhN9YL9LcyUPUsqYHq6Rret0udATZAwMVJaio2_Ec,2123
|
|
7
|
-
llumo/execution.py,sha256=nWbJ7AvWuUPcOb6i-JzKRna_PvF-ewZTiK8skS-5n3w,1380
|
|
8
|
-
llumo/functionCalling.py,sha256=D5jYapu1rIvdIJNUYPYMTyhQ1H-6nkwoOLMi6eekfUE,7241
|
|
9
|
-
llumo/google.py,sha256=3S_aRtbtlctCXPGR0u4baLlkyFrsjd02vlUCkoRPA5U,2147
|
|
10
|
-
llumo/helpingFuntions.py,sha256=B6FwUQ5f1v4FKrWCbYoGWMFdscOV_liuuhTgNQ3cdrk,27275
|
|
11
|
-
llumo/llumoLogger.py,sha256=UW3eIQb5txneilx8FQnGf6t4LgP85NoIf5YECFDZShk,1912
|
|
12
|
-
llumo/llumoSessionContext.py,sha256=J4oFCWcpksZ2sWgGJqzq2LjUwIG1OAQva_EAvNy9ACs,10373
|
|
13
|
-
llumo/models.py,sha256=aVEZsOOoQx5LeNtwSyBxqvrINq0izH3QWu_YjsMPE6o,2910
|
|
14
|
-
llumo/openai.py,sha256=QyNMXiYTppaU_YjU6vU5UB0At3OiNntoDTQ0dszLN0g,8538
|
|
15
|
-
llumo/sockets.py,sha256=pBDo-U65hMIMwKMwZQl3iBkEjISEt-9BkXxZTWfSHF4,6116
|
|
16
|
-
llumo-0.2.25.dist-info/licenses/LICENSE,sha256=tF9yAcfPV9xGT3ViWmC8hPvOo8BEk4ZICbUfcEo8Dlk,182
|
|
17
|
-
llumo-0.2.25.dist-info/METADATA,sha256=HkIJH54gwQ-rAgxzgpiCWVvvn6tD0jNPYJqnTAoZOfA,1558
|
|
18
|
-
llumo-0.2.25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
-
llumo-0.2.25.dist-info/top_level.txt,sha256=d5zUTMI99llPtLRB8rtSrqELm_bOqX-bNC5IcwlDk88,6
|
|
20
|
-
llumo-0.2.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|