llumo 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llumo/callback.py +123 -31
- llumo/client.py +213 -228
- llumo/google.py +16 -16
- llumo/helpingFuntions.py +2 -2
- llumo/llumoSessionContext.py +91 -29
- llumo/openai.py +148 -136
- {llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/METADATA +1 -1
- llumo-0.2.30.dist-info/RECORD +20 -0
- llumo-0.2.28.dist-info/RECORD +0 -20
- {llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/WHEEL +0 -0
- {llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/licenses/LICENSE +0 -0
- {llumo-0.2.28.dist-info → llumo-0.2.30.dist-info}/top_level.txt +0 -0
llumo/client.py
CHANGED
|
@@ -29,10 +29,7 @@ fetchUrl = (
|
|
|
29
29
|
"https://red-skull-service-392377961931.us-central1.run.app/api/get-cells-data"
|
|
30
30
|
)
|
|
31
31
|
socketDataUrl = "https://app.llumo.ai/api/eval/get-awaited"
|
|
32
|
-
|
|
33
|
-
# "workspaceID":"c9191fdf33bdd7838328c1a0",
|
|
34
|
-
# "playgroundID":"17496117244856b7815ac94004347b1c2e2f7e01600ec"
|
|
35
|
-
# }
|
|
32
|
+
|
|
36
33
|
validateUrl = "https://app.llumo.ai/api/workspace-details"
|
|
37
34
|
socketUrl = "https://red-skull-service-392377961931.us-central1.run.app/"
|
|
38
35
|
|
|
@@ -79,6 +76,7 @@ class LlumoClient:
|
|
|
79
76
|
# Try to parse JSON
|
|
80
77
|
try:
|
|
81
78
|
data = response.json()
|
|
79
|
+
# print(data)
|
|
82
80
|
except ValueError as e:
|
|
83
81
|
print(f"JSON parsing error: {str(e)}")
|
|
84
82
|
# print(f"Response content that could not be parsed: {response.text[:1000]}...")
|
|
@@ -93,7 +91,9 @@ class LlumoClient:
|
|
|
93
91
|
self.workspaceID = data["data"]["data"].get("workspaceID")
|
|
94
92
|
self.evalDefinition = data["data"]["data"]["analyticsMapping"]
|
|
95
93
|
self.socketToken = data["data"]["data"].get("token")
|
|
96
|
-
self.
|
|
94
|
+
# print(self.socketToken)
|
|
95
|
+
self.hasSubscribed = data["data"]["data"].get("hasSubscr"
|
|
96
|
+
"ibed", False)
|
|
97
97
|
self.trialEndDate = data["data"]["data"].get("trialEndDate", None)
|
|
98
98
|
self.subscriptionEndDate = data["data"]["data"].get(
|
|
99
99
|
"subscriptionEndDate", None
|
|
@@ -191,7 +191,7 @@ class LlumoClient:
|
|
|
191
191
|
def postBatch(self, batch, workspaceID):
|
|
192
192
|
payload = {
|
|
193
193
|
"batch": json.dumps(batch),
|
|
194
|
-
"runType": "
|
|
194
|
+
"runType": "FULL_EVAL_RUN",
|
|
195
195
|
"workspaceID": workspaceID,
|
|
196
196
|
}
|
|
197
197
|
# socketToken here if the "JWD" token
|
|
@@ -204,6 +204,7 @@ class LlumoClient:
|
|
|
204
204
|
response = requests.post(postUrl, json=payload, headers=headers)
|
|
205
205
|
# print(f"Post API Status Code: {response.status_code}")
|
|
206
206
|
# print(response.text)
|
|
207
|
+
# print(response.status_code)
|
|
207
208
|
|
|
208
209
|
except Exception as e:
|
|
209
210
|
print(f"Error in posting batch: {e}")
|
|
@@ -644,7 +645,7 @@ class LlumoClient:
|
|
|
644
645
|
self.socket.listenForResults(
|
|
645
646
|
min_wait=20,
|
|
646
647
|
max_wait=timeout,
|
|
647
|
-
inactivity_timeout=
|
|
648
|
+
inactivity_timeout=50,
|
|
648
649
|
expected_results=None,
|
|
649
650
|
)
|
|
650
651
|
|
|
@@ -701,13 +702,11 @@ class LlumoClient:
|
|
|
701
702
|
data,
|
|
702
703
|
evals: list,
|
|
703
704
|
prompt_template="Give answer to the given query: {{query}} using the given context: {{context}}.",
|
|
704
|
-
outputColName="output",
|
|
705
|
-
createExperiment: bool = False,
|
|
706
705
|
getDataFrame: bool = False,
|
|
707
706
|
_tocheck=True,
|
|
708
707
|
):
|
|
709
|
-
if hasattr(self, "startLlumoRun"):
|
|
710
|
-
|
|
708
|
+
# if hasattr(self, "startLlumoRun"):
|
|
709
|
+
# self.startLlumoRun(runName="evaluateMultiple")
|
|
711
710
|
if isinstance(data, dict):
|
|
712
711
|
data = [data]
|
|
713
712
|
elif not isinstance(data, list):
|
|
@@ -743,9 +742,9 @@ class LlumoClient:
|
|
|
743
742
|
listener_thread = threading.Thread(
|
|
744
743
|
target=self.socket.listenForResults,
|
|
745
744
|
kwargs={
|
|
746
|
-
"min_wait":
|
|
745
|
+
"min_wait": 20,
|
|
747
746
|
"max_wait": timeout,
|
|
748
|
-
"inactivity_timeout":
|
|
747
|
+
"inactivity_timeout": 35,
|
|
749
748
|
"expected_results": expectedResults,
|
|
750
749
|
},
|
|
751
750
|
daemon=True,
|
|
@@ -753,148 +752,152 @@ class LlumoClient:
|
|
|
753
752
|
listener_thread.start()
|
|
754
753
|
self.validateApiKey(evalName=evals[0])
|
|
755
754
|
activePlayground = self.playgroundID
|
|
756
|
-
for evalName
|
|
757
|
-
|
|
755
|
+
# print(f"\n======= Running evaluation for: {evalName} =======")
|
|
756
|
+
|
|
757
|
+
# Validate API and dependencies
|
|
758
|
+
# self.validateApiKey(evalName=evals[0])
|
|
759
|
+
customAnalytics = getCustomAnalytics(self.workspaceID)
|
|
760
|
+
# metricDependencies = checkDependency(
|
|
761
|
+
# evalName,
|
|
762
|
+
# list(dataframe.columns),
|
|
763
|
+
# tocheck=_tocheck,
|
|
764
|
+
# customevals=customAnalytics,
|
|
765
|
+
# )
|
|
766
|
+
# if not metricDependencies["status"]:
|
|
767
|
+
# raise LlumoAIError.dependencyError(metricDependencies["message"])
|
|
758
768
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
769
|
+
# evalDefinition = self.evalDefinition[evalName]["definition"]
|
|
770
|
+
model = "GPT_4"
|
|
771
|
+
provider = "OPENAI"
|
|
772
|
+
evalType = "LLM"
|
|
773
|
+
workspaceID = self.workspaceID
|
|
774
|
+
email = self.email
|
|
775
|
+
# categories = self.categories
|
|
776
|
+
# evaluationStrictness = self.evaluationStrictness
|
|
777
|
+
# grammarCheckOutput = self.grammarCheckOutput
|
|
778
|
+
# insightLength = self.insightsLength
|
|
779
|
+
# numJudges = self.numJudges
|
|
780
|
+
# penaltyBonusInstructions = self.penaltyBonusInstructions
|
|
781
|
+
# probableEdgeCases = self.probableEdgeCases
|
|
782
|
+
# fieldMapping = self.fieldMapping
|
|
770
783
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
numJudges = self.numJudges
|
|
782
|
-
penaltyBonusInstructions = self.penaltyBonusInstructions
|
|
783
|
-
probableEdgeCases = self.probableEdgeCases
|
|
784
|
-
fieldMapping = self.fieldMapping
|
|
784
|
+
userHits = checkUserHits(
|
|
785
|
+
self.workspaceID,
|
|
786
|
+
self.hasSubscribed,
|
|
787
|
+
self.trialEndDate,
|
|
788
|
+
self.subscriptionEndDate,
|
|
789
|
+
self.hitsAvailable,
|
|
790
|
+
len(dataframe),
|
|
791
|
+
)
|
|
792
|
+
if not userHits["success"]:
|
|
793
|
+
raise LlumoAIError.InsufficientCredits(userHits["message"])
|
|
785
794
|
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
795
|
+
currentBatch = []
|
|
796
|
+
for index, row in dataframe.iterrows():
|
|
797
|
+
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
|
798
|
+
groundTruth = row.get("groundTruth", "")
|
|
799
|
+
messageHistory = (
|
|
800
|
+
[row["messageHistory"]]
|
|
801
|
+
if "messageHistory" in dataframe.columns
|
|
802
|
+
else []
|
|
793
803
|
)
|
|
794
|
-
|
|
795
|
-
|
|
804
|
+
promptTemplate = prompt_template
|
|
805
|
+
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
|
796
806
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
tools = [row["tools"]] if "tools" in dataframe.columns else []
|
|
800
|
-
groundTruth = row.get("groundTruth", "")
|
|
801
|
-
messageHistory = (
|
|
802
|
-
[row["messageHistory"]]
|
|
803
|
-
if "messageHistory" in dataframe.columns
|
|
804
|
-
else []
|
|
805
|
-
)
|
|
806
|
-
promptTemplate = prompt_template
|
|
807
|
-
keys = re.findall(r"{{(.*?)}}", promptTemplate)
|
|
807
|
+
if not all([ky in dataframe.columns for ky in keys]):
|
|
808
|
+
raise LlumoAIError.InvalidPromptTemplate()
|
|
808
809
|
|
|
809
|
-
|
|
810
|
-
|
|
810
|
+
inputDict = {key: row[key] for key in keys if key in row}
|
|
811
|
+
# output = row.get(outputColName, "")
|
|
812
|
+
output = row.get("output","")
|
|
813
|
+
intermediateSteps = row.get("intermediateSteps", "")
|
|
811
814
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
intermediateSteps = row.get("intermediateSteps", "")
|
|
815
|
+
rowID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
816
|
+
columnID = f"{int(time.time() * 1000)}{uuid.uuid4()}".replace("-", "")
|
|
815
817
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
+
compoundKey = f"{rowID}-{columnID}-{columnID}"
|
|
819
|
+
# rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
|
|
820
|
+
rowIdMapping[compoundKey] = {"index": index}
|
|
818
821
|
|
|
819
|
-
compoundKey = f"{rowID}-{columnID}-{columnID}"
|
|
820
|
-
rowIdMapping[compoundKey] = {"index": index, "eval": evalName}
|
|
821
822
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
},
|
|
845
|
-
"categories": categories,
|
|
846
|
-
"evaluationStrictness": evaluationStrictness,
|
|
847
|
-
"grammarCheckOutput": grammarCheckOutput,
|
|
848
|
-
"insightLength": insightLength,
|
|
849
|
-
"numJudges": numJudges,
|
|
850
|
-
"penaltyBonusInstructions": penaltyBonusInstructions,
|
|
851
|
-
"probableEdgeCases": probableEdgeCases,
|
|
852
|
-
"model": model,
|
|
853
|
-
"provider": provider,
|
|
823
|
+
templateData = {
|
|
824
|
+
"processID": getProcessID(),
|
|
825
|
+
"socketID": socketID,
|
|
826
|
+
"rowID": rowID,
|
|
827
|
+
"columnID": columnID,
|
|
828
|
+
"processType": "FULL_EVAL_RUN",
|
|
829
|
+
"evalType": "LLM",
|
|
830
|
+
"workspaceID": workspaceID,
|
|
831
|
+
"email": email,
|
|
832
|
+
"playgroundID": activePlayground,
|
|
833
|
+
"source": "SDK",
|
|
834
|
+
"processData": {
|
|
835
|
+
# "analyticsName": evalName,
|
|
836
|
+
# "definition": evalDefinition,
|
|
837
|
+
"executionDependency": {
|
|
838
|
+
"query": "",
|
|
839
|
+
"context": "",
|
|
840
|
+
"output": output,
|
|
841
|
+
"tools": tools,
|
|
842
|
+
"groundTruth": groundTruth,
|
|
843
|
+
"messageHistory": messageHistory,
|
|
844
|
+
"intermediateSteps": intermediateSteps,
|
|
854
845
|
},
|
|
855
|
-
"
|
|
856
|
-
"
|
|
857
|
-
"
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
promptTemplate, tempObj
|
|
872
|
-
)
|
|
873
|
-
else:
|
|
874
|
-
query += f" {key}: {value}, "
|
|
846
|
+
"evallist":evals,
|
|
847
|
+
# "model": model,
|
|
848
|
+
# "provider": provider,
|
|
849
|
+
"sessionID":self.sessionID
|
|
850
|
+
# "categories": categories,
|
|
851
|
+
# "evaluationStrictness": evaluationStrictness,
|
|
852
|
+
# "grammarCheckOutput": grammarCheckOutput,
|
|
853
|
+
# "insightLength": insightLength,
|
|
854
|
+
# "numJudges": numJudges,
|
|
855
|
+
# "penaltyBonusInstructions": penaltyBonusInstructions,
|
|
856
|
+
# "probableEdgeCases": probableEdgeCases,
|
|
857
|
+
},
|
|
858
|
+
"type": "FULL_EVAL_RUN",
|
|
859
|
+
# "kpi": evalName,
|
|
860
|
+
# "fieldMappig": fieldMapping,
|
|
861
|
+
}
|
|
875
862
|
|
|
876
|
-
|
|
877
|
-
|
|
863
|
+
query = ""
|
|
864
|
+
context = ""
|
|
865
|
+
for key, value in inputDict.items():
|
|
866
|
+
if isinstance(value, str):
|
|
867
|
+
length = len(value.split()) * 1.5
|
|
868
|
+
if length > 50:
|
|
878
869
|
context += f" {key}: {value}, "
|
|
870
|
+
else:
|
|
871
|
+
if promptTemplate:
|
|
872
|
+
tempObj = {key: value}
|
|
873
|
+
promptTemplate = getInputPopulatedPrompt(
|
|
874
|
+
promptTemplate, tempObj
|
|
875
|
+
)
|
|
876
|
+
else:
|
|
877
|
+
query += f" {key}: {value}, "
|
|
879
878
|
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
879
|
+
if not context.strip():
|
|
880
|
+
for key, value in inputDict.items():
|
|
881
|
+
context += f" {key}: {value}, "
|
|
882
|
+
|
|
883
|
+
templateData["processData"]["executionDependency"][
|
|
884
|
+
"context"
|
|
885
|
+
] = context.strip()
|
|
886
|
+
templateData["processData"]["executionDependency"][
|
|
887
|
+
"query"
|
|
888
|
+
] = query.strip()
|
|
889
|
+
if promptTemplate and not query.strip():
|
|
883
890
|
templateData["processData"]["executionDependency"][
|
|
884
891
|
"query"
|
|
885
|
-
] =
|
|
886
|
-
if promptTemplate and not query.strip():
|
|
887
|
-
templateData["processData"]["executionDependency"][
|
|
888
|
-
"query"
|
|
889
|
-
] = promptTemplate
|
|
890
|
-
|
|
891
|
-
currentBatch.append(templateData)
|
|
892
|
-
if len(currentBatch) == 10:
|
|
893
|
-
self.allBatches.append(currentBatch)
|
|
894
|
-
currentBatch = []
|
|
892
|
+
] = promptTemplate
|
|
895
893
|
|
|
896
|
-
|
|
894
|
+
currentBatch.append(templateData)
|
|
895
|
+
if len(currentBatch) == 10:
|
|
897
896
|
self.allBatches.append(currentBatch)
|
|
897
|
+
currentBatch = []
|
|
898
|
+
|
|
899
|
+
if currentBatch:
|
|
900
|
+
self.allBatches.append(currentBatch)
|
|
898
901
|
|
|
899
902
|
for batch in tqdm(
|
|
900
903
|
self.allBatches,
|
|
@@ -905,7 +908,8 @@ class LlumoClient:
|
|
|
905
908
|
):
|
|
906
909
|
try:
|
|
907
910
|
self.postBatch(batch=batch, workspaceID=workspaceID)
|
|
908
|
-
time.sleep(
|
|
911
|
+
time.sleep(2)
|
|
912
|
+
# print(batch)
|
|
909
913
|
except Exception as e:
|
|
910
914
|
print(f"Error posting batch: {e}")
|
|
911
915
|
raise
|
|
@@ -921,8 +925,8 @@ class LlumoClient:
|
|
|
921
925
|
receivedRowIDs = {key for item in rawResults for key in item.keys()}
|
|
922
926
|
expectedRowIDs = set(rowIdMapping.keys())
|
|
923
927
|
missingRowIDs = expectedRowIDs - receivedRowIDs
|
|
924
|
-
# print("All expected keys:",
|
|
925
|
-
# print("All received keys:",
|
|
928
|
+
# print("All expected keys:", expectedRowIDs)
|
|
929
|
+
# print("All received keys:", receivedRowIDs)
|
|
926
930
|
# print("Missing keys:", len(missingRowIDs))
|
|
927
931
|
missingRowIDs = list(missingRowIDs)
|
|
928
932
|
if len(missingRowIDs) > 0:
|
|
@@ -930,102 +934,83 @@ class LlumoClient:
|
|
|
930
934
|
rawResults.extend(dataFromDb)
|
|
931
935
|
|
|
932
936
|
self.evalData = rawResults
|
|
937
|
+
# print("RAW RESULTS: ", self.evalData)
|
|
933
938
|
|
|
934
939
|
# Initialize dataframe columns for each eval
|
|
935
|
-
for
|
|
936
|
-
dataframe[
|
|
937
|
-
dataframe[f"{
|
|
940
|
+
for ev_name in evals:
|
|
941
|
+
dataframe[ev_name] = ""
|
|
942
|
+
dataframe[f"{ev_name} Reason"] = ""
|
|
943
|
+
# dataframe[f"{ev_name} EdgeCase"] = None
|
|
938
944
|
|
|
939
945
|
# Map results to dataframe rows
|
|
940
946
|
for item in rawResults:
|
|
941
947
|
for compound_key, value in item.items():
|
|
942
|
-
if compound_key in rowIdMapping:
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
dataframe.at[index, f"{evalName} Reason"] = value.get("reasoning")
|
|
947
|
-
|
|
948
|
-
# Log the evaluation step
|
|
949
|
-
if hasattr(self, "logEvalStep"):
|
|
950
|
-
try:
|
|
951
|
-
start_time = time.time()
|
|
952
|
-
self.logEvalStep(
|
|
953
|
-
stepName=f"EVAL-{evalName}",
|
|
954
|
-
output=value.get("value"),
|
|
955
|
-
context=row.get("context", ""),
|
|
956
|
-
query=row.get("query", ""),
|
|
957
|
-
messageHistory=row.get("messageHistory", ""),
|
|
958
|
-
tools=row.get("tools", ""),
|
|
959
|
-
intermediateSteps=row.get("intermediateSteps", ""),
|
|
960
|
-
groundTruth=row.get("groundTruth", ""),
|
|
961
|
-
analyticsScore=value.get("analyticsScore", {}),
|
|
962
|
-
reasoning=value.get("reasoning", {}),
|
|
963
|
-
classification=value.get("classification", {}),
|
|
964
|
-
evalLabel=value.get("evalLabel", {}),
|
|
965
|
-
latencyMs=int((time.time() - start_time) * 1000),
|
|
966
|
-
status="SUCCESS",
|
|
967
|
-
message="",
|
|
968
|
-
)
|
|
969
|
-
except Exception as e:
|
|
970
|
-
print(f"Error logging eval step: {e}")
|
|
948
|
+
if compound_key not in rowIdMapping:
|
|
949
|
+
continue
|
|
950
|
+
index = rowIdMapping[compound_key]["index"]
|
|
951
|
+
rowID, columnID, _ = compound_key.split("-", 2)
|
|
971
952
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
self.endLlumoRun()
|
|
953
|
+
if hasattr(self, "startLlumoRun"):
|
|
954
|
+
self.startLlumoRun(runName="evaluateMultiple",rowID = rowID, columnID = columnID)
|
|
975
955
|
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
# df = dataframe.fillna("Some error occured").astype(object)
|
|
979
|
-
with warnings.catch_warnings():
|
|
980
|
-
warnings.simplefilter(action="ignore", category=FutureWarning)
|
|
981
|
-
df = dataframe.fillna("Some error occurred").astype(str)
|
|
956
|
+
# get the dataframe row at this index
|
|
957
|
+
row = dataframe.iloc[index].to_dict()
|
|
982
958
|
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
email,
|
|
986
|
-
workspaceID,
|
|
987
|
-
df,
|
|
988
|
-
promptText=prompt_template,
|
|
989
|
-
definationMapping=self.definationMapping,
|
|
990
|
-
outputColName=outputColName,
|
|
991
|
-
activePlayground=activePlayground,
|
|
992
|
-
customAnalytics=customAnalytics,
|
|
993
|
-
):
|
|
994
|
-
print(
|
|
995
|
-
"LLUMO’s intuitive UI is ready—start exploring and experimenting with your logs now. Visit https://app.llumo.ai/evallm to see the results."
|
|
996
|
-
)
|
|
997
|
-
if getDataFrame:
|
|
998
|
-
return LlumoDataFrameResults(
|
|
999
|
-
dataframe,
|
|
1000
|
-
evals=self.evals,
|
|
1001
|
-
evalData=self.evalData,
|
|
1002
|
-
definationMapping=self.definationMapping,
|
|
1003
|
-
)
|
|
1004
|
-
else:
|
|
1005
|
-
data = dataframe.to_dict(orient="records")
|
|
1006
|
-
return LlumoDictResults(
|
|
1007
|
-
data,
|
|
1008
|
-
evals=self.evals,
|
|
1009
|
-
evalData=self.evalData,
|
|
1010
|
-
definationMapping=self.definationMapping,
|
|
1011
|
-
)
|
|
959
|
+
if not value:
|
|
960
|
+
continue
|
|
1012
961
|
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
962
|
+
|
|
963
|
+
# ️ Handle fullEval block
|
|
964
|
+
fullEval = value.get("fullEval") if isinstance(value, dict) else None
|
|
965
|
+
if fullEval:
|
|
966
|
+
if "evalMetrics" in fullEval and isinstance(fullEval["evalMetrics"], list):
|
|
967
|
+
for eval_item in fullEval["evalMetrics"]:
|
|
968
|
+
evalName = eval_item.get("evalName") or eval_item.get("kpiName")
|
|
969
|
+
score = str(eval_item.get("score")) or eval_item.get("value")
|
|
970
|
+
reasoning = eval_item.get("reasoning")
|
|
971
|
+
# edgeCase = eval_item.get("edgeCase")
|
|
972
|
+
|
|
973
|
+
if evalName:
|
|
974
|
+
dataframe.at[index, evalName] = score
|
|
975
|
+
dataframe.at[index, f"{evalName} Reason"] = reasoning
|
|
976
|
+
# dataframe.at[index, f"{evalName} EdgeCase"] = edgeCase
|
|
977
|
+
|
|
978
|
+
# logEvalStep if available
|
|
979
|
+
if hasattr(self, "logEvalStep"):
|
|
980
|
+
try:
|
|
981
|
+
start_time = time.time()
|
|
982
|
+
self.logEvalStep(
|
|
983
|
+
stepName=f"EVAL-{evalName}",
|
|
984
|
+
output=row.get("output", ""),
|
|
985
|
+
context=row.get("context", ""),
|
|
986
|
+
query=row.get("query", ""),
|
|
987
|
+
messageHistory=row.get("messageHistory", ""),
|
|
988
|
+
tools=row.get("tools", ""),
|
|
989
|
+
intermediateSteps=row.get("intermediateSteps", ""),
|
|
990
|
+
groundTruth=row.get("groundTruth", ""),
|
|
991
|
+
analyticsScore=score,
|
|
992
|
+
reasoning=reasoning,
|
|
993
|
+
classification=eval_item.get("classification", {}),
|
|
994
|
+
evalLabel=eval_item.get("evalLabel", {}),
|
|
995
|
+
latencyMs=int((time.time() - start_time) * 1000),
|
|
996
|
+
status="SUCCESS",
|
|
997
|
+
message="",
|
|
998
|
+
)
|
|
999
|
+
except Exception as e:
|
|
1000
|
+
print(f"⚠️ logEvalStep failed: {e}")
|
|
1001
|
+
if hasattr(self, "endLlumoRun"):
|
|
1002
|
+
self.endEvalRun()
|
|
1003
|
+
|
|
1004
|
+
# Clean up and finish
|
|
1005
|
+
try:
|
|
1006
|
+
self.socket.disconnect()
|
|
1007
|
+
except Exception:
|
|
1008
|
+
pass
|
|
1009
|
+
|
|
1010
|
+
# if hasattr(self, "endLlumoRun"):
|
|
1011
|
+
# self.endEvalRun()
|
|
1012
|
+
#
|
|
1013
|
+
return dataframe
|
|
1029
1014
|
|
|
1030
1015
|
def promptSweep(
|
|
1031
1016
|
self,
|
|
@@ -1806,8 +1791,8 @@ class LlumoClient:
|
|
|
1806
1791
|
rowIdMapping = {} # (rowID-columnID-columnID -> (index, evalName))
|
|
1807
1792
|
self.validateApiKey(evalName=evals[0])
|
|
1808
1793
|
if createExperiment:
|
|
1809
|
-
if playgroundID:
|
|
1810
|
-
activePlayground = playgroundID
|
|
1794
|
+
if self.playgroundID:
|
|
1795
|
+
activePlayground = self.playgroundID
|
|
1811
1796
|
else:
|
|
1812
1797
|
activePlayground = str(
|
|
1813
1798
|
createEvalPlayground(email=self.email, workspaceID=self.workspaceID)
|
llumo/google.py
CHANGED
|
@@ -15,9 +15,9 @@ def evaluate_multiple(data, api_key=None, evals=["Response Correctness"]):
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class ChatCompletionWithEval:
|
|
18
|
-
def __init__(self, response, evaluation):
|
|
18
|
+
def __init__(self, response, evaluation=None):
|
|
19
19
|
self._response = response
|
|
20
|
-
self.evaluation = evaluation
|
|
20
|
+
# self.evaluation = evaluation
|
|
21
21
|
|
|
22
22
|
def __getattr__(self, name):
|
|
23
23
|
return getattr(self._response, name)
|
|
@@ -46,21 +46,21 @@ class genai:
|
|
|
46
46
|
response = self._model_instance.generate_content(contents=contents, **kwargs)
|
|
47
47
|
output = response.text
|
|
48
48
|
|
|
49
|
-
eval_input = [{
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
}]
|
|
49
|
+
# eval_input = [{
|
|
50
|
+
# "query": contents,
|
|
51
|
+
# "context": context or contents,
|
|
52
|
+
# "output": output,
|
|
53
|
+
# }]
|
|
54
54
|
|
|
55
|
-
evaluation = None
|
|
56
|
-
try:
|
|
57
|
-
|
|
58
|
-
except Exception as e:
|
|
59
|
-
|
|
55
|
+
# evaluation = None
|
|
56
|
+
# try:
|
|
57
|
+
# evaluation = evaluate_multiple(data=eval_input, evals=evals, api_key=llumo_key)
|
|
58
|
+
# except Exception as e:
|
|
59
|
+
# evaluation = None
|
|
60
60
|
|
|
61
|
-
if evaluation is None:
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
# if evaluation is None:
|
|
62
|
+
# print("Cannot process your request for evaluation, please check your api and try again later.")
|
|
63
|
+
# return response
|
|
64
64
|
|
|
65
65
|
|
|
66
|
-
return ChatCompletionWithEval(response, evaluation)
|
|
66
|
+
return ChatCompletionWithEval(response, evaluation=None)
|
llumo/helpingFuntions.py
CHANGED
|
@@ -130,7 +130,7 @@ def checkUserHits(
|
|
|
130
130
|
response = json.loads(responseBody.text)
|
|
131
131
|
|
|
132
132
|
proceed = response.get("execution", "")
|
|
133
|
-
print(proceed)
|
|
133
|
+
# print(proceed)
|
|
134
134
|
|
|
135
135
|
if proceed:
|
|
136
136
|
return {"success": True, "message": "Hits added and access granted."}
|
|
@@ -234,7 +234,7 @@ def createColumn(workspaceID, dataframe, playgroundID, promptText=None,queryColN
|
|
|
234
234
|
except Exception as e:
|
|
235
235
|
pass
|
|
236
236
|
evalDependencies = checkDependency(_returnDepMapping=True,customevals=customAnalytics)
|
|
237
|
-
print(allEvals)
|
|
237
|
+
# print(allEvals)
|
|
238
238
|
# Create a mapping of column names to unique column IDs
|
|
239
239
|
columnIDMapping = {}
|
|
240
240
|
|